Re: [PATCH v3 3/3] PCI/bwctrl: Replace legacy speed conversion with shared macro

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 2025/8/18 13:21, Manivannan Sadhasivam wrote:
EXTERNAL EMAIL

On Sun, Aug 17, 2025 at 11:02:10PM GMT, Hans Zhang wrote:


On 2025/8/17 04:13, Lukas Wunner wrote:
On Sat, Aug 16, 2025 at 11:46:33PM +0800, Hans Zhang wrote:
Remove obsolete pci_bus_speed2lnkctl2() function and utilize the common
PCIE_SPEED2LNKCTL2_TLS() macro instead.
[...]
+++ b/drivers/pci/pcie/bwctrl.c
@@ -53,23 +53,6 @@ static bool pcie_valid_speed(enum pci_bus_speed speed)
           return (speed >= PCIE_SPEED_2_5GT) && (speed <= PCIE_SPEED_64_0GT);
   }
-static u16 pci_bus_speed2lnkctl2(enum pci_bus_speed speed)
-{
- static const u8 speed_conv[] = {
-         [PCIE_SPEED_2_5GT] = PCI_EXP_LNKCTL2_TLS_2_5GT,
-         [PCIE_SPEED_5_0GT] = PCI_EXP_LNKCTL2_TLS_5_0GT,
-         [PCIE_SPEED_8_0GT] = PCI_EXP_LNKCTL2_TLS_8_0GT,
-         [PCIE_SPEED_16_0GT] = PCI_EXP_LNKCTL2_TLS_16_0GT,
-         [PCIE_SPEED_32_0GT] = PCI_EXP_LNKCTL2_TLS_32_0GT,
-         [PCIE_SPEED_64_0GT] = PCI_EXP_LNKCTL2_TLS_64_0GT,
- };
-
- if (WARN_ON_ONCE(!pcie_valid_speed(speed)))
-         return 0;
-
- return speed_conv[speed];
-}
-
   static inline u16 pcie_supported_speeds2target_speed(u8 supported_speeds)
   {
           return __fls(supported_speeds);
@@ -91,7 +74,7 @@ static u16 pcie_bwctrl_select_speed(struct pci_dev *port, enum pci_bus_speed spe
           u8 desired_speeds, supported_speeds;
           struct pci_dev *dev;
- desired_speeds = GENMASK(pci_bus_speed2lnkctl2(speed_req),
+ desired_speeds = GENMASK(PCIE_SPEED2LNKCTL2_TLS(speed_req),
                                    __fls(PCI_EXP_LNKCAP2_SLS_2_5GB));

No, that's not good.  The function you're removing above,
pci_bus_speed2lnkctl2(), uses an array to look up the speed.
That's an O(1) operation, it doesn't get any more efficient
than that.  It was a deliberate design decision to do this
when the bandwidth controller was created.

Whereas the function you're using instead uses a series
of ternary operators.  That's no longer an O(1) operation,
the compiler translates it into a series of conditional
branches, so essentially an O(n) lookup (where n is the
number of speeds).  So it's less efficient and less elegant.

Please come up with an approach that doesn't make this
worse than before.


Dear Lukas,

Thank you very much for your reply.

I think the original static array will waste some memory space. Originally,
we only needed a size of 6 bytes, but in reality, the size of this array is
26 bytes.


This is just one time allocation as the array is a 'static const', which is not
a big deal.

static const u8 speed_conv[] = {
       [PCIE_SPEED_2_5GT] = PCI_EXP_LNKCTL2_TLS_2_5GT,
       [PCIE_SPEED_5_0GT] = PCI_EXP_LNKCTL2_TLS_5_0GT,
       [PCIE_SPEED_8_0GT] = PCI_EXP_LNKCTL2_TLS_8_0GT,
       [PCIE_SPEED_16_0GT] = PCI_EXP_LNKCTL2_TLS_16_0GT,
       [PCIE_SPEED_32_0GT] = PCI_EXP_LNKCTL2_TLS_32_0GT,
       [PCIE_SPEED_64_0GT] = PCI_EXP_LNKCTL2_TLS_64_0GT,
};

[...]

drivers/pci/pci.h
#define PCIE_LNKCAP_SLS2SPEED(lnkcap)                                 \
({                                                                    \
       u32 lnkcap_sls = (lnkcap) & PCI_EXP_LNKCAP_SLS;                 \
                                                                       \
       (lnkcap_sls == PCI_EXP_LNKCAP_SLS_64_0GB ? PCIE_SPEED_64_0GT :  \
        lnkcap_sls == PCI_EXP_LNKCAP_SLS_32_0GB ? PCIE_SPEED_32_0GT :  \
        lnkcap_sls == PCI_EXP_LNKCAP_SLS_16_0GB ? PCIE_SPEED_16_0GT :  \
        lnkcap_sls == PCI_EXP_LNKCAP_SLS_8_0GB ? PCIE_SPEED_8_0GT :    \
        lnkcap_sls == PCI_EXP_LNKCAP_SLS_5_0GB ? PCIE_SPEED_5_0GT :    \
        lnkcap_sls == PCI_EXP_LNKCAP_SLS_2_5GB ? PCIE_SPEED_2_5GT :    \
        PCI_SPEED_UNKNOWN);                                            \
})

/* PCIe link information from Link Capabilities 2 */
#define PCIE_LNKCAP2_SLS2SPEED(lnkcap2) \
       ((lnkcap2) & PCI_EXP_LNKCAP2_SLS_64_0GB ? PCIE_SPEED_64_0GT : \
        (lnkcap2) & PCI_EXP_LNKCAP2_SLS_32_0GB ? PCIE_SPEED_32_0GT : \
        (lnkcap2) & PCI_EXP_LNKCAP2_SLS_16_0GB ? PCIE_SPEED_16_0GT : \
        (lnkcap2) & PCI_EXP_LNKCAP2_SLS_8_0GB ? PCIE_SPEED_8_0GT : \
        (lnkcap2) & PCI_EXP_LNKCAP2_SLS_5_0GB ? PCIE_SPEED_5_0GT : \
        (lnkcap2) & PCI_EXP_LNKCAP2_SLS_2_5GB ? PCIE_SPEED_2_5GT : \
        PCI_SPEED_UNKNOWN)

#define PCIE_LNKCTL2_TLS2SPEED(lnkctl2) \
({                                                                    \
       u16 lnkctl2_tls = (lnkctl2) & PCI_EXP_LNKCTL2_TLS;              \
                                                                       \
       (lnkctl2_tls == PCI_EXP_LNKCTL2_TLS_64_0GT ? PCIE_SPEED_64_0GT :        \
        lnkctl2_tls == PCI_EXP_LNKCTL2_TLS_32_0GT ? PCIE_SPEED_32_0GT :        \
        lnkctl2_tls == PCI_EXP_LNKCTL2_TLS_16_0GT ? PCIE_SPEED_16_0GT :        \
        lnkctl2_tls == PCI_EXP_LNKCTL2_TLS_8_0GT ? PCIE_SPEED_8_0GT :  \
        lnkctl2_tls == PCI_EXP_LNKCTL2_TLS_5_0GT ? PCIE_SPEED_5_0GT :  \
        lnkctl2_tls == PCI_EXP_LNKCTL2_TLS_2_5GT ? PCIE_SPEED_2_5GT :  \
        PCI_SPEED_UNKNOWN);                                            \
})

No, these macros are terrible. They generate more assembly code than needed for
a simple array based lookup. So in the end, they increase the binary size and
also doesn't provide any improvement other than the unification in the textual
form.

I have to take my Acked-by back as I sort of overlooked these factors. As Lukas
rightly said, the pci_bus_speed2lnkctl2() does lookup in O(1), which is what we
want here.

Code refactoring shouldn't come at the expense of the runtime overhead.


Dear Mani,

Thank you very much for your reply.


Could you please share your views on modifying PCIE_SPEED2LNKCTL2_TLS to the pcie_speed_to_lnkctl2_tls inline function? Or simply put the pci_bus_speed2lnkctl2 in bwctrl.c into drivers/pci/pci.h?

Previous version modifications:

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 12215ee72afb..b5a3ce6c239b 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -419,6 +419,15 @@ void pci_bus_put(struct pci_bus *bus);
 	 (lnkctl2) == PCI_EXP_LNKCTL2_TLS_2_5GT ? PCIE_SPEED_2_5GT : \
 	 PCI_SPEED_UNKNOWN)

+#define PCIE_SPEED2LNKCTL2_TLS(speed) \
+	((speed) == PCIE_SPEED_2_5GT ? PCI_EXP_LNKCTL2_TLS_2_5GT : \
+	 (speed) == PCIE_SPEED_5_0GT ? PCI_EXP_LNKCTL2_TLS_5_0GT : \
+	 (speed) == PCIE_SPEED_8_0GT ? PCI_EXP_LNKCTL2_TLS_8_0GT : \
+	 (speed) == PCIE_SPEED_16_0GT ? PCI_EXP_LNKCTL2_TLS_16_0GT : \
+	 (speed) == PCIE_SPEED_32_0GT ? PCI_EXP_LNKCTL2_TLS_32_0GT : \
+	 (speed) == PCIE_SPEED_64_0GT ? PCI_EXP_LNKCTL2_TLS_64_0GT : \
+	 0)
+
 /* PCIe speed to Mb/s reduced by encoding overhead */
 #define PCIE_SPEED2MBS_ENC(speed) \
 	((speed) == PCIE_SPEED_64_0GT ? 64000*1/1 : \




Current modifications:

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 34f65d69662e..d6c3333315a0 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -422,6 +422,28 @@ void pci_bus_put(struct pci_bus *bus);
          PCI_SPEED_UNKNOWN);                                            \
  })

+static inline u16 pcie_speed_to_lnkctl2_tls(enum pci_bus_speed speed)
+{
+       /*
+        * Convert PCIe speed enum to LNKCTL2_TLS value using
+        * direct arithmetic:
+        *
+        * Speed enum:  0x14 (2.5GT) -> TLS = 0x1
+        *              0x15 (5.0GT) -> TLS = 0x2
+        *              0x16 (8.0GT) -> TLS = 0x3
+        *              0x17 (16.0GT)-> TLS = 0x4
+        *              0x18 (32.0GT)-> TLS = 0x5
+        *              0x19 (64.0GT)-> TLS = 0x6
+        *
+        * Formula: TLS = (speed - PCIE_SPEED_2_5GT) + 1
+        */
+       if (!WARN_ON_ONCE(speed >= PCIE_SPEED_2_5GT ||
+                         speed <= PCIE_SPEED_64_0GT))
+               return 0;
+
+       return (speed - PCIE_SPEED_2_5GT) + PCI_EXP_LNKCTL2_TLS_2_5GT;
+}
+
  /* PCIe speed to Mb/s reduced by encoding overhead */
  #define PCIE_SPEED2MBS_ENC(speed) \
         ((speed) == PCIE_SPEED_64_0GT ? 64000*1/1 : \




In the future, I will try to find a good way to modify these macro definitions.


Best regards,
Hans







[Index of Archives]     [DMA Engine]     [Linux Coverity]     [Linux USB]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Greybus]

  Powered by Linux