On Sun, Aug 17, 2025 at 11:02:10PM GMT, Hans Zhang wrote: > > > On 2025/8/17 04:13, Lukas Wunner wrote: > > On Sat, Aug 16, 2025 at 11:46:33PM +0800, Hans Zhang wrote: > > > Remove obsolete pci_bus_speed2lnkctl2() function and utilize the common > > > PCIE_SPEED2LNKCTL2_TLS() macro instead. > > [...] > > > +++ b/drivers/pci/pcie/bwctrl.c > > > @@ -53,23 +53,6 @@ static bool pcie_valid_speed(enum pci_bus_speed speed) > > > return (speed >= PCIE_SPEED_2_5GT) && (speed <= PCIE_SPEED_64_0GT); > > > } > > > -static u16 pci_bus_speed2lnkctl2(enum pci_bus_speed speed) > > > -{ > > > - static const u8 speed_conv[] = { > > > - [PCIE_SPEED_2_5GT] = PCI_EXP_LNKCTL2_TLS_2_5GT, > > > - [PCIE_SPEED_5_0GT] = PCI_EXP_LNKCTL2_TLS_5_0GT, > > > - [PCIE_SPEED_8_0GT] = PCI_EXP_LNKCTL2_TLS_8_0GT, > > > - [PCIE_SPEED_16_0GT] = PCI_EXP_LNKCTL2_TLS_16_0GT, > > > - [PCIE_SPEED_32_0GT] = PCI_EXP_LNKCTL2_TLS_32_0GT, > > > - [PCIE_SPEED_64_0GT] = PCI_EXP_LNKCTL2_TLS_64_0GT, > > > - }; > > > - > > > - if (WARN_ON_ONCE(!pcie_valid_speed(speed))) > > > - return 0; > > > - > > > - return speed_conv[speed]; > > > -} > > > - > > > static inline u16 pcie_supported_speeds2target_speed(u8 supported_speeds) > > > { > > > return __fls(supported_speeds); > > > @@ -91,7 +74,7 @@ static u16 pcie_bwctrl_select_speed(struct pci_dev *port, enum pci_bus_speed spe > > > u8 desired_speeds, supported_speeds; > > > struct pci_dev *dev; > > > - desired_speeds = GENMASK(pci_bus_speed2lnkctl2(speed_req), > > > + desired_speeds = GENMASK(PCIE_SPEED2LNKCTL2_TLS(speed_req), > > > __fls(PCI_EXP_LNKCAP2_SLS_2_5GB)); > > > > No, that's not good. The function you're removing above, > > pci_bus_speed2lnkctl2(), uses an array to look up the speed. > > That's an O(1) operation, it doesn't get any more efficient > > than that. It was a deliberate design decision to do this > > when the bandwidth controller was created. > > > > Whereas the function you're using instead uses a series > > of ternary operators. That's no longer an O(1) operation, > > the compiler translates it into a series of conditional > > branches, so essentially an O(n) lookup (where n is the > > number of speeds). So it's less efficient and less elegant. > > > > Please come up with an approach that doesn't make this > > worse than before. > > > Dear Lukas, > > Thank you very much for your reply. > > I think the original static array will waste some memory space. Originally, > we only needed a size of 6 bytes, but in reality, the size of this array is > 26 bytes. > This is just one time allocation as the array is a 'static const', which is not a big deal. > static const u8 speed_conv[] = { > [PCIE_SPEED_2_5GT] = PCI_EXP_LNKCTL2_TLS_2_5GT, > [PCIE_SPEED_5_0GT] = PCI_EXP_LNKCTL2_TLS_5_0GT, > [PCIE_SPEED_8_0GT] = PCI_EXP_LNKCTL2_TLS_8_0GT, > [PCIE_SPEED_16_0GT] = PCI_EXP_LNKCTL2_TLS_16_0GT, > [PCIE_SPEED_32_0GT] = PCI_EXP_LNKCTL2_TLS_32_0GT, > [PCIE_SPEED_64_0GT] = PCI_EXP_LNKCTL2_TLS_64_0GT, > }; [...] > drivers/pci/pci.h > #define PCIE_LNKCAP_SLS2SPEED(lnkcap) \ > ({ \ > u32 lnkcap_sls = (lnkcap) & PCI_EXP_LNKCAP_SLS; \ > \ > (lnkcap_sls == PCI_EXP_LNKCAP_SLS_64_0GB ? PCIE_SPEED_64_0GT : \ > lnkcap_sls == PCI_EXP_LNKCAP_SLS_32_0GB ? PCIE_SPEED_32_0GT : \ > lnkcap_sls == PCI_EXP_LNKCAP_SLS_16_0GB ? PCIE_SPEED_16_0GT : \ > lnkcap_sls == PCI_EXP_LNKCAP_SLS_8_0GB ? PCIE_SPEED_8_0GT : \ > lnkcap_sls == PCI_EXP_LNKCAP_SLS_5_0GB ? PCIE_SPEED_5_0GT : \ > lnkcap_sls == PCI_EXP_LNKCAP_SLS_2_5GB ? PCIE_SPEED_2_5GT : \ > PCI_SPEED_UNKNOWN); \ > }) > > /* PCIe link information from Link Capabilities 2 */ > #define PCIE_LNKCAP2_SLS2SPEED(lnkcap2) \ > ((lnkcap2) & PCI_EXP_LNKCAP2_SLS_64_0GB ? PCIE_SPEED_64_0GT : \ > (lnkcap2) & PCI_EXP_LNKCAP2_SLS_32_0GB ? PCIE_SPEED_32_0GT : \ > (lnkcap2) & PCI_EXP_LNKCAP2_SLS_16_0GB ? PCIE_SPEED_16_0GT : \ > (lnkcap2) & PCI_EXP_LNKCAP2_SLS_8_0GB ? PCIE_SPEED_8_0GT : \ > (lnkcap2) & PCI_EXP_LNKCAP2_SLS_5_0GB ? PCIE_SPEED_5_0GT : \ > (lnkcap2) & PCI_EXP_LNKCAP2_SLS_2_5GB ? PCIE_SPEED_2_5GT : \ > PCI_SPEED_UNKNOWN) > > #define PCIE_LNKCTL2_TLS2SPEED(lnkctl2) \ > ({ \ > u16 lnkctl2_tls = (lnkctl2) & PCI_EXP_LNKCTL2_TLS; \ > \ > (lnkctl2_tls == PCI_EXP_LNKCTL2_TLS_64_0GT ? PCIE_SPEED_64_0GT : \ > lnkctl2_tls == PCI_EXP_LNKCTL2_TLS_32_0GT ? PCIE_SPEED_32_0GT : \ > lnkctl2_tls == PCI_EXP_LNKCTL2_TLS_16_0GT ? PCIE_SPEED_16_0GT : \ > lnkctl2_tls == PCI_EXP_LNKCTL2_TLS_8_0GT ? PCIE_SPEED_8_0GT : \ > lnkctl2_tls == PCI_EXP_LNKCTL2_TLS_5_0GT ? PCIE_SPEED_5_0GT : \ > lnkctl2_tls == PCI_EXP_LNKCTL2_TLS_2_5GT ? PCIE_SPEED_2_5GT : \ > PCI_SPEED_UNKNOWN); \ > }) No, these macros are terrible. They generate more assembly code than needed for a simple array based lookup. So in the end, they increase the binary size and also doesn't provide any improvement other than the unification in the textual form. I have to take my Acked-by back as I sort of overlooked these factors. As Lukas rightly said, the pci_bus_speed2lnkctl2() does lookup in O(1), which is what we want here. Code refactoring shouldn't come at the expense of the runtime overhead. - Mani -- மணிவண்ணன் சதாசிவம்