From: Dan Williams <dan.j.williams@xxxxxxxxx> Sent: Wednesday, July 16, 2025 9:09 AM > > The ability to emulate a host bridge is useful not only for hardware PCI > controllers like CONFIG_VMD, or virtual PCI controllers like > CONFIG_PCI_HYPERV, but also for test and development scenarios like > CONFIG_SAMPLES_DEVSEC [1]. > > One stumbling block for defining CONFIG_SAMPLES_DEVSEC, a sample > implementation of a platform TSM for PCI Device Security, is the need to > accommodate PCI_DOMAINS_GENERIC architectures alongside x86 [2]. > > In support of supplementing the existing CONFIG_PCI_BRIDGE_EMUL > infrastructure for host bridges: > > * Introduce pci_bus_find_emul_domain_nr() as a common way to find a free > PCI domain number whether that is to reuse the existing dynamic > allocation code in the !ACPI case, or to assign an unused domain above > the last ACPI segment. > > * Convert pci-hyperv to the new allocator so that the PCI core can > unconditionally assume that bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET > is the dynamically allocated case. > > A follow on patch can also convert vmd to the new scheme. Currently vmd > is limited to CONFIG_PCI_DOMAINS_GENERIC=n (x86) so, unlike pci-hyperv, > it does not immediately conflict with this new > pci_bus_find_emul_domain_nr() mechanism. > > Link: https://lore.kernel.org/all/174107249038.1288555.12362100502109498455.stgit@xxxxxxxxxxxxxxxxxxxxxxxxx/ [1] > Reported-by: Suzuki K Poulose <suzuki.poulose@xxxxxxx> > Closes: https://lore.kernel.org/all/20250311144601.145736-3-suzuki.poulose@xxxxxxx/ > Cc: Lorenzo Pieralisi <lpieralisi@xxxxxxxxxx> > Cc: Manivannan Sadhasivam <manivannan.sadhasivam@xxxxxxxxxx> > Cc: Rob Herring <robh@xxxxxxxxxx> > Cc: Bjorn Helgaas <bhelgaas@xxxxxxxxxx> > Cc: "K. Y. Srinivasan" <kys@xxxxxxxxxxxxx> > Cc: Haiyang Zhang <haiyangz@xxxxxxxxxxxxx> > Cc: Wei Liu <wei.liu@xxxxxxxxxx> > Cc: Dexuan Cui <decui@xxxxxxxxxxxxx> > Tested-by: Suzuki K Poulose <suzuki.poulose@xxxxxxx> > Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> > --- > drivers/pci/controller/pci-hyperv.c | 53 ++--------------------------- > drivers/pci/pci.c | 43 ++++++++++++++++++++++- > drivers/pci/probe.c | 8 ++++- > include/linux/pci.h | 4 +++ > 4 files changed, 56 insertions(+), 52 deletions(-) > > diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c > index ef5d655a0052..cfe9806bdbe4 100644 > --- a/drivers/pci/controller/pci-hyperv.c > +++ b/drivers/pci/controller/pci-hyperv.c > @@ -3630,48 +3630,6 @@ static int hv_send_resources_released(struct hv_device *hdev) > return 0; > } > > -#define HVPCI_DOM_MAP_SIZE (64 * 1024) > -static DECLARE_BITMAP(hvpci_dom_map, HVPCI_DOM_MAP_SIZE); > - > -/* > - * PCI domain number 0 is used by emulated devices on Gen1 VMs, so define 0 > - * as invalid for passthrough PCI devices of this driver. > - */ > -#define HVPCI_DOM_INVALID 0 > - > -/** > - * hv_get_dom_num() - Get a valid PCI domain number > - * Check if the PCI domain number is in use, and return another number if > - * it is in use. > - * > - * @dom: Requested domain number > - * > - * return: domain number on success, HVPCI_DOM_INVALID on failure > - */ > -static u16 hv_get_dom_num(u16 dom) > -{ > - unsigned int i; > - > - if (test_and_set_bit(dom, hvpci_dom_map) == 0) > - return dom; > - > - for_each_clear_bit(i, hvpci_dom_map, HVPCI_DOM_MAP_SIZE) { > - if (test_and_set_bit(i, hvpci_dom_map) == 0) > - return i; > - } > - > - return HVPCI_DOM_INVALID; > -} > - > -/** > - * hv_put_dom_num() - Mark the PCI domain number as free > - * @dom: Domain number to be freed > - */ > -static void hv_put_dom_num(u16 dom) > -{ > - clear_bit(dom, hvpci_dom_map); > -} > - > /** > * hv_pci_probe() - New VMBus channel probe, for a root PCI bus > * @hdev: VMBus's tracking struct for this root PCI bus > @@ -3715,9 +3673,9 @@ static int hv_pci_probe(struct hv_device *hdev, > * collisions) in the same VM. > */ > dom_req = hdev->dev_instance.b[5] << 8 | hdev->dev_instance.b[4]; > - dom = hv_get_dom_num(dom_req); > + dom = pci_bus_find_emul_domain_nr(dom_req); > > - if (dom == HVPCI_DOM_INVALID) { > + if (dom < 0) { > dev_err(&hdev->device, > "Unable to use dom# 0x%x or other numbers", dom_req); > ret = -EINVAL; > @@ -3851,7 +3809,7 @@ static int hv_pci_probe(struct hv_device *hdev, > destroy_wq: > destroy_workqueue(hbus->wq); > free_dom: > - hv_put_dom_num(hbus->bridge->domain_nr); > + pci_bus_release_emul_domain_nr(hbus->bridge->domain_nr); > free_bus: > kfree(hbus); > return ret; > @@ -3976,8 +3934,6 @@ static void hv_pci_remove(struct hv_device *hdev) > irq_domain_remove(hbus->irq_domain); > irq_domain_free_fwnode(hbus->fwnode); > > - hv_put_dom_num(hbus->bridge->domain_nr); > - > kfree(hbus); > } > > @@ -4148,9 +4104,6 @@ static int __init init_hv_pci_drv(void) > if (ret) > return ret; > > - /* Set the invalid domain number's bit, so it will not be used */ > - set_bit(HVPCI_DOM_INVALID, hvpci_dom_map); > - > /* Initialize PCI block r/w interface */ > hvpci_block_ops.read_block = hv_read_config_block; > hvpci_block_ops.write_block = hv_write_config_block; > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c > index e9448d55113b..833ebf2d5213 100644 > --- a/drivers/pci/pci.c > +++ b/drivers/pci/pci.c > @@ -6692,9 +6692,50 @@ static void pci_no_domains(void) > #endif > } > > +#ifdef CONFIG_PCI_DOMAINS > +static DEFINE_IDA(pci_domain_nr_dynamic_ida); > + > +/* > + * Find a free domain_nr either allocated by pci_domain_nr_dynamic_ida or > + * fallback to the first free domain number above the last ACPI segment number. > + * Caller may have a specific domain number in mind, in which case try to > + * reserve it. > + * > + * Note that this allocation is freed by pci_release_host_bridge_dev(). > + */ > +int pci_bus_find_emul_domain_nr(int hint) > +{ > + if (hint >= 0) { > + hint = ida_alloc_range(&pci_domain_nr_dynamic_ida, hint, hint, > + GFP_KERNEL); This almost preserves the existing functionality in pci-hyperv.c. But if the "hint" passed in is zero, current code in pci-hyperv.c treats that as a collision and allocates some other value. The special treatment of zero is necessary per the comment with the definition of HVPCI_DOM_INVALID. I don't have an opinion on whether the code here should treat a "hint" of zero as invalid, or whether that should be handled in pci-hyperv.c. > + > + if (hint >= 0) > + return hint; > + } > + > + if (acpi_disabled) > + return ida_alloc(&pci_domain_nr_dynamic_ida, GFP_KERNEL); > + > + /* > + * Emulated domains start at 0x10000 to not clash with ACPI _SEG > + * domains. Per ACPI r6.0, sec 6.5.6, _SEG returns an integer, of > + * which the lower 16 bits are the PCI Segment Group (domain) number. > + * Other bits are currently reserved. > + */ Back in 2018 and 2019, the Microsoft Linux team encountered problems with PCI domain IDs that exceeded 0xFFFF. User space code, such as the Xorg X server, assumed PCI domain IDs were at most 16 bits, and retained only the low 16 bits if the value was larger. My memory of the details is vague, but I believe some or all of this behavior was tied to libpciaccess. As a result of these user space limitations, the pci-hyperv.c code made sure to not create any domain IDs larger than 0xFFFF. The problem was not just theoretical -- Microsoft had customers reporting issues due to the "32-bit domain ID problem" and the pci-hyperv.c code was updated to avoid it. I don't have information on whether user space code has been fixed, or the extent to which such a fix has propagated into distro versions. At the least, a VM with old user space code might break if the kernel is upgraded to one with this patch. How do people see the risks now that it is 6 years later? I don't have enough data to make an assessment. Michael > + return ida_alloc_range(&pci_domain_nr_dynamic_ida, 0x10000, INT_MAX, > + GFP_KERNEL); > +} > +EXPORT_SYMBOL_GPL(pci_bus_find_emul_domain_nr); > + > +void pci_bus_release_emul_domain_nr(int domain_nr) > +{ > + ida_free(&pci_domain_nr_dynamic_ida, domain_nr); > +} > +EXPORT_SYMBOL_GPL(pci_bus_release_emul_domain_nr); > +#endif > + > #ifdef CONFIG_PCI_DOMAINS_GENERIC > static DEFINE_IDA(pci_domain_nr_static_ida); > -static DEFINE_IDA(pci_domain_nr_dynamic_ida); > > static void of_pci_reserve_static_domain_nr(void) > { > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c > index 4b8693ec9e4c..e94978c3be3d 100644 > --- a/drivers/pci/probe.c > +++ b/drivers/pci/probe.c > @@ -632,6 +632,11 @@ static void pci_release_host_bridge_dev(struct device *dev) > > pci_free_resource_list(&bridge->windows); > pci_free_resource_list(&bridge->dma_ranges); > + > + /* Host bridges only have domain_nr set in the emulation case */ > + if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET) > + pci_bus_release_emul_domain_nr(bridge->domain_nr); > + > kfree(bridge); > } > > @@ -1112,7 +1117,8 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge) > device_del(&bridge->dev); > free: > #ifdef CONFIG_PCI_DOMAINS_GENERIC > - pci_bus_release_domain_nr(parent, bus->domain_nr); > + if (bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET) > + pci_bus_release_domain_nr(parent, bus->domain_nr); > #endif > if (bus_registered) > put_device(&bus->dev); > diff --git a/include/linux/pci.h b/include/linux/pci.h > index 05e68f35f392..f6a713da5c49 100644 > --- a/include/linux/pci.h > +++ b/include/linux/pci.h > @@ -1934,10 +1934,14 @@ DEFINE_GUARD(pci_dev, struct pci_dev *, > pci_dev_lock(_T), pci_dev_unlock(_T)) > */ > #ifdef CONFIG_PCI_DOMAINS > extern int pci_domains_supported; > +int pci_bus_find_emul_domain_nr(int hint); > +void pci_bus_release_emul_domain_nr(int domain_nr); > #else > enum { pci_domains_supported = 0 }; > static inline int pci_domain_nr(struct pci_bus *bus) { return 0; } > static inline int pci_proc_domain(struct pci_bus *bus) { return 0; } > +static inline int pci_bus_find_emul_domain_nr(int hint) { return 0; } > +static inline void pci_bus_release_emul_domain_nr(int domain_nr) { } > #endif /* CONFIG_PCI_DOMAINS */ > > /* > -- > 2.50.1 >