From: Nam Cao <namcao@xxxxxxxxxxxxx> Sent: Thursday, June 26, 2025 7:48 AM > > Move away from the legacy MSI domain setup, switch to use > msi_create_parent_irq_domain(). With the additional tweak to this patch that you supplied separately, everything in my testing on both x86 and arm64 seems to work OK. So that's all good. On arm64, I did notice the following IRQ domain information from /sys/kernel/debug/irq/domains: # cat HV-PCI-MSIX-1e03\:00\:00.0-12 name: HV-PCI-MSIX-1e03:00:00.0-12 size: 0 mapped: 7 flags: 0x00000213 IRQ_DOMAIN_FLAG_HIERARCHY IRQ_DOMAIN_NAME_ALLOCATED IRQ_DOMAIN_FLAG_MSI IRQ_DOMAIN_FLAG_MSI_DEVICE parent: 5D202AA8-1E03-4F0F-A786-390A0D2749E9-3 name: 5D202AA8-1E03-4F0F-A786-390A0D2749E9-3 size: 0 mapped: 7 flags: 0x00000103 IRQ_DOMAIN_FLAG_HIERARCHY IRQ_DOMAIN_NAME_ALLOCATED IRQ_DOMAIN_FLAG_MSI_PARENT parent: hv_vpci_arm64 name: hv_vpci_arm64 size: 956 mapped: 31 flags: 0x00000003 IRQ_DOMAIN_FLAG_HIERARCHY IRQ_DOMAIN_NAME_ALLOCATED parent: irqchip@0x00000000ffff0000-1 name: irqchip@0x00000000ffff0000-1 size: 0 mapped: 47 flags: 0x00000003 IRQ_DOMAIN_FLAG_HIERARCHY IRQ_DOMAIN_NAME_ALLOCATED The 5D202AA8-1E03-4F0F-A786-390A0D2749E9-3 domain has IRQ_DOMAIN_FLAG_MSI_PARENT set. But the hv_vpci_arm64 and irqchip@... domains do not. Is that a problem? On x86, the output is this, with IRQ_DOMAIN_FLAG_MSI_PARENT set in the next level up VECTOR domain: # cat HV-PCI-MSIX-6b71\:00\:02.0-12 name: HV-PCI-MSIX-6b71:00:02.0-12 size: 0 mapped: 17 flags: 0x00000213 IRQ_DOMAIN_FLAG_HIERARCHY IRQ_DOMAIN_NAME_ALLOCATED IRQ_DOMAIN_FLAG_MSI IRQ_DOMAIN_FLAG_MSI_DEVICE parent: 8564CB14-6B71-477C-B189-F175118E6FF0-3 name: 8564CB14-6B71-477C-B189-F175118E6FF0-3 size: 0 mapped: 17 flags: 0x00000103 IRQ_DOMAIN_FLAG_HIERARCHY IRQ_DOMAIN_NAME_ALLOCATED IRQ_DOMAIN_FLAG_MSI_PARENT parent: VECTOR name: VECTOR size: 0 mapped: 67 flags: 0x00000103 IRQ_DOMAIN_FLAG_HIERARCHY IRQ_DOMAIN_NAME_ALLOCATED IRQ_DOMAIN_FLAG_MSI_PARENT Finally, I've noted a couple of code review comments below. These comments may reflect my lack of fully understanding the MSI IRQ handling, in which case, please set me straight. Thanks, Michael > > Signed-off-by: Nam Cao <namcao@xxxxxxxxxxxxx> > --- > Cc: K. Y. Srinivasan <kys@xxxxxxxxxxxxx> > Cc: Haiyang Zhang <haiyangz@xxxxxxxxxxxxx> > Cc: Wei Liu <wei.liu@xxxxxxxxxx> > Cc: Dexuan Cui <decui@xxxxxxxxxxxxx> > Cc: linux-hyperv@xxxxxxxxxxxxxxx > --- > drivers/pci/Kconfig | 1 + > drivers/pci/controller/pci-hyperv.c | 98 +++++++++++++++++++++++------ > 2 files changed, 80 insertions(+), 19 deletions(-) > > diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig > index 9c0e4aaf4e8cb..9a249c65aedcd 100644 > --- a/drivers/pci/Kconfig > +++ b/drivers/pci/Kconfig > @@ -223,6 +223,7 @@ config PCI_HYPERV > tristate "Hyper-V PCI Frontend" > depends on ((X86 && X86_64) || ARM64) && HYPERV && PCI_MSI && SYSFS > select PCI_HYPERV_INTERFACE > + select IRQ_MSI_LIB > help > The PCI device frontend driver allows the kernel to import arbitrary > PCI devices from a PCI backend to support PCI driver domains. > diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c > index ef5d655a0052c..3a24fadddb83b 100644 > --- a/drivers/pci/controller/pci-hyperv.c > +++ b/drivers/pci/controller/pci-hyperv.c > @@ -44,6 +44,7 @@ > #include <linux/delay.h> > #include <linux/semaphore.h> > #include <linux/irq.h> > +#include <linux/irqchip/irq-msi-lib.h> > #include <linux/msi.h> > #include <linux/hyperv.h> > #include <linux/refcount.h> > @@ -508,7 +509,6 @@ struct hv_pcibus_device { > struct list_head children; > struct list_head dr_list; > > - struct msi_domain_info msi_info; > struct irq_domain *irq_domain; > > struct workqueue_struct *wq; > @@ -1687,7 +1687,7 @@ static void hv_msi_free(struct irq_domain *domain, struct msi_domain_info *info, > struct msi_desc *msi = irq_data_get_msi_desc(irq_data); > > pdev = msi_desc_to_pci_dev(msi); > - hbus = info->data; > + hbus = domain->host_data; > int_desc = irq_data_get_irq_chip_data(irq_data); > if (!int_desc) > return; > @@ -1705,7 +1705,6 @@ static void hv_msi_free(struct irq_domain *domain, struct msi_domain_info *info, > > static void hv_irq_mask(struct irq_data *data) > { > - pci_msi_mask_irq(data); > if (data->parent_data->chip->irq_mask) > irq_chip_mask_parent(data); > } > @@ -1716,7 +1715,6 @@ static void hv_irq_unmask(struct irq_data *data) > > if (data->parent_data->chip->irq_unmask) > irq_chip_unmask_parent(data); > - pci_msi_unmask_irq(data); > } > > struct compose_comp_ctxt { > @@ -2101,6 +2099,44 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) > msg->data = 0; > } > > +static bool hv_pcie_init_dev_msi_info(struct device *dev, struct irq_domain *domain, > + struct irq_domain *real_parent, struct msi_domain_info *info) > +{ > + struct irq_chip *chip = info->chip; > + > + if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info)) > + return false; > + > + info->ops->msi_prepare = hv_msi_prepare; > + > + chip->irq_set_affinity = irq_chip_set_affinity_parent; > + > + if (IS_ENABLED(CONFIG_X86)) > + chip->flags |= IRQCHIP_MOVE_DEFERRED; > + > + return true; > +} > + > +#define HV_PCIE_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ > + MSI_FLAG_USE_DEF_CHIP_OPS | \ > + MSI_FLAG_PCI_MSI_MASK_PARENT) > +#define HV_PCIE_MSI_FLAGS_SUPPORTED (MSI_FLAG_MULTI_PCI_MSI | \ > + MSI_FLAG_PCI_MSIX | \ > + MSI_GENERIC_FLAGS_MASK) > + > +static const struct msi_parent_ops hv_pcie_msi_parent_ops = { > + .required_flags = HV_PCIE_MSI_FLAGS_REQUIRED, > + .supported_flags = HV_PCIE_MSI_FLAGS_SUPPORTED, > + .bus_select_token = DOMAIN_BUS_PCI_MSI, > +#ifdef CONFIG_X86 > + .chip_flags = MSI_CHIP_FLAG_SET_ACK, > +#elif defined(CONFIG_ARM64) > + .chip_flags = MSI_CHIP_FLAG_SET_EOI, > +#endif > + .prefix = "HV-", > + .init_dev_msi_info = hv_pcie_init_dev_msi_info, > +}; > + > /* HW Interrupt Chip Descriptor */ > static struct irq_chip hv_msi_irq_chip = { > .name = "Hyper-V PCIe MSI", > @@ -2108,7 +2144,6 @@ static struct irq_chip hv_msi_irq_chip = { > .irq_set_affinity = irq_chip_set_affinity_parent, > #ifdef CONFIG_X86 > .irq_ack = irq_chip_ack_parent, > - .flags = IRQCHIP_MOVE_DEFERRED, > #elif defined(CONFIG_ARM64) > .irq_eoi = irq_chip_eoi_parent, > #endif Would it work to drop the #ifdef's and always set both .irq_ack and .irq_eoi on x86 and on ARM64? Is which one gets called controlled by the child HV-PCI-MSIX- ... domain, based on the .chip_flags? I'm trying to reduce the #ifdef clutter. I tested without the #ifdefs on both x86 and arm64, and everything works, but I know that doesn't prove that it's OK. If the #ifdefs can go away, then I'd like to see a tweak to the way .chip_flags is set. Rather than do an #ifdef inline for struct msi_parent_ops hv_pcie_msi_parent_ops, add a #define HV_MSI_CHIP_FLAGS in the existing #ifdef X86 and #ifdef ARM64 sections respectively near the top of this source file, and then use HV_MSI_CHIP_FLAGS in struct msi_parent_ops hv_pcie_msi_parent_ops. As much as is reasonable, I'd like to not clutter the code with #ifdef X86 #elseif ARM64, but instead group all the differences under the existing #ifdefs near the top. There are some places where this isn't practical, but this seems like a place that is practical. > @@ -2116,9 +2151,37 @@ static struct irq_chip hv_msi_irq_chip = { > .irq_unmask = hv_irq_unmask, > }; > > -static struct msi_domain_ops hv_msi_ops = { > - .msi_prepare = hv_msi_prepare, > - .msi_free = hv_msi_free, > +static int hv_pcie_domain_alloc(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs, > + void *arg) > +{ > + /* TODO: move the content of hv_compose_msi_msg() in here */ Could you elaborate on this TODO? Is the idea to loop through all the IRQs and generate the MSI message for each one? What is the advantage to doing it here? I noticed in Patch 3 of the series, the Aardvark controller has advk_msi_irq_compose_msi_msg(), but you had not moved it into the domain allocation path. Also, is there some point in the time in the future where the "TODO" is likely to become a "MUST DO"? > + int ret; > + > + ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, arg); > + if (ret < 0) > + return ret; > + > + for (int i = 0; i < nr_irqs; i++) { > + irq_domain_set_info(d, virq + i, 0, &hv_msi_irq_chip, NULL, FLOW_HANDLER, NULL, > + FLOW_NAME); > + } > + > + return 0; > +} > + > +static void hv_pcie_domain_free(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs) > +{ > + struct msi_domain_info *info = d->host_data; > + > + for (int i = 0; i < nr_irqs; i++) > + hv_msi_free(d, info, virq + i); > + > + irq_domain_free_irqs_top(d, virq, nr_irqs); > +} > + > +static const struct irq_domain_ops hv_pcie_domain_ops = { > + .alloc = hv_pcie_domain_alloc, > + .free = hv_pcie_domain_free, > }; > > /** > @@ -2136,17 +2199,14 @@ static struct msi_domain_ops hv_msi_ops = { > */ > static int hv_pcie_init_irq_domain(struct hv_pcibus_device *hbus) > { > - hbus->msi_info.chip = &hv_msi_irq_chip; > - hbus->msi_info.ops = &hv_msi_ops; > - hbus->msi_info.flags = (MSI_FLAG_USE_DEF_DOM_OPS | > - MSI_FLAG_USE_DEF_CHIP_OPS | MSI_FLAG_MULTI_PCI_MSI | > - MSI_FLAG_PCI_MSIX); > - hbus->msi_info.handler = FLOW_HANDLER; > - hbus->msi_info.handler_name = FLOW_NAME; > - hbus->msi_info.data = hbus; > - hbus->irq_domain = pci_msi_create_irq_domain(hbus->fwnode, > - &hbus->msi_info, > - hv_pci_get_root_domain()); > + struct irq_domain_info info = { > + .fwnode = hbus->fwnode, > + .ops = &hv_pcie_domain_ops, > + .host_data = hbus, > + .parent = hv_pci_get_root_domain(), > + }; > + > + hbus->irq_domain = msi_create_parent_irq_domain(&info, &hv_pcie_msi_parent_ops); > if (!hbus->irq_domain) { > dev_err(&hbus->hdev->device, > "Failed to build an MSI IRQ domain\n"); > -- > 2.39.5 >