On 6/3/25 8:54 AM, Fabio M. De Francesco wrote: > I/O Machine Check Architecture events may signal failing PCIe components > or links. The AER event contains details on what was happening on the wire > when the error was signaled. > > Trace the CPER PCIe Error section (UEFI v2.10, Appendix N.2.7) reported > by the I/O MCA. > > Cc: Dan Williams <dan.j.williams@xxxxxxxxx> > Signed-off-by: Fabio M. De Francesco <fabio.m.de.francesco@xxxxxxxxxxxxxxx> Reviewed-by: Dave Jiang <dave.jiang@xxxxxxxxx> > --- > drivers/acpi/Kconfig | 1 + > drivers/acpi/acpi_extlog.c | 32 ++++++++++++++++++++++++++++++++ > drivers/pci/pcie/aer.c | 2 +- > include/linux/aer.h | 8 ++++++-- > 4 files changed, 40 insertions(+), 3 deletions(-) > > diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig > index 7bc40c2735ac0..2bbd9e4868ad7 100644 > --- a/drivers/acpi/Kconfig > +++ b/drivers/acpi/Kconfig > @@ -493,6 +493,7 @@ config ACPI_EXTLOG > tristate "Extended Error Log support" > depends on X86_MCE && X86_LOCAL_APIC && EDAC > select UEFI_CPER > + select ACPI_APEI_PCIEAER > help > Certain usages such as Predictive Failure Analysis (PFA) require > more information about the error than what can be described in > diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c > index 47d11cb5c9120..b2928ff297eda 100644 > --- a/drivers/acpi/acpi_extlog.c > +++ b/drivers/acpi/acpi_extlog.c > @@ -132,6 +132,34 @@ static int print_extlog_rcd(const char *pfx, > return 1; > } > > +static void extlog_print_pcie(struct cper_sec_pcie *pcie_err, > + int severity) > +{ > + struct aer_capability_regs *aer; > + struct pci_dev *pdev; > + unsigned int devfn; > + unsigned int bus; > + int aer_severity; > + int domain; > + > + if (!(pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID || > + pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO)) > + return; > + > + aer_severity = cper_severity_to_aer(severity); > + aer = (struct aer_capability_regs *)pcie_err->aer_info; > + domain = pcie_err->device_id.segment; > + bus = pcie_err->device_id.bus; > + devfn = PCI_DEVFN(pcie_err->device_id.device, > + pcie_err->device_id.function); > + pdev = pci_get_domain_bus_and_slot(domain, bus, devfn); > + if (!pdev) > + return; > + > + pci_print_aer(KERN_DEBUG, pdev, aer_severity, aer); > + pci_dev_put(pdev); > +} > + > static int extlog_print(struct notifier_block *nb, unsigned long val, > void *data) > { > @@ -183,6 +211,10 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, > if (gdata->error_data_length >= sizeof(*mem)) > trace_extlog_mem_event(mem, err_seq, fru_id, fru_text, > (u8)gdata->error_severity); > + } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { > + struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata); > + > + extlog_print_pcie(pcie_err, gdata->error_severity); > } else { > void *err = acpi_hest_get_payload(gdata); > > diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c > index d0ebf7c15afa9..627fcf4346983 100644 > --- a/drivers/pci/pcie/aer.c > +++ b/drivers/pci/pcie/aer.c > @@ -801,7 +801,7 @@ void pci_print_aer(char *level, struct pci_dev *dev, int aer_severity, > trace_aer_event(dev_name(&dev->dev), (status & ~mask), > aer_severity, tlp_header_valid, &aer->header_log); > } > -EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL"); > +EXPORT_SYMBOL_GPL(pci_print_aer); > > /** > * add_error_device - list device to be handled > diff --git a/include/linux/aer.h b/include/linux/aer.h > index 45d0fb2e2e759..6ce433cee4625 100644 > --- a/include/linux/aer.h > +++ b/include/linux/aer.h > @@ -56,16 +56,20 @@ struct aer_capability_regs { > #if defined(CONFIG_PCIEAER) > int pci_aer_clear_nonfatal_status(struct pci_dev *dev); > int pcie_aer_is_native(struct pci_dev *dev); > +void pci_print_aer(char *level, struct pci_dev *dev, int aer_severity, > + struct aer_capability_regs *aer); > #else > static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) > { > return -EINVAL; > } > static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } > +static inline void pci_print_aer(char *level, struct pci_dev *dev, > + int aer_severity, > + struct aer_capability_regs *aer) > +{ } > #endif > > -void pci_print_aer(char *level, struct pci_dev *dev, int aer_severity, > - struct aer_capability_regs *aer); > int cper_severity_to_aer(int cper_severity); > void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, > int severity, struct aer_capability_regs *aer_regs);