Currently, CXL and GHES feature use pci_print_aer() function to log AER errors. Its implementation is pretty similar to aer_print_error(), duplicating the way how native PCIe devices report errors. We shouldn't log messages differently only because they are coming from a different code path. Make CXL devices and GHES to call aer_print_error() when reporting AER errors. Add a wrapper, aer_print_platform_error(), that translates aer_capabilities_regs to aer_err_info so we can use pci_print_aer() function. Signed-off-by: Karolina Stolarek <karolina.stolarek@xxxxxxxxxx> --- v2: - Don't expose aer_err_info to the world; as aer_recover_queue() is tightly connected to the ghes code, introduce a wrapper for aer_print_error() - Move aer_err_info memset to the wrapper, don't expect the caller to clean it for us I'm still working on the logs; in the meantime, I think, we can continue reviewing the patch. drivers/cxl/core/pci.c | 2 +- drivers/pci/pcie/aer.c | 64 ++++++++++++++++++++---------------------- include/linux/aer.h | 4 +-- 3 files changed, 33 insertions(+), 37 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 013b869b66cb..9ba711365388 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -885,7 +885,7 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) return; - pci_print_aer(pdev, severity, &aer_regs); + aer_print_platform_error(pdev, severity, &aer_regs); if (severity == AER_CORRECTABLE) cxl_handle_rdport_cor_ras(cxlds, dport); diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index a1cf8c7ef628..ec34bc9b2332 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -760,47 +760,42 @@ int cper_severity_to_aer(int cper_severity) EXPORT_SYMBOL_GPL(cper_severity_to_aer); #endif -void pci_print_aer(struct pci_dev *dev, int aer_severity, - struct aer_capability_regs *aer) +static void populate_aer_err_info(struct aer_err_info *info, int severity, + struct aer_capability_regs *aer_regs) { - int layer, agent, tlp_header_valid = 0; - u32 status, mask; - struct aer_err_info info; - - if (aer_severity == AER_CORRECTABLE) { - status = aer->cor_status; - mask = aer->cor_mask; - } else { - status = aer->uncor_status; - mask = aer->uncor_mask; - tlp_header_valid = status & AER_LOG_TLP_MASKS; - } - - layer = AER_GET_LAYER_ERROR(aer_severity, status); - agent = AER_GET_AGENT(aer_severity, status); + int tlp_header_valid; memset(&info, 0, sizeof(info)); - info.severity = aer_severity; - info.status = status; - info.mask = mask; - info.first_error = PCI_ERR_CAP_FEP(aer->cap_control); - pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask); - __aer_print_error(dev, &info); - pci_err(dev, "aer_layer=%s, aer_agent=%s\n", - aer_error_layer[layer], aer_agent_string[agent]); + info->severity = severity; + info->first_error = PCI_ERR_CAP_FEP(aer_regs->cap_control); - if (aer_severity != AER_CORRECTABLE) - pci_err(dev, "aer_uncor_severity: 0x%08x\n", - aer->uncor_severity); + if (severity == AER_CORRECTABLE) { + info->id = aer_regs->cor_err_source; + info->status = aer_regs->cor_status; + info->mask = aer_regs->cor_mask; + } else { + info->id = aer_regs->uncor_err_source; + info->status = aer_regs->uncor_status; + info->mask = aer_regs->uncor_mask; + tlp_header_valid = info->status & AER_LOG_TLP_MASKS; + + if (tlp_header_valid) { + info->tlp_header_valid = tlp_header_valid; + info->tlp = aer_regs->header_log; + } + } +} - if (tlp_header_valid) - pcie_print_tlp_log(dev, &aer->header_log, dev_fmt(" ")); +void aer_print_platform_error(struct pci_dev *pdev, int severity, + struct aer_capability_regs *aer_regs) +{ + struct aer_err_info info; - trace_aer_event(dev_name(&dev->dev), (status & ~mask), - aer_severity, tlp_header_valid, &aer->header_log); + populate_aer_err_info(&info, severity, aer_regs); + aer_print_error(pdev, &info); } -EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL"); +EXPORT_SYMBOL_NS_GPL(aer_print_platform_error, "CXL"); /** * add_error_device - list device to be handled @@ -1146,7 +1141,8 @@ static void aer_recover_work_func(struct work_struct *work) PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn)); continue; } - pci_print_aer(pdev, entry.severity, entry.regs); + + aer_print_platform_error(pdev, entry.severity, entry.regs); /* * Memory for aer_capability_regs(entry.regs) is being diff --git a/include/linux/aer.h b/include/linux/aer.h index 02940be66324..5593352dfb51 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -64,8 +64,8 @@ static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } #endif -void pci_print_aer(struct pci_dev *dev, int aer_severity, - struct aer_capability_regs *aer); +void aer_print_platform_error(struct pci_dev *pdev, int severity, + struct aer_capability_regs *aer_regs); int cper_severity_to_aer(int cper_severity); void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, int severity, struct aer_capability_regs *aer_regs); -- 2.43.5