On 6/26/2025 6:42 PM, Sathyanarayanan Kuppuswamy wrote: > On 6/26/25 3:42 PM, Terry Bowman wrote: >> The CXL AER error handling logic currently resides in the AER driver file, >> drivers/pci/pcie/aer.c. CXL specific changes are conditionally compiled >> using #ifdefs. >> >> Improve the AER driver maintainability by separating the CXL specific logic >> from the AER driver's core functionality and removing the #ifdefs. >> Introduce drivers/pci/pcie/cxl_aer.c and move the CXL AER logic into the >> new file. >> >> Update the makefile to conditionally compile the CXL file using the >> existing CONFIG_PCIEAER_CXL Kconfig. >> >> Signed-off-by: Terry Bowman <terry.bowman@xxxxxxx> >> --- > After moving the code , you seem to have updated it with your own > changes. May be you split it into two patches. Yes, I'll update to make a copy and add the changes in the following patch(es). -Terry >> drivers/pci/pci.h | 8 +++ >> drivers/pci/pcie/Makefile | 1 + >> drivers/pci/pcie/aer.c | 138 ------------------------------------- >> drivers/pci/pcie/cxl_aer.c | 138 +++++++++++++++++++++++++++++++++++++ >> include/linux/pci_ids.h | 2 + >> 5 files changed, 149 insertions(+), 138 deletions(-) >> create mode 100644 drivers/pci/pcie/cxl_aer.c >> >> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h >> index a0d1e59b5666..91b583cf18eb 100644 >> --- a/drivers/pci/pci.h >> +++ b/drivers/pci/pci.h >> @@ -1029,6 +1029,14 @@ static inline void pci_save_aer_state(struct pci_dev *dev) { } >> static inline void pci_restore_aer_state(struct pci_dev *dev) { } >> #endif >> >> +#ifdef CONFIG_PCIEAER_CXL >> +void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info); >> +void cxl_rch_enable_rcec(struct pci_dev *rcec); >> +#else >> +static inline void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) { } >> +static inline void cxl_rch_enable_rcec(struct pci_dev *rcec) { } >> +#endif >> + >> #ifdef CONFIG_ACPI >> bool pci_acpi_preserve_config(struct pci_host_bridge *bridge); >> int pci_acpi_program_hp_params(struct pci_dev *dev); >> diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile >> index 173829aa02e6..cd2cb925dbd5 100644 >> --- a/drivers/pci/pcie/Makefile >> +++ b/drivers/pci/pcie/Makefile >> @@ -8,6 +8,7 @@ obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o bwctrl.o >> >> obj-y += aspm.o >> obj-$(CONFIG_PCIEAER) += aer.o err.o tlp.o >> +obj-$(CONFIG_PCIEAER_CXL) += cxl_aer.o >> obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o >> obj-$(CONFIG_PCIE_PME) += pme.o >> obj-$(CONFIG_PCIE_DPC) += dpc.o >> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c >> index a2df9456595a..0b4d721980ef 100644 >> --- a/drivers/pci/pcie/aer.c >> +++ b/drivers/pci/pcie/aer.c >> @@ -1094,144 +1094,6 @@ static bool find_source_device(struct pci_dev *parent, >> return true; >> } >> >> -#ifdef CONFIG_PCIEAER_CXL >> - >> -/** >> - * pci_aer_unmask_internal_errors - unmask internal errors >> - * @dev: pointer to the pci_dev data structure >> - * >> - * Unmask internal errors in the Uncorrectable and Correctable Error >> - * Mask registers. >> - * >> - * Note: AER must be enabled and supported by the device which must be >> - * checked in advance, e.g. with pcie_aer_is_native(). >> - */ >> -static void pci_aer_unmask_internal_errors(struct pci_dev *dev) >> -{ >> - int aer = dev->aer_cap; >> - u32 mask; >> - >> - pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &mask); >> - mask &= ~PCI_ERR_UNC_INTN; >> - pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, mask); >> - >> - pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, &mask); >> - mask &= ~PCI_ERR_COR_INTERNAL; >> - pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask); >> -} >> - >> -static bool is_cxl_mem_dev(struct pci_dev *dev) >> -{ >> - /* >> - * The capability, status, and control fields in Device 0, >> - * Function 0 DVSEC control the CXL functionality of the >> - * entire device (CXL 3.0, 8.1.3). >> - */ >> - if (dev->devfn != PCI_DEVFN(0, 0)) >> - return false; >> - >> - /* >> - * CXL Memory Devices must have the 502h class code set (CXL >> - * 3.0, 8.1.12.1). >> - */ >> - if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL) >> - return false; >> - >> - return true; >> -} >> - >> -static bool cxl_error_is_native(struct pci_dev *dev) >> -{ >> - struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); >> - >> - return (pcie_ports_native || host->native_aer); >> -} >> - >> -static bool is_internal_error(struct aer_err_info *info) >> -{ >> - if (info->severity == AER_CORRECTABLE) >> - return info->status & PCI_ERR_COR_INTERNAL; >> - >> - return info->status & PCI_ERR_UNC_INTN; >> -} >> - >> -static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) >> -{ >> - struct aer_err_info *info = (struct aer_err_info *)data; >> - const struct pci_error_handlers *err_handler; >> - >> - if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev)) >> - return 0; >> - >> - /* Protect dev->driver */ >> - device_lock(&dev->dev); >> - >> - err_handler = dev->driver ? dev->driver->err_handler : NULL; >> - if (!err_handler) >> - goto out; >> - >> - if (info->severity == AER_CORRECTABLE) { >> - if (err_handler->cor_error_detected) >> - err_handler->cor_error_detected(dev); >> - } else if (err_handler->error_detected) { >> - if (info->severity == AER_NONFATAL) >> - err_handler->error_detected(dev, pci_channel_io_normal); >> - else if (info->severity == AER_FATAL) >> - err_handler->error_detected(dev, pci_channel_io_frozen); >> - } >> -out: >> - device_unlock(&dev->dev); >> - return 0; >> -} >> - >> -static void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) >> -{ >> - /* >> - * Internal errors of an RCEC indicate an AER error in an >> - * RCH's downstream port. Check and handle them in the CXL.mem >> - * device driver. >> - */ >> - if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC && >> - is_internal_error(info)) >> - pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info); >> -} >> - >> -static int handles_cxl_error_iter(struct pci_dev *dev, void *data) >> -{ >> - bool *handles_cxl = data; >> - >> - if (!*handles_cxl) >> - *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev); >> - >> - /* Non-zero terminates iteration */ >> - return *handles_cxl; >> -} >> - >> -static bool handles_cxl_errors(struct pci_dev *rcec) >> -{ >> - bool handles_cxl = false; >> - >> - if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC && >> - pcie_aer_is_native(rcec)) >> - pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl); >> - >> - return handles_cxl; >> -} >> - >> -static void cxl_rch_enable_rcec(struct pci_dev *rcec) >> -{ >> - if (!handles_cxl_errors(rcec)) >> - return; >> - >> - pci_aer_unmask_internal_errors(rcec); >> - pci_info(rcec, "CXL: Internal errors unmasked"); >> -} >> - >> -#else >> -static inline void cxl_rch_enable_rcec(struct pci_dev *dev) { } >> -static inline void cxl_rch_handle_error(struct pci_dev *dev, >> - struct aer_err_info *info) { } >> -#endif >> >> /** >> * pci_aer_handle_error - handle logging error into an event log >> diff --git a/drivers/pci/pcie/cxl_aer.c b/drivers/pci/pcie/cxl_aer.c >> new file mode 100644 >> index 000000000000..b2ea14f70055 >> --- /dev/null >> +++ b/drivers/pci/pcie/cxl_aer.c >> @@ -0,0 +1,138 @@ >> +// SPDX-License-Identifier: GPL-2.0-only >> +/* Copyright(c) 2025 AMD Corporation. All rights reserved. */ >> + >> +#include <linux/pci.h> >> +#include <linux/aer.h> >> +#include "../pci.h" >> + >> +/** >> + * pci_aer_unmask_internal_errors - unmask internal errors >> + * @dev: pointer to the pci_dev data structure >> + * >> + * Unmask internal errors in the Uncorrectable and Correctable Error >> + * Mask registers. >> + * >> + * Note: AER must be enabled and supported by the device which must be >> + * checked in advance, e.g. with pcie_aer_is_native(). >> + */ >> +static void pci_aer_unmask_internal_errors(struct pci_dev *dev) >> +{ >> + int aer = dev->aer_cap; >> + u32 mask; >> + >> + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &mask); >> + mask &= ~PCI_ERR_UNC_INTN; >> + pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, mask); >> + >> + pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, &mask); >> + mask &= ~PCI_ERR_COR_INTERNAL; >> + pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask); >> +} >> + >> +static bool is_cxl_mem_dev(struct pci_dev *dev) >> +{ >> + /* >> + * The capability, status, and control fields in Device 0, >> + * Function 0 DVSEC control the CXL functionality of the >> + * entire device (CXL 3.2, 8.1.3). >> + */ >> + if (dev->devfn != PCI_DEVFN(0, 0)) >> + return false; >> + >> + /* >> + * CXL Memory Devices must have the 502h class code set (CXL >> + * 3.2, 8.1.12.1). >> + */ >> + if (FIELD_GET(PCI_CLASS_CODE_MASK, dev->class) != PCI_CLASS_MEMORY_CXL) >> + return false; >> + >> + return true; >> +} >> + >> +static bool cxl_error_is_native(struct pci_dev *dev) >> +{ >> + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); >> + >> + return (pcie_ports_native || host->native_aer); >> +} >> + >> +static bool is_internal_error(struct aer_err_info *info) >> +{ >> + if (info->severity == AER_CORRECTABLE) >> + return info->status & PCI_ERR_COR_INTERNAL; >> + >> + return info->status & PCI_ERR_UNC_INTN; >> +} >> + >> +static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) >> +{ >> + struct aer_err_info *info = (struct aer_err_info *)data; >> + const struct pci_error_handlers *err_handler; >> + >> + if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev)) >> + return 0; >> + >> + /* Protect dev->driver */ >> + device_lock(&dev->dev); >> + >> + err_handler = dev->driver ? dev->driver->err_handler : NULL; >> + if (!err_handler) >> + goto out; >> + >> + if (info->severity == AER_CORRECTABLE) { >> + if (err_handler->cor_error_detected) >> + err_handler->cor_error_detected(dev); >> + } else if (err_handler->error_detected) { >> + if (info->severity == AER_NONFATAL) >> + err_handler->error_detected(dev, pci_channel_io_normal); >> + else if (info->severity == AER_FATAL) >> + err_handler->error_detected(dev, pci_channel_io_frozen); >> + } >> +out: >> + device_unlock(&dev->dev); >> + return 0; >> +} >> + >> +void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) >> +{ >> + /* >> + * Internal errors of an RCEC indicate an AER error in an >> + * RCH's downstream port. Check and handle them in the CXL.mem >> + * device driver. >> + */ >> + if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC && >> + is_internal_error(info)) >> + pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info); >> +} >> + >> +static int handles_cxl_error_iter(struct pci_dev *dev, void *data) >> +{ >> + bool *handles_cxl = data; >> + >> + if (!*handles_cxl) >> + *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev); >> + >> + /* Non-zero terminates iteration */ >> + return *handles_cxl; >> +} >> + >> +static bool handles_cxl_errors(struct pci_dev *rcec) >> +{ >> + bool handles_cxl = false; >> + >> + if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC && >> + pcie_aer_is_native(rcec)) >> + pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl); >> + >> + return handles_cxl; >> +} >> + >> +void cxl_rch_enable_rcec(struct pci_dev *rcec) >> +{ >> + if (!handles_cxl_errors(rcec)) >> + return; >> + >> + pci_aer_unmask_internal_errors(rcec); >> + pci_info(rcec, "CXL: Internal errors unmasked"); >> +} >> + >> diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h >> index e2d71b6fdd84..31b3935bf189 100644 >> --- a/include/linux/pci_ids.h >> +++ b/include/linux/pci_ids.h >> @@ -12,6 +12,8 @@ >> >> /* Device classes and subclasses */ >> >> +#define PCI_CLASS_CODE_MASK 0xFFFF00 >> + >> #define PCI_CLASS_NOT_DEFINED 0x0000 >> #define PCI_CLASS_NOT_DEFINED_VGA 0x0001 >>