The CXL AER error handling logic currently resides in the AER driver file, drivers/pci/pcie/aer.c. CXL specific changes are conditionally compiled using #ifdefs. Improve the AER driver maintainability by separating the CXL specific logic from the AER driver's core functionality and removing the #ifdefs. Introduce drivers/pci/pcie/cxl_aer.c and move the CXL AER logic into the new file. Update the makefile to conditionally compile the CXL file using the existing CONFIG_PCIEAER_CXL Kconfig. Signed-off-by: Terry Bowman <terry.bowman@xxxxxxx> --- drivers/pci/pci.h | 8 +++ drivers/pci/pcie/Makefile | 1 + drivers/pci/pcie/aer.c | 138 ------------------------------------- drivers/pci/pcie/cxl_aer.c | 138 +++++++++++++++++++++++++++++++++++++ include/linux/pci_ids.h | 2 + 5 files changed, 149 insertions(+), 138 deletions(-) create mode 100644 drivers/pci/pcie/cxl_aer.c diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index a0d1e59b5666..91b583cf18eb 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -1029,6 +1029,14 @@ static inline void pci_save_aer_state(struct pci_dev *dev) { } static inline void pci_restore_aer_state(struct pci_dev *dev) { } #endif +#ifdef CONFIG_PCIEAER_CXL +void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info); +void cxl_rch_enable_rcec(struct pci_dev *rcec); +#else +static inline void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) { } +static inline void cxl_rch_enable_rcec(struct pci_dev *rcec) { } +#endif + #ifdef CONFIG_ACPI bool pci_acpi_preserve_config(struct pci_host_bridge *bridge); int pci_acpi_program_hp_params(struct pci_dev *dev); diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index 173829aa02e6..cd2cb925dbd5 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o bwctrl.o obj-y += aspm.o obj-$(CONFIG_PCIEAER) += aer.o err.o tlp.o +obj-$(CONFIG_PCIEAER_CXL) += cxl_aer.o obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o obj-$(CONFIG_PCIE_PME) += pme.o obj-$(CONFIG_PCIE_DPC) += dpc.o diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index a2df9456595a..0b4d721980ef 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -1094,144 +1094,6 @@ static bool find_source_device(struct pci_dev *parent, return true; } -#ifdef CONFIG_PCIEAER_CXL - -/** - * pci_aer_unmask_internal_errors - unmask internal errors - * @dev: pointer to the pci_dev data structure - * - * Unmask internal errors in the Uncorrectable and Correctable Error - * Mask registers. - * - * Note: AER must be enabled and supported by the device which must be - * checked in advance, e.g. with pcie_aer_is_native(). - */ -static void pci_aer_unmask_internal_errors(struct pci_dev *dev) -{ - int aer = dev->aer_cap; - u32 mask; - - pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &mask); - mask &= ~PCI_ERR_UNC_INTN; - pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, mask); - - pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, &mask); - mask &= ~PCI_ERR_COR_INTERNAL; - pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask); -} - -static bool is_cxl_mem_dev(struct pci_dev *dev) -{ - /* - * The capability, status, and control fields in Device 0, - * Function 0 DVSEC control the CXL functionality of the - * entire device (CXL 3.0, 8.1.3). - */ - if (dev->devfn != PCI_DEVFN(0, 0)) - return false; - - /* - * CXL Memory Devices must have the 502h class code set (CXL - * 3.0, 8.1.12.1). - */ - if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL) - return false; - - return true; -} - -static bool cxl_error_is_native(struct pci_dev *dev) -{ - struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); - - return (pcie_ports_native || host->native_aer); -} - -static bool is_internal_error(struct aer_err_info *info) -{ - if (info->severity == AER_CORRECTABLE) - return info->status & PCI_ERR_COR_INTERNAL; - - return info->status & PCI_ERR_UNC_INTN; -} - -static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) -{ - struct aer_err_info *info = (struct aer_err_info *)data; - const struct pci_error_handlers *err_handler; - - if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev)) - return 0; - - /* Protect dev->driver */ - device_lock(&dev->dev); - - err_handler = dev->driver ? dev->driver->err_handler : NULL; - if (!err_handler) - goto out; - - if (info->severity == AER_CORRECTABLE) { - if (err_handler->cor_error_detected) - err_handler->cor_error_detected(dev); - } else if (err_handler->error_detected) { - if (info->severity == AER_NONFATAL) - err_handler->error_detected(dev, pci_channel_io_normal); - else if (info->severity == AER_FATAL) - err_handler->error_detected(dev, pci_channel_io_frozen); - } -out: - device_unlock(&dev->dev); - return 0; -} - -static void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) -{ - /* - * Internal errors of an RCEC indicate an AER error in an - * RCH's downstream port. Check and handle them in the CXL.mem - * device driver. - */ - if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC && - is_internal_error(info)) - pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info); -} - -static int handles_cxl_error_iter(struct pci_dev *dev, void *data) -{ - bool *handles_cxl = data; - - if (!*handles_cxl) - *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev); - - /* Non-zero terminates iteration */ - return *handles_cxl; -} - -static bool handles_cxl_errors(struct pci_dev *rcec) -{ - bool handles_cxl = false; - - if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC && - pcie_aer_is_native(rcec)) - pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl); - - return handles_cxl; -} - -static void cxl_rch_enable_rcec(struct pci_dev *rcec) -{ - if (!handles_cxl_errors(rcec)) - return; - - pci_aer_unmask_internal_errors(rcec); - pci_info(rcec, "CXL: Internal errors unmasked"); -} - -#else -static inline void cxl_rch_enable_rcec(struct pci_dev *dev) { } -static inline void cxl_rch_handle_error(struct pci_dev *dev, - struct aer_err_info *info) { } -#endif /** * pci_aer_handle_error - handle logging error into an event log diff --git a/drivers/pci/pcie/cxl_aer.c b/drivers/pci/pcie/cxl_aer.c new file mode 100644 index 000000000000..b2ea14f70055 --- /dev/null +++ b/drivers/pci/pcie/cxl_aer.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2025 AMD Corporation. All rights reserved. */ + +#include <linux/pci.h> +#include <linux/aer.h> +#include "../pci.h" + +/** + * pci_aer_unmask_internal_errors - unmask internal errors + * @dev: pointer to the pci_dev data structure + * + * Unmask internal errors in the Uncorrectable and Correctable Error + * Mask registers. + * + * Note: AER must be enabled and supported by the device which must be + * checked in advance, e.g. with pcie_aer_is_native(). + */ +static void pci_aer_unmask_internal_errors(struct pci_dev *dev) +{ + int aer = dev->aer_cap; + u32 mask; + + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &mask); + mask &= ~PCI_ERR_UNC_INTN; + pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, mask); + + pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, &mask); + mask &= ~PCI_ERR_COR_INTERNAL; + pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask); +} + +static bool is_cxl_mem_dev(struct pci_dev *dev) +{ + /* + * The capability, status, and control fields in Device 0, + * Function 0 DVSEC control the CXL functionality of the + * entire device (CXL 3.2, 8.1.3). + */ + if (dev->devfn != PCI_DEVFN(0, 0)) + return false; + + /* + * CXL Memory Devices must have the 502h class code set (CXL + * 3.2, 8.1.12.1). + */ + if (FIELD_GET(PCI_CLASS_CODE_MASK, dev->class) != PCI_CLASS_MEMORY_CXL) + return false; + + return true; +} + +static bool cxl_error_is_native(struct pci_dev *dev) +{ + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); + + return (pcie_ports_native || host->native_aer); +} + +static bool is_internal_error(struct aer_err_info *info) +{ + if (info->severity == AER_CORRECTABLE) + return info->status & PCI_ERR_COR_INTERNAL; + + return info->status & PCI_ERR_UNC_INTN; +} + +static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) +{ + struct aer_err_info *info = (struct aer_err_info *)data; + const struct pci_error_handlers *err_handler; + + if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev)) + return 0; + + /* Protect dev->driver */ + device_lock(&dev->dev); + + err_handler = dev->driver ? dev->driver->err_handler : NULL; + if (!err_handler) + goto out; + + if (info->severity == AER_CORRECTABLE) { + if (err_handler->cor_error_detected) + err_handler->cor_error_detected(dev); + } else if (err_handler->error_detected) { + if (info->severity == AER_NONFATAL) + err_handler->error_detected(dev, pci_channel_io_normal); + else if (info->severity == AER_FATAL) + err_handler->error_detected(dev, pci_channel_io_frozen); + } +out: + device_unlock(&dev->dev); + return 0; +} + +void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) +{ + /* + * Internal errors of an RCEC indicate an AER error in an + * RCH's downstream port. Check and handle them in the CXL.mem + * device driver. + */ + if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC && + is_internal_error(info)) + pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info); +} + +static int handles_cxl_error_iter(struct pci_dev *dev, void *data) +{ + bool *handles_cxl = data; + + if (!*handles_cxl) + *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev); + + /* Non-zero terminates iteration */ + return *handles_cxl; +} + +static bool handles_cxl_errors(struct pci_dev *rcec) +{ + bool handles_cxl = false; + + if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC && + pcie_aer_is_native(rcec)) + pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl); + + return handles_cxl; +} + +void cxl_rch_enable_rcec(struct pci_dev *rcec) +{ + if (!handles_cxl_errors(rcec)) + return; + + pci_aer_unmask_internal_errors(rcec); + pci_info(rcec, "CXL: Internal errors unmasked"); +} + diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index e2d71b6fdd84..31b3935bf189 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -12,6 +12,8 @@ /* Device classes and subclasses */ +#define PCI_CLASS_CODE_MASK 0xFFFF00 + #define PCI_CLASS_NOT_DEFINED 0x0000 #define PCI_CLASS_NOT_DEFINED_VGA 0x0001 -- 2.34.1