On 6/5/2025 7:52 PM, Sathyanarayanan Kuppuswamy wrote: > On 6/3/25 10:22 AM, Terry Bowman wrote: >> During CXL device cleanup the CXL PCIe Port device interrupts remain >> enabled. This potentially allows unnecessary interrupt processing on >> behalf of the CXL errors while the device is destroyed. >> >> Disable CXL protocol errors by setting the CXL devices' AER mask register. >> >> Introduce pci_aer_mask_internal_errors() similar to pci_aer_unmask_internal_errors(). >> >> Introduce cxl_mask_prot_interrupts() to call pci_aer_mask_internal_errors(). >> Add calls to cxl_mask_prot_interrupts() within CXL Port teardown for CXL >> Root Ports, CXL Downstream Switch Ports, CXL Upstream Switch Ports, and CXL >> Endpoints. Follow the same "bottom-up" approach used during CXL Port >> teardown. >> >> Implement cxl_mask_prot_interrupts() in a header file to avoid introducing >> Kconfig ifdefs in cxl/core/port.c. >> >> Signed-off-by: Terry Bowman <terry.bowman@xxxxxxx> >> --- > Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@xxxxxxxxxxxxxxx> Kuppuswamy, thank you for the series patchset reviews and reviewed-by's. I Regards, Terry >> drivers/cxl/core/port.c | 6 ++++++ >> drivers/cxl/cxl.h | 8 ++++++++ >> drivers/pci/pcie/aer.c | 21 +++++++++++++++++++++ >> include/linux/aer.h | 1 + >> 4 files changed, 36 insertions(+) >> >> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c >> index 07b9bb0f601f..6aaaad002a7f 100644 >> --- a/drivers/cxl/core/port.c >> +++ b/drivers/cxl/core/port.c >> @@ -1433,6 +1433,9 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, "CXL"); >> */ >> static void delete_switch_port(struct cxl_port *port) >> { >> + cxl_mask_prot_interrupts(port->uport_dev); >> + cxl_mask_prot_interrupts(port->parent_dport->dport_dev); >> + >> devm_release_action(port->dev.parent, cxl_unlink_parent_dport, port); >> devm_release_action(port->dev.parent, cxl_unlink_uport, port); >> devm_release_action(port->dev.parent, unregister_port, port); >> @@ -1446,6 +1449,7 @@ static void reap_dports(struct cxl_port *port) >> device_lock_assert(&port->dev); >> >> xa_for_each(&port->dports, index, dport) { >> + cxl_mask_prot_interrupts(dport->dport_dev); >> devm_release_action(&port->dev, cxl_dport_unlink, dport); >> devm_release_action(&port->dev, cxl_dport_remove, dport); >> devm_kfree(&port->dev, dport); >> @@ -1476,6 +1480,8 @@ static void cxl_detach_ep(void *data) >> { >> struct cxl_memdev *cxlmd = data; >> >> + cxl_mask_prot_interrupts(cxlmd->cxlds->dev); >> + >> for (int i = cxlmd->depth - 1; i >= 1; i--) { >> struct cxl_port *port, *parent_port; >> struct detach_ctx ctx = { >> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h >> index 2c1c00466a25..2753db3d473e 100644 >> --- a/drivers/cxl/cxl.h >> +++ b/drivers/cxl/cxl.h >> @@ -12,6 +12,7 @@ >> #include <linux/node.h> >> #include <linux/io.h> >> #include <linux/pci.h> >> +#include <linux/aer.h> >> >> extern const struct nvdimm_security_ops *cxl_security_ops; >> >> @@ -771,9 +772,16 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port, >> #ifdef CONFIG_PCIEAER_CXL >> void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport); >> void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host); >> +static inline void cxl_mask_prot_interrupts(struct device *dev) >> +{ >> + struct pci_dev *pdev __free(pci_dev_put) = pci_dev_get(to_pci_dev(dev)); >> + >> + pci_aer_mask_internal_errors(pdev); >> +} >> #else >> static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport, >> struct device *host) { } >> +static inline void cxl_mask_prot_interrupts(struct device *dev) { } >> #endif >> >> struct cxl_decoder *to_cxl_decoder(struct device *dev); >> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c >> index 2d202ad1453a..69230cf87d79 100644 >> --- a/drivers/pci/pcie/aer.c >> +++ b/drivers/pci/pcie/aer.c >> @@ -979,6 +979,27 @@ void pci_aer_unmask_internal_errors(struct pci_dev *dev) >> } >> EXPORT_SYMBOL_NS_GPL(pci_aer_unmask_internal_errors, "CXL"); >> >> +/** >> + * pci_aer_mask_internal_errors - mask internal errors >> + * @dev: pointer to the pcie_dev data structure >> + * >> + * Masks internal errors in the Uncorrectable and Correctable Error >> + * Mask registers. >> + * >> + * Note: AER must be enabled and supported by the device which must be >> + * checked in advance, e.g. with pcie_aer_is_native(). >> + */ >> +void pci_aer_mask_internal_errors(struct pci_dev *dev) >> +{ >> + int aer = dev->aer_cap; >> + >> + pci_clear_and_set_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, >> + 0, PCI_ERR_UNC_INTN); >> + pci_clear_and_set_config_dword(dev, aer + PCI_ERR_COR_MASK, >> + 0, PCI_ERR_COR_INTERNAL); >> +} >> +EXPORT_SYMBOL_NS_GPL(pci_aer_mask_internal_errors, "CXL"); >> + >> static bool is_cxl_mem_dev(struct pci_dev *dev) >> { >> /* >> diff --git a/include/linux/aer.h b/include/linux/aer.h >> index 74600e75705f..41167ad3797a 100644 >> --- a/include/linux/aer.h >> +++ b/include/linux/aer.h >> @@ -108,5 +108,6 @@ int cper_severity_to_aer(int cper_severity); >> void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, >> int severity, struct aer_capability_regs *aer_regs); >> void pci_aer_unmask_internal_errors(struct pci_dev *dev); >> +void pci_aer_mask_internal_errors(struct pci_dev *dev); >> #endif //_AER_H_ >>