CXL protocol errors are not enabled for all CXL devices after boot. These must be enabled inorder to process CXL protocol errors. Introduce cxl_unmask_proto_interrupts() to call pci_aer_unmask_internal_errors(). pci_aer_unmask_internal_errors() expects the pdev->aer_cap is initialized. But, dev->aer_cap is not initialized for CXL Upstream Switch Ports and CXL Downstream Switch Ports. Initialize the dev->aer_cap if necessary. Enable AER correctable internal errors and uncorrectable internal errors for all CXL devices. Signed-off-by: Terry Bowman <terry.bowman@xxxxxxx> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@xxxxxxxxxx> Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@xxxxxxxxxxxxxxx> --- Changes in v10->v11: - Added check for valid PCI devices in is_cxl_error() (Terry) - Removed check for RCiEP in cxl_handle_proto_err() and cxl_report_error_detected() (Terry) --- drivers/cxl/core/ras.c | 26 +++++++++++++++++++++++++- drivers/pci/pci.h | 2 -- include/linux/aer.h | 2 ++ 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index 3da675f72616..90ea0dfb942f 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -122,6 +122,21 @@ static DECLARE_WORK(cxl_cper_prot_err_work, cxl_cper_prot_err_work_fn); static pci_ers_result_t cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ras_base); static void cxl_handle_cor_ras(struct device *dev, u64 serial, void __iomem *ras_base); +static void cxl_unmask_proto_interrupts(struct device *dev) +{ + struct pci_dev *pdev __free(pci_dev_put) = + pci_dev_get(to_pci_dev(dev)); + + if (!pdev->aer_cap) { + pdev->aer_cap = pci_find_ext_capability(pdev, + PCI_EXT_CAP_ID_ERR); + if (!pdev->aer_cap) + return; + } + + pci_aer_unmask_internal_errors(pdev); +} + #ifdef CONFIG_CXL_RCH_RAS static void cxl_dport_map_rch_aer(struct cxl_dport *dport) { @@ -418,7 +433,10 @@ void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) cxl_dport_map_rch_aer(dport); cxl_disable_rch_root_ints(dport); + return; } + + cxl_unmask_proto_interrupts(dport->dport_dev); } EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); @@ -429,8 +447,12 @@ static void cxl_uport_init_ras_reporting(struct cxl_port *port, map->host = host; if (cxl_map_component_regs(map, &port->uport_regs, - BIT(CXL_CM_CAP_CAP_ID_RAS))) + BIT(CXL_CM_CAP_CAP_ID_RAS))) { dev_dbg(&port->dev, "Failed to map RAS capability\n"); + return; + } + + cxl_unmask_proto_interrupts(port->uport_dev); } void cxl_switch_port_init_ras(struct cxl_port *port) @@ -466,6 +488,8 @@ void cxl_endpoint_port_init_ras(struct cxl_port *ep) } cxl_dport_init_ras_reporting(parent_dport, cxlmd->cxlds->dev); + + cxl_unmask_proto_interrupts(cxlmd->cxlds->dev); } EXPORT_SYMBOL_NS_GPL(cxl_endpoint_port_init_ras, "CXL"); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 0c4f73dd645f..090b52a26862 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -1169,12 +1169,10 @@ static inline void cxl_rch_enable_rcec(struct pci_dev *rcec) { } #endif #ifdef CONFIG_CXL_RAS -void pci_aer_unmask_internal_errors(struct pci_dev *dev); bool is_internal_error(struct aer_err_info *info); bool is_cxl_error(struct pci_dev *pdev, struct aer_err_info *info); void cxl_forward_error(struct pci_dev *pdev, struct aer_err_info *info); #else -static inline void pci_aer_unmask_internal_errors(struct pci_dev *dev) { } static inline bool is_internal_error(struct aer_err_info *info) { return false; } static inline bool is_cxl_error(struct pci_dev *pdev, struct aer_err_info *info) { return false; } static inline void cxl_forward_error(struct pci_dev *pdev, struct aer_err_info *info) { } diff --git a/include/linux/aer.h b/include/linux/aer.h index 751a026fea73..4e2fc55f2497 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -82,11 +82,13 @@ int cxl_proto_err_kfifo_get(struct cxl_proto_err_work_data *wd); void cxl_register_proto_err_work(struct work_struct *work); void cxl_unregister_proto_err_work(void); bool cxl_error_is_native(struct pci_dev *dev); +void pci_aer_unmask_internal_errors(struct pci_dev *dev); #else static inline int cxl_proto_err_kfifo_get(struct cxl_proto_err_work_data *wd) { return 0; } static inline void cxl_register_proto_err_work(struct work_struct *work) { } static inline void cxl_unregister_proto_err_work(void) { } static inline bool cxl_error_is_native(struct pci_dev *dev) { return false; } +static inline void pci_aer_unmask_internal_errors(struct pci_dev *dev) { } #endif void pci_print_aer(struct pci_dev *dev, int aer_severity, -- 2.51.0.rc2.21.ge5ab6b3e5a