On 8/26/25 6:35 PM, Terry Bowman wrote: > CXL protocol errors are not enabled for all CXL devices after boot. These > must be enabled inorder to process CXL protocol errors. > > Introduce cxl_unmask_proto_interrupts() to call pci_aer_unmask_internal_errors(). > pci_aer_unmask_internal_errors() expects the pdev->aer_cap is initialized. > But, dev->aer_cap is not initialized for CXL Upstream Switch Ports and CXL > Downstream Switch Ports. Initialize the dev->aer_cap if necessary. Enable AER > correctable internal errors and uncorrectable internal errors for all CXL > devices. > > Signed-off-by: Terry Bowman <terry.bowman@xxxxxxx> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@xxxxxxxxxx> > Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@xxxxxxxxxxxxxxx> Reviewed-by: Dave Jiang <dave.jiang@xxxxxxxxx> > > --- > Changes in v10->v11: > - Added check for valid PCI devices in is_cxl_error() (Terry) > - Removed check for RCiEP in cxl_handle_proto_err() and > cxl_report_error_detected() (Terry) > --- > drivers/cxl/core/ras.c | 26 +++++++++++++++++++++++++- > drivers/pci/pci.h | 2 -- > include/linux/aer.h | 2 ++ > 3 files changed, 27 insertions(+), 3 deletions(-) > > diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c > index 3da675f72616..90ea0dfb942f 100644 > --- a/drivers/cxl/core/ras.c > +++ b/drivers/cxl/core/ras.c > @@ -122,6 +122,21 @@ static DECLARE_WORK(cxl_cper_prot_err_work, cxl_cper_prot_err_work_fn); > static pci_ers_result_t cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ras_base); > static void cxl_handle_cor_ras(struct device *dev, u64 serial, void __iomem *ras_base); > > +static void cxl_unmask_proto_interrupts(struct device *dev) > +{ > + struct pci_dev *pdev __free(pci_dev_put) = > + pci_dev_get(to_pci_dev(dev)); > + > + if (!pdev->aer_cap) { > + pdev->aer_cap = pci_find_ext_capability(pdev, > + PCI_EXT_CAP_ID_ERR); > + if (!pdev->aer_cap) > + return; > + } > + > + pci_aer_unmask_internal_errors(pdev); > +} > + > #ifdef CONFIG_CXL_RCH_RAS > static void cxl_dport_map_rch_aer(struct cxl_dport *dport) > { > @@ -418,7 +433,10 @@ void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) > > cxl_dport_map_rch_aer(dport); > cxl_disable_rch_root_ints(dport); > + return; > } > + > + cxl_unmask_proto_interrupts(dport->dport_dev); > } > EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); > > @@ -429,8 +447,12 @@ static void cxl_uport_init_ras_reporting(struct cxl_port *port, > > map->host = host; > if (cxl_map_component_regs(map, &port->uport_regs, > - BIT(CXL_CM_CAP_CAP_ID_RAS))) > + BIT(CXL_CM_CAP_CAP_ID_RAS))) { > dev_dbg(&port->dev, "Failed to map RAS capability\n"); > + return; > + } > + > + cxl_unmask_proto_interrupts(port->uport_dev); > } > > void cxl_switch_port_init_ras(struct cxl_port *port) > @@ -466,6 +488,8 @@ void cxl_endpoint_port_init_ras(struct cxl_port *ep) > } > > cxl_dport_init_ras_reporting(parent_dport, cxlmd->cxlds->dev); > + > + cxl_unmask_proto_interrupts(cxlmd->cxlds->dev); > } > EXPORT_SYMBOL_NS_GPL(cxl_endpoint_port_init_ras, "CXL"); > > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h > index 0c4f73dd645f..090b52a26862 100644 > --- a/drivers/pci/pci.h > +++ b/drivers/pci/pci.h > @@ -1169,12 +1169,10 @@ static inline void cxl_rch_enable_rcec(struct pci_dev *rcec) { } > #endif > > #ifdef CONFIG_CXL_RAS > -void pci_aer_unmask_internal_errors(struct pci_dev *dev); > bool is_internal_error(struct aer_err_info *info); > bool is_cxl_error(struct pci_dev *pdev, struct aer_err_info *info); > void cxl_forward_error(struct pci_dev *pdev, struct aer_err_info *info); > #else > -static inline void pci_aer_unmask_internal_errors(struct pci_dev *dev) { } > static inline bool is_internal_error(struct aer_err_info *info) { return false; } > static inline bool is_cxl_error(struct pci_dev *pdev, struct aer_err_info *info) { return false; } > static inline void cxl_forward_error(struct pci_dev *pdev, struct aer_err_info *info) { } > diff --git a/include/linux/aer.h b/include/linux/aer.h > index 751a026fea73..4e2fc55f2497 100644 > --- a/include/linux/aer.h > +++ b/include/linux/aer.h > @@ -82,11 +82,13 @@ int cxl_proto_err_kfifo_get(struct cxl_proto_err_work_data *wd); > void cxl_register_proto_err_work(struct work_struct *work); > void cxl_unregister_proto_err_work(void); > bool cxl_error_is_native(struct pci_dev *dev); > +void pci_aer_unmask_internal_errors(struct pci_dev *dev); > #else > static inline int cxl_proto_err_kfifo_get(struct cxl_proto_err_work_data *wd) { return 0; } > static inline void cxl_register_proto_err_work(struct work_struct *work) { } > static inline void cxl_unregister_proto_err_work(void) { } > static inline bool cxl_error_is_native(struct pci_dev *dev) { return false; } > +static inline void pci_aer_unmask_internal_errors(struct pci_dev *dev) { } > #endif > > void pci_print_aer(struct pci_dev *dev, int aer_severity,