On Wed, 26 Mar 2025 20:47:13 -0500 Terry Bowman <terry.bowman@xxxxxxx> wrote: > Introduce CXL error handlers for CXL Port devices. These are needed > to handle and log CXL protocol errors. > > Update cxl_create_prot_err_info() with support for CXL Root Ports (RP), CXL > Upstream Switch Ports (USP) and CXL Downstreasm Switch ports (DSP). > > Add functions cxl_port_error_detected() and cxl_port_cor_error_detected(). > > Add cxl_assign_error_handlers() and use to assign the CXL Port error > handlers for CXL RP, CXL USP, and CXL DSP. Make the assignments in > cxl_uport_init_ras() and cxl_dport_init_ras() after mapping RAS registers. > > Signed-off-by: Terry Bowman <terry.bowman@xxxxxxx> > --- > drivers/cxl/core/core.h | 2 ++ > drivers/cxl/core/pci.c | 23 +++++++++++++ > drivers/cxl/core/port.c | 4 +-- > drivers/cxl/core/ras.c | 76 +++++++++++++++++++++++++++++++++-------- > drivers/cxl/cxl.h | 5 +++ > drivers/cxl/port.c | 29 ++++++++++++++-- > 6 files changed, 120 insertions(+), 19 deletions(-) > > diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h > index 15699299dc11..5ce7269e5f13 100644 > --- a/drivers/cxl/core/core.h > +++ b/drivers/cxl/core/core.h > @@ -122,6 +122,8 @@ void cxl_ras_exit(void); > int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port); > int cxl_acpi_get_extended_linear_cache_size(struct resource *backing_res, > int nid, resource_size_t *size); > +struct cxl_port *find_cxl_port(struct device *dport_dev, > + struct cxl_dport **dport); > > #ifdef CONFIG_CXL_FEATURES > size_t cxl_get_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid, > diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c > index 10b2abfb0e64..9ed6f700e132 100644 > --- a/drivers/cxl/core/pci.c > +++ b/drivers/cxl/core/pci.c > @@ -739,6 +739,29 @@ static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds) > > #ifdef CONFIG_PCIEAER_CXL > > + > +void cxl_port_cor_error_detected(struct device *cxl_dev, > + struct cxl_prot_error_info *err_info) > +{ > + void __iomem *ras_base = err_info->ras_base; > + struct device *pci_dev = &err_info->pdev->dev; > + u64 serial = 0; > + > + __cxl_handle_cor_ras(cxl_dev, pci_dev, serial, ras_base); > +} > +EXPORT_SYMBOL_NS_GPL(cxl_port_cor_error_detected, "CXL"); > + > +pci_ers_result_t cxl_port_error_detected(struct device *cxl_dev, > + struct cxl_prot_error_info *err_info) > +{ > + void __iomem *ras_base = err_info->ras_base; > + struct device *pci_dev = &err_info->pdev->dev; > + u64 serial = 0; Maybe just put that directly in the call? Or is it usefull to hvae it here as a form of documentation? > + > + return __cxl_handle_ras(cxl_dev, pci_dev, serial, ras_base); > +} > +EXPORT_SYMBOL_NS_GPL(cxl_port_error_detected, "CXL"); > + > static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds, > struct cxl_dport *dport) > { > diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c > index f18cb568eabd..fe38e76f2d1a 100644 > --- a/drivers/cxl/core/ras.c > +++ b/drivers/cxl/core/ras.c > @@ -110,34 +110,80 @@ static void cxl_cper_prot_err_work_fn(struct work_struct *work) > } > static DECLARE_WORK(cxl_cper_prot_err_work, cxl_cper_prot_err_work_fn); > > +static int match_uport(struct device *dev, const void *data) > +{ > + const struct device *uport_dev = data; > + struct cxl_port *port; > + > + if (!is_cxl_port(dev)) > + return 0; > + > + port = to_cxl_port(dev); > + > + return port->uport_dev == uport_dev; > +} > + > int cxl_create_prot_err_info(struct pci_dev *_pdev, int severity, > struct cxl_prot_error_info *err_info) > { > struct pci_dev *pdev __free(pci_dev_put) = pci_dev_get(_pdev); > - struct cxl_dev_state *cxlds; > > if (!pdev || !err_info) { > pr_warn_once("Error: parameter is NULL"); > return -ENODEV; > } > > - if ((pci_pcie_type(pdev) != PCI_EXP_TYPE_ENDPOINT) && > - (pci_pcie_type(pdev) != PCI_EXP_TYPE_RC_END)) { > + *err_info = (struct cxl_prot_error_info){ 0 }; > + err_info->severity = severity; > + err_info->pdev = pdev; Can maybe carry forward earlier suggestion for at least these two fields. *err_info = (struct cxl_prot_error_info) { .severity = ... }; > + > + switch (pci_pcie_type(pdev)) { > + case PCI_EXP_TYPE_ROOT_PORT: > + case PCI_EXP_TYPE_DOWNSTREAM: > + { > + struct cxl_dport *dport = NULL; > + struct cxl_port *port __free(put_cxl_port) = > + find_cxl_port(&pdev->dev, &dport); > + > + if (!port || !is_cxl_port(&port->dev)) > + return -ENODEV; > + > + err_info->ras_base = dport ? dport->regs.ras : NULL; > + err_info->dev = &port->dev; > + break; > + } > + case PCI_EXP_TYPE_UPSTREAM: > + { > + struct cxl_port *port; > + struct device *port_dev __free(put_device) = > + bus_find_device(&cxl_bus_type, NULL, &pdev->dev, > + match_uport); > + > + if (!port_dev || !is_cxl_port(port_dev)) > + return -ENODEV; > + > + port = to_cxl_port(port_dev); > + err_info->ras_base = port ? port->uport_regs.ras : NULL; > + err_info->dev = port_dev; > + break; > + } > + case PCI_EXP_TYPE_ENDPOINT: > + case PCI_EXP_TYPE_RC_END: > + { > + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); > + struct cxl_memdev *cxlmd = cxlds->cxlmd; > + struct device *dev __free(put_device) = get_device(&cxlmd->dev); > + > + err_info->ras_base = cxlds->regs.ras; > + err_info->dev = &cxlds->cxlmd->dev; > + break; > + } > + default: > + { > pci_warn_once(pdev, "Error: Unsupported device type (%X)", pci_pcie_type(pdev)); > return -ENODEV; > } > - > - cxlds = pci_get_drvdata(pdev); > - struct device *dev __free(put_device) = get_device(&cxlds->cxlmd->dev); > - > - if (!dev) > - return -ENODEV; > - > - *err_info = (struct cxl_prot_error_info){ 0 }; > - err_info->ras_base = cxlds->regs.ras; > - err_info->severity = severity; > - err_info->pdev = pdev; > - err_info->dev = dev; > + } > > return 0; > }