>-----Original Message----- >From: Terry Bowman <terry.bowman@xxxxxxx> >Sent: 26 June 2025 23:43 >To: dave@xxxxxxxxxxxx; Jonathan Cameron <jonathan.cameron@xxxxxxxxxx>; >dave.jiang@xxxxxxxxx; alison.schofield@xxxxxxxxx; dan.j.williams@xxxxxxxxx; >bhelgaas@xxxxxxxxxx; Shiju Jose <shiju.jose@xxxxxxxxxx>; >ming.li@xxxxxxxxxxxx; Smita.KoralahalliChannabasappa@xxxxxxx; >rrichter@xxxxxxx; dan.carpenter@xxxxxxxxxx; >PradeepVineshReddy.Kodamati@xxxxxxx; lukas@xxxxxxxxx; >Benjamin.Cheatham@xxxxxxx; >sathyanarayanan.kuppuswamy@xxxxxxxxxxxxxxx; terry.bowman@xxxxxxx; >linux-cxl@xxxxxxxxxxxxxxx >Cc: linux-kernel@xxxxxxxxxxxxxxx; linux-pci@xxxxxxxxxxxxxxx >Subject: [PATCH v10 14/17] cxl/pci: Introduce CXL Endpoint protocol error >handlers > >CXL Endpoint protocol errors are currently handled using PCI error handlers. The >CXL Endpoint requires CXL specific handling in the case of uncorrectable error >(UCE) handling not provided by the PCI handlers. > >Add CXL specific handlers for CXL Endpoints. Rename the existing >cxl_error_handlers to be pci_error_handlers to more correctly indicate the >error type and follow naming consistency. > >The PCI handlers will be called if the CXL device is not trained for alternate >protocol (CXL). Update the CXL Endpoint PCI handlers to call the CXL UCE >handlers. > >The existing EP UCE handler includes checks for various results. These are no >longer needed because CXL UCE recovery will not be attempted. Implement >cxl_handle_ras() to return PCI_ERS_RESULT_NONE or PCI_ERS_RESULT_PANIC. >The CXL UCE handler is called by cxl_do_recovery() that acts on the return >value. In the case of the PCI handler path, call panic() if the result is >PCI_ERS_RESULT_PANIC. > >Signed-off-by: Terry Bowman <terry.bowman@xxxxxxx> >Reviewed-by: Kuppuswamy Sathyanarayanan ><sathyanarayanan.kuppuswamy@xxxxxxxxxxxxxxx> >--- > drivers/cxl/core/native_ras.c | 15 ++++--- > drivers/cxl/core/pci.c | 77 ++++++++++++++++++----------------- > drivers/cxl/cxl.h | 4 ++ > drivers/cxl/cxlpci.h | 6 +-- > drivers/cxl/pci.c | 8 ++-- > 5 files changed, 59 insertions(+), 51 deletions(-) > [...] >diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index >887b54cf3395..7209ffb5c2fe 100644 >--- a/drivers/cxl/core/pci.c >+++ b/drivers/cxl/core/pci.c >@@ -705,8 +705,8 @@ static void header_log_copy(void __iomem *ras_base, >u32 *log) > * Log the state of the RAS status registers and prepare them to log the > * next error status. Return 1 if reset needed. > */ >-static bool cxl_handle_ras(struct device *dev, u64 serial, >- void __iomem *ras_base) >+static pci_ers_result_t cxl_handle_ras(struct device *dev, u64 serial, >+ void __iomem *ras_base) > { > u32 hl[CXL_HEADERLOG_SIZE_U32]; > void __iomem *addr; >@@ -715,13 +715,13 @@ static bool cxl_handle_ras(struct device *dev, u64 >serial, > > if (!ras_base) { > dev_warn_once(dev, "CXL RAS register block is not mapped"); >- return false; >+ return PCI_ERS_RESULT_NONE; > } > > addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; > status = readl(addr); > if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) >- return false; >+ return PCI_ERS_RESULT_NONE; > > /* If multiple errors, log header points to first error from ctrl reg */ > if (hweight32(status) > 1) { >@@ -738,7 +738,7 @@ static bool cxl_handle_ras(struct device *dev, u64 serial, > trace_cxl_aer_uncorrectable_error(dev, serial, status, fe, hl); > writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); > >- return true; >+ return PCI_ERS_RESULT_PANIC; > } > > #ifdef CONFIG_PCIEAER_CXL >@@ -833,13 +833,14 @@ static void cxl_handle_rdport_errors(struct >cxl_dev_state *cxlds) static void cxl_handle_rdport_errors(struct cxl_dev_state >*cxlds) { } #endif > >-void cxl_cor_error_detected(struct pci_dev *pdev) >+void cxl_cor_error_detected(struct device *dev) > { >+ struct pci_dev *pdev = to_pci_dev(dev); > struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); >- struct device *dev = &cxlds->cxlmd->dev; >+ struct device *cxlmd_dev = &cxlds->cxlmd->dev; > >- scoped_guard(device, dev) { >- if (!dev->driver) { >+ scoped_guard(device, cxlmd_dev) { >+ if (!cxlmd_dev->driver) { > dev_warn(&pdev->dev, > "%s: memdev disabled, abort error >handling\n", > dev_name(dev)); >@@ -854,20 +855,26 @@ void cxl_cor_error_detected(struct pci_dev *pdev) } >EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL"); > >-pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, >- pci_channel_state_t state) >+void pci_cor_error_detected(struct pci_dev *pdev) > { >- struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); >- struct cxl_memdev *cxlmd = cxlds->cxlmd; >- struct device *dev = &cxlmd->dev; >- bool ue; >+ cxl_cor_error_detected(&pdev->dev); >+} >+EXPORT_SYMBOL_NS_GPL(pci_cor_error_detected, "CXL"); > >- scoped_guard(device, dev) { >- if (!dev->driver) { >+pci_ers_result_t cxl_error_detected(struct device *dev) { >+ struct pci_dev *pdev = to_pci_dev(dev); >+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); >+ struct device *cxlmd_dev = &cxlds->cxlmd->dev; >+ pci_ers_result_t ue; >+ >+ scoped_guard(device, cxlmd_dev) { >+ Please remove the extra blank line. >+ if (!cxlmd_dev->driver) { > dev_warn(&pdev->dev, > "%s: memdev disabled, abort error >handling\n", > dev_name(dev)); Thanks, Shiju