CXL currently has separate trace routines for CXL Port errors and CXL Endpoint errors. This is inconvenient for the user because they must enable 2 sets of trace routines. Make updates to the trace logging such that a single trace routine logs both CXL Endpoint and CXL Port protocol errors. Keep the trace log fields 'memdev' and 'host'. While these are not accurate for non-Endpoints the fields will remain as-is to prevent breaking userspace RAS trace consumers. Add serial number parameter to the trace logging. This is used for EPs and 0 is provided for CXL port devices without a serial number. Below is output of correctable and uncorrectable protocol error logging. CXL Root Port and CXL Endpoint examples are included below. Root Port: cxl_aer_correctable_error: memdev=0000:0c:00.0 host=pci0000:0c serial: 0 status='CRC Threshold Hit' cxl_aer_uncorrectable_error: memdev=0000:0c:00.0 host=pci0000:0c serial: 0 status: 'Cache Byte Enable Parity Error' first_error: 'Cache Byte Enable Parity Error' Endpoint: cxl_aer_correctable_error: memdev=mem3 host=0000:0f:00.0 serial=0 status='CRC Threshold Hit' cxl_aer_uncorrectable_error: memdev=mem3 host=0000:0f:00.0 serial: 0 status: 'Cache Byte Enable Parity Error' first_error: 'Cache Byte Enable Parity Error' Signed-off-by: Terry Bowman <terry.bowman@xxxxxxx> --- drivers/cxl/core/pci.c | 19 ++++----- drivers/cxl/core/ras.c | 14 ++++--- drivers/cxl/core/trace.h | 84 +++++++++------------------------------- 3 files changed, 37 insertions(+), 80 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index c9a4b528e0b8..156ce094a8b9 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -664,8 +664,8 @@ void read_cdat_data(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL"); -static void cxl_handle_cor_ras(struct device *dev, - void __iomem *ras_base) +static void cxl_handle_cor_ras(struct device *dev, u64 serial, + void __iomem *ras_base) { void __iomem *addr; u32 status; @@ -679,7 +679,7 @@ static void cxl_handle_cor_ras(struct device *dev, status = readl(addr); if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); - trace_cxl_aer_correctable_error(to_cxl_memdev(dev), status); + trace_cxl_aer_correctable_error(dev, serial, status); } } @@ -704,7 +704,8 @@ static void header_log_copy(void __iomem *ras_base, u32 *log) * Log the state of the RAS status registers and prepare them to log the * next error status. Return 1 if reset needed. */ -static bool cxl_handle_ras(struct device *dev, void __iomem *ras_base) +static bool cxl_handle_ras(struct device *dev, u64 serial, + void __iomem *ras_base) { u32 hl[CXL_HEADERLOG_SIZE_U32]; void __iomem *addr; @@ -733,7 +734,7 @@ static bool cxl_handle_ras(struct device *dev, void __iomem *ras_base) } header_log_copy(ras_base, hl); - trace_cxl_aer_uncorrectable_error(to_cxl_memdev(dev), status, fe, hl); + trace_cxl_aer_uncorrectable_error(dev, serial, status, fe, hl); writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); return true; @@ -744,13 +745,13 @@ static bool cxl_handle_ras(struct device *dev, void __iomem *ras_base) static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds, struct cxl_dport *dport) { - cxl_handle_cor_ras(&cxlds->cxlmd->dev, dport->regs.ras); + cxl_handle_cor_ras(&cxlds->cxlmd->dev, cxlds->serial, dport->regs.ras); } static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds, struct cxl_dport *dport) { - return cxl_handle_ras(&cxlds->cxlmd->dev, dport->regs.ras); + return cxl_handle_ras(&cxlds->cxlmd->dev, cxlds->serial, dport->regs.ras); } /* @@ -847,7 +848,7 @@ void cxl_cor_error_detected(struct pci_dev *pdev) if (cxlds->rcd) cxl_handle_rdport_errors(cxlds); - cxl_handle_cor_ras(&cxlds->cxlmd->dev, cxlds->regs.ras); + cxl_handle_cor_ras(&cxlds->cxlmd->dev, cxlds->serial, cxlds->regs.ras); } } EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL"); @@ -876,7 +877,7 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, * chance the situation is recoverable dump the status of the RAS * capability registers and bounce the active state of the memdev. */ - ue = cxl_handle_ras(&cxlds->cxlmd->dev, cxlds->regs.ras); + ue = cxl_handle_ras(&cxlds->cxlmd->dev, cxlds->serial, cxlds->regs.ras); } diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index 962dc94fed8c..9588b39faabd 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -13,7 +13,7 @@ static void cxl_cper_trace_corr_port_prot_err(struct pci_dev *pdev, { u32 status = ras_cap.cor_status & ~ras_cap.cor_mask; - trace_cxl_port_aer_correctable_error(&pdev->dev, status); + trace_cxl_aer_correctable_error(&pdev->dev, 0, status); } static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev, @@ -28,8 +28,8 @@ static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev, else fe = status; - trace_cxl_port_aer_uncorrectable_error(&pdev->dev, status, fe, - ras_cap.header_log); + trace_cxl_aer_uncorrectable_error(&pdev->dev, 0, status, fe, + ras_cap.header_log); } static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev, @@ -42,7 +42,8 @@ static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev, if (!cxlds) return; - trace_cxl_aer_correctable_error(cxlds->cxlmd, status); + trace_cxl_aer_correctable_error(&cxlds->cxlmd->dev, cxlds->serial, + status); } static void cxl_cper_trace_uncorr_prot_err(struct pci_dev *pdev, @@ -62,8 +63,9 @@ static void cxl_cper_trace_uncorr_prot_err(struct pci_dev *pdev, else fe = status; - trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, - ras_cap.header_log); + trace_cxl_aer_uncorrectable_error(&cxlds->cxlmd->dev, + cxlds->serial, status, + fe, ras_cap.header_log); } static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data) diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index 25ebfbc1616c..494d6db461a7 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -48,49 +48,22 @@ { CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \ ) -TRACE_EVENT(cxl_port_aer_uncorrectable_error, - TP_PROTO(struct device *dev, u32 status, u32 fe, u32 *hl), - TP_ARGS(dev, status, fe, hl), - TP_STRUCT__entry( - __string(device, dev_name(dev)) - __string(host, dev_name(dev->parent)) - __field(u32, status) - __field(u32, first_error) - __array(u32, header_log, CXL_HEADERLOG_SIZE_U32) - ), - TP_fast_assign( - __assign_str(device); - __assign_str(host); - __entry->status = status; - __entry->first_error = fe; - /* - * Embed the 512B headerlog data for user app retrieval and - * parsing, but no need to print this in the trace buffer. - */ - memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE); - ), - TP_printk("device=%s host=%s status: '%s' first_error: '%s'", - __get_str(device), __get_str(host), - show_uc_errs(__entry->status), - show_uc_errs(__entry->first_error) - ) -); - TRACE_EVENT(cxl_aer_uncorrectable_error, - TP_PROTO(const struct cxl_memdev *cxlmd, u32 status, u32 fe, u32 *hl), - TP_ARGS(cxlmd, status, fe, hl), + TP_PROTO(struct device *dev, u64 serial, u32 status, u32 fe, + u32 *hl), + TP_ARGS(dev, serial, status, fe, hl), TP_STRUCT__entry( - __string(memdev, dev_name(&cxlmd->dev)) - __string(host, dev_name(cxlmd->dev.parent)) + __string(name, dev_name(dev)) + __string(parent, dev_name(dev->parent)) __field(u64, serial) __field(u32, status) __field(u32, first_error) __array(u32, header_log, CXL_HEADERLOG_SIZE_U32) ), TP_fast_assign( - __assign_str(memdev); - __assign_str(host); - __entry->serial = cxlmd->cxlds->serial; + __assign_str(name); + __assign_str(parent); + __entry->serial = serial; __entry->status = status; __entry->first_error = fe; /* @@ -99,8 +72,8 @@ TRACE_EVENT(cxl_aer_uncorrectable_error, */ memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE); ), - TP_printk("memdev=%s host=%s serial=%lld: status: '%s' first_error: '%s'", - __get_str(memdev), __get_str(host), __entry->serial, + TP_printk("memdev=%s host=%s serial=%lld status='%s' first_error='%s'", + __get_str(name), __get_str(parent), __entry->serial, show_uc_errs(__entry->status), show_uc_errs(__entry->first_error) ) @@ -124,42 +97,23 @@ TRACE_EVENT(cxl_aer_uncorrectable_error, { CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical Layer" } \ ) -TRACE_EVENT(cxl_port_aer_correctable_error, - TP_PROTO(struct device *dev, u32 status), - TP_ARGS(dev, status), - TP_STRUCT__entry( - __string(device, dev_name(dev)) - __string(host, dev_name(dev->parent)) - __field(u32, status) - ), - TP_fast_assign( - __assign_str(device); - __assign_str(host); - __entry->status = status; - ), - TP_printk("device=%s host=%s status='%s'", - __get_str(device), __get_str(host), - show_ce_errs(__entry->status) - ) -); - TRACE_EVENT(cxl_aer_correctable_error, - TP_PROTO(const struct cxl_memdev *cxlmd, u32 status), - TP_ARGS(cxlmd, status), + TP_PROTO(struct device *dev, u64 serial, u32 status), + TP_ARGS(dev, serial, status), TP_STRUCT__entry( - __string(memdev, dev_name(&cxlmd->dev)) - __string(host, dev_name(cxlmd->dev.parent)) + __string(name, dev_name(dev)) + __string(parent, dev_name(dev->parent)) __field(u64, serial) __field(u32, status) ), TP_fast_assign( - __assign_str(memdev); - __assign_str(host); - __entry->serial = cxlmd->cxlds->serial; + __assign_str(name); + __assign_str(parent); + __entry->serial = serial; __entry->status = status; ), - TP_printk("memdev=%s host=%s serial=%lld: status: '%s'", - __get_str(memdev), __get_str(host), __entry->serial, + TP_printk("memdev=%s host=%s serial=%lld status='%s'", + __get_str(name), __get_str(parent), __entry->serial, show_ce_errs(__entry->status) ) ); -- 2.34.1