[PATCH v9 15/16] CXL/PCI: Enable CXL protocol errors during CXL Port probe
From: Terry Bowman
Date: Tue Jun 03 2025 - 13:29:44 EST
CXL protocol errors are not enabled for all CXL devices after boot. These
must be enabled inorder to process CXL protocol errors.
Export the AER service driver's pci_aer_unmask_internal_errors().
Introduce cxl_unmask_prot_interrupts() to call pci_aer_unmask_internal_errors().
pci_aer_unmask_internal_errors() expects the pdev->aer_cap is initialized.
But, dev->aer_cap is not initialized for CXL Upstream Switch Ports and CXL
Downstream Switch Ports. Initialize the dev->aer_cap if necessary. Enable AER
correctable internal errors and uncorrectable internal errors for all CXL
devices.
Signed-off-by: Terry Bowman <terry.bowman@xxxxxxx>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@xxxxxxxxxx>
---
drivers/cxl/port.c | 29 +++++++++++++++++++++++++++--
drivers/pci/pcie/aer.c | 3 ++-
include/linux/aer.h | 1 +
3 files changed, 30 insertions(+), 3 deletions(-)
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index 0f7c4010ba58..3687848ae772 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -3,6 +3,7 @@
#include <linux/device.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/pci.h>
#include "cxlmem.h"
#include "cxlpci.h"
@@ -60,6 +61,21 @@ static int discover_region(struct device *dev, void *unused)
#ifdef CONFIG_PCIEAER_CXL
+static void cxl_unmask_prot_interrupts(struct device *dev)
+{
+ struct pci_dev *pdev __free(pci_dev_put) =
+ pci_dev_get(to_pci_dev(dev));
+
+ if (!pdev->aer_cap) {
+ pdev->aer_cap = pci_find_ext_capability(pdev,
+ PCI_EXT_CAP_ID_ERR);
+ if (!pdev->aer_cap)
+ return;
+ }
+
+ pci_aer_unmask_internal_errors(pdev);
+}
+
static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
{
resource_size_t aer_phys;
@@ -118,8 +134,12 @@ static void cxl_uport_init_ras_reporting(struct cxl_port *port,
map->host = host;
if (cxl_map_component_regs(map, &port->uport_regs,
- BIT(CXL_CM_CAP_CAP_ID_RAS)))
+ BIT(CXL_CM_CAP_CAP_ID_RAS))) {
dev_dbg(&port->dev, "Failed to map RAS capability\n");
+ return;
+ }
+
+ cxl_unmask_prot_interrupts(port->uport_dev);
}
/**
@@ -144,9 +164,12 @@ void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
}
if (cxl_map_component_regs(&dport->reg_map, &dport->regs.component,
- BIT(CXL_CM_CAP_CAP_ID_RAS)))
+ BIT(CXL_CM_CAP_CAP_ID_RAS))) {
dev_dbg(dport->dport_dev, "Failed to map RAS capability\n");
+ return;
+ }
+ cxl_unmask_prot_interrupts(dport->dport_dev);
}
EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
@@ -177,6 +200,8 @@ static void cxl_endpoint_port_init_ras(struct cxl_port *port)
}
cxl_dport_init_ras_reporting(dport, &cxlmd->dev);
+
+ cxl_unmask_prot_interrupts(cxlmd->cxlds->dev);
}
#else
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 5efe5a718960..2d202ad1453a 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -964,7 +964,7 @@ static bool find_source_device(struct pci_dev *parent,
* Note: AER must be enabled and supported by the device which must be
* checked in advance, e.g. with pcie_aer_is_native().
*/
-static void pci_aer_unmask_internal_errors(struct pci_dev *dev)
+void pci_aer_unmask_internal_errors(struct pci_dev *dev)
{
int aer = dev->aer_cap;
u32 mask;
@@ -977,6 +977,7 @@ static void pci_aer_unmask_internal_errors(struct pci_dev *dev)
mask &= ~PCI_ERR_COR_INTERNAL;
pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask);
}
+EXPORT_SYMBOL_NS_GPL(pci_aer_unmask_internal_errors, "CXL");
static bool is_cxl_mem_dev(struct pci_dev *dev)
{
diff --git a/include/linux/aer.h b/include/linux/aer.h
index c9a18eca16f8..74600e75705f 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -107,5 +107,6 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
int cper_severity_to_aer(int cper_severity);
void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
int severity, struct aer_capability_regs *aer_regs);
+void pci_aer_unmask_internal_errors(struct pci_dev *dev);
#endif //_AER_H_
--
2.34.1