[PATCH v9 16/16] CXL/PCI: Disable CXL protocol error interrupts during CXL Port cleanup
From: Terry Bowman
Date: Tue Jun 03 2025 - 13:32:37 EST
During CXL device cleanup the CXL PCIe Port device interrupts remain
enabled. This potentially allows unnecessary interrupt processing on
behalf of the CXL errors while the device is destroyed.
Disable CXL protocol errors by setting the CXL devices' AER mask register.
Introduce pci_aer_mask_internal_errors() similar to pci_aer_unmask_internal_errors().
Introduce cxl_mask_prot_interrupts() to call pci_aer_mask_internal_errors().
Add calls to cxl_mask_prot_interrupts() within CXL Port teardown for CXL
Root Ports, CXL Downstream Switch Ports, CXL Upstream Switch Ports, and CXL
Endpoints. Follow the same "bottom-up" approach used during CXL Port
teardown.
Implement cxl_mask_prot_interrupts() in a header file to avoid introducing
Kconfig ifdefs in cxl/core/port.c.
Signed-off-by: Terry Bowman <terry.bowman@xxxxxxx>
---
drivers/cxl/core/port.c | 6 ++++++
drivers/cxl/cxl.h | 8 ++++++++
drivers/pci/pcie/aer.c | 21 +++++++++++++++++++++
include/linux/aer.h | 1 +
4 files changed, 36 insertions(+)
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 07b9bb0f601f..6aaaad002a7f 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -1433,6 +1433,9 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, "CXL");
*/
static void delete_switch_port(struct cxl_port *port)
{
+ cxl_mask_prot_interrupts(port->uport_dev);
+ cxl_mask_prot_interrupts(port->parent_dport->dport_dev);
+
devm_release_action(port->dev.parent, cxl_unlink_parent_dport, port);
devm_release_action(port->dev.parent, cxl_unlink_uport, port);
devm_release_action(port->dev.parent, unregister_port, port);
@@ -1446,6 +1449,7 @@ static void reap_dports(struct cxl_port *port)
device_lock_assert(&port->dev);
xa_for_each(&port->dports, index, dport) {
+ cxl_mask_prot_interrupts(dport->dport_dev);
devm_release_action(&port->dev, cxl_dport_unlink, dport);
devm_release_action(&port->dev, cxl_dport_remove, dport);
devm_kfree(&port->dev, dport);
@@ -1476,6 +1480,8 @@ static void cxl_detach_ep(void *data)
{
struct cxl_memdev *cxlmd = data;
+ cxl_mask_prot_interrupts(cxlmd->cxlds->dev);
+
for (int i = cxlmd->depth - 1; i >= 1; i--) {
struct cxl_port *port, *parent_port;
struct detach_ctx ctx = {
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 2c1c00466a25..2753db3d473e 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -12,6 +12,7 @@
#include <linux/node.h>
#include <linux/io.h>
#include <linux/pci.h>
+#include <linux/aer.h>
extern const struct nvdimm_security_ops *cxl_security_ops;
@@ -771,9 +772,16 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
#ifdef CONFIG_PCIEAER_CXL
void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport);
void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host);
+static inline void cxl_mask_prot_interrupts(struct device *dev)
+{
+ struct pci_dev *pdev __free(pci_dev_put) = pci_dev_get(to_pci_dev(dev));
+
+ pci_aer_mask_internal_errors(pdev);
+}
#else
static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport,
struct device *host) { }
+static inline void cxl_mask_prot_interrupts(struct device *dev) { }
#endif
struct cxl_decoder *to_cxl_decoder(struct device *dev);
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 2d202ad1453a..69230cf87d79 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -979,6 +979,27 @@ void pci_aer_unmask_internal_errors(struct pci_dev *dev)
}
EXPORT_SYMBOL_NS_GPL(pci_aer_unmask_internal_errors, "CXL");
+/**
+ * pci_aer_mask_internal_errors - mask internal errors
+ * @dev: pointer to the pcie_dev data structure
+ *
+ * Masks internal errors in the Uncorrectable and Correctable Error
+ * Mask registers.
+ *
+ * Note: AER must be enabled and supported by the device which must be
+ * checked in advance, e.g. with pcie_aer_is_native().
+ */
+void pci_aer_mask_internal_errors(struct pci_dev *dev)
+{
+ int aer = dev->aer_cap;
+
+ pci_clear_and_set_config_dword(dev, aer + PCI_ERR_UNCOR_MASK,
+ 0, PCI_ERR_UNC_INTN);
+ pci_clear_and_set_config_dword(dev, aer + PCI_ERR_COR_MASK,
+ 0, PCI_ERR_COR_INTERNAL);
+}
+EXPORT_SYMBOL_NS_GPL(pci_aer_mask_internal_errors, "CXL");
+
static bool is_cxl_mem_dev(struct pci_dev *dev)
{
/*
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 74600e75705f..41167ad3797a 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -108,5 +108,6 @@ int cper_severity_to_aer(int cper_severity);
void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
int severity, struct aer_capability_regs *aer_regs);
void pci_aer_unmask_internal_errors(struct pci_dev *dev);
+void pci_aer_mask_internal_errors(struct pci_dev *dev);
#endif //_AER_H_
--
2.34.1