[PATCH] pci: pcie: AER: Fix logging of Correctable errors

From: Matt Jolly
Date: Thu Jun 18 2020 - 11:56:50 EST


The AER documentation indicates that correctable (severity=Corrected)
errors should be output as a warning so that users can filter these
errors if they choose to; This functionality does not appear to have been implemented.

This patch modifies the functions aer_print_error and __aer_print_error
to send correctable errors as a warning (pci_warn), rather than as an error (pci_err). It
partially addresses several bugs in relation to kernel message buffer
spam for misbehaving devices - the root cause (possibly device firmware?) isn't
addressed, but the dmesg output is less alarming for end users, and can
be filtered separately from uncorrectable errors. This should hopefully
reduce the need for users to disable AER to suppress corrected errors.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=201517
Link: https://bugzilla.kernel.org/show_bug.cgi?id=196183

Signed-off-by: Matt Jolly <Kangie@xxxxxxxxxxxxxx>
---
drivers/pci/pcie/aer.c | 36 ++++++++++++++++++++++++++----------
1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 3acf56683915..131ecc0df2cb 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -662,12 +662,18 @@ static void __aer_print_error(struct pci_dev *dev,
errmsg = i < ARRAY_SIZE(aer_uncorrectable_error_string) ?
aer_uncorrectable_error_string[i] : NULL;

- if (errmsg)
- pci_err(dev, " [%2d] %-22s%s\n", i, errmsg,
- info->first_error == i ? " (First)" : "");
- else
+ if (errmsg) {
+ if (info->severity == AER_CORRECTABLE) {
+ pci_warn(dev, " [%2d] %-22s%s\n", i, errmsg,
+ info->first_error == i ? " (First)" : "");
+ } else {
+ pci_err(dev, " [%2d] %-22s%s\n", i, errmsg,
+ info->first_error == i ? " (First)" : "");
+ }
+ } else {
pci_err(dev, " [%2d] Unknown Error Bit%s\n",
i, info->first_error == i ? " (First)" : "");
+ }
}
pci_dev_aer_stats_incr(dev, info);
}
@@ -686,13 +692,23 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
layer = AER_GET_LAYER_ERROR(info->severity, info->status);
agent = AER_GET_AGENT(info->severity, info->status);

- pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
- aer_error_severity_string[info->severity],
- aer_error_layer[layer], aer_agent_string[agent]);
+ if (info->severity == AER_CORRECTABLE) {
+ pci_warn(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
+ aer_error_severity_string[info->severity],
+ aer_error_layer[layer], aer_agent_string[agent]);

- pci_err(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
- dev->vendor, dev->device,
- info->status, info->mask);
+ pci_warn(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
+ dev->vendor, dev->device,
+ info->status, info->mask);
+ } else {
+ pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
+ aer_error_severity_string[info->severity],
+ aer_error_layer[layer], aer_agent_string[agent]);
+
+ pci_err(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
+ dev->vendor, dev->device,
+ info->status, info->mask);
+ }

__aer_print_error(dev, info);

--
2.26.2