[PATCH v4 1/3] aerdrv: Trace Event for AER

From: Lance Ortiz
Date: Mon Dec 03 2012 - 13:38:04 EST


This header file will define a new trace event that will be triggered when
a AER event occurs. The following data will be provided to the trace
event.

char * dev_name - The name of the slot where the device resides
([domain:]bus:device.function).

u32 status - Either the correctable or uncorrectable register
indicating what error or errors have been see.

u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED

The trace event will also provide a trace string that may look like:

"0000:05:00.0 PCIe Bus Error:severity=Uncorrected (Non-Fatal), Poisoned
TLP"

v1-v2 Move header from include/ras/aer_event.h to
include/trace/events/ras.h
v3-v4 Cleaned up comments and commit header
Signed-off-by: Lance Ortiz <lance.ortiz@xxxxxx>
---

include/trace/events/ras.h | 78 ++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 78 insertions(+), 0 deletions(-)
create mode 100644 include/trace/events/ras.h

diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h
new file mode 100644
index 0000000..774303d
--- /dev/null
+++ b/include/trace/events/ras.h
@@ -0,0 +1,78 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM aer_event
+#define TRACE_INCLUDE_FILE ras
+
+#if !defined(_TRACE_AER_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_AER_H
+
+#include <linux/tracepoint.h>
+#include <linux/edac.h>
+
+
+/*
+ * PCIe Advanced Error Reporting (AER) PCIE Report Error
+ *
+ * These events are generated when hardware detects a corrected or
+ * uncorrected event on a pci express device. The event report has
+ * the following structure:
+ *
+ * char * dev_name - The name of the slot where the device resides
+ * ([domain:]bus:device.function).
+ * u32 status - Either the correctable or uncorrectable register
+ * indicating what error or errors have been seen
+ * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED
+ */
+
+#define correctable_error_string \
+ {BIT(0), "Receiver Error"}, \
+ {BIT(6), "Bad TLP"}, \
+ {BIT(7), "Bad DLLP"}, \
+ {BIT(8), "RELAY_NUM Rollover"}, \
+ {BIT(12), "Replay Timer Timeout"}, \
+ {BIT(13), "Advisory Non-Fatal"}
+
+#define uncorrectable_error_string \
+ {BIT(4), "Data Link Protocol"}, \
+ {BIT(12), "Poisoned TLP"}, \
+ {BIT(13), "Flow Control Protocol"}, \
+ {BIT(14), "Completion Timeout"}, \
+ {BIT(15), "Completer Abort"}, \
+ {BIT(16), "Unexpected Completion"}, \
+ {BIT(17), "Receiver Overflow"}, \
+ {BIT(18), "Malformed TLP"}, \
+ {BIT(19), "ECRC"}, \
+ {BIT(20), "Unsupported Request"}
+
+TRACE_EVENT(aer_event,
+ TP_PROTO(const char *dev_name,
+ const u32 status,
+ const u8 severity),
+
+ TP_ARGS(dev_name, status, severity),
+
+ TP_STRUCT__entry(
+ __string( dev_name, dev_name )
+ __field( u32, status )
+ __field( u8, severity )
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev_name, dev_name);
+ __entry->status = status;
+ __entry->severity = severity;
+ ),
+
+ TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
+ __get_str(dev_name),
+ (__entry->severity == HW_EVENT_ERR_CORRECTED) ? "Corrected" :
+ ((__entry->severity == HW_EVENT_ERR_FATAL) ?
+ "Fatal" : "Uncorrected"),
+ __entry->severity == HW_EVENT_ERR_CORRECTED ?
+ __print_flags(__entry->status, "|", correctable_error_string) :
+ __print_flags(__entry->status, "|", uncorrectable_error_string))
+);
+
+#endif /* _TRACE_AER_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/