Hello Boris,Subject: [PATCH V14 02/10] ras: acpi/apei: cper: generic error data entry v3 per ACPI 6.1Use a verb in your patch subjects: "Add support for ..." or so.
Will change to hardware error.
On Tue, Mar 28, 2017 at 01:30:32PM -0600, Tyler Baicar wrote:
Currently when a RAS error is reported it is not timestamped.What is a RAS error? You mean a hardware error?
I'll change the wording to describe it better.
The ACPI 6.1 spec adds the timestamp field to the generic errorSo what this patch does doesn't have a lot to to do with the Subject?
data entry v3 structure. The timestamp of when the firmware
generated the error is now being reported.
Please state what the patch does in the Subject.
I'll break this up into two patches as you suggest below so that this only adds the timestamp and the new patch adds the helper defines and usage.
Also, your commit message talks about adding timestamp but the patch
does more. You need to state that too and explain what this patch does
actually.
Will do.Signed-off-by: Tyler Baicar <tbaicar@xxxxxxxxxxxxxx>Yuck, acpi_hest_generic_data_v300. Can we make those struct names smaller pls?
CC: Jonathan (Zhixiong) Zhang <zjzhang@xxxxxxxxxxxxxx>
Reviewed-by: James Morse <james.morse@xxxxxxx>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx>
---
drivers/acpi/apei/ghes.c | 9 ++++---
drivers/firmware/efi/cper.c | 63 +++++++++++++++++++++++++++++++++++----------
include/acpi/ghes.h | 22 ++++++++++++++++
3 files changed, 77 insertions(+), 17 deletions(-)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 0241e36..9ddbb93 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -421,7 +421,8 @@ static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int
int flags = -1;
int sec_sev = ghes_severity(gdata->error_severity);
struct cper_sec_mem_err *mem_err;
- mem_err = (struct cper_sec_mem_err *)(gdata + 1);
+
+ mem_err = acpi_hest_generic_data_payload(gdata);
if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
return;
@@ -458,7 +459,8 @@ static void ghes_do_proc(struct ghes *ghes,
if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
CPER_SEC_PLATFORM_MEM)) {
struct cper_sec_mem_err *mem_err;
- mem_err = (struct cper_sec_mem_err *)(gdata+1);
+
+ mem_err = acpi_hest_generic_data_payload(gdata);
ghes_edac_report_mem_error(ghes, sev, mem_err);
arch_apei_report_mem_error(sev, mem_err);
@@ -468,7 +470,8 @@ static void ghes_do_proc(struct ghes *ghes,
else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
CPER_SEC_PCIE)) {
struct cper_sec_pcie *pcie_err;
- pcie_err = (struct cper_sec_pcie *)(gdata+1);
+
+ pcie_err = acpi_hest_generic_data_payload(gdata);
if (sev == GHES_SEV_RECOVERABLE &&
sec_sev == GHES_SEV_RECOVERABLE &&
pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index d425374..8fa4e23 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -32,6 +32,9 @@
#include <linux/acpi.h>
#include <linux/pci.h>
#include <linux/aer.h>
+#include <linux/printk.h>
+#include <linux/bcd.h>
+#include <acpi/ghes.h>
#define INDENT_SP " "
@@ -386,13 +389,37 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
pfx, pcie->bridge.secondary_status, pcie->bridge.control);
}
+static void cper_estatus_print_section_v300(const char *pfx,
+ const struct acpi_hest_generic_data_v300 *gdata)
And v300 is just silly.
And then align args at opening brace.
Will do.
+{Align those vertically on the = sign.
+ __u8 hour, min, sec, day, mon, year, century, *timestamp;
+
+ if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
+ timestamp = (__u8 *)&(gdata->time_stamp);
+ sec = bcd2bin(timestamp[0]);
+ min = bcd2bin(timestamp[1]);
+ hour = bcd2bin(timestamp[2]);
+ day = bcd2bin(timestamp[4]);
+ mon = bcd2bin(timestamp[5]);
+ year = bcd2bin(timestamp[6]);
+ century = bcd2bin(timestamp[7]);
Will do.
+ printk("%stime: %7s %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx,Move that test in a separate if-statement - that printk is unreadable as
+ 0x01 & *(timestamp + 3) ? "precise" : "",
it is. Also, the test bit always comes second.
All I did here was remove the const, but will do.century,static void
+ year, mon, day, hour, min, sec);
+ }
+}
+
static void cper_estatus_print_section(
- const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
+ const char *pfx, struct acpi_hest_generic_data *gdata, int sec_no)
cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata,
int sec_no)
looks much better.
As Joe mentioned, acpi_hest_generic<foo> is already used throughout this code base. I do not see the value in varying from the preexisting naming style.{Jeez, that macro name is like a one-lined book!
uuid_le *sec_type = (uuid_le *)gdata->section_type;
__u16 severity;
char newpfx[64];
+ if (acpi_hest_generic_data_version(gdata) >= 3)
Let's make that "hest_gdata_ver()" or something else shorter.
I'll break this into two patches.
+ cper_estatus_print_section_v300(pfx,This looks like an unrelated change. The payload function addition and the
+ (const struct acpi_hest_generic_data_v300 *)gdata);
+
severity = gdata->error_severity;
printk("%s""Error %d, type: %s\n", pfx, sec_no,
cper_severity_str(severity));
@@ -403,14 +430,18 @@ static void cper_estatus_print_section(
snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
- struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
+ struct cper_sec_proc_generic *proc_err;
+
+ proc_err = acpi_hest_generic_data_payload(gdata);
conversion of the code to use it should be a separate patch. And shorten that
function name too pls.
I do not agree with this. The struct being passed to this function is already named acpi_hest_generic_data in the existing code and all over this code is named gdata not just d.
printk("%s""section_type: general processor error\n", newpfx);This is one unreadable pile of too long names with a clearly redundant
if (gdata->error_data_length >= sizeof(*proc_err))
cper_print_proc_generic(newpfx, proc_err);
else
goto err_section_too_small;
} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
- struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
+ struct cper_sec_mem_err *mem_err;
+
+ mem_err = acpi_hest_generic_data_payload(gdata);
printk("%s""section_type: memory error\n", newpfx);
if (gdata->error_data_length >=
sizeof(struct cper_sec_mem_err_old))
@@ -419,7 +450,9 @@ static void cper_estatus_print_section(
else
goto err_section_too_small;
} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
- struct cper_sec_pcie *pcie = (void *)(gdata + 1);
+ struct cper_sec_pcie *pcie;
+
+ pcie = acpi_hest_generic_data_payload(gdata);
printk("%s""section_type: PCIe error\n", newpfx);
if (gdata->error_data_length >= sizeof(*pcie))
cper_print_pcie(newpfx, pcie, gdata);
@@ -438,7 +471,7 @@ void cper_estatus_print(const char *pfx,
const struct acpi_hest_generic_status *estatus)
{
struct acpi_hest_generic_data *gdata;
- unsigned int data_len, gedata_len;
+ unsigned int data_len;
int sec_no = 0;
char newpfx[64];
__u16 severity;
@@ -451,12 +484,13 @@ void cper_estatus_print(const char *pfx,
printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
data_len = estatus->data_length;
gdata = (struct acpi_hest_generic_data *)(estatus + 1);
+
snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
- while (data_len >= sizeof(*gdata)) {
- gedata_len = gdata->error_data_length;
+
+ while (data_len >= acpi_hest_generic_data_size(gdata)) {
cper_estatus_print_section(newpfx, gdata, sec_no);
- data_len -= gedata_len + sizeof(*gdata);
- gdata = (void *)(gdata + 1) + gedata_len;
+ data_len -= acpi_hest_generic_data_record_size(gdata);
+ gdata = acpi_hest_generic_data_next(gdata);
sec_no++;
}
}
@@ -486,12 +520,13 @@ int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
return rc;
data_len = estatus->data_length;
gdata = (struct acpi_hest_generic_data *)(estatus + 1);
- while (data_len >= sizeof(*gdata)) {
- gedata_len = gdata->error_data_length;
- if (gedata_len > data_len - sizeof(*gdata))
+
+ while (data_len >= acpi_hest_generic_data_size(gdata)) {
+ gedata_len = acpi_hest_generic_data_error_length(gdata);
+ if (gedata_len > data_len - acpi_hest_generic_data_size(gdata))
return -EINVAL;
- data_len -= gedata_len + sizeof(*gdata);
- gdata = (void *)(gdata + 1) + gedata_len;
+ data_len -= gedata_len + acpi_hest_generic_data_size(gdata);
+ gdata = acpi_hest_generic_data_next(gdata);
}
if (data_len)
return -EINVAL;
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index 68f088a..6ae318b 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -12,6 +12,18 @@
#define GHES_TO_CLEAR 0x0001
#define GHES_EXITING 0x0002
+#define acpi_hest_generic_data_error_length(gdata) \
+ (((struct acpi_hest_generic_data *)(gdata))->error_data_length)
+#define acpi_hest_generic_data_size(gdata) \
+ ((acpi_hest_generic_data_version(gdata) >= 3) ? \
+ sizeof(struct acpi_hest_generic_data_v300) : \
+ sizeof(struct acpi_hest_generic_data))
+#define acpi_hest_generic_data_record_size(gdata) \
+ (acpi_hest_generic_data_size(gdata) + \
+ acpi_hest_generic_data_error_length(gdata))
+#define acpi_hest_generic_data_next(gdata) \
+ ((void *)(gdata) + acpi_hest_generic_data_record_size(gdata))
and too long prefix. Please shorten it all.
+Lemme try to shorten it:
struct ghes {
union {
struct acpi_hest_generic *generic;
@@ -73,3 +85,13 @@ static inline void ghes_edac_unregister(struct ghes *ghes)
{
}
#endif
+
+#define acpi_hest_generic_data_version(gdata) \
+ (gdata->revision >> 8)
+
+static inline void *acpi_hest_generic_data_payload(struct acpi_hest_generic_data *gdata)
static inline void *acpi_hest_get_payload(struct acpi_hest_gdata *d)
{
if (hest_gdata_ver(d) >= 3)
return (void *)(((struct acpi_hest_gdata_v3 *)d) + 1);
else
return d + 1;
}
Now this is much more readable IMO. You can actually see what's going
on. And you still know what the struct names are.
So let's drop all that unnecessary too long prefixing and make the
code readable. That cper thing needs a lot more scrubbing, of course,
but some other day.