Re: [PATCH v4 1/2] edac: Add an API for edac device to report for multiple errors

From: Borislav Petkov
Date: Mon Sep 30 2019 - 10:50:50 EST


On Mon, Sep 23, 2019 at 08:17:40PM +0100, Hanna Hawa wrote:
> +void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
> + int inst_nr, int block_nr, const char *msg)
> +{
> + __edac_device_handle_ce(edac_dev, 1, inst_nr, block_nr, msg);
> +}
> +EXPORT_SYMBOL_GPL(edac_device_handle_ce);

Eww, I don't like that: exporting the function *and* the __ counterpart.
The user will get confused and that is unnecessary.

See below for a better version. This way you solve the whole deal with a
single patch.

---
From: Hanna Hawa <hhhawa@xxxxxxxxxx>
Date: Mon, 23 Sep 2019 20:17:40 +0100
Subject: [PATCH] EDAC/device: Rework error logging API

Make the main workhorse the "count" functions which can log a @count
of errors. Have the current APIs edac_device_handle_{ce,ue}() call
the _count() variants and this way keep the exported symbols number
unchanged.

[ bp: Rewrite. ]

Signed-off-by: Hanna Hawa <hhhawa@xxxxxxxxxx>
Signed-off-by: Borislav Petkov <bp@xxxxxxx>
Cc: benh@xxxxxxxxxx
Cc: dwmw@xxxxxxxxxxxx
Cc: hanochu@xxxxxxxxxx
Cc: James Morse <james.morse@xxxxxxx>
Cc: jonnyc@xxxxxxxxxx
Cc: linux-edac <linux-edac@xxxxxxxxxxxxxxx>
Cc: Mauro Carvalho Chehab <mchehab@xxxxxxxxxx>
Cc: ronenk@xxxxxxxxxx
Cc: talel@xxxxxxxxxx
Cc: Tony Luck <tony.luck@xxxxxxxxx>
Link: https://lkml.kernel.org/r/20190923191741.29322-2-hhhawa@xxxxxxxxxx
---
drivers/edac/edac_device.c | 44 ++++++++++++++++---------------
drivers/edac/edac_device.h | 54 ++++++++++++++++++++++++++++++--------
2 files changed, 66 insertions(+), 32 deletions(-)

diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
index 65cf2b9355c4..d4d8bed5b55d 100644
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -555,8 +555,9 @@ static inline int edac_device_get_panic_on_ue(struct edac_device_ctl_info
return edac_dev->panic_on_ue;
}

-void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
- int inst_nr, int block_nr, const char *msg)
+void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev,
+ unsigned int count, int inst_nr, int block_nr,
+ const char *msg)
{
struct edac_device_instance *instance;
struct edac_device_block *block = NULL;
@@ -582,23 +583,24 @@ void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,

if (instance->nr_blocks > 0) {
block = instance->blocks + block_nr;
- block->counters.ce_count++;
+ block->counters.ce_count += count;
}

/* Propagate the count up the 'totals' tree */
- instance->counters.ce_count++;
- edac_dev->counters.ce_count++;
+ instance->counters.ce_count += count;
+ edac_dev->counters.ce_count += count;

if (edac_device_get_log_ce(edac_dev))
edac_device_printk(edac_dev, KERN_WARNING,
- "CE: %s instance: %s block: %s '%s'\n",
- edac_dev->ctl_name, instance->name,
- block ? block->name : "N/A", msg);
+ "CE: %s instance: %s block: %s count: %d '%s'\n",
+ edac_dev->ctl_name, instance->name,
+ block ? block->name : "N/A", count, msg);
}
-EXPORT_SYMBOL_GPL(edac_device_handle_ce);
+EXPORT_SYMBOL_GPL(edac_device_handle_ce_count);

-void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
- int inst_nr, int block_nr, const char *msg)
+void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev,
+ unsigned int count, int inst_nr, int block_nr,
+ const char *msg)
{
struct edac_device_instance *instance;
struct edac_device_block *block = NULL;
@@ -624,22 +626,22 @@ void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,

if (instance->nr_blocks > 0) {
block = instance->blocks + block_nr;
- block->counters.ue_count++;
+ block->counters.ue_count += count;
}

/* Propagate the count up the 'totals' tree */
- instance->counters.ue_count++;
- edac_dev->counters.ue_count++;
+ instance->counters.ue_count += count;
+ edac_dev->counters.ue_count += count;

if (edac_device_get_log_ue(edac_dev))
edac_device_printk(edac_dev, KERN_EMERG,
- "UE: %s instance: %s block: %s '%s'\n",
- edac_dev->ctl_name, instance->name,
- block ? block->name : "N/A", msg);
+ "UE: %s instance: %s block: %s count: %d '%s'\n",
+ edac_dev->ctl_name, instance->name,
+ block ? block->name : "N/A", count, msg);

if (edac_device_get_panic_on_ue(edac_dev))
- panic("EDAC %s: UE instance: %s block %s '%s'\n",
- edac_dev->ctl_name, instance->name,
- block ? block->name : "N/A", msg);
+ panic("EDAC %s: UE instance: %s block %s count: %d '%s'\n",
+ edac_dev->ctl_name, instance->name,
+ block ? block->name : "N/A", count, msg);
}
-EXPORT_SYMBOL_GPL(edac_device_handle_ue);
+EXPORT_SYMBOL_GPL(edac_device_handle_ue_count);
diff --git a/drivers/edac/edac_device.h b/drivers/edac/edac_device.h
index 1aaba74ae411..c4c0e0bdce14 100644
--- a/drivers/edac/edac_device.h
+++ b/drivers/edac/edac_device.h
@@ -286,27 +286,60 @@ extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev);
extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev);

/**
- * edac_device_handle_ue():
- * perform a common output and handling of an 'edac_dev' UE event
+ * Log correctable errors.
*
* @edac_dev: pointer to struct &edac_device_ctl_info
- * @inst_nr: number of the instance where the UE error happened
- * @block_nr: number of the block where the UE error happened
+ * @inst_nr: number of the instance where the CE error happened
+ * @count: Number of errors to log.
+ * @block_nr: number of the block where the CE error happened
+ * @msg: message to be printed
+ */
+void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev,
+ unsigned int count, int inst_nr, int block_nr,
+ const char *msg);
+
+/**
+ * Log uncorrectable errors.
+ *
+ * @edac_dev: pointer to struct &edac_device_ctl_info
+ * @inst_nr: number of the instance where the CE error happened
+ * @count: Number of errors to log.
+ * @block_nr: number of the block where the CE error happened
* @msg: message to be printed
*/
-extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
- int inst_nr, int block_nr, const char *msg);
+void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev,
+ unsigned int count, int inst_nr, int block_nr,
+ const char *msg);
+
/**
- * edac_device_handle_ce():
- * perform a common output and handling of an 'edac_dev' CE event
+ * edac_device_handle_ce(): Log a single correctable error
*
* @edac_dev: pointer to struct &edac_device_ctl_info
* @inst_nr: number of the instance where the CE error happened
* @block_nr: number of the block where the CE error happened
* @msg: message to be printed
*/
-extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
- int inst_nr, int block_nr, const char *msg);
+static inline void
+edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, int inst_nr,
+ int block_nr, const char *msg)
+{
+ edac_device_handle_ce_count(edac_dev, 1, inst_nr, block_nr, msg);
+}
+
+/**
+ * edac_device_handle_ue(): Log a single uncorrectable error
+ *
+ * @edac_dev: pointer to struct &edac_device_ctl_info
+ * @inst_nr: number of the instance where the UE error happened
+ * @block_nr: number of the block where the UE error happened
+ * @msg: message to be printed
+ */
+static inline void
+edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, int inst_nr,
+ int block_nr, const char *msg)
+{
+ edac_device_handle_ue_count(edac_dev, 1, inst_nr, block_nr, msg);
+}

/**
* edac_device_alloc_index: Allocate a unique device index number
@@ -316,5 +349,4 @@ extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
*/
extern int edac_device_alloc_index(void);
extern const char *edac_layer_name[];
-
#endif
--
2.21.0

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette