Re: [PATCH v2] ACPI: APEI: fix missing erst record id

From: Rafael J. Wysocki
Date: Tue Apr 05 2022 - 20:36:06 EST


APEI reviewers, your input is needed here.

On Tue, Apr 5, 2022 at 8:14 AM Liu Xinpeng <liuxp11@xxxxxxxxxxxxxxx> wrote:
>
> record_id is in the erst_record_id_cache but not in storage,so
> erst_read will return -ENOENT, and then goto retry_next,
> erst_get_record_id_next skip a record_id. This can result in
> printing the records just in the cache.
>
> A reproducer of the problem(retry many times):
>
> [root@localhost erst-inject]# ./erst-inject -c 0xaaaaa00011
> [root@localhost erst-inject]# ./erst-inject -p
> rc: 273
> rcd sig: CPER
> rcd id: 0xaaaaa00012
> rc: 273
> rcd sig: CPER
> rcd id: 0xaaaaa00013
> rc: 273
> rcd sig: CPER
> rcd id: 0xaaaaa00014
> [root@localhost erst-inject]# ./erst-inject -i 0xaaaaa000006
> [root@localhost erst-inject]# ./erst-inject -i 0xaaaaa000007
> [root@localhost erst-inject]# ./erst-inject -i 0xaaaaa000008
> [root@localhost erst-inject]# ./erst-inject -p
> rc: 273
> rcd sig: CPER
> rcd id: 0xaaaaa00012
> rc: 273
> rcd sig: CPER
> rcd id: 0xaaaaa00013
> rc: 273
> rcd sig: CPER
> rcd id: 0xaaaaa00014
> [root@localhost erst-inject]# ./erst-inject -n
> total error record count: 6
>
> Signed-off-by: Liu Xinpeng <liuxp11@xxxxxxxxxxxxxxx>
> ---
> drivers/acpi/apei/erst-dbg.c | 4 +++-
> drivers/acpi/apei/erst.c | 34 +++++++++++++++++++++++++++++++---
> include/acpi/apei.h | 1 +
> 3 files changed, 35 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c
> index c740f0faad39..5b8164280a17 100644
> --- a/drivers/acpi/apei/erst-dbg.c
> +++ b/drivers/acpi/apei/erst-dbg.c
> @@ -113,8 +113,10 @@ static ssize_t erst_dbg_read(struct file *filp, char __user *ubuf,
> retry:
> rc = len = erst_read(id, erst_dbg_buf, erst_dbg_buf_len);
> /* The record may be cleared by others, try read next record */
> - if (rc == -ENOENT)
> + if (rc == -ENOENT) {
> + erst_clear_cache(id);
> goto retry_next;
> + }
> if (rc < 0)
> goto out;
> if (len > ERST_DBG_RECORD_LEN_MAX) {
> diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
> index 698d67cee052..07d69dc7fd62 100644
> --- a/drivers/acpi/apei/erst.c
> +++ b/drivers/acpi/apei/erst.c
> @@ -856,6 +856,31 @@ ssize_t erst_read(u64 record_id, struct cper_record_header *record,
> }
> EXPORT_SYMBOL_GPL(erst_read);
>
> +int erst_clear_cache(u64 record_id)
> +{
> + int rc, i;
> + u64 *entries;
> +
> + if (erst_disable)
> + return -ENODEV;
> +
> + rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
> + if (rc)
> + return rc;
> +
> + entries = erst_record_id_cache.entries;
> + for (i = 0; i < erst_record_id_cache.len; i++) {
> + if (entries[i] == record_id)
> + entries[i] = APEI_ERST_INVALID_RECORD_ID;
> + }
> + __erst_record_id_cache_compact();
> +
> + mutex_unlock(&erst_record_id_cache.lock);
> +
> + return rc;
> +}
> +EXPORT_SYMBOL_GPL(erst_clear_cache);
> +
> int erst_clear(u64 record_id)
> {
> int rc, i;
> @@ -998,14 +1023,17 @@ static ssize_t erst_reader(struct pstore_record *record)
>
> len = erst_read(record_id, &rcd->hdr, rcd_len);
> /* The record may be cleared by others, try read next record */
> - if (len == -ENOENT)
> + if (len == -ENOENT) {
> + erst_clear_cache(record_id);
> goto skip;
> - else if (len < 0 || len < sizeof(*rcd)) {
> + } else if (len < 0 || len < sizeof(*rcd)) {
> rc = -EIO;
> goto out;
> }
> - if (!guid_equal(&rcd->hdr.creator_id, &CPER_CREATOR_PSTORE))
> + if (!guid_equal(&rcd->hdr.creator_id, &CPER_CREATOR_PSTORE)) {
> + erst_clear_cache(record_id);
> goto skip;
> + }
>
> record->buf = kmalloc(len, GFP_KERNEL);
> if (record->buf == NULL) {
> diff --git a/include/acpi/apei.h b/include/acpi/apei.h
> index afaca3a075e8..f8c11ff4115a 100644
> --- a/include/acpi/apei.h
> +++ b/include/acpi/apei.h
> @@ -47,6 +47,7 @@ void erst_get_record_id_end(void);
> ssize_t erst_read(u64 record_id, struct cper_record_header *record,
> size_t buflen);
> int erst_clear(u64 record_id);
> +int erst_clear_cache(u64 record_id);
>
> int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data);
> void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err);
> --
> 2.23.0
>