Re: [PATCH] x86/mce: /dev/mcelog: Dynamically allocate space for machine check records

From: Borislav Petkov
Date: Mon Feb 17 2020 - 15:48:05 EST


On Fri, Feb 07, 2020 at 04:05:51PM -0800, Tony Luck wrote:
> We have had a hard coded limit of 32 machine check records since the
> dawn of time. But as numbers of cores increase, it is possible for
> more than 32 errors to be reported before a user process reads from
> /dev/mcelog. In this case the additional errors are lost.
>
> Keep 32 as the minimum. But tune the maximum value up based on the
> number of processors.
>
> Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx>
> ---
> arch/x86/include/asm/mce.h | 6 ++--
> arch/x86/kernel/cpu/mce/dev-mcelog.c | 46 ++++++++++++++++------------
> 2 files changed, 29 insertions(+), 23 deletions(-)

...

> @@ -214,21 +210,21 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
>
> /* Only supports full reads right now */
> err = -EINVAL;
> - if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
> + if (*off != 0 || usize < mcelog->len*sizeof(struct mce))

Add spaces around that *

> goto out;
>
> - next = mcelog.next;
> + next = mcelog->next;
> err = 0;
>
> for (i = 0; i < next; i++) {
> - struct mce *m = &mcelog.entry[i];
> + struct mce *m = &mcelog->entry[i];
>
> err |= copy_to_user(buf, m, sizeof(*m));
> buf += sizeof(*m);
> }
>
> - memset(mcelog.entry, 0, next * sizeof(struct mce));
> - mcelog.next = 0;
> + memset(mcelog->entry, 0, next * sizeof(struct mce));
> + mcelog->next = 0;
>
> if (err)
> err = -EFAULT;

...

> @@ -340,6 +336,15 @@ static struct miscdevice mce_chrdev_device = {
> static __init int dev_mcelog_init_device(void)
> {
> int err;
> + int mce_log_len;

Please sort function local variables declaration in a reverse christmas
tree order:

<type A> longest_variable_name;
<type B> shorter_var_name;
<type C> even_shorter;
<type D> i;

> +
> + mce_log_len = max(MCE_LOG_MIN_LEN, num_online_cpus());

arch/x86/kernel/cpu/mce/dev-mcelog.c: In function âdev_mcelog_init_deviceâ:
./include/linux/kernel.h:835:29: warning: comparison of distinct pointer types lacks a cast
835 | (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
| ^~
./include/linux/kernel.h:849:4: note: in expansion of macro â__typecheckâ
849 | (__typecheck(x, y) && __no_side_effects(x, y))
| ^~~~~~~~~~~
./include/linux/kernel.h:859:24: note: in expansion of macro â__safe_cmpâ
859 | __builtin_choose_expr(__safe_cmp(x, y), \
| ^~~~~~~~~~
./include/linux/kernel.h:875:19: note: in expansion of macro â__careful_cmpâ
875 | #define max(x, y) __careful_cmp(x, y, >)
| ^~~~~~~~~~~~~
arch/x86/kernel/cpu/mce/dev-mcelog.c:341:16: note: in expansion of macro âmaxâ
341 | mce_log_len = max(MCE_LOG_MIN_LEN, num_online_cpus());
| ^~~

That MCE_LOG_MIN_LEN wants to be 32U.

> + mcelog = kzalloc(sizeof(*mcelog) + mce_log_len * sizeof(struct mce), GFP_KERNEL);
> + if (!mcelog)
> + return -ENOMEM;

<---- newline here.

> + strncpy(mcelog->signature, MCE_LOG_SIGNATURE, sizeof(mcelog->signature));
> + mcelog->len = mce_log_len;
> + mcelog->recordlen = sizeof(struct mce);
>
> /* register character device /dev/mcelog */
> err = misc_register(&mce_chrdev_device);

Thx.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette