Re: [PATCH v2 7/7] x86/mce: Decode a kernel instruction to determine if it is copying from user

From: Borislav Petkov
Date: Mon Oct 05 2020 - 12:31:42 EST


On Wed, Sep 30, 2020 at 04:26:11PM -0700, Tony Luck wrote:
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index 9713825e6745..60bacf6e0501 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -1236,14 +1236,19 @@ static void kill_me_maybe(struct callback_head *cb)
> if (!p->mce_ripv)
> flags |= MF_MUST_KILL;
>
> - if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) {
> + if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags) &&
> + !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
> set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
> sync_core();
> return;
> }
>
> - pr_err("Memory error not recovered");
> - kill_me_now(cb);
> + if (p->mce_vaddr != (void __user *)~0ul) {

As previously pointed out, pls test against -1L even if it is the
same value so that it is obvious this is the error value coming from
insn_get_addr_ref().

> + force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT);
> + } else {
> + pr_err("Memory error not recovered");
> + kill_me_now(cb);
> + }
> }
>
> /*
> diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
> index 8517cbf7b184..6e8b38cf52d9 100644
> --- a/arch/x86/kernel/cpu/mce/severity.c
> +++ b/arch/x86/kernel/cpu/mce/severity.c
> @@ -10,6 +10,9 @@
> #include <linux/init.h>
> #include <linux/debugfs.h>
> #include <asm/mce.h>
> +#include <asm/traps.h>
> +#include <asm/insn.h>
> +#include <asm/insn-eval.h>
> #include <linux/uaccess.h>
>
> #include "internal.h"
> @@ -198,6 +201,45 @@ static struct severity {
> #define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
> (MCG_STATUS_RIPV|MCG_STATUS_EIPV))
>
> +static bool is_copy_from_user(struct pt_regs *regs)
> +{
> + u8 insn_buf[MAX_INSN_SIZE];
> + struct insn insn;
> + unsigned long addr;
> +
> + if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
> + return false;
> +
> + kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
> + insn_get_opcode(&insn);
> + if (!insn.opcode.got)
> + return false;
> +
> + switch (insn.opcode.value) {
> + /* MOV mem,reg */
> + case 0x8A: case 0x8B:
> + /* MOVZ mem,reg */
> + case 0xB60F: case 0xB70F:
> + insn_get_modrm(&insn);
> + insn_get_sib(&insn);

You need to test here:

insn->modrm.got = 1;

and
insn->sib.got = 1;

I know, this is weird - those functions should return an error value
instead of being void and I've asked Masami in the past but no reply.

Who knows, one fine day I might convert the crap to do that instead.

> + addr = (unsigned long)insn_get_addr_ref(&insn, regs);
> + break;
> + /* REP MOVS */
> + case 0xA4: case 0xA5:
> + addr = regs->si;
> + break;
> + default:
> + return false;
> + }
> +
> + if (fault_in_kernel_space(addr))
> + return false;
> +
> + current->mce_vaddr = (void __user *)addr;
> +
> + return true;
> +}
> +
> /*
> * If mcgstatus indicated that ip/cs on the stack were
> * no good, then "m->cs" will be zero and we will have

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette