[v2 08/13] x86/mce: Warn of a microcode update is in progress when MCE arrives

From: Ashok Raj
Date: Thu Nov 03 2022 - 14:00:33 EST


Due to the nature of microcode updates to long flow instructions, its
possible if an MCE is taken when microcode update is in progress could be
dangerous. There is nothing the kernel can do to mitigate safely.

Drop some bread crumbs to note that a MCE happened while a microcode update
is also in progress.

Suggested-by: Boris Petkov <bp@xxxxxxxxx>
Reviewed-by: Tony Luck <tony.luck@xxxxxxxxx>
Signed-off-by: Ashok Raj <ashok.raj@xxxxxxxxx>
---
arch/x86/include/asm/microcode.h | 2 ++
arch/x86/kernel/cpu/mce/core.c | 5 +++++
arch/x86/kernel/cpu/microcode/core.c | 9 +++++++++
3 files changed, 16 insertions(+)

diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index ffb46f2b0354..f16973fb7330 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -167,8 +167,10 @@ static inline void microcode_bsp_resume(void) { }
#endif

#ifdef CONFIG_MICROCODE_LATE_LOADING
+extern int ucode_update_in_progress(void);
extern void hold_sibling_in_nmi(void);
#else
+static inline int ucode_update_in_progress(void) { return 0; }
static inline void hold_sibling_in_nmi(void) { }
#endif

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 2c8ec5c71712..67669686fab4 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -46,6 +46,7 @@
#include <linux/hardirq.h>

#include <asm/intel-family.h>
+#include <asm/microcode.h>
#include <asm/processor.h>
#include <asm/traps.h>
#include <asm/tlbflush.h>
@@ -1425,6 +1426,10 @@ noinstr void do_machine_check(struct pt_regs *regs)
else if (unlikely(!mca_cfg.initialized))
return unexpected_machine_check(regs);

+ instrumentation_begin();
+ if (ucode_update_in_progress())
+ pr_warn("MCE triggered while microcode update is in progress\n");
+ instrumentation_end();
if (mce_flags.skx_repmov_quirk && quirk_skylake_repmov())
goto clear;

diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 6084a87ea8f3..6f59ffdf2881 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -327,6 +327,8 @@ void reload_early_microcode(void)
static struct platform_device *microcode_pdev;

#ifdef CONFIG_MICROCODE_LATE_LOADING
+static int ucode_updating;
+
/*
* Late loading dance. Why the heavy-handed stomp_machine effort?
*
@@ -556,6 +558,11 @@ static void microcode_check(struct cpuinfo_x86 *orig)
pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
}

+int ucode_update_in_progress(void)
+{
+ return ucode_updating;
+}
+
/*
* Reload microcode late on all CPUs. Wait for a sec until they
* all gather together.
@@ -578,7 +585,9 @@ static int microcode_reload_late(void)
}

copy_cpu_caps(&info);
+ ucode_updating = 1;
ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
+ ucode_updating = 0;
if (ret == 0)
microcode_check(&info);

--
2.34.1