[PATCH v2] x86/cpu: Fix migration safety with X86_BUG_NULL_SEL
From: Jane Malalane
Date: Wed Oct 13 2021 - 10:30:54 EST
Currently, Linux probes for X86_BUG_NULL_SEL unconditionally which
makes it unsafe to migrate in a virtualised environment as the
properties across the migration pool might differ.
To be specific, the case which goes wrong is:
1. Zen1 (or earlier) and Zen2 (or later) in a migration pool
2. Linux boots on Zen2, probes and finds the absence of X86_BUG_NULL_SEL
3. Linux is then migrated to Zen1
Linux is now running on a X86_BUG_NULL_SEL-impacted CPU while believing
that the bug is fixed.
The only way to address the problem is to fully trust the "no longer
affected" CPUID bit when virtualised, because in the above case it would
be clear deliberately to indicate the fact "you might migrate to
somewhere which has this behaviour".
Zen3 adds the NullSelectorClearsBase bit to indicate that loading
a NULL segment selector zeroes the base and limit fields, as well as
just attributes. Zen2 also has this behaviour but doesn't have the
NSCB bit.
Signed-off-by: Jane Malalane <jane.malalane@xxxxxxxxxx>
---
CC: <x86@xxxxxxxxxx>
CC: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
CC: Ingo Molnar <mingo@xxxxxxxxxx>
CC: Borislav Petkov <bp@xxxxxxxxx>
CC: "H. Peter Anvin" <hpa@xxxxxxxxx>
CC: Pu Wen <puwen@xxxxxxxx>
CC: Paolo Bonzini <pbonzini@xxxxxxxxxx>
CC: Sean Christopherson <seanjc@xxxxxxxxxx>
CC: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
CC: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CC: Yazen Ghannam <Yazen.Ghannam@xxxxxxx>
CC: Brijesh Singh <brijesh.singh@xxxxxxx>
CC: Huang Rui <ray.huang@xxxxxxx>
CC: Andy Lutomirski <luto@xxxxxxxxxx>
CC: Kim Phillips <kim.phillips@xxxxxxx>
CC: <stable@xxxxxxxxxxxxxxx>
v2:
* Deliberately not __init. early_init_*() not __init functions
* Fixed whitespace error flagged by scripts/checkpatch.pl
---
arch/x86/kernel/cpu/amd.c | 22 ++++++++++++++++++++++
arch/x86/kernel/cpu/common.c | 8 +++-----
arch/x86/kernel/cpu/cpu.h | 1 +
arch/x86/kernel/cpu/hygon.c | 22 ++++++++++++++++++++++
4 files changed, 48 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 2131af9f2fa2..1abfb0ae1f74 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -625,6 +625,7 @@ static void early_init_amd(struct cpuinfo_x86 *c)
{
u64 value;
u32 dummy;
+ bool nscb = false;
early_init_amd_mc(c);
@@ -650,6 +651,27 @@ static void early_init_amd(struct cpuinfo_x86 *c)
if (c->x86_power & BIT(14))
set_cpu_cap(c, X86_FEATURE_RAPL);
+ /*
+ * Zen1 and earlier CPUs don't clear segment base/limits when
+ * loading a NULL selector. This has been designated
+ * X86_BUG_NULL_SEG.
+ *
+ * Zen3 CPUs advertise Null Selector Clears Base in CPUID.
+ * Zen2 CPUs also have this behaviour, but no CPUID bit.
+ *
+ * A hypervisor may sythesize the bit, but may also hide it
+ * for migration safety, so we must not probe for model
+ * specific behaviour when virtualised.
+ */
+ if (c->extended_cpuid_level >= 0x80000021 && cpuid_eax(0x80000021) & BIT(6))
+ nscb = true;
+
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !nscb && c->x86 == 0x17)
+ nscb = check_null_seg_clears_base(c);
+
+ if (!nscb)
+ set_cpu_bug(c, X86_BUG_NULL_SEG);
+
#ifdef CONFIG_X86_64
set_cpu_cap(c, X86_FEATURE_SYSCALL32);
#else
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0f8885949e8c..2ca4afb97247 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1395,7 +1395,7 @@ void __init early_cpu_init(void)
early_identify_cpu(&boot_cpu_data);
}
-static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
+bool check_null_seg_clears_base(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_64
/*
@@ -1418,10 +1418,10 @@ static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
wrmsrl(MSR_FS_BASE, 1);
loadsegment(fs, 0);
rdmsrl(MSR_FS_BASE, tmp);
- if (tmp != 0)
- set_cpu_bug(c, X86_BUG_NULL_SEG);
wrmsrl(MSR_FS_BASE, old_base);
+ return tmp == 0;
#endif
+ return true;
}
static void generic_identify(struct cpuinfo_x86 *c)
@@ -1457,8 +1457,6 @@ static void generic_identify(struct cpuinfo_x86 *c)
get_model_name(c); /* Default name */
- detect_null_seg_behavior(c);
-
/*
* ESPFIX is a strange bug. All real CPUs have it. Paravirt
* systems that run Linux at CPL > 0 may or may not have the
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 95521302630d..ad88bce508fa 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -75,6 +75,7 @@ extern int detect_extended_topology_early(struct cpuinfo_x86 *c);
extern int detect_extended_topology(struct cpuinfo_x86 *c);
extern int detect_ht_early(struct cpuinfo_x86 *c);
extern void detect_ht(struct cpuinfo_x86 *c);
+extern bool check_null_seg_clears_base(struct cpuinfo_x86 *c);
unsigned int aperfmperf_get_khz(int cpu);
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
index 6d50136f7ab9..49bdb55efe52 100644
--- a/arch/x86/kernel/cpu/hygon.c
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -240,6 +240,7 @@ static void bsp_init_hygon(struct cpuinfo_x86 *c)
static void early_init_hygon(struct cpuinfo_x86 *c)
{
u32 dummy;
+ bool nscb = false;
early_init_hygon_mc(c);
@@ -264,6 +265,27 @@ static void early_init_hygon(struct cpuinfo_x86 *c)
if (c->x86_power & BIT(14))
set_cpu_cap(c, X86_FEATURE_RAPL);
+ /*
+ * Zen1 and earlier CPUs don't clear segment base/limits when
+ * loading a NULL selector. This has been designated
+ * X86_BUG_NULL_SEG.
+ *
+ * Zen3 CPUs advertise Null Selector Clears Base in CPUID.
+ * Zen2 CPUs also have this behaviour, but no CPUID bit.
+ *
+ * A hypervisor may sythesize the bit, but may also hide it
+ * for migration safety, so we must not probe for model
+ * specific behaviour when virtualised.
+ */
+ if (c->extended_cpuid_level >= 0x80000021 && cpuid_eax(0x80000021) & BIT(6))
+ nscb = true;
+
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !nscb && c->x86 == 0x18)
+ nscb = check_null_seg_clears_base(c);
+
+ if (!nscb)
+ set_cpu_bug(c, X86_BUG_NULL_SEG);
+
#ifdef CONFIG_X86_64
set_cpu_cap(c, X86_FEATURE_SYSCALL32);
#endif
--
2.11.0