Re: [PATCH v8 4/5] x86/xsave: Make XSAVE check the base CPUID features before enabling

From: Vegard Nossum
Date: Sat Jun 29 2019 - 11:24:53 EST



On 10/5/17 11:52 PM, Andi Kleen wrote:
From: Andi Kleen <ak@xxxxxxxxxxxxxxx>

Before enabling XSAVE, not only check the XSAVE specific CPUID bits,
but also the base CPUID features of the respective XSAVE feature.
This allows to disable individual XSAVE states using the existing
clearcpuid= option, which can be useful for performance testing
and debugging, and also in general avoids inconsistencies.

v2:
Add curly brackets (Thomas Gleixner)
Signed-off-by: Andi Kleen <ak@xxxxxxxxxxxxxxx>
---
arch/x86/kernel/fpu/xstate.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index f1d5476c9022..924bd895b5ee 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -15,6 +15,7 @@
#include <asm/fpu/xstate.h>
#include <asm/tlbflush.h>
+#include <asm/cpufeature.h>
/*
* Although we spell it out in here, the Processor Trace
@@ -36,6 +37,19 @@ static const char *xfeature_names[] =
"unknown xstate feature" ,
};
+static short xsave_cpuid_features[] = {
+ X86_FEATURE_FPU,
+ X86_FEATURE_XMM,
+ X86_FEATURE_AVX,
+ X86_FEATURE_MPX,
+ X86_FEATURE_MPX,
+ X86_FEATURE_AVX512F,
+ X86_FEATURE_AVX512F,
+ X86_FEATURE_AVX512F,
+ X86_FEATURE_INTEL_PT,
+ X86_FEATURE_PKU,
+};
+
/*
* Mask of xstate features supported by the CPU and the kernel:
*/
@@ -726,6 +740,7 @@ void __init fpu__init_system_xstate(void)
unsigned int eax, ebx, ecx, edx;
static int on_boot_cpu __initdata = 1;
int err;
+ int i;
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
@@ -759,6 +774,14 @@ void __init fpu__init_system_xstate(void)
goto out_disable;
}
+ /*
+ * Clear XSAVE features that are disabled in the normal CPUID.
+ */
+ for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
+ if (!boot_cpu_has(xsave_cpuid_features[i]))
+ xfeatures_mask &= ~BIT(i);
+ }
+
xfeatures_mask &= fpu__get_supported_xfeatures_mask();
/* Enable xstate instructions to be able to continue with initialization: */


Hi,

The commit for this patch in mainline
(ccb18db2ab9d923df07e7495123fe5fb02329713) causes the kernel to hang on
boot when passing the "nofxsr" option:

$ kvm -cpu host -kernel arch/x86/boot/bzImage -append "console=ttyS0 nofxsr earlyprintk=ttyS0" -serial stdio -display none -smp 2
early console in extract_kernel
input_data: 0x0000000001dea276
input_len: 0x0000000000500704
output: 0x0000000001000000
output_len: 0x00000000012c79b4
kernel_total_size: 0x0000000000f24000
booted via startup_32()
Physical KASLR using RDRAND RDTSC...
Virtual KASLR using RDRAND RDTSC...

Decompressing Linux... Parsing ELF... Performing relocations... done.
Booting the kernel.
[..hang..]

If I revert it from Linus's tree (~5.2-rc6) then it boots again:

early console in extract_kernel
input_data: 0x00000000024192e9
input_len: 0x00000000005d8ea1
output: 0x0000000001000000
output_len: 0x00000000019c7fa4
kernel_total_size: 0x000000000162c000
trampoline_32bit: 0x000000000009d000
booted via startup_32()
Physical KASLR using RDRAND RDTSC...
Virtual KASLR using RDRAND RDTSC...

Decompressing Linux... Parsing ELF... Performing relocations... done.
Booting the kernel.
Linux version 5.2.0-rc6+ (vegard@t460) (gcc version 5.5.0 20171010 (Ubuntu 5.5.0-12ubuntu1~16.04)) #98 SMP PREEMPT Sat Jun 29 17:13:31 CEST 2019
Command line: console=ttyS0 nofxsr earlyprintk=ttyS0
[..normal boot..]

/proc/cpuinfo inside the VM is:

processor : 0
vendor_id : GenuineIntel
cpu family : 6
model : 78
model name : Intel(R) Core(TM) i5-6300U CPU @ 2.40GHz
stepping : 3
microcode : 0x1
cpu MHz : 2496.000
cache size : 4096 KB
physical id : 0
siblings : 1
core id : 0
cpu cores : 1
apicid : 0
initial apicid : 0
fpu : yes
fpu_exception : yes
cpuid level : 13
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl cpuid tsc_known_freq pni pclmulqdq vmx ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch cpuid_fault invpcid_single pti ssbd ibrs ibpb tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx rdseed adx smap clflushopt xsaveopt xsavec xgetbv1 xsaves arat
bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds
bogomips : 4992.00
clflush size : 64
cache_alignment : 64
address sizes : 40 bits physical, 48 bits virtual
power management:


Vegard