Re: [PATCH v10 10/27] KVM: x86: Refine xsave-managed guest register/MSR reset handling

From: Sean Christopherson
Date: Wed May 01 2024 - 16:40:39 EST


The shortlog is a bit stale now that it only deals with XSTATE. This?

KVM: x86: Zero XSTATE components on INIT by iterating over supported features

On Sun, Feb 18, 2024, Yang Weijiang wrote:
> Tweak the code a bit to facilitate resetting more xstate components in
> the future, e.g., CET's xstate-managed MSRs.
>
> No functional change intended.
>
> Suggested-by: Chao Gao <chao.gao@xxxxxxxxx>
> Signed-off-by: Yang Weijiang <weijiang.yang@xxxxxxxxx>
> ---
> arch/x86/kvm/x86.c | 30 +++++++++++++++++++++++++++---
> 1 file changed, 27 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 10847e1cc413..5a9c07751c0e 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -12217,11 +12217,27 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
> static_branch_dec(&kvm_has_noapic_vcpu);
> }
>
> +#define XSTATE_NEED_RESET_MASK (XFEATURE_MASK_BNDREGS | \
> + XFEATURE_MASK_BNDCSR)
> +
> +static bool kvm_vcpu_has_xstate(unsigned long xfeature)
> +{
> + switch (xfeature) {
> + case XFEATURE_MASK_BNDREGS:
> + case XFEATURE_MASK_BNDCSR:
> + return kvm_cpu_cap_has(X86_FEATURE_MPX);
> + default:
> + return false;
> + }
> +}
> +
> void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> {
> struct kvm_cpuid_entry2 *cpuid_0x1;
> unsigned long old_cr0 = kvm_read_cr0(vcpu);
> + DECLARE_BITMAP(reset_mask, 64);

I vote to use a u64 instead of a bitmask. The result cast isn't exactly pretty,
but it's not all that uncommon, and it's easy enough to make it "safe" by adding
BUILD_BUG_ON().

On the flip side, using the bitmap_*() APIs for super simple bitwise-OR/AND/TEST
operations makes the code harder to read.

> unsigned long new_cr0;
> + unsigned int i;
>
> /*
> * Several of the "set" flows, e.g. ->set_cr0(), read other registers
> @@ -12274,7 +12290,12 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> kvm_async_pf_hash_reset(vcpu);
> vcpu->arch.apf.halted = false;
>
> - if (vcpu->arch.guest_fpu.fpstate && kvm_mpx_supported()) {
> + bitmap_from_u64(reset_mask, (kvm_caps.supported_xcr0 |
> + kvm_caps.supported_xss) &
> + XSTATE_NEED_RESET_MASK);
> +
> + if (vcpu->arch.guest_fpu.fpstate &&
> + !bitmap_empty(reset_mask, XFEATURE_MAX)) {
> struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate;
>
> /*
> @@ -12284,8 +12305,11 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> if (init_event)
> kvm_put_guest_fpu(vcpu);
>
> - fpstate_clear_xstate_component(fpstate, XFEATURE_BNDREGS);
> - fpstate_clear_xstate_component(fpstate, XFEATURE_BNDCSR);
> + for_each_set_bit(i, reset_mask, XFEATURE_MAX) {
> + if (!kvm_vcpu_has_xstate(i))
> + continue;
> + fpstate_clear_xstate_component(fpstate, i);
> + }

A few intertwined thoughts:

1. fpstate is zero allocated, and KVM absolutely relies on that, e.g. KVM doesn't
manually zero out the XSAVE fields that are preserved on INIT, but zeroed on
RESET.

2. That means there is no need to manually clear XSTATE components during RESET,
as KVM doesn't support standalone RESET, i.e. it's only cleared during vCPU
creation, when guest FPU state is guaranteed to be '0'.

3. That makes XSTATE_NEED_RESET_MASK a misnomer, as it's literally the !RESET
path that is relevant. E.g. it should be XSTATE_CLEAR_ON_INIT_MASK or so.

4. If we add a helper, then XSTATE_NEED_RESET_MASK is probably unneeded.

So, what if we slot in the below (compile tested only) patch as prep work? Then
this patch becomes:

---
arch/x86/kvm/x86.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b441bf61b541..b00730353a28 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12220,6 +12220,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event)
{
struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate;
+ u64 xfeatures_mask;
+ int i;

/*
* Guest FPU state is zero allocated and so doesn't need to be manually
@@ -12233,16 +12235,20 @@ static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event)
* are unchanged. Currently, the only components that are zeroed and
* supported by KVM are MPX related.
*/
- if (!kvm_mpx_supported())
+ xfeatures_mask = (kvm_caps.supported_xcr0 | kvm_caps.supported_xss) &
+ (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
+ if (!xfeatures_mask)
return;

+ BUILD_BUG_ON(XFEATURE_MAX >= sizeof(xfeatures_mask));
+
/*
* All paths that lead to INIT are required to load the guest's FPU
* state (because most paths are buried in KVM_RUN).
*/
kvm_put_guest_fpu(vcpu);
- fpstate_clear_xstate_component(fpstate, XFEATURE_BNDREGS);
- fpstate_clear_xstate_component(fpstate, XFEATURE_BNDCSR);
+ for_each_set_bit(i, xfeatures_mask, XFEATURE_MAX)
+ fpstate_clear_xstate_component(fpstate, i);
kvm_load_guest_fpu(vcpu);
}


base-commit: efca8b27900dfec160b6ba90820fa2ced81de904
--


and the final code looks like:


static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event)
{
struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate;
u64 xfeatures_mask;
int i;

/*
* Guest FPU state is zero allocated and so doesn't need to be manually
* cleared on RESET, i.e. during vCPU creation.
*/
if (!init_event || !fpstate)
return;

/*
* On INIT, only select XSTATE components are zeroed, most compoments
* are unchanged. Currently, the only components that are zeroed and
* supported by KVM are MPX and CET related.
*/
xfeatures_mask = (kvm_caps.supported_xcr0 | kvm_caps.supported_xss) &
(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR |
XFEATURE_MASK_CET_USER | XFEATURE_MASK_CET_KERNEL);
if (!xfeatures_mask)
return;

BUILD_BUG_ON(XFEATURE_MAX >= sizeof(xfeatures_mask) * BITS_PER_BYTE);

/*
* All paths that lead to INIT are required to load the guest's FPU
* state (because most paths are buried in KVM_RUN).
*/
kvm_put_guest_fpu(vcpu);
for_each_set_bit(i, (unsigned long *)&xfeatures_mask, XFEATURE_MAX)
fpstate_clear_xstate_component(fpstate, i);
kvm_load_guest_fpu(vcpu);
}


--
From: Sean Christopherson <seanjc@xxxxxxxxxx>
Date: Wed, 1 May 2024 12:12:31 -0700
Subject: [PATCH] KVM: x86: Manually clear MPX state only on INIT

Don't manually clear/zero MPX state on RESET, as the guest FPU state is
zero allocated and KVM only does RESET during vCPU creation, i.e. the
relevant state is guaranteed to be all zeroes.

Opportunistically move the relevant code into a helper in anticipation of
adding support for CET shadow stacks, which also has state that is zeroed
on INIT.

Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
arch/x86/kvm/x86.c | 46 ++++++++++++++++++++++++++++++----------------
1 file changed, 30 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 10847e1cc413..b441bf61b541 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12217,6 +12217,35 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
static_branch_dec(&kvm_has_noapic_vcpu);
}

+static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event)
+{
+ struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate;
+
+ /*
+ * Guest FPU state is zero allocated and so doesn't need to be manually
+ * cleared on RESET, i.e. during vCPU creation.
+ */
+ if (!init_event || !fpstate)
+ return;
+
+ /*
+ * On INIT, only select XSTATE components are zeroed, most compoments
+ * are unchanged. Currently, the only components that are zeroed and
+ * supported by KVM are MPX related.
+ */
+ if (!kvm_mpx_supported())
+ return;
+
+ /*
+ * All paths that lead to INIT are required to load the guest's FPU
+ * state (because most paths are buried in KVM_RUN).
+ */
+ kvm_put_guest_fpu(vcpu);
+ fpstate_clear_xstate_component(fpstate, XFEATURE_BNDREGS);
+ fpstate_clear_xstate_component(fpstate, XFEATURE_BNDCSR);
+ kvm_load_guest_fpu(vcpu);
+}
+
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
{
struct kvm_cpuid_entry2 *cpuid_0x1;
@@ -12274,22 +12303,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_async_pf_hash_reset(vcpu);
vcpu->arch.apf.halted = false;

- if (vcpu->arch.guest_fpu.fpstate && kvm_mpx_supported()) {
- struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate;
-
- /*
- * All paths that lead to INIT are required to load the guest's
- * FPU state (because most paths are buried in KVM_RUN).
- */
- if (init_event)
- kvm_put_guest_fpu(vcpu);
-
- fpstate_clear_xstate_component(fpstate, XFEATURE_BNDREGS);
- fpstate_clear_xstate_component(fpstate, XFEATURE_BNDCSR);
-
- if (init_event)
- kvm_load_guest_fpu(vcpu);
- }
+ kvm_xstate_reset(vcpu, init_event);

if (!init_event) {
vcpu->arch.smbase = 0x30000;

base-commit: 1a89965fa9dae1ae04f44679860ef6bc008c2003
--