Re: [PATCH] x86/kvm/mmu: fix switch between root and guest MMUs

From: Paolo Bonzini
Date: Fri Feb 22 2019 - 12:12:20 EST


On 22/02/19 17:45, Vitaly Kuznetsov wrote:
> Commit 14c07ad89f4d ("x86/kvm/mmu: introduce guest_mmu") brought one subtle
> change: previously, when switching back from L2 to L1, we were resetting
> MMU hooks (like mmu->get_cr3()) in kvm_init_mmu() called from
> nested_vmx_load_cr3() and now we do that in nested_ept_uninit_mmu_context()
> when we re-target vcpu->arch.mmu pointer.
> The change itself looks logical: if nested_ept_init_mmu_context() changes
> something than nested_ept_uninit_mmu_context() restores it back. There is,
> however, one thing: the following call chain:
>
> nested_vmx_load_cr3()
> kvm_mmu_new_cr3()
> __kvm_mmu_new_cr3()
> fast_cr3_switch()
> cached_root_available()
>
> now happens with MMU hooks pointing to the new MMU (root MMU in our case)
> while previously it was happening with the old one. cached_root_available()
> tries to stash current root but it is incorrect to read current CR3 with
> mmu->get_cr3(), we need to use old_mmu->get_cr3() which in case we're
> switching from L2 to L1 is guest_mmu. (BTW, in shadow page tables case this
> is a non-issue because we don't switch MMU).
>
> While we could've tried to guess that we're switching between MMUs and call
> the right ->get_cr3() from cached_root_available() this seems to be overly
> complicated. Instead, just stash the corresponding CR3 when setting
> root_hpa and make cached_root_available() use the stashed value.
>
> Fixes: 14c07ad89f4d ("x86/kvm/mmu: introduce guest_mmu")
> Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>

Is the bug latent until the other patch? Or are they completely
separate issues?

Thanks,

Paolo

> ---
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/kvm/mmu.c | 17 +++++++++++++----
> 2 files changed, 14 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index be87f71ccaa5..180373360e34 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -398,6 +398,7 @@ struct kvm_mmu {
> void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
> u64 *spte, const void *pte);
> hpa_t root_hpa;
> + gpa_t root_cr3;
> union kvm_mmu_role mmu_role;
> u8 root_level;
> u8 shadow_root_level;
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 90ba1a1755a4..f2d1d230d5b8 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -3555,6 +3555,7 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> &invalid_list);
> mmu->root_hpa = INVALID_PAGE;
> }
> + mmu->root_cr3 = 0;
> }
>
> kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
> @@ -3610,6 +3611,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
> vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
> } else
> BUG();
> + vcpu->arch.mmu->root_cr3 = vcpu->arch.mmu->get_cr3(vcpu);
>
> return 0;
> }
> @@ -3618,10 +3620,11 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
> {
> struct kvm_mmu_page *sp;
> u64 pdptr, pm_mask;
> - gfn_t root_gfn;
> + gfn_t root_gfn, root_cr3;
> int i;
>
> - root_gfn = vcpu->arch.mmu->get_cr3(vcpu) >> PAGE_SHIFT;
> + root_cr3 = vcpu->arch.mmu->get_cr3(vcpu);
> + root_gfn = root_cr3 >> PAGE_SHIFT;
>
> if (mmu_check_root(vcpu, root_gfn))
> return 1;
> @@ -3646,7 +3649,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
> ++sp->root_count;
> spin_unlock(&vcpu->kvm->mmu_lock);
> vcpu->arch.mmu->root_hpa = root;
> - return 0;
> + goto set_root_cr3;
> }
>
> /*
> @@ -3712,6 +3715,9 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
> vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->lm_root);
> }
>
> +set_root_cr3:
> + vcpu->arch.mmu->root_cr3 = root_cr3;
> +
> return 0;
> }
>
> @@ -4163,7 +4169,7 @@ static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
> struct kvm_mmu_root_info root;
> struct kvm_mmu *mmu = vcpu->arch.mmu;
>
> - root.cr3 = mmu->get_cr3(vcpu);
> + root.cr3 = mmu->root_cr3;
> root.hpa = mmu->root_hpa;
>
> for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
> @@ -4176,6 +4182,7 @@ static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
> }
>
> mmu->root_hpa = root.hpa;
> + mmu->root_cr3 = root.cr3;
>
> return i < KVM_MMU_NUM_PREV_ROOTS;
> }
> @@ -5517,11 +5524,13 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
> vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
>
> vcpu->arch.root_mmu.root_hpa = INVALID_PAGE;
> + vcpu->arch.root_mmu.root_cr3 = 0;
> vcpu->arch.root_mmu.translate_gpa = translate_gpa;
> for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
> vcpu->arch.root_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
>
> vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE;
> + vcpu->arch.guest_mmu.root_cr3 = 0;
> vcpu->arch.guest_mmu.translate_gpa = translate_gpa;
> for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
> vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
>