Re: [PATCH 3/3] kvm: mmu: Separate pte generation from set_spte

From: Vitaly Kuznetsov
Date: Wed Feb 05 2020 - 08:52:42 EST


Ben Gardon <bgardon@xxxxxxxxxx> writes:

> Separate the functions for generating leaf page table entries from the
> function that inserts them into the paging structure. This refactoring
> will facilitate changes to the MMU sychronization model to use atomic
> compare / exchanges (which are not guaranteed to succeed) instead of a
> monolithic MMU lock.
>
> No functional change expected.
>
> Tested by running kvm-unit-tests on an Intel Haswell machine. This
> commit introduced no new failures.
>
> This commit can be viewed in Gerrit at:
> https://linux-review.googlesource.com/c/virt/kvm/kvm/+/2360
>
> Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx>
> Reviewed-by: Peter Shier <pshier@xxxxxxxxxx>
> ---
> arch/x86/kvm/mmu/mmu.c | 52 +++++++++++++++++++++++++++---------------
> 1 file changed, 34 insertions(+), 18 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index b81010d0edae1..9239ad5265dc6 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3000,20 +3000,14 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
> #define SET_SPTE_WRITE_PROTECTED_PT BIT(0)
> #define SET_SPTE_NEED_REMOTE_TLB_FLUSH BIT(1)
>
> -static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
> - unsigned int pte_access, int level,
> - gfn_t gfn, kvm_pfn_t pfn, bool speculative,
> - bool can_unsync, bool host_writable)
> +static u64 make_spte(struct kvm_vcpu *vcpu, unsigned int pte_access, int level,
> + gfn_t gfn, kvm_pfn_t pfn, u64 old_spte, bool speculative,
> + bool can_unsync, bool host_writable, bool ad_disabled,
> + int *ret)

With such a long parameter list we may think about passing a pointer to
a structure instead (common for make_spte()/set_spte())

> {
> u64 spte = 0;
> - int ret = 0;
> - struct kvm_mmu_page *sp;
> -
> - if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
> - return 0;
>
> - sp = page_header(__pa(sptep));
> - if (sp_ad_disabled(sp))
> + if (ad_disabled)
> spte |= SPTE_AD_DISABLED_MASK;
> else if (kvm_vcpu_ad_need_write_protect(vcpu))
> spte |= SPTE_AD_WRPROT_ONLY_MASK;
> @@ -3066,27 +3060,49 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
> * is responsibility of mmu_get_page / kvm_sync_page.
> * Same reasoning can be applied to dirty page accounting.
> */
> - if (!can_unsync && is_writable_pte(*sptep))
> - goto set_pte;
> + if (!can_unsync && is_writable_pte(old_spte))
> + return spte;
>
> if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
> pgprintk("%s: found shadow page for %llx, marking ro\n",
> __func__, gfn);
> - ret |= SET_SPTE_WRITE_PROTECTED_PT;
> + *ret |= SET_SPTE_WRITE_PROTECTED_PT;
> pte_access &= ~ACC_WRITE_MASK;
> spte &= ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE);
> }
> }
>
> - if (pte_access & ACC_WRITE_MASK) {
> - kvm_vcpu_mark_page_dirty(vcpu, gfn);
> + if (pte_access & ACC_WRITE_MASK)
> spte |= spte_shadow_dirty_mask(spte);
> - }
>
> if (speculative)
> spte = mark_spte_for_access_track(spte);
>
> -set_pte:
> + return spte;
> +}
> +
> +static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
> + unsigned int pte_access, int level,
> + gfn_t gfn, kvm_pfn_t pfn, bool speculative,
> + bool can_unsync, bool host_writable)
> +{
> + u64 spte = 0;
> + struct kvm_mmu_page *sp;
> + int ret = 0;
> +
> + if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
> + return 0;
> +
> + sp = page_header(__pa(sptep));
> +
> + spte = make_spte(vcpu, pte_access, level, gfn, pfn, *sptep, speculative,
> + can_unsync, host_writable, sp_ad_disabled(sp), &ret);

I'm probably missing something, but in make_spte() I see just one place
which writes to '*ret' so at the end, this is either
SET_SPTE_WRITE_PROTECTED_PT or 0 (which we got only because we
initialize it to 0 in set_spte()). Unless this is preparation to some
other change, I don't see much value in the complication.

Can we actually reverse the logic, pass 'spte' by reference and return
'ret'?

> + if (!spte)
> + return 0;
> +
> + if (spte & PT_WRITABLE_MASK)
> + kvm_vcpu_mark_page_dirty(vcpu, gfn);
> +
> if (mmu_spte_update(sptep, spte))
> ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH;
> return ret;

--
Vitaly