Re: [PATCH RFC v8 02/56] KVM: x86: Add 'update_mem_attr' x86 op

From: Zhi Wang
Date: Tue Mar 21 2023 - 07:22:21 EST


On Mon, 20 Mar 2023 13:05:43 -0500
Michael Roth <michael.roth@xxxxxxx> wrote:

> On Fri, Mar 17, 2023 at 09:56:11PM -0700, Isaku Yamahata wrote:
> > On Mon, Feb 20, 2023 at 12:37:53PM -0600,
> > Michael Roth <michael.roth@xxxxxxx> wrote:
> >
> > > This callback will do any platform-specific handling needed for
> > > converting pages between shared/private.
> > >
> > > Signed-off-by: Michael Roth <michael.roth@xxxxxxx>
> > > ---
> > > arch/x86/include/asm/kvm-x86-ops.h | 1 +
> > > arch/x86/include/asm/kvm_host.h | 2 ++
> > > arch/x86/kvm/mmu/mmu.c | 13 +++++++++++++
> > > include/linux/kvm_host.h | 4 ++++
> > > virt/kvm/kvm_main.c | 29 +++++++++++++++++++++++++++++
> > > 5 files changed, 49 insertions(+)
> > >
> > > diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
> > > index 72183da010b8..a8aaf532c2ab 100644
> > > --- a/arch/x86/include/asm/kvm-x86-ops.h
> > > +++ b/arch/x86/include/asm/kvm-x86-ops.h
> > > @@ -132,6 +132,7 @@ KVM_X86_OP(complete_emulated_msr)
> > > KVM_X86_OP(vcpu_deliver_sipi_vector)
> > > KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
> > > KVM_X86_OP_OPTIONAL_RET0(fault_is_private);
> > > +KVM_X86_OP_OPTIONAL_RET0(update_mem_attr)
> > >
> > > #undef KVM_X86_OP
> > > #undef KVM_X86_OP_OPTIONAL
> > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > > index f856d689dda0..2da3fb2d5d1b 100644
> > > --- a/arch/x86/include/asm/kvm_host.h
> > > +++ b/arch/x86/include/asm/kvm_host.h
> > > @@ -1644,6 +1644,8 @@ struct kvm_x86_ops {
> > > void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa,
> > > int root_level);
> > > bool (*fault_is_private)(struct kvm *kvm, gpa_t gpa, u64 error_code, bool *private_fault);
> > > + int (*update_mem_attr)(struct kvm_memory_slot *slot, unsigned int attr,
> > > + gfn_t start, gfn_t end);
> > >
> > > bool (*has_wbinvd_exit)(void);
> > >
> > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > > index fb3f34b7391c..053bd77bbf52 100644
> > > --- a/arch/x86/kvm/mmu/mmu.c
> > > +++ b/arch/x86/kvm/mmu/mmu.c
> > > @@ -7251,4 +7251,17 @@ void kvm_arch_set_memory_attributes(struct kvm *kvm,
> > > linfo_update_mixed(gfn, slot, level, mixed);
> > > }
> > > }
> > > +
> > > +void kvm_arch_post_set_memory_attributes(struct kvm *kvm,
> > > + struct kvm_memory_slot *slot,
> > > + unsigned long attrs,
> > > + gfn_t start, gfn_t end)
> > > +{
> > > + int ret;
> > > +
> > > + ret = static_call(kvm_x86_update_mem_attr)(slot, attrs, start, end);
> > > + if (ret)
> > > + pr_warn_ratelimited("Failed to update GFN range 0x%llx-0x%llx with attributes 0x%lx. Ret: %d\n",
> > > + start, end, attrs, ret);
> > > +}
> > > #endif
> > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > index fdc59479b3e2..d200b8f45583 100644
> > > --- a/include/linux/kvm_host.h
> > > +++ b/include/linux/kvm_host.h
> > > @@ -2330,6 +2330,10 @@ void kvm_arch_set_memory_attributes(struct kvm *kvm,
> > > struct kvm_memory_slot *slot,
> > > unsigned long attrs,
> > > gfn_t start, gfn_t end);
> > > +void kvm_arch_post_set_memory_attributes(struct kvm *kvm,
> > > + struct kvm_memory_slot *slot,
> > > + unsigned long attrs,
> > > + gfn_t start, gfn_t end);
> > >
> > > static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
> > > {
> > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > > index b68574ff6c30..8ec985f1c57d 100644
> > > --- a/virt/kvm/kvm_main.c
> > > +++ b/virt/kvm/kvm_main.c
> > > @@ -2561,6 +2561,32 @@ static void kvm_mem_attrs_changed(struct kvm *kvm, unsigned long attrs,
> > > kvm_flush_remote_tlbs(kvm);
> > > }
> > >
> > > +static void kvm_post_mem_attrs_changed(struct kvm *kvm, unsigned long attrs,
> > > + gfn_t start_orig, gfn_t end_orig)
> > > +{
> > > + struct kvm_memory_slot *slot;
> > > + struct kvm_memslots *slots;
> > > + struct kvm_memslot_iter iter;
> > > + int i;
> > > +
> > > + for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
> > > + slots = __kvm_memslots(kvm, i);
> > > +
> > > + kvm_for_each_memslot_in_gfn_range(&iter, slots, start_orig, end_orig) {
> > > + gfn_t start, end;
> > > +
> > > + slot = iter.slot;
> > > + start = max(start_orig, slot->base_gfn);
> > > + end = min(end_orig, slot->base_gfn + slot->npages);
> > > +
> > > + if (start >= end)
> > > + continue;
> > > +
> > > + kvm_arch_post_set_memory_attributes(kvm, slot, attrs, start, end);
> > > + }
> > > + }
> > > +}
> > > +
> > > static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
> > > struct kvm_memory_attributes *attrs)
> > > {
> > > @@ -2602,6 +2628,9 @@ static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
> > > kvm_mmu_invalidate_end(kvm);
> > > KVM_MMU_UNLOCK(kvm);
> > >
> > > + if (i > start)
> > > + kvm_post_mem_attrs_changed(kvm, attrs->attributes, start, i);
> > > +
> >
> > Doesn't kvm_arch_set_memory_attributes() work for you? i.e the following patch.
> > The error check and pr_warn_ratelimited() can be pushed down into the callback.
>
> This is originally how I had but when CONFIG_PREEMPT_COUNT is set this
> will generate warnings for this callback as well as the invalidation
> callback as reported in v7 here:
>
> https://lore.kernel.org/lkml/Y80vhKwQyw8hS%2F22@notebook/
>
> The main issue is that kvm_mem_attrs_changed() is called while holding
> the KVM MMU lock, which disables preemption. But when updating
> attributes for SNP, we also need to remove private pages from kernel
> directmap, which involves acquiring a mutex which results in
> "BUG: scheduling while atomic" warnings.
>
> So that's why we ended up somewhat duplicating some of the logic and
> using a separate callback chain that happens out of KVM MMU lock.

Let's split the things of changing memory attributes:

1) Update the memory attributes in the xa array (Both TDX and SNP)
2) Zapping the EPT/NPT mappings (Required by TDX)
3) Update RMP table (Required by SNP)
4) Update the directmap of kernel (SNP, but I guess TDX needs it as well)

Does SNP really need to zap the NPT mappings when changing the memory
attributes? (The new mappings will be created later in the fault). I don't
find this requirement from APM.

If yes, can we postpone the update of the RMP table in the later fault,
like TDX? So that we can save this update_mem_attr x86 ops as things
will be solved in the SNP-specific fault handler.

If no, guess we need a x86 ops to tell if a zapping is required.

Back to the lock, updating RMP table doesn't require a mutex. Taking
the lock is required when updating the directmap. both TDX/SNP requires
this update the directmap when changing memory attributes.

Wouldn't it better to factor the touching directmap of kernel part out?

Then you can call the x86 ops.update_mem_attr() in kvm_mem_attrs_changed().
And update the direct kernel mapping for both TDX/SNP in the
kvm_post_mem_attrs_changed().

>
> -Mike
>
> >
> > From 7c618c1f3c236c382e64680efcbe7d8a672aa870 Mon Sep 17 00:00:00 2001
> > Message-Id: <7c618c1f3c236c382e64680efcbe7d8a672aa870.1679114841.git.isaku.yamahata@xxxxxxxxx>
> > In-Reply-To: <428a676face7a06a90e59dca1c32941c9b6ee001.1679114841.git.isaku.yamahata@xxxxxxxxx>
> > References: <428a676face7a06a90e59dca1c32941c9b6ee001.1679114841.git.isaku.yamahata@xxxxxxxxx>
> > From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
> > Date: Fri, 17 Mar 2023 12:00:09 -0700
> > Subject: [PATCH 4/4] KVM: x86: Add 'set_mem_attr' x86 op
> >
> > This callback will do any platform-specific handling needed for
> > converting pages between shared/private.
> >
> > Originally-by: Michael Roth <michael.roth@xxxxxxx>
> > Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
> > ---
> > arch/x86/include/asm/kvm-x86-ops.h | 1 +
> > arch/x86/include/asm/kvm_host.h | 2 ++
> > arch/x86/kvm/mmu/mmu.c | 1 +
> > 3 files changed, 4 insertions(+)
> >
> > diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
> > index dc5f18ac0bd5..956db2ee25a5 100644
> > --- a/arch/x86/include/asm/kvm-x86-ops.h
> > +++ b/arch/x86/include/asm/kvm-x86-ops.h
> > @@ -100,6 +100,7 @@ KVM_X86_OP_OPTIONAL_RET0(set_identity_map_addr)
> > KVM_X86_OP_OPTIONAL_RET0(get_mt_mask)
> > KVM_X86_OP(load_mmu_pgd)
> > KVM_X86_OP(fault_is_private)
> > +KVM_X86_OP_OPTIONAL(set_mem_attr)
> > KVM_X86_OP_OPTIONAL(link_private_spt)
> > KVM_X86_OP_OPTIONAL(free_private_spt)
> > KVM_X86_OP_OPTIONAL(split_private_spt)
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 0382d236fbf4..88e11dd3afde 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1731,6 +1731,8 @@ struct kvm_x86_ops {
> > void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa,
> > int root_level);
> > bool (*fault_is_private)(struct kvm *kvm, gpa_t gpa, u64 error_code);
> > + void (*set_mem_attr)(struct kvm *kvm, struct kvm_memory_slot *slot,
> > + unsigned int attr, gfn_t start, gfn_t end);
> >
> > int (*link_private_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
> > void *private_spt);
> > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > index 0ec94c72895c..329333486e64 100644
> > --- a/arch/x86/kvm/mmu/mmu.c
> > +++ b/arch/x86/kvm/mmu/mmu.c
> > @@ -7908,6 +7908,7 @@ void kvm_arch_set_memory_attributes(struct kvm *kvm,
> > gfn_t start, gfn_t end)
> > {
> > kvm_update_lpage_mixed_flag(kvm, slot, true, attrs, start, end);
> > + static_call(kvm_x86_set_mem_attr)(kvm, slot, attrs, start, end);
> > }
> >
> > void kvm_memory_attributes_create_memslot(struct kvm *kvm,
> > --
> > 2.25.1
> >
> > --
> > Isaku Yamahata <isaku.yamahata@xxxxxxxxx>