[PATCH 05/22] kvm: mmu: Add functions to handle changed TDP SPTEs

From: Ben Gardon
Date: Fri Sep 25 2020 - 17:23:33 EST


The existing bookkeeping done by KVM when a PTE is changed is spread
around several functions. This makes it difficult to remember all the
stats, bitmaps, and other subsystems that need to be updated whenever a
PTE is modified. When a non-leaf PTE is marked non-present or becomes a
leaf PTE, page table memory must also be freed. To simplify the MMU and
facilitate the use of atomic operations on SPTEs in future patches, create
functions to handle some of the bookkeeping required as a result of
a change.

Tested by running kvm-unit-tests and KVM selftests on an Intel Haswell
machine. This series introduced no new failures.

This series can be viewed in Gerrit at:
https://linux-review.googlesource.com/c/virt/kvm/kvm/+/2538

Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx>
---
arch/x86/kvm/mmu/mmu.c | 6 +-
arch/x86/kvm/mmu/mmu_internal.h | 3 +
arch/x86/kvm/mmu/tdp_mmu.c | 105 ++++++++++++++++++++++++++++++++
3 files changed, 111 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 0f871e36394da..f09081f9137b0 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -310,8 +310,8 @@ static void kvm_flush_remote_tlbs_with_range(struct kvm *kvm,
kvm_flush_remote_tlbs(kvm);
}

-static void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
- u64 start_gfn, u64 pages)
+void kvm_flush_remote_tlbs_with_address(struct kvm *kvm, u64 start_gfn,
+ u64 pages)
{
struct kvm_tlb_range range;

@@ -819,7 +819,7 @@ static bool is_accessed_spte(u64 spte)
: !is_access_track_spte(spte);
}

-static bool is_dirty_spte(u64 spte)
+bool is_dirty_spte(u64 spte)
{
u64 dirty_mask = spte_shadow_dirty_mask(spte);

diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 530b7d893c7b3..ff1fe0e04fba5 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -83,5 +83,8 @@ kvm_pfn_t spte_to_pfn(u64 pte);
bool is_mmio_spte(u64 spte);
int is_shadow_present_pte(u64 pte);
int is_last_spte(u64 pte, int level);
+bool is_dirty_spte(u64 spte);

+void kvm_flush_remote_tlbs_with_address(struct kvm *kvm, u64 start_gfn,
+ u64 pages);
#endif /* __KVM_X86_MMU_INTERNAL_H */
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index cdca829e42040..653507773b42c 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -189,3 +189,108 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)

return __pa(root->spt);
}
+
+static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
+ u64 old_spte, u64 new_spte, int level);
+
+/**
+ * handle_changed_spte - handle bookkeeping associated with an SPTE change
+ * @kvm: kvm instance
+ * @as_id: the address space of the paging structure the SPTE was a part of
+ * @gfn: the base GFN that was mapped by the SPTE
+ * @old_spte: The value of the SPTE before the change
+ * @new_spte: The value of the SPTE after the change
+ * @level: the level of the PT the SPTE is part of in the paging structure
+ *
+ * Handle bookkeeping that might result from the modification of a SPTE.
+ * This function must be called for all TDP SPTE modifications.
+ */
+static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
+ u64 old_spte, u64 new_spte, int level)
+{
+ bool was_present = is_shadow_present_pte(old_spte);
+ bool is_present = is_shadow_present_pte(new_spte);
+ bool was_leaf = was_present && is_last_spte(old_spte, level);
+ bool is_leaf = is_present && is_last_spte(new_spte, level);
+ bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
+ u64 *pt;
+ u64 old_child_spte;
+ int i;
+
+ WARN_ON(level > PT64_ROOT_MAX_LEVEL);
+ WARN_ON(level < PG_LEVEL_4K);
+ WARN_ON(gfn % KVM_PAGES_PER_HPAGE(level));
+
+ /*
+ * If this warning were to trigger it would indicate that there was a
+ * missing MMU notifier or a race with some notifier handler.
+ * A present, leaf SPTE should never be directly replaced with another
+ * present leaf SPTE pointing to a differnt PFN. A notifier handler
+ * should be zapping the SPTE before the main MM's page table is
+ * changed, or the SPTE should be zeroed, and the TLBs flushed by the
+ * thread before replacement.
+ */
+ if (was_leaf && is_leaf && pfn_changed) {
+ pr_err("Invalid SPTE change: cannot replace a present leaf\n"
+ "SPTE with another present leaf SPTE mapping a\n"
+ "different PFN!\n"
+ "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d",
+ as_id, gfn, old_spte, new_spte, level);
+
+ /*
+ * Crash the host to prevent error propagation and guest data
+ * courruption.
+ */
+ BUG();
+ }
+
+ if (old_spte == new_spte)
+ return;
+
+ /*
+ * The only times a SPTE should be changed from a non-present to
+ * non-present state is when an MMIO entry is installed/modified/
+ * removed. In that case, there is nothing to do here.
+ */
+ if (!was_present && !is_present) {
+ /*
+ * If this change does not involve a MMIO SPTE, it is
+ * unexpected. Log the change, though it should not impact the
+ * guest since both the former and current SPTEs are nonpresent.
+ */
+ if (WARN_ON(!is_mmio_spte(old_spte) && !is_mmio_spte(new_spte)))
+ pr_err("Unexpected SPTE change! Nonpresent SPTEs\n"
+ "should not be replaced with another,\n"
+ "different nonpresent SPTE, unless one or both\n"
+ "are MMIO SPTEs.\n"
+ "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d",
+ as_id, gfn, old_spte, new_spte, level);
+ return;
+ }
+
+
+ if (was_leaf && is_dirty_spte(old_spte) &&
+ (!is_dirty_spte(new_spte) || pfn_changed))
+ kvm_set_pfn_dirty(spte_to_pfn(old_spte));
+
+ /*
+ * Recursively handle child PTs if the change removed a subtree from
+ * the paging structure.
+ */
+ if (was_present && !was_leaf && (pfn_changed || !is_present)) {
+ pt = spte_to_child_pt(old_spte, level);
+
+ for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
+ old_child_spte = *(pt + i);
+ *(pt + i) = 0;
+ handle_changed_spte(kvm, as_id,
+ gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)),
+ old_child_spte, 0, level - 1);
+ }
+
+ kvm_flush_remote_tlbs_with_address(kvm, gfn,
+ KVM_PAGES_PER_HPAGE(level));
+
+ free_page((unsigned long)pt);
+ }
+}
--
2.28.0.709.gb0816b6eb0-goog