[PATCH v2 3/4] KVM: x86/mmu: Use '0' as the one and only value for an invalid PAE root

From: Sean Christopherson
Date: Tue Mar 09 2021 - 17:43:02 EST


Use '0' to denote an invalid pae_root instead of '0' or INVALID_PAGE.
Unlike root_hpa, the pae_roots hold permission bits and thus are
guaranteed to be non-zero. Having to deal with both values leads to
bugs, e.g. failing to set back to INVALID_PAGE, warning on the wrong
value, etc...

Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
arch/x86/kvm/mmu/mmu.c | 24 +++++++++++++-----------
arch/x86/kvm/mmu/mmu_audit.c | 2 +-
arch/x86/kvm/mmu/mmu_internal.h | 10 ++++++++++
3 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index febe71935bb5..6b0576ff2846 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3197,11 +3197,14 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
(mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
mmu_free_root_page(kvm, &mmu->root_hpa, &invalid_list);
} else if (mmu->pae_root) {
- for (i = 0; i < 4; ++i)
- if (mmu->pae_root[i] != 0)
- mmu_free_root_page(kvm,
- &mmu->pae_root[i],
- &invalid_list);
+ for (i = 0; i < 4; ++i) {
+ if (!IS_VALID_PAE_ROOT(mmu->pae_root[i]))
+ continue;
+
+ mmu_free_root_page(kvm, &mmu->pae_root[i],
+ &invalid_list);
+ mmu->pae_root[i] = INVALID_PAE_ROOT;
+ }
}
mmu->root_hpa = INVALID_PAGE;
mmu->root_pgd = 0;
@@ -3253,8 +3256,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
return -EIO;

for (i = 0; i < 4; ++i) {
- WARN_ON_ONCE(mmu->pae_root[i] &&
- VALID_PAGE(mmu->pae_root[i]));
+ WARN_ON_ONCE(IS_VALID_PAE_ROOT(mmu->pae_root[i]));

root = mmu_alloc_root(vcpu, i << (30 - PAGE_SHIFT),
i << 30, PT32_ROOT_LEVEL, true);
@@ -3328,11 +3330,11 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
}

for (i = 0; i < 4; ++i) {
- WARN_ON_ONCE(mmu->pae_root[i] && VALID_PAGE(mmu->pae_root[i]));
+ WARN_ON_ONCE(IS_VALID_PAE_ROOT(mmu->pae_root[i]));

if (mmu->root_level == PT32E_ROOT_LEVEL) {
if (!(pdptrs[i] & PT_PRESENT_MASK)) {
- mmu->pae_root[i] = 0;
+ mmu->pae_root[i] = INVALID_PAE_ROOT;
continue;
}
root_gfn = pdptrs[i] >> PAGE_SHIFT;
@@ -3450,7 +3452,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
for (i = 0; i < 4; ++i) {
hpa_t root = vcpu->arch.mmu->pae_root[i];

- if (root && VALID_PAGE(root)) {
+ if (IS_VALID_PAE_ROOT(root)) {
root &= PT64_BASE_ADDR_MASK;
sp = to_shadow_page(root);
mmu_sync_children(vcpu, sp);
@@ -5307,7 +5309,7 @@ static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)

mmu->pae_root = page_address(page);
for (i = 0; i < 4; ++i)
- mmu->pae_root[i] = INVALID_PAGE;
+ mmu->pae_root[i] = INVALID_PAE_ROOT;

return 0;
}
diff --git a/arch/x86/kvm/mmu/mmu_audit.c b/arch/x86/kvm/mmu/mmu_audit.c
index ced15fd58fde..cedc17b2f60e 100644
--- a/arch/x86/kvm/mmu/mmu_audit.c
+++ b/arch/x86/kvm/mmu/mmu_audit.c
@@ -70,7 +70,7 @@ static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
for (i = 0; i < 4; ++i) {
hpa_t root = vcpu->arch.mmu->pae_root[i];

- if (root && VALID_PAGE(root)) {
+ if (IS_VALID_PAE_ROOT(root)) {
root &= PT64_BASE_ADDR_MASK;
sp = to_shadow_page(root);
__mmu_spte_walk(vcpu, sp, fn, 2);
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index ec4fc28b325a..5fe9123fc932 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -20,6 +20,16 @@ extern bool dbg;
#define MMU_WARN_ON(x) do { } while (0)
#endif

+/*
+ * Unlike regular MMU roots, PAE "roots", a.k.a. PDPTEs/PDPTRs, have a PRESENT
+ * bit, and thus are guaranteed to be non-zero when valid. And, when a guest
+ * PDPTR is !PRESENT, its corresponding PAE root cannot be set to INVALID_PAGE,
+ * as the CPU would treat that as PRESENT PDPTR with reserved bits set. Use
+ * '0' instead of INVALID_PAGE to indicate an invalid PAE root.
+ */
+#define INVALID_PAE_ROOT 0
+#define IS_VALID_PAE_ROOT(x) (!!(x))
+
struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
--
2.30.1.766.gb4fecdf3b7-goog