Hello everyone,
here it is the mmu notifier #v14.
http://www.kernel.org/pub/linux/kernel/people/andrea/patches/v2.6/2.6.25/mmu-notifier-v14/
Please everyone involved review and (hopefully ;) ack that this is
safe to go in 2.6.26, the most important is to verify that this is a
noop when disarmed regardless of MMU_NOTIFIER=y or =n.
http://www.kernel.org/pub/linux/kernel/people/andrea/patches/v2.6/2.6.25/mmu-notifier-v14/mmu-notifier-core
I'll be sending that patch to Andrew inbox.
Signed-off-by: Andrea Arcangeli <andrea@xxxxxxxxxxxx>
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 8d45fab..ce3251c 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -21,6 +21,7 @@ config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
depends on HAVE_KVM
select PREEMPT_NOTIFIERS
+ select MMU_NOTIFIER
select ANON_INODES
---help---
Support hosting fully virtualized guest machines using hardware
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2ad6f54..853087a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -663,6 +663,108 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
account_shadowed(kvm, gfn);
}
+static void kvm_unmap_spte(struct kvm *kvm, u64 *spte)
+{
+ struct page *page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
+ get_page(page);
static void nonpaging_free(struct kvm_vcpu *vcpu)
@@ -1643,11 +1771,11 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
int r;
u64 gpte = 0;
pfn_t pfn;
-
- vcpu->arch.update_pte.largepage = 0;
+ int mmu_seq;
+ int largepage;
if (bytes != 4 && bytes != 8)
- return;
+ goto out_lock;
/*
* Assume that the pte write on a page table of the same type
@@ -1660,7 +1788,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
if ((bytes == 4) && (gpa % 4 == 0)) {
r = kvm_read_guest(vcpu->kvm, gpa & ~(u64)7, &gpte, 8);
if (r)
- return;
+ goto out_lock;
memcpy((void *)&gpte + (gpa % 8), new, 4);
} else if ((bytes == 8) && (gpa % 8 == 0)) {
memcpy((void *)&gpte, new, 8);
@@ -1670,23 +1798,35 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
memcpy((void *)&gpte, new, 4);
}
if (!is_present_pte(gpte))
- return;
+ goto out_lock;
gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
+ largepage = 0;
down_read(¤t->mm->mmap_sem);
if (is_large_pte(gpte) && is_largepage_backed(vcpu, gfn)) {
gfn &= ~(KVM_PAGES_PER_HPAGE-1);
- vcpu->arch.update_pte.largepage = 1;
+ largepage = 1;
}
+ mmu_seq = atomic_read(&vcpu->kvm->arch.mmu_notifier_seq);
+ /* implicit mb(), we'll read before PT lock is unlocked */
pfn = gfn_to_pfn(vcpu->kvm, gfn);
up_read(¤t->mm->mmap_sem);
- if (is_error_pfn(pfn)) {
- kvm_release_pfn_clean(pfn);
- return;
- }
+ if (is_error_pfn(pfn))
+ goto out_release_and_lock;
+
+ spin_lock(&vcpu->kvm->mmu_lock);
+ BUG_ON(!is_error_pfn(vcpu->arch.update_pte.pfn));
vcpu->arch.update_pte.gfn = gfn;
vcpu->arch.update_pte.pfn = pfn;
+ vcpu->arch.update_pte.largepage = largepage;
+ vcpu->arch.update_pte.mmu_seq = mmu_seq;
+ return;
+
+out_release_and_lock:
+ kvm_release_pfn_clean(pfn);
+out_lock:
+ spin_lock(&vcpu->kvm->mmu_lock);
}
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
@@ -1711,7 +1851,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes);
@@ -3899,13 +4037,12 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
- kvm_free_pit(kvm);
- kfree(kvm->arch.vpic);
- kfree(kvm->arch.vioapic);
- kvm_free_vcpus(kvm);
- kvm_free_physmem(kvm);
- if (kvm->arch.apic_access_page)
- put_page(kvm->arch.apic_access_page);
+ /*
+ * kvm_mmu_notifier_release() will be called before
+ * mmu_notifier_unregister returns, if it didn't run
+ * already.
+ */
+ mmu_notifier_unregister(&kvm->arch.mmu_notifier, kvm->mm);
kfree(kvm);
}