[PATCH 3.16 183/233] KVM: async_pf: avoid async pf injection when in guest mode

From: Ben Hutchings
Date: Sat Sep 09 2017 - 19:08:03 EST


3.16.48-rc1 review patch. If anyone has any objections, please let me know.

------------------

From: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>

commit 9bc1f09f6fa76fdf31eb7d6a4a4df43574725f93 upstream.

INFO: task gnome-terminal-:1734 blocked for more than 120 seconds.
Not tainted 4.12.0-rc4+ #8
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
gnome-terminal- D 0 1734 1015 0x00000000
Call Trace:
__schedule+0x3cd/0xb30
schedule+0x40/0x90
kvm_async_pf_task_wait+0x1cc/0x270
? __vfs_read+0x37/0x150
? prepare_to_swait+0x22/0x70
do_async_page_fault+0x77/0xb0
? do_async_page_fault+0x77/0xb0
async_page_fault+0x28/0x30

This is triggered by running both win7 and win2016 on L1 KVM simultaneously,
and then gives stress to memory on L1, I can observed this hang on L1 when
at least ~70% swap area is occupied on L0.

This is due to async pf was injected to L2 which should be injected to L1,
L2 guest starts receiving pagefault w/ bogus %cr2(apf token from the host
actually), and L1 guest starts accumulating tasks stuck in D state in
kvm_async_pf_task_wait() since missing PAGE_READY async_pfs.

This patch fixes the hang by doing async pf when executing L1 guest.

Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Cc: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
Signed-off-by: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
[bwh: Backported to 3.16: adjust context]
Signed-off-by: Ben Hutchings <ben@xxxxxxxxxxxxxxx>
---
arch/x86/kvm/mmu.c | 7 +++++--
arch/x86/kvm/mmu.h | 1 +
arch/x86/kvm/x86.c | 3 +--
3 files changed, 7 insertions(+), 4 deletions(-)

--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3292,12 +3292,15 @@ static int kvm_arch_setup_async_pf(struc
return kvm_setup_async_pf(vcpu, gva, gfn_to_hva(vcpu->kvm, gfn), &arch);
}

-static bool can_do_async_pf(struct kvm_vcpu *vcpu)
+bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
{
if (unlikely(!irqchip_in_kernel(vcpu->kvm) ||
kvm_event_needs_reinjection(vcpu)))
return false;

+ if (is_guest_mode(vcpu))
+ return false;
+
return kvm_x86_ops->interrupt_allowed(vcpu);
}

@@ -3311,7 +3314,7 @@ static bool try_async_pf(struct kvm_vcpu
if (!async)
return false; /* *pfn has correct page already */

- if (!prefault && can_do_async_pf(vcpu)) {
+ if (!prefault && kvm_can_do_async_pf(vcpu)) {
trace_kvm_try_async_get_page(gva, gfn);
if (kvm_find_async_pf_gfn(vcpu, gfn)) {
trace_kvm_async_pf_doublefault(gva, gfn);
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -79,6 +79,7 @@ int handle_mmio_page_fault_common(struct
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
bool execonly);
+bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);

static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
{
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7612,8 +7612,7 @@ bool kvm_arch_can_inject_async_page_pres
if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
return true;
else
- return !kvm_event_needs_reinjection(vcpu) &&
- kvm_x86_ops->interrupt_allowed(vcpu);
+ return kvm_can_do_async_pf(vcpu);
}

void kvm_arch_register_noncoherent_dma(struct kvm *kvm)