[PATCH 04/43] KVM: VMX: Store the host kernel's IDT base in a global variable

From: Paolo Bonzini
Date: Thu Jun 13 2019 - 13:14:34 EST


From: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>

Although the kernel may use multiple IDTs, KVM should only ever see the
"real" IDT, e.g. the early init IDT is long gone by the time KVM runs
and the debug stack IDT is only used for small windows of time in very
specific flows.

Before commit a547c6db4d2f1 ("KVM: VMX: Enable acknowledge interupt on
vmexit"), the kernel's IDT base was consumed by KVM only when setting
constant VMCS state, i.e. to set VMCS.HOST_IDTR_BASE. Because constant
host state is done once per vCPU, there was ostensibly no need to cache
the kernel's IDT base.

When support for "ack interrupt on exit" was introduced, KVM added a
second consumer of the IDT base as handling already-acked interrupts
requires directly calling the interrupt handler, i.e. KVM uses the IDT
base to find the address of the handler. Because interrupts are a fast
path, KVM cached the IDT base to avoid having to VMREAD HOST_IDTR_BASE.
Presumably, the IDT base was cached on a per-vCPU basis simply because
the existing code grabbed the IDT base on a per-vCPU (VMCS) basis.

Note, all post-boot IDTs use the same handlers for external interrupts,
i.e. the "ack interrupt on exit" use of the IDT base would be unaffected
even if the cached IDT somehow did not match the current IDT. And as
for the original use case of setting VMCS.HOST_IDTR_BASE, if any of the
above analysis is wrong then KVM has had a bug since the beginning of
time since KVM has effectively been caching the IDT at vCPU creation
since commit a8b732ca01c ("[PATCH] kvm: userspace interface").

Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---
arch/x86/kvm/vmx/vmx.c | 12 +++++++-----
arch/x86/kvm/vmx/vmx.h | 1 -
2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b541fe2c6347..c90abf33b509 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -392,6 +392,7 @@ static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bit
};

u64 host_efer;
+static unsigned long host_idt_base;

/*
* Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
@@ -3728,7 +3729,6 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
{
u32 low32, high32;
unsigned long tmpl;
- struct desc_ptr dt;
unsigned long cr0, cr3, cr4;

cr0 = read_cr0();
@@ -3764,9 +3764,7 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */

- store_idt(&dt);
- vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
- vmx->host_idt_base = dt.address;
+ vmcs_writel(HOST_IDTR_BASE, host_idt_base); /* 22.2.4 */

vmcs_writel(HOST_RIP, (unsigned long)vmx_vmexit); /* 22.2.5 */

@@ -6144,7 +6142,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
return;

vector = intr_info & INTR_INFO_VECTOR_MASK;
- desc = (gate_desc *)vmx->host_idt_base + vector;
+ desc = (gate_desc *)host_idt_base + vector;
entry = gate_offset(desc);

asm volatile(
@@ -7429,10 +7427,14 @@ static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
static __init int hardware_setup(void)
{
unsigned long host_bndcfgs;
+ struct desc_ptr dt;
int r, i;

rdmsrl_safe(MSR_EFER, &host_efer);

+ store_idt(&dt);
+ host_idt_base = dt.address;
+
for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
kvm_define_shared_msr(i, vmx_msr_index[i]);

diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 1cdaa5af8245..decd31055da8 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -187,7 +187,6 @@ struct vcpu_vmx {
int nmsrs;
int save_nmsrs;
bool guest_msrs_dirty;
- unsigned long host_idt_base;
#ifdef CONFIG_X86_64
u64 msr_host_kernel_gs_base;
u64 msr_guest_kernel_gs_base;
--
1.8.3.1