Re: [PATCH] KVM/nVMX: Stop mapping the "APIC-access address" page into the kernel

From: Raslan, KarimAllah
Date: Mon Dec 03 2018 - 11:59:48 EST


On Mon, 2018-12-03 at 14:59 +0100, KarimAllah Ahmed wrote:
> The "APIC-access address" is simply a token that the hypervisor puts into
> the PFN of a 4K EPTE (or PTE if using shadow paging) that triggers APIC
> virtualization whenever a page walk terminates with that PFN. This address
> has to be a legal address (i.e. within the physical address supported by
> the CPU), but it need not have WB memory behind it. In fact, it need not
> have anything at all behind it. When bit 31 ("activate secondary controls")
> of the primary processor-based VM-execution controls is set and bit 0
> ("virtualize APIC accesses") of the secondary processor-based VM-execution
> controls is set, the PFN recorded in the VMCS "APIC-access address" field
> will never be touched. (Instead, the access triggers APIC virtualization,
> which may access the PFN recorded in the "Virtual-APIC address" field of
> the VMCS.)
>
> So stop mapping the "APIC-access address" page into the kernel and even
> drop the requirements to have a valid page backing it. Instead, just use
> some token that:
>
> 1) Not one of the valid guest pages.
> 2) Within the physical address supported by the CPU.
>
> Suggested-by: Jim Mattson <jmattson@xxxxxxxxxx>
> Signed-off-by: KarimAllah Ahmed <karahmed@xxxxxxxxx>
> ---
>
> Thanks Jim for the commit message :)
> ---
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/kvm/mmu.c | 10 ++++++
> arch/x86/kvm/vmx.c | 71 ++++++++++++++++++-----------------------
> 3 files changed, 42 insertions(+), 40 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index fbda5a9..7e50196 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1077,6 +1077,7 @@ struct kvm_x86_ops {
> void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
> void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
> void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
> + bool (*nested_apic_access_addr)(struct kvm_vcpu *vcpu, gpa_t gpa, hpa_t *hpa);
> void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
> int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
> int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 7c03c0f..ae46a8d 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -3962,9 +3962,19 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
> static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
> gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable)
> {
> + hpa_t hpa;
> struct kvm_memory_slot *slot;
> bool async;
>
> + if (is_guest_mode(vcpu) &&
> + kvm_x86_ops->nested_apic_access_addr &&
> + kvm_x86_ops->nested_apic_access_addr(vcpu, gfn_to_gpa(gfn), &hpa)) {
> + *pfn = hpa >> PAGE_SHIFT;
> + if (writable)
> + *writable = true;
> + return false;
> + }

Now thinking further about this, I actually still need to validate that the L12Â
EPT for this gfn actually contains the apic_access address. To ensure that IÂ
only fixup the fault when the L1 hypervisor sets up both VMCS L12 APIC_ACCESSÂ
and L12 EPT to contain the same address.

Will fix and send v2.

> +
> /*
> * Don't expose private memslots to L2.
> */
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 83a614f..340cf56 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -864,7 +864,6 @@ struct nested_vmx {
> * Guest pages referred to in the vmcs02 with host-physical
> * pointers, so we must keep them pinned while L2 runs.
> */
> - struct page *apic_access_page;
> struct kvm_host_map virtual_apic_map;
> struct kvm_host_map pi_desc_map;
> struct kvm_host_map msr_bitmap_map;
> @@ -8512,10 +8511,6 @@ static void free_nested(struct kvm_vcpu *vcpu)
> kfree(vmx->nested.cached_vmcs12);
> kfree(vmx->nested.cached_shadow_vmcs12);
> /* Unpin physical memory we referred to in the vmcs02 */
> - if (vmx->nested.apic_access_page) {
> - kvm_release_page_dirty(vmx->nested.apic_access_page);
> - vmx->nested.apic_access_page = NULL;
> - }
> kvm_vcpu_unmap(&vmx->nested.virtual_apic_map);
> kvm_vcpu_unmap(&vmx->nested.pi_desc_map);
> vmx->nested.pi_desc = NULL;
> @@ -11901,41 +11896,27 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
> static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
> struct vmcs12 *vmcs12);
>
> +static hpa_t vmx_apic_access_addr(void)
> +{
> + /*
> + * The physical address choosen here has to:
> + * 1) Never be an address that could be assigned to a guest.
> + * 2) Within the maximum physical limits of the CPU.
> + *
> + * So our choice below is completely random, but at least it follows
> + * these two rules.
> + */
> + return __pa_symbol(_text) & PAGE_MASK;
> +}
> +
> static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
> {
> struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> struct kvm_host_map *map;
> - struct page *page;
> - u64 hpa;
>
> - if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
> - /*
> - * Translate L1 physical address to host physical
> - * address for vmcs02. Keep the page pinned, so this
> - * physical address remains valid. We keep a reference
> - * to it so we can release it later.
> - */
> - if (vmx->nested.apic_access_page) { /* shouldn't happen */
> - kvm_release_page_dirty(vmx->nested.apic_access_page);
> - vmx->nested.apic_access_page = NULL;
> - }
> - page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
> - /*
> - * If translation failed, no matter: This feature asks
> - * to exit when accessing the given address, and if it
> - * can never be accessed, this feature won't do
> - * anything anyway.
> - */
> - if (!is_error_page(page)) {
> - vmx->nested.apic_access_page = page;
> - hpa = page_to_phys(vmx->nested.apic_access_page);
> - vmcs_write64(APIC_ACCESS_ADDR, hpa);
> - } else {
> - vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
> - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
> - }
> - }
> + if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
> + vmcs_write64(APIC_ACCESS_ADDR, vmx_apic_access_addr());
>
> if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
> map = &vmx->nested.virtual_apic_map;
> @@ -14196,12 +14177,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
> /* This is needed for same reason as it was needed in prepare_vmcs02 */
> vmx->host_rsp = 0;
>
> - /* Unpin physical memory we referred to in vmcs02 */
> - if (vmx->nested.apic_access_page) {
> - kvm_release_page_dirty(vmx->nested.apic_access_page);
> - vmx->nested.apic_access_page = NULL;
> - }
> -
> kvm_vcpu_unmap(&vmx->nested.virtual_apic_map);
> kvm_vcpu_unmap(&vmx->nested.pi_desc_map);
> vmx->nested.pi_desc = NULL;
> @@ -14949,6 +14924,21 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
> return 0;
> }
>
> +static bool vmx_nested_apic_access_addr(struct kvm_vcpu *vcpu,
> + gpa_t gpa, hpa_t *hpa)
> +{
> + struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> +
> + if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
> + page_address_valid(vcpu, vmcs12->apic_access_addr) &&
> + (gpa_to_gfn(gpa) == gpa_to_gfn(vmcs12->apic_access_addr))) {
> + *hpa = vmx_apic_access_addr();
> + return true;
> + }
> +
> + return false;
> +}
> +
> static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
> .cpu_has_kvm_support = cpu_has_kvm_support,
> .disabled_by_bios = vmx_disabled_by_bios,
> @@ -15022,6 +15012,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
> .update_cr8_intercept = update_cr8_intercept,
> .set_virtual_apic_mode = vmx_set_virtual_apic_mode,
> .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
> + .nested_apic_access_addr = vmx_nested_apic_access_addr,
> .get_enable_apicv = vmx_get_enable_apicv,
> .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
> .load_eoi_exitmap = vmx_load_eoi_exitmap,



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrer: Christian Schlaeger, Ralf Herbrich
Ust-ID: DE 289 237 879
Eingetragen am Amtsgericht Charlottenburg HRB 149173 B