[PATCH] KVM: VMX: optimize shadow VMCS copying

From: Paolo Bonzini
Date: Wed Dec 13 2017 - 07:14:22 EST


Because all fields can be read/written with a single vmread/vmwrite on
64-bit kernels, the switch statements in copy_vmcs12_to_shadow and
copy_shadow_to_vmcs12 are unnecessary.

What I did in this patch is to copy the two parts of 64-bit fields
separately on 32-bit kernels, to keep all complicated #ifdef-ery
in init_vmcs_shadow_fields. The disadvantage is that 64-bit fields
have to be listed separately in shadow_read_only/read_write_fields,
but those are few and we can validate the arrays when building the
VMREAD and VMWRITE bitmaps. This saves a few hundred clock cycles
per nested vmexit.

However there is still a "switch" in vmcs_read_any and vmcs_write_any.
So, while at it, this patch reorders the fields by type, hoping that
the branch predictor appreciates it.

Cc: Jim Mattson <jmattson@xxxxxxxxxx>
Cc: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---
arch/x86/kvm/vmx.c | 135 ++++++++++++++++++++++++-----------------------------
1 file changed, 61 insertions(+), 74 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9fadba5338fd..3d3b0ae0da63 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -727,49 +727,59 @@ static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
* (e.g. force a sync if VM_INSTRUCTION_ERROR is modified
* by nested_vmx_failValid)
*/
+ /* 32-bits */
VM_EXIT_REASON,
VM_EXIT_INTR_INFO,
VM_EXIT_INSTRUCTION_LEN,
IDT_VECTORING_INFO_FIELD,
IDT_VECTORING_ERROR_CODE,
VM_EXIT_INTR_ERROR_CODE,
+
+ /* Natural width */
EXIT_QUALIFICATION,
GUEST_LINEAR_ADDRESS,
- GUEST_PHYSICAL_ADDRESS
+
+ /* 64-bit */
+ GUEST_PHYSICAL_ADDRESS,
+ GUEST_PHYSICAL_ADDRESS_HIGH,
};
static int max_shadow_read_only_fields =
ARRAY_SIZE(shadow_read_only_fields);

static unsigned long shadow_read_write_fields[] = {
+ /* 16-bits */
+ GUEST_CS_SELECTOR,
+ GUEST_INTR_STATUS,
+ GUEST_PML_INDEX,
+ HOST_FS_SELECTOR,
+ HOST_GS_SELECTOR,
+
+ /* 32-bits */
+ CPU_BASED_VM_EXEC_CONTROL,
+ EXCEPTION_BITMAP,
+ VM_ENTRY_EXCEPTION_ERROR_CODE,
+ VM_ENTRY_INTR_INFO_FIELD,
+ VM_ENTRY_INSTRUCTION_LEN,
TPR_THRESHOLD,
+ GUEST_CS_LIMIT,
+ GUEST_CS_AR_BYTES,
+ GUEST_INTERRUPTIBILITY_INFO,
+ VMX_PREEMPTION_TIMER_VALUE,
+
+ /* Natural width */
GUEST_RIP,
GUEST_RSP,
GUEST_CR0,
GUEST_CR3,
GUEST_CR4,
- GUEST_INTERRUPTIBILITY_INFO,
GUEST_RFLAGS,
- GUEST_CS_SELECTOR,
- GUEST_CS_AR_BYTES,
- GUEST_CS_LIMIT,
GUEST_CS_BASE,
GUEST_ES_BASE,
- GUEST_PML_INDEX,
- GUEST_INTR_STATUS,
- VMX_PREEMPTION_TIMER_VALUE,
CR0_GUEST_HOST_MASK,
CR0_READ_SHADOW,
CR4_READ_SHADOW,
- EXCEPTION_BITMAP,
- CPU_BASED_VM_EXEC_CONTROL,
- VM_ENTRY_EXCEPTION_ERROR_CODE,
- VM_ENTRY_INTR_INFO_FIELD,
- VM_ENTRY_INSTRUCTION_LEN,
- VM_ENTRY_EXCEPTION_ERROR_CODE,
HOST_FS_BASE,
HOST_GS_BASE,
- HOST_FS_SELECTOR,
- HOST_GS_SELECTOR
};
static int max_shadow_read_write_fields =
ARRAY_SIZE(shadow_read_write_fields);
@@ -4068,15 +4078,39 @@ static void init_vmcs_shadow_fields(void)
{
int i, j;

- /* No checks for read only fields yet */
+ for (i = j = 0; i < max_shadow_read_only_fields; i++) {
+ u16 field = shadow_read_only_fields[i];
+ if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64 &&
+ (i + 1 == max_shadow_read_only_fields ||
+ shadow_read_only_fields[i + 1] != field + 1))
+ pr_err("Missing field from shadow_read_only_field %x\n",
+ field + 1);
+
+ clear_bit(field, vmx_vmread_bitmap);
+#ifdef CONFIG_X86_64
+ if (field & 1)
+ continue;
+#endif
+ if (j < i)
+ shadow_read_only_fields[j] = field;
+ j++;
+ }
+ max_shadow_read_only_fields = j;

for (i = j = 0; i < max_shadow_read_write_fields; i++) {
+ u16 field = shadow_read_write_fields[i];
+ if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64 &&
+ (i + 1 == max_shadow_read_write_fields ||
+ shadow_read_write_fields[i + 1] != field + 1))
+ pr_err("Missing field from shadow_read_write_field %x\n",
+ field + 1);
+
/*
* PML and the preemption timer can be emulated, but the
* processor cannot vmwrite to fields that don't exist
* on bare metal.
*/
- switch (shadow_read_write_fields[i]) {
+ switch (field) {
case GUEST_PML_INDEX:
if (!cpu_has_vmx_pml())
continue;
@@ -4093,31 +4127,17 @@ static void init_vmcs_shadow_fields(void)
break;
}

+ clear_bit(field, vmx_vmwrite_bitmap);
+ clear_bit(field, vmx_vmread_bitmap);
+#ifdef CONFIG_X86_64
+ if (field & 1)
+ continue;
+#endif
if (j < i)
- shadow_read_write_fields[j] =
- shadow_read_write_fields[i];
+ shadow_read_write_fields[j] = field;
j++;
}
max_shadow_read_write_fields = j;
-
- /* shadowed fields guest access without vmexit */
- for (i = 0; i < max_shadow_read_write_fields; i++) {
- unsigned long field = shadow_read_write_fields[i];
-
- clear_bit(field, vmx_vmwrite_bitmap);
- clear_bit(field, vmx_vmread_bitmap);
- if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64) {
- clear_bit(field + 1, vmx_vmwrite_bitmap);
- clear_bit(field + 1, vmx_vmread_bitmap);
- }
- }
- for (i = 0; i < max_shadow_read_only_fields; i++) {
- unsigned long field = shadow_read_only_fields[i];
-
- clear_bit(field, vmx_vmread_bitmap);
- if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64)
- clear_bit(field + 1, vmx_vmread_bitmap);
- }
}

static __init int alloc_kvm_area(void)
@@ -7820,23 +7840,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)

for (i = 0; i < num_fields; i++) {
field = fields[i];
- switch (vmcs_field_type(field)) {
- case VMCS_FIELD_TYPE_U16:
- field_value = vmcs_read16(field);
- break;
- case VMCS_FIELD_TYPE_U32:
- field_value = vmcs_read32(field);
- break;
- case VMCS_FIELD_TYPE_U64:
- field_value = vmcs_read64(field);
- break;
- case VMCS_FIELD_TYPE_NATURAL_WIDTH:
- field_value = vmcs_readl(field);
- break;
- default:
- WARN_ON(1);
- continue;
- }
+ field_value = __vmcs_readl(field);
vmcs12_write_any(&vmx->vcpu, field, field_value);
}

@@ -7867,24 +7871,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
for (i = 0; i < max_fields[q]; i++) {
field = fields[q][i];
vmcs12_read_any(&vmx->vcpu, field, &field_value);
-
- switch (vmcs_field_type(field)) {
- case VMCS_FIELD_TYPE_U16:
- vmcs_write16(field, (u16)field_value);
- break;
- case VMCS_FIELD_TYPE_U32:
- vmcs_write32(field, (u32)field_value);
- break;
- case VMCS_FIELD_TYPE_U64:
- vmcs_write64(field, (u64)field_value);
- break;
- case VMCS_FIELD_TYPE_NATURAL_WIDTH:
- vmcs_writel(field, (long)field_value);
- break;
- default:
- WARN_ON(1);
- break;
- }
+ __vmcs_writel(field, field_value);
}
}

--
1.8.3.1