[PATCH v3 2/4] KVM: SVM: Add emulation support for #GP triggered by SVM instructions

From: Wei Huang
Date: Tue Jan 26 2021 - 06:18:16 EST


From: Bandan Das <bsd@xxxxxxxxxx>

While running SVM related instructions (VMRUN/VMSAVE/VMLOAD), some AMD
CPUs check EAX against reserved memory regions (e.g. SMM memory on host)
before checking VMCB's instruction intercept. If EAX falls into such
memory areas, #GP is triggered before VMEXIT. This causes problem under
nested virtualization. To solve this problem, KVM needs to trap #GP and
check the instructions triggering #GP. For VM execution instructions,
KVM emulates these instructions.

Co-developed-by: Wei Huang <wei.huang2@xxxxxxx>
Signed-off-by: Wei Huang <wei.huang2@xxxxxxx>
Signed-off-by: Bandan Das <bsd@xxxxxxxxxx>
---
arch/x86/kvm/svm/svm.c | 109 ++++++++++++++++++++++++++++++++++-------
1 file changed, 91 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 7ef171790d02..e5ca01e25e89 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -200,6 +200,8 @@ module_param(sev_es, int, 0444);
bool __read_mostly dump_invalid_vmcb;
module_param(dump_invalid_vmcb, bool, 0644);

+bool svm_gp_erratum_intercept = true;
+
static u8 rsm_ins_bytes[] = "\x0f\xaa";

static void svm_complete_interrupts(struct vcpu_svm *svm);
@@ -288,6 +290,9 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
if (!(efer & EFER_SVME)) {
svm_leave_nested(svm);
svm_set_gif(svm, true);
+ /* #GP intercept is still needed in vmware_backdoor */
+ if (!enable_vmware_backdoor)
+ clr_exception_intercept(svm, GP_VECTOR);

/*
* Free the nested guest state, unless we are in SMM.
@@ -309,6 +314,10 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)

svm->vmcb->save.efer = efer | EFER_SVME;
vmcb_mark_dirty(svm->vmcb, VMCB_CR);
+ /* Enable #GP interception for SVM instructions */
+ if (svm_gp_erratum_intercept)
+ set_exception_intercept(svm, GP_VECTOR);
+
return 0;
}

@@ -1957,24 +1966,6 @@ static int ac_interception(struct vcpu_svm *svm)
return 1;
}

-static int gp_interception(struct vcpu_svm *svm)
-{
- struct kvm_vcpu *vcpu = &svm->vcpu;
- u32 error_code = svm->vmcb->control.exit_info_1;
-
- WARN_ON_ONCE(!enable_vmware_backdoor);
-
- /*
- * VMware backdoor emulation on #GP interception only handles IN{S},
- * OUT{S}, and RDPMC, none of which generate a non-zero error code.
- */
- if (error_code) {
- kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
- return 1;
- }
- return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
-}
-
static bool is_erratum_383(void)
{
int err, i;
@@ -2173,6 +2164,88 @@ static int vmrun_interception(struct vcpu_svm *svm)
return nested_svm_vmrun(svm);
}

+enum {
+ NONE_SVM_INSTR,
+ SVM_INSTR_VMRUN,
+ SVM_INSTR_VMLOAD,
+ SVM_INSTR_VMSAVE,
+};
+
+/* Return NONE_SVM_INSTR if not SVM instrs, otherwise return decode result */
+static int svm_instr_opcode(struct kvm_vcpu *vcpu)
+{
+ struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
+
+ if (ctxt->b != 0x1 || ctxt->opcode_len != 2)
+ return NONE_SVM_INSTR;
+
+ switch (ctxt->modrm) {
+ case 0xd8: /* VMRUN */
+ return SVM_INSTR_VMRUN;
+ case 0xda: /* VMLOAD */
+ return SVM_INSTR_VMLOAD;
+ case 0xdb: /* VMSAVE */
+ return SVM_INSTR_VMSAVE;
+ default:
+ break;
+ }
+
+ return NONE_SVM_INSTR;
+}
+
+static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
+{
+ int (*const svm_instr_handlers[])(struct vcpu_svm *svm) = {
+ [SVM_INSTR_VMRUN] = vmrun_interception,
+ [SVM_INSTR_VMLOAD] = vmload_interception,
+ [SVM_INSTR_VMSAVE] = vmsave_interception,
+ };
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ return svm_instr_handlers[opcode](svm);
+}
+
+/*
+ * #GP handling code. Note that #GP can be triggered under the following two
+ * cases:
+ * 1) SVM VM-related instructions (VMRUN/VMSAVE/VMLOAD) that trigger #GP on
+ * some AMD CPUs when EAX of these instructions are in the reserved memory
+ * regions (e.g. SMM memory on host).
+ * 2) VMware backdoor
+ */
+static int gp_interception(struct vcpu_svm *svm)
+{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ u32 error_code = svm->vmcb->control.exit_info_1;
+ int opcode;
+
+ /* Both #GP cases have zero error_code */
+ if (error_code)
+ goto reinject;
+
+ /* Decode the instruction for usage later */
+ if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK)
+ goto reinject;
+
+ opcode = svm_instr_opcode(vcpu);
+
+ if (opcode == NONE_SVM_INSTR) {
+ WARN_ON_ONCE(!enable_vmware_backdoor);
+
+ /*
+ * VMware backdoor emulation on #GP interception only handles
+ * IN{S}, OUT{S}, and RDPMC.
+ */
+ return kvm_emulate_instruction(vcpu,
+ EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
+ } else
+ return emulate_svm_instr(vcpu, opcode);
+
+reinject:
+ kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
+ return 1;
+}
+
void svm_set_gif(struct vcpu_svm *svm, bool value)
{
if (value) {
--
2.27.0