Re: [PATCH] deal with interrupt shadow state for emulated instruction

From: Avi Kivity
Date: Thu Apr 30 2009 - 07:51:01 EST


Glauber Costa wrote:
we currently unblock shadow interrupt state when we skip an instruction,
but failing to do so when we actually emulate one. This blocks interrupts
in key instruction blocks, in particular sti; hlt; sequences

If the instruction emulated is an sti, we have to block shadow interrupts.
The same goes for mov ss. pop ss also needs it, but we don't currently
emulate it.

Without this patch, I cannot boot gpxe option roms at vmx machines.
This is described at https://bugzilla.redhat.com/show_bug.cgi?id=494469

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index cb306cf..9455a30 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -510,6 +510,8 @@ struct kvm_x86_ops {
void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+ void (*interrupt_shadow_mask)(struct kvm_vcpu *vcpu, int mask);

Can you verb this function? set_interrupt_shadow would make it nicely complement get_interrupt_shadow.

+ u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
void (*patch_hypercall)(struct kvm_vcpu *vcpu,
unsigned char *hypercall_addr);
int (*get_irq)(struct kvm_vcpu *vcpu);
+static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ u32 ret = 0;
+
+ if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
+ ret |= (X86_SHADOW_INT_STI && X86_SHADOW_INT_MOV_SS);
+ return ret;
+}

Hmm, if the guest runs an infinite emulated 'mov ss', it will keep toggling the MOV_SS bit, but STI will remain set, so we'll never allow an interrupt into the guest kernel.

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index d2664fc..797d41f 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1618,6 +1618,16 @@ special_insn:
int err;
sel = c->src.val;
+ if (c->modrm_reg == VCPU_SREG_SS) {
+ u32 int_shadow =
+ kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu);
+ /* See sti emulation for an explanation of this */
+ if ((int_shadow & X86_SHADOW_INT_MOV_SS))
+ ctxt->interruptibility &= ~X86_SHADOW_INT_MOV_SS;
+ else
+ ctxt->interruptibility |= X86_SHADOW_INT_MOV_SS;
+ }

^=

@@ -1846,10 +1856,23 @@ special_insn:
ctxt->eflags &= ~X86_EFLAGS_IF;
c->dst.type = OP_NONE; /* Disable writeback. */
break;
- case 0xfb: /* sti */
+ case 0xfb: { /* sti */
+ u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu);
+ /*
+ * an sti; sti; sequence only disable interrupts for the first
+ * instruction. So, if the last instruction, be it emulated or
+ * not, left the system with the INT_STI flag enabled, it
+ * means that the last instruction is an sti. We should not
+ * leave the flag on in this case
+ */
+ if ((int_shadow & X86_SHADOW_INT_STI))
+ ctxt->interruptibility &= ~X86_SHADOW_INT_STI;
+ else
+ ctxt->interruptibility |= X86_SHADOW_INT_STI;

^=

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/