Re: [PATCH v2 08/29] LoongArch: KVM: Implement vcpu handle exit interface

From: Paolo Bonzini
Date: Mon Feb 20 2023 - 13:46:34 EST


On 2/20/23 07:57, Tianrui Zhao wrote:
+ * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)

As far as I can see, RESUME_FLAG_NV does not exist anymore and this is just copied from arch/mips?

You can keep RESUME_HOST/RESUME_GUEST for the individual functions, but here please make it just "1" for resume guest, and "<= 0" for resume host. This is easy enough to check from assembly and removes the srai by 2.

+static int _kvm_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ unsigned long exst = vcpu->arch.host_estat;
+ u32 intr = exst & 0x1fff; /* ignore NMI */
+ u32 exccode = (exst & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT;
+ u32 __user *opc = (u32 __user *) vcpu->arch.pc;
+ int ret = RESUME_GUEST, cpu;
+
+ vcpu->mode = OUTSIDE_GUEST_MODE;
+
+ /* Set a default exit reason */
+ run->exit_reason = KVM_EXIT_UNKNOWN;
+ run->ready_for_interrupt_injection = 1;
+
+ /*
+ * Set the appropriate status bits based on host CPU features,
+ * before we hit the scheduler
+ */

Stale comment?

+ local_irq_enable();

Please add guest_state_exit_irqoff() here.

+ kvm_debug("%s: exst: %lx, PC: %p, kvm_run: %p, kvm_vcpu: %p\n",
+ __func__, exst, opc, run, vcpu);

Please add the information to the kvm_exit tracepoint (thus also removing variables such as "exst" or "opc" from this function) instead of calling kvm_debug().

+ trace_kvm_exit(vcpu, exccode);
+ if (exccode) {
+ ret = _kvm_handle_fault(vcpu, exccode);
+ } else {
+ WARN(!intr, "suspicious vm exiting");
+ ++vcpu->stat.int_exits;
+
+ if (need_resched())
+ cond_resched();

This "if" is not necessary because there is already a cond_resched() below.

+ ret = RESUME_GUEST;

This "ret" is not necessary because "ret" is already initialized to RESUME_GUEST above, you can either remove it or remove the initializer.

+ }
+
+ cond_resched();
+ local_irq_disable();

At this point, ret is either RESUME_GUEST or RESUME_HOST. So, the "if"s below are either all taken or all not taken, and most of this code:

kvm_acquire_timer(vcpu);
_kvm_deliver_intr(vcpu);

if (signal_pending(current)) {
run->exit_reason = KVM_EXIT_INTR;
ret = (-EINTR << 2) | RESUME_HOST;
++vcpu->stat.signal_exits;
// no need for a tracepoint here
// trace_kvm_exit(vcpu, KVM_TRACE_EXIT_SIGNAL);
}

trace_kvm_reenter(vcpu);

/*
* Make sure the read of VCPU requests in vcpu_reenter()
* callback is not reordered ahead of the write to vcpu->mode,
* or we could miss a TLB flush request while the requester sees
* the VCPU as outside of guest mode and not needing an IPI.
*/
smp_store_mb(vcpu->mode, IN_GUEST_MODE);

cpu = smp_processor_id();
_kvm_check_requests(vcpu, cpu);
_kvm_check_vmid(vcpu, cpu);
vcpu->arch.host_eentry = csr_read64(LOONGARCH_CSR_EENTRY);

/*
* If FPU are enabled (i.e. the guest's FPU context
* is live), restore FCSR0.
*/
if (_kvm_guest_has_fpu(&vcpu->arch) &&
read_csr_euen() & (CSR_EUEN_FPEN)) {
kvm_restore_fcsr(&vcpu->arch.fpu);
}

(all except for the "if (signal_pending(current))" and the final "if") is pretty much duplicated with kvm_arch_vcpu_ioctl_run(); the remaining code can also be done from kvm_arch_vcpu_ioctl_run(), the cost is small. Please move it to a separate function, for example:

int kvm_pre_enter_guest(struct kvm_vcpu *vcpu)
{
if (signal_pending(current)) {
run->exit_reason = KVM_EXIT_INTR;
++vcpu->stat.signal_exits;
return -EINTR;
}

kvm_acquire_timer(vcpu);
_kvm_deliver_intr(vcpu);

...

if (_kvm_guest_has_fpu(&vcpu->arch) &&
read_csr_euen() & (CSR_EUEN_FPEN)) {
kvm_restore_fcsr(&vcpu->arch.fpu);
}
return 1;
}

Call it from _kvm_handle_exit():

if (ret == RESUME_HOST)
return 0;

r = kvm_pre_enter_guest(vcpu);
if (r > 0) {
trace_kvm_reenter(vcpu);
guest_state_enter_irqoff();
}

return r;

and from kvm_arch_vcpu_ioctl_run():

local_irq_disable();
guest_timing_enter_irqoff();
r = kvm_pre_enter_guest(vcpu);
if (r > 0) {
trace_kvm_enter(vcpu);
/*
* This should actually not be a function pointer, but
* just for clarity */
*/
guest_state_enter_irqoff();
r = vcpu->arch.vcpu_run(run, vcpu);
/* guest_state_exit_irqoff() already done. */
trace_kvm_out(vcpu);
}
guest_timing_exit_irqoff();
local_irq_enable();

out:
kvm_sigset_deactivate(vcpu);

vcpu_put(vcpu);
return r;

Paolo

+ if (ret == RESUME_GUEST)
+ kvm_acquire_timer(vcpu);
+
+ if (!(ret & RESUME_HOST)) {
+ _kvm_deliver_intr(vcpu);
+ /* Only check for signals if not already exiting to userspace */
+ if (signal_pending(current)) {
+ run->exit_reason = KVM_EXIT_INTR;
+ ret = (-EINTR << 2) | RESUME_HOST;
+ ++vcpu->stat.signal_exits;
+ trace_kvm_exit(vcpu, KVM_TRACE_EXIT_SIGNAL);
+ }
+ }
+
+ if (ret == RESUME_GUEST) {
+ trace_kvm_reenter(vcpu);
+
+ /*
+ * Make sure the read of VCPU requests in vcpu_reenter()
+ * callback is not reordered ahead of the write to vcpu->mode,
+ * or we could miss a TLB flush request while the requester sees
+ * the VCPU as outside of guest mode and not needing an IPI.
+ */
+ smp_store_mb(vcpu->mode, IN_GUEST_MODE);
+
+ cpu = smp_processor_id();
+ _kvm_check_requests(vcpu, cpu);
+ _kvm_check_vmid(vcpu, cpu);
+ vcpu->arch.host_eentry = csr_read64(LOONGARCH_CSR_EENTRY);
+
+ /*
+ * If FPU are enabled (i.e. the guest's FPU context
+ * is live), restore FCSR0.
+ */
+ if (_kvm_guest_has_fpu(&vcpu->arch) &&
+ read_csr_euen() & (CSR_EUEN_FPEN)) {
+ kvm_restore_fcsr(&vcpu->arch.fpu);
+ }
+ }
+
+ return ret;
+}
+
int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
{
int i;