[PATCH v4 08/11] KVM: X86: Move write_l1_tsc_offset() logic to common code and rename it

From: Ilias Stamatis
Date: Wed May 26 2021 - 14:47:10 EST


The write_l1_tsc_offset() callback has a misleading name. It does not
set L1's TSC offset, it rather updates the current TSC offset which
might be different if a nested guest is executing. Additionally, both
the vmx and svm implementations use the same logic for calculating the
current TSC before writing it to hardware.

Rename the function and move the common logic to the caller. The vmx/svm
specific code now merely sets the given offset to the corresponding
hardware structure.

Signed-off-by: Ilias Stamatis <ilstam@xxxxxxxxxx>
Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
---
arch/x86/include/asm/kvm-x86-ops.h | 2 +-
arch/x86/include/asm/kvm_host.h | 3 +--
arch/x86/kvm/svm/svm.c | 21 ++++-----------------
arch/x86/kvm/vmx/vmx.c | 23 +++--------------------
arch/x86/kvm/x86.c | 24 +++++++++++++++++++++---
5 files changed, 30 insertions(+), 43 deletions(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 2063616fba1c..029c9615378f 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -89,7 +89,7 @@ KVM_X86_OP(load_mmu_pgd)
KVM_X86_OP_NULL(has_wbinvd_exit)
KVM_X86_OP(get_l2_tsc_offset)
KVM_X86_OP(get_l2_tsc_multiplier)
-KVM_X86_OP(write_l1_tsc_offset)
+KVM_X86_OP(write_tsc_offset)
KVM_X86_OP(get_exit_info)
KVM_X86_OP(check_intercept)
KVM_X86_OP(handle_exit_irqoff)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index aaf756442ed1..f099277b993d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1307,8 +1307,7 @@ struct kvm_x86_ops {

u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu);
u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu);
- /* Returns actual tsc_offset set in active VMCS */
- u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
+ void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);

/*
* Retrieve somewhat arbitrary exit information. Intended to be used
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index ca70e46f9194..8dfb2513b72a 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1094,26 +1094,13 @@ static u64 svm_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
return kvm_default_tsc_scaling_ratio;
}

-static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{
struct vcpu_svm *svm = to_svm(vcpu);
- u64 g_tsc_offset = 0;
-
- if (is_guest_mode(vcpu)) {
- /* Write L1's TSC offset. */
- g_tsc_offset = svm->vmcb->control.tsc_offset -
- svm->vmcb01.ptr->control.tsc_offset;
- svm->vmcb01.ptr->control.tsc_offset = offset;
- }
-
- trace_kvm_write_tsc_offset(vcpu->vcpu_id,
- svm->vmcb->control.tsc_offset - g_tsc_offset,
- offset);
-
- svm->vmcb->control.tsc_offset = offset + g_tsc_offset;

+ svm->vmcb01.ptr->control.tsc_offset = vcpu->arch.l1_tsc_offset;
+ svm->vmcb->control.tsc_offset = offset;
vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
- return svm->vmcb->control.tsc_offset;
}

/* Evaluate instruction intercepts that depend on guest CPUID features. */
@@ -4540,7 +4527,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {

.get_l2_tsc_offset = svm_get_l2_tsc_offset,
.get_l2_tsc_multiplier = svm_get_l2_tsc_multiplier,
- .write_l1_tsc_offset = svm_write_l1_tsc_offset,
+ .write_tsc_offset = svm_write_tsc_offset,

.load_mmu_pgd = svm_load_mmu_pgd,

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1c83605eccc1..4b70431c2edd 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1808,26 +1808,9 @@ u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
return kvm_default_tsc_scaling_ratio;
}

-static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{
- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- u64 g_tsc_offset = 0;
-
- /*
- * We're here if L1 chose not to trap WRMSR to TSC. According
- * to the spec, this should set L1's TSC; The offset that L1
- * set for L2 remains unchanged, and still needs to be added
- * to the newly set TSC to get L2's TSC.
- */
- if (is_guest_mode(vcpu) &&
- (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING))
- g_tsc_offset = vmcs12->tsc_offset;
-
- trace_kvm_write_tsc_offset(vcpu->vcpu_id,
- vcpu->arch.tsc_offset - g_tsc_offset,
- offset);
- vmcs_write64(TSC_OFFSET, offset + g_tsc_offset);
- return offset + g_tsc_offset;
+ vmcs_write64(TSC_OFFSET, offset);
}

/*
@@ -7723,7 +7706,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {

.get_l2_tsc_offset = vmx_get_l2_tsc_offset,
.get_l2_tsc_multiplier = vmx_get_l2_tsc_multiplier,
- .write_l1_tsc_offset = vmx_write_l1_tsc_offset,
+ .write_tsc_offset = vmx_write_tsc_offset,

.load_mmu_pgd = vmx_load_mmu_pgd,

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 705de0badc9e..801fa1e8e915 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2359,10 +2359,28 @@ u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier)
}
EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_multiplier);

-static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 l1_offset)
{
- vcpu->arch.l1_tsc_offset = offset;
- vcpu->arch.tsc_offset = static_call(kvm_x86_write_l1_tsc_offset)(vcpu, offset);
+ trace_kvm_write_tsc_offset(vcpu->vcpu_id,
+ vcpu->arch.l1_tsc_offset,
+ l1_offset);
+
+ vcpu->arch.l1_tsc_offset = l1_offset;
+
+ /*
+ * If we are here because L1 chose not to trap WRMSR to TSC then
+ * according to the spec this should set L1's TSC (as opposed to
+ * setting L1's offset for L2).
+ */
+ if (is_guest_mode(vcpu))
+ vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(
+ l1_offset,
+ static_call(kvm_x86_get_l2_tsc_offset)(vcpu),
+ static_call(kvm_x86_get_l2_tsc_multiplier)(vcpu));
+ else
+ vcpu->arch.tsc_offset = l1_offset;
+
+ static_call(kvm_x86_write_tsc_offset)(vcpu, vcpu->arch.tsc_offset);
}

static inline bool kvm_check_tsc_unstable(void)
--
2.17.1