[patch V4 part 2 17/18] x86/kvm/vmx: Move guest enter/exit into .noinstr.text

From: Thomas Gleixner
Date: Tue May 05 2020 - 10:14:58 EST


Move the functions which are inside the RCU off region into the
non-instrumentable text section.

Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Cc: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
---
arch/x86/include/asm/hardirq.h | 4 -
arch/x86/include/asm/kvm_host.h | 8 +++
arch/x86/kvm/vmx/ops.h | 4 +
arch/x86/kvm/vmx/vmenter.S | 5 +
arch/x86/kvm/vmx/vmx.c | 105 ++++++++++++++++++++++------------------
arch/x86/kvm/x86.c | 2
6 files changed, 79 insertions(+), 49 deletions(-)

--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -67,12 +67,12 @@ static inline void kvm_set_cpu_l1tf_flus
__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
}

-static inline void kvm_clear_cpu_l1tf_flush_l1d(void)
+static __always_inline void kvm_clear_cpu_l1tf_flush_l1d(void)
{
__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 0);
}

-static inline bool kvm_get_cpu_l1tf_flush_l1d(void)
+static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void)
{
return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d);
}
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1601,7 +1601,15 @@ asmlinkage void kvm_spurious_fault(void)
insn "\n\t" \
"jmp 668f \n\t" \
"667: \n\t" \
+ "1: \n\t" \
+ ".pushsection .discard.instr_begin \n\t" \
+ ".long 1b - . \n\t" \
+ ".popsection \n\t" \
"call kvm_spurious_fault \n\t" \
+ "1: \n\t" \
+ ".pushsection .discard.instr_end \n\t" \
+ ".long 1b - . \n\t" \
+ ".popsection \n\t" \
"668: \n\t" \
_ASM_EXTABLE(666b, 667b)

--- a/arch/x86/kvm/vmx/ops.h
+++ b/arch/x86/kvm/vmx/ops.h
@@ -146,7 +146,9 @@ do { \
: : op1 : "cc" : error, fault); \
return; \
error: \
+ instr_begin(); \
insn##_error(error_args); \
+ instr_end(); \
return; \
fault: \
kvm_spurious_fault(); \
@@ -161,7 +163,9 @@ do { \
: : op1, op2 : "cc" : error, fault); \
return; \
error: \
+ instr_begin(); \
insn##_error(error_args); \
+ instr_end(); \
return; \
fault: \
kvm_spurious_fault(); \
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -27,7 +27,7 @@
#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE
#endif

- .text
+.section .noinstr.text, "ax"

/**
* vmx_vmenter - VM-Enter the current loaded VMCS
@@ -231,6 +231,9 @@ SYM_FUNC_START(__vmx_vcpu_run)
jmp 1b
SYM_FUNC_END(__vmx_vcpu_run)

+
+.section .text, "ax"
+
/**
* vmread_error_trampoline - Trampoline from inline asm to vmread_error()
* @field: VMCS field encoding that failed
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6000,7 +6000,7 @@ static int vmx_handle_exit(struct kvm_vc
* information but as all relevant affected CPUs have 32KiB L1D cache size
* there is no point in doing so.
*/
-static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
+static noinstr void vmx_l1d_flush(struct kvm_vcpu *vcpu)
{
int size = PAGE_SIZE << L1D_CACHE_ORDER;

@@ -6033,7 +6033,7 @@ static void vmx_l1d_flush(struct kvm_vcp
vcpu->stat.l1d_flush++;

if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
- wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
+ native_wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
return;
}

@@ -6514,7 +6514,7 @@ static void vmx_update_hv_timer(struct k
}
}

-void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
+void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
{
if (unlikely(host_rsp != vmx->loaded_vmcs->host_state.rsp)) {
vmx->loaded_vmcs->host_state.rsp = host_rsp;
@@ -6524,6 +6524,61 @@ void vmx_update_host_rsp(struct vcpu_vmx

bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);

+static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
+ struct vcpu_vmx *vmx)
+{
+ instr_begin();
+ /*
+ * VMENTER enables interrupts (host state), but the kernel state is
+ * interrupts disabled when this is invoked. Also tell RCU about
+ * it. This is the same logic as for exit_to_user_mode().
+ *
+ * 1) Trace interrupts on state
+ * 2) Prepare lockdep with RCU on
+ * 3) Invoke context tracking if enabled to adjust RCU state
+ * 4) Tell lockdep that interrupts are enabled
+ */
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ instr_end();
+
+ guest_enter_irqoff();
+ lockdep_hardirqs_on(CALLER_ADDR0);
+
+ /* L1D Flush includes CPU buffer clear to mitigate MDS */
+ if (static_branch_unlikely(&vmx_l1d_should_flush))
+ vmx_l1d_flush(vcpu);
+ else if (static_branch_unlikely(&mds_user_clear))
+ mds_clear_cpu_buffers();
+
+ if (vcpu->arch.cr2 != read_cr2())
+ write_cr2(vcpu->arch.cr2);
+
+ vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
+ vmx->loaded_vmcs->launched);
+
+ vcpu->arch.cr2 = read_cr2();
+
+ /*
+ * VMEXIT disables interrupts (host state), but tracing and lockdep
+ * have them in state 'on'. Same as enter_from_user_mode().
+ *
+ * 1) Tell lockdep that interrupts are disabled
+ * 2) Invoke context tracking if enabled to reactivate RCU
+ * 3) Trace interrupts off state
+ *
+ * This needs to be done before the below as native_read_msr()
+ * contains a tracepoint and x86_spec_ctrl_restore_host() calls
+ * into world and some more.
+ */
+ lockdep_hardirqs_off(CALLER_ADDR0);
+ guest_exit_irqoff();
+
+ instr_begin();
+ trace_hardirqs_off_prepare();
+ instr_end();
+}
+
static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6604,49 +6659,9 @@ static void vmx_vcpu_run(struct kvm_vcpu
x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);

/*
- * VMENTER enables interrupts (host state), but the kernel state is
- * interrupts disabled when this is invoked. Also tell RCU about
- * it. This is the same logic as for exit_to_user_mode().
- *
- * 1) Trace interrupts on state
- * 2) Prepare lockdep with RCU on
- * 3) Invoke context tracking if enabled to adjust RCU state
- * 4) Tell lockdep that interrupts are enabled
+ * The actual VMENTER/EXIT is in the .noinstr.text section.
*/
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
- guest_enter_irqoff();
- lockdep_hardirqs_on(CALLER_ADDR0);
-
- /* L1D Flush includes CPU buffer clear to mitigate MDS */
- if (static_branch_unlikely(&vmx_l1d_should_flush))
- vmx_l1d_flush(vcpu);
- else if (static_branch_unlikely(&mds_user_clear))
- mds_clear_cpu_buffers();
-
- if (vcpu->arch.cr2 != read_cr2())
- write_cr2(vcpu->arch.cr2);
-
- vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
- vmx->loaded_vmcs->launched);
-
- vcpu->arch.cr2 = read_cr2();
-
- /*
- * VMEXIT disables interrupts (host state), but tracing and lockdep
- * have them in state 'on'. Same as enter_from_user_mode().
- *
- * 1) Tell lockdep that interrupts are disabled
- * 2) Invoke context tracking if enabled to reactivate RCU
- * 3) Trace interrupts off state
- *
- * This needs to be done before the below as native_read_msr()
- * contains a tracepoint and x86_spec_ctrl_restore_host() calls
- * into world and some more.
- */
- lockdep_hardirqs_off(CALLER_ADDR0);
- guest_exit_irqoff();
- trace_hardirqs_off_prepare();
+ vmx_vcpu_enter_exit(vcpu, vmx);

/*
* We do not use IBRS in the kernel. If this vCPU has used the
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -381,7 +381,7 @@ int kvm_set_apic_base(struct kvm_vcpu *v
}
EXPORT_SYMBOL_GPL(kvm_set_apic_base);

-asmlinkage __visible void kvm_spurious_fault(void)
+asmlinkage __visible noinstr void kvm_spurious_fault(void)
{
/* Fault while not rebooting. We want the trace. */
BUG_ON(!kvm_rebooting);