Re: [syzbot] WARNING in kvm_mmu_uninit_tdp_mmu (2)

From: Maxim Levitsky
Date: Thu Apr 28 2022 - 13:17:07 EST


On Thu, 2022-04-28 at 15:32 +0000, Sean Christopherson wrote:
> On Tue, Apr 26, 2022, Maxim Levitsky wrote:
> > I can reproduce this in a VM, by running and CTRL+C'in my ipi_stress test,
>
> Can you post your ipi_stress test? I'm curious to see if I can repro, and also
> very curious as to what might be unique about your test. I haven't been able to
> repro the syzbot test, nor have I been able to repro by killing VMs/tests.
>

This is the patch series (mostly attempt to turn svm to mini library,
but I don't know if this is worth it.
It was done so that ipi_stress could use nesting itself to wait for IPI
from within a nested guest. I usually don't use it.

This is more or less how I was running it lately (I have a wrapper script)


./x86/run x86/ipi_stress.flat \
-global kvm-pit.lost_tick_policy=discard \
-machine kernel-irqchip=on -name debug-threads=on \
\
-smp 8 \
-cpu host,x2apic=off,svm=off,-hypervisor \
-overcommit cpu-pm=on \
-m 4g -append "0 10000"


Its not fully finised for upstream, I will get to it soon.

'cpu-pm=on' won't work for you as this fails due to non atomic memslot
update bug for which I have a small hack in qemu, and it is on my
backlog to fix it correctly.

Mostly likely cpu_pm=off will also reproduce it.


Test was run in a guest, natively this doesn't seem to reproduce.
tdp mmu was used for both L0 and L1.

Best regards,
Maxim levitsky
From 325a2eff01184e82f1f80ac5783eb5bc5058e1a8 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
Date: Mon, 28 Mar 2022 14:23:53 +0300
Subject: [PATCH 1/7] svm: move svm spec definitions to lib/x86/svm.h

---
lib/x86/svm.h | 364 ++++++++++++++++++++++++++++++++++++++++++++++++++
x86/svm.h | 359 +------------------------------------------------
2 files changed, 365 insertions(+), 358 deletions(-)
create mode 100644 lib/x86/svm.h

diff --git a/lib/x86/svm.h b/lib/x86/svm.h
new file mode 100644
index 00000000..38bb9224
--- /dev/null
+++ b/lib/x86/svm.h
@@ -0,0 +1,364 @@
+
+#ifndef SRC_LIB_X86_SVM_H_
+#define SRC_LIB_X86_SVM_H_
+
+enum {
+ INTERCEPT_INTR,
+ INTERCEPT_NMI,
+ INTERCEPT_SMI,
+ INTERCEPT_INIT,
+ INTERCEPT_VINTR,
+ INTERCEPT_SELECTIVE_CR0,
+ INTERCEPT_STORE_IDTR,
+ INTERCEPT_STORE_GDTR,
+ INTERCEPT_STORE_LDTR,
+ INTERCEPT_STORE_TR,
+ INTERCEPT_LOAD_IDTR,
+ INTERCEPT_LOAD_GDTR,
+ INTERCEPT_LOAD_LDTR,
+ INTERCEPT_LOAD_TR,
+ INTERCEPT_RDTSC,
+ INTERCEPT_RDPMC,
+ INTERCEPT_PUSHF,
+ INTERCEPT_POPF,
+ INTERCEPT_CPUID,
+ INTERCEPT_RSM,
+ INTERCEPT_IRET,
+ INTERCEPT_INTn,
+ INTERCEPT_INVD,
+ INTERCEPT_PAUSE,
+ INTERCEPT_HLT,
+ INTERCEPT_INVLPG,
+ INTERCEPT_INVLPGA,
+ INTERCEPT_IOIO_PROT,
+ INTERCEPT_MSR_PROT,
+ INTERCEPT_TASK_SWITCH,
+ INTERCEPT_FERR_FREEZE,
+ INTERCEPT_SHUTDOWN,
+ INTERCEPT_VMRUN,
+ INTERCEPT_VMMCALL,
+ INTERCEPT_VMLOAD,
+ INTERCEPT_VMSAVE,
+ INTERCEPT_STGI,
+ INTERCEPT_CLGI,
+ INTERCEPT_SKINIT,
+ INTERCEPT_RDTSCP,
+ INTERCEPT_ICEBP,
+ INTERCEPT_WBINVD,
+ INTERCEPT_MONITOR,
+ INTERCEPT_MWAIT,
+ INTERCEPT_MWAIT_COND,
+};
+
+enum {
+ VMCB_CLEAN_INTERCEPTS = 1, /* Intercept vectors, TSC offset, pause filter count */
+ VMCB_CLEAN_PERM_MAP = 2, /* IOPM Base and MSRPM Base */
+ VMCB_CLEAN_ASID = 4, /* ASID */
+ VMCB_CLEAN_INTR = 8, /* int_ctl, int_vector */
+ VMCB_CLEAN_NPT = 16, /* npt_en, nCR3, gPAT */
+ VMCB_CLEAN_CR = 32, /* CR0, CR3, CR4, EFER */
+ VMCB_CLEAN_DR = 64, /* DR6, DR7 */
+ VMCB_CLEAN_DT = 128, /* GDT, IDT */
+ VMCB_CLEAN_SEG = 256, /* CS, DS, SS, ES, CPL */
+ VMCB_CLEAN_CR2 = 512, /* CR2 only */
+ VMCB_CLEAN_LBR = 1024, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
+ VMCB_CLEAN_AVIC = 2048, /* APIC_BAR, APIC_BACKING_PAGE,
+ PHYSICAL_TABLE pointer, LOGICAL_TABLE pointer */
+ VMCB_CLEAN_ALL = 4095,
+};
+
+struct __attribute__ ((__packed__)) vmcb_control_area {
+ u16 intercept_cr_read;
+ u16 intercept_cr_write;
+ u16 intercept_dr_read;
+ u16 intercept_dr_write;
+ u32 intercept_exceptions;
+ u64 intercept;
+ u8 reserved_1[40];
+ u16 pause_filter_thresh;
+ u16 pause_filter_count;
+ u64 iopm_base_pa;
+ u64 msrpm_base_pa;
+ u64 tsc_offset;
+ u32 asid;
+ u8 tlb_ctl;
+ u8 reserved_2[3];
+ u32 int_ctl;
+ u32 int_vector;
+ u32 int_state;
+ u8 reserved_3[4];
+ u32 exit_code;
+ u32 exit_code_hi;
+ u64 exit_info_1;
+ u64 exit_info_2;
+ u32 exit_int_info;
+ u32 exit_int_info_err;
+ u64 nested_ctl;
+ u8 reserved_4[16];
+ u32 event_inj;
+ u32 event_inj_err;
+ u64 nested_cr3;
+ u64 virt_ext;
+ u32 clean;
+ u32 reserved_5;
+ u64 next_rip;
+ u8 insn_len;
+ u8 insn_bytes[15];
+ u8 reserved_6[800];
+};
+
+#define TLB_CONTROL_DO_NOTHING 0
+#define TLB_CONTROL_FLUSH_ALL_ASID 1
+
+#define V_TPR_MASK 0x0f
+
+#define V_IRQ_SHIFT 8
+#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
+
+#define V_GIF_ENABLED_SHIFT 25
+#define V_GIF_ENABLED_MASK (1 << V_GIF_ENABLED_SHIFT)
+
+#define V_GIF_SHIFT 9
+#define V_GIF_MASK (1 << V_GIF_SHIFT)
+
+#define V_INTR_PRIO_SHIFT 16
+#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
+
+#define V_IGN_TPR_SHIFT 20
+#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
+
+#define V_INTR_MASKING_SHIFT 24
+#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
+
+#define SVM_INTERRUPT_SHADOW_MASK 1
+
+#define SVM_IOIO_STR_SHIFT 2
+#define SVM_IOIO_REP_SHIFT 3
+#define SVM_IOIO_SIZE_SHIFT 4
+#define SVM_IOIO_ASIZE_SHIFT 7
+
+#define SVM_IOIO_TYPE_MASK 1
+#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT)
+#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT)
+#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
+#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
+
+#define SVM_VM_CR_VALID_MASK 0x001fULL
+#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
+#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL
+
+#define TSC_RATIO_DEFAULT 0x0100000000ULL
+
+struct __attribute__ ((__packed__)) vmcb_seg {
+ u16 selector;
+ u16 attrib;
+ u32 limit;
+ u64 base;
+};
+
+struct __attribute__ ((__packed__)) vmcb_save_area {
+ struct vmcb_seg es;
+ struct vmcb_seg cs;
+ struct vmcb_seg ss;
+ struct vmcb_seg ds;
+ struct vmcb_seg fs;
+ struct vmcb_seg gs;
+ struct vmcb_seg gdtr;
+ struct vmcb_seg ldtr;
+ struct vmcb_seg idtr;
+ struct vmcb_seg tr;
+ u8 reserved_1[43];
+ u8 cpl;
+ u8 reserved_2[4];
+ u64 efer;
+ u8 reserved_3[112];
+ u64 cr4;
+ u64 cr3;
+ u64 cr0;
+ u64 dr7;
+ u64 dr6;
+ u64 rflags;
+ u64 rip;
+ u8 reserved_4[88];
+ u64 rsp;
+ u8 reserved_5[24];
+ u64 rax;
+ u64 star;
+ u64 lstar;
+ u64 cstar;
+ u64 sfmask;
+ u64 kernel_gs_base;
+ u64 sysenter_cs;
+ u64 sysenter_esp;
+ u64 sysenter_eip;
+ u64 cr2;
+ u8 reserved_6[32];
+ u64 g_pat;
+ u64 dbgctl;
+ u64 br_from;
+ u64 br_to;
+ u64 last_excp_from;
+ u64 last_excp_to;
+};
+
+struct __attribute__ ((__packed__)) vmcb {
+ struct vmcb_control_area control;
+ struct vmcb_save_area save;
+};
+
+#define SVM_CPUID_FEATURE_SHIFT 2
+#define SVM_CPUID_FUNC 0x8000000a
+
+#define SVM_VM_CR_SVM_DISABLE 4
+
+#define SVM_SELECTOR_S_SHIFT 4
+#define SVM_SELECTOR_DPL_SHIFT 5
+#define SVM_SELECTOR_P_SHIFT 7
+#define SVM_SELECTOR_AVL_SHIFT 8
+#define SVM_SELECTOR_L_SHIFT 9
+#define SVM_SELECTOR_DB_SHIFT 10
+#define SVM_SELECTOR_G_SHIFT 11
+
+#define SVM_SELECTOR_TYPE_MASK (0xf)
+#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT)
+#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT)
+#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT)
+#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT)
+#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT)
+#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT)
+#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT)
+
+#define SVM_SELECTOR_WRITE_MASK (1 << 1)
+#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
+#define SVM_SELECTOR_CODE_MASK (1 << 3)
+
+#define INTERCEPT_CR0_MASK 1
+#define INTERCEPT_CR3_MASK (1 << 3)
+#define INTERCEPT_CR4_MASK (1 << 4)
+#define INTERCEPT_CR8_MASK (1 << 8)
+
+#define INTERCEPT_DR0_MASK 1
+#define INTERCEPT_DR1_MASK (1 << 1)
+#define INTERCEPT_DR2_MASK (1 << 2)
+#define INTERCEPT_DR3_MASK (1 << 3)
+#define INTERCEPT_DR4_MASK (1 << 4)
+#define INTERCEPT_DR5_MASK (1 << 5)
+#define INTERCEPT_DR6_MASK (1 << 6)
+#define INTERCEPT_DR7_MASK (1 << 7)
+
+#define SVM_EVTINJ_VEC_MASK 0xff
+
+#define SVM_EVTINJ_TYPE_SHIFT 8
+#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_VALID (1 << 31)
+#define SVM_EVTINJ_VALID_ERR (1 << 11)
+
+#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
+#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK
+
+#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
+#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
+#define SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT
+#define SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT
+
+#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
+#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
+
+#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
+#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
+#define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44
+
+#define SVM_EXIT_READ_CR0 0x000
+#define SVM_EXIT_READ_CR3 0x003
+#define SVM_EXIT_READ_CR4 0x004
+#define SVM_EXIT_READ_CR8 0x008
+#define SVM_EXIT_WRITE_CR0 0x010
+#define SVM_EXIT_WRITE_CR3 0x013
+#define SVM_EXIT_WRITE_CR4 0x014
+#define SVM_EXIT_WRITE_CR8 0x018
+#define SVM_EXIT_READ_DR0 0x020
+#define SVM_EXIT_READ_DR1 0x021
+#define SVM_EXIT_READ_DR2 0x022
+#define SVM_EXIT_READ_DR3 0x023
+#define SVM_EXIT_READ_DR4 0x024
+#define SVM_EXIT_READ_DR5 0x025
+#define SVM_EXIT_READ_DR6 0x026
+#define SVM_EXIT_READ_DR7 0x027
+#define SVM_EXIT_WRITE_DR0 0x030
+#define SVM_EXIT_WRITE_DR1 0x031
+#define SVM_EXIT_WRITE_DR2 0x032
+#define SVM_EXIT_WRITE_DR3 0x033
+#define SVM_EXIT_WRITE_DR4 0x034
+#define SVM_EXIT_WRITE_DR5 0x035
+#define SVM_EXIT_WRITE_DR6 0x036
+#define SVM_EXIT_WRITE_DR7 0x037
+#define SVM_EXIT_EXCP_BASE 0x040
+#define SVM_EXIT_INTR 0x060
+#define SVM_EXIT_NMI 0x061
+#define SVM_EXIT_SMI 0x062
+#define SVM_EXIT_INIT 0x063
+#define SVM_EXIT_VINTR 0x064
+#define SVM_EXIT_CR0_SEL_WRITE 0x065
+#define SVM_EXIT_IDTR_READ 0x066
+#define SVM_EXIT_GDTR_READ 0x067
+#define SVM_EXIT_LDTR_READ 0x068
+#define SVM_EXIT_TR_READ 0x069
+#define SVM_EXIT_IDTR_WRITE 0x06a
+#define SVM_EXIT_GDTR_WRITE 0x06b
+#define SVM_EXIT_LDTR_WRITE 0x06c
+#define SVM_EXIT_TR_WRITE 0x06d
+#define SVM_EXIT_RDTSC 0x06e
+#define SVM_EXIT_RDPMC 0x06f
+#define SVM_EXIT_PUSHF 0x070
+#define SVM_EXIT_POPF 0x071
+#define SVM_EXIT_CPUID 0x072
+#define SVM_EXIT_RSM 0x073
+#define SVM_EXIT_IRET 0x074
+#define SVM_EXIT_SWINT 0x075
+#define SVM_EXIT_INVD 0x076
+#define SVM_EXIT_PAUSE 0x077
+#define SVM_EXIT_HLT 0x078
+#define SVM_EXIT_INVLPG 0x079
+#define SVM_EXIT_INVLPGA 0x07a
+#define SVM_EXIT_IOIO 0x07b
+#define SVM_EXIT_MSR 0x07c
+#define SVM_EXIT_TASK_SWITCH 0x07d
+#define SVM_EXIT_FERR_FREEZE 0x07e
+#define SVM_EXIT_SHUTDOWN 0x07f
+#define SVM_EXIT_VMRUN 0x080
+#define SVM_EXIT_VMMCALL 0x081
+#define SVM_EXIT_VMLOAD 0x082
+#define SVM_EXIT_VMSAVE 0x083
+#define SVM_EXIT_STGI 0x084
+#define SVM_EXIT_CLGI 0x085
+#define SVM_EXIT_SKINIT 0x086
+#define SVM_EXIT_RDTSCP 0x087
+#define SVM_EXIT_ICEBP 0x088
+#define SVM_EXIT_WBINVD 0x089
+#define SVM_EXIT_MONITOR 0x08a
+#define SVM_EXIT_MWAIT 0x08b
+#define SVM_EXIT_MWAIT_COND 0x08c
+#define SVM_EXIT_NPF 0x400
+
+#define SVM_EXIT_ERR -1
+
+#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
+
+#define SVM_CR0_RESERVED_MASK 0xffffffff00000000U
+#define SVM_CR3_LONG_MBZ_MASK 0xfff0000000000000U
+#define SVM_CR3_LONG_RESERVED_MASK 0x0000000000000fe7U
+#define SVM_CR3_PAE_LEGACY_RESERVED_MASK 0x0000000000000007U
+#define SVM_CR4_LEGACY_RESERVED_MASK 0xff08e000U
+#define SVM_CR4_RESERVED_MASK 0xffffffffff08e000U
+#define SVM_DR6_RESERVED_MASK 0xffffffffffff1ff0U
+#define SVM_DR7_RESERVED_MASK 0xffffffff0000cc00U
+#define SVM_EFER_RESERVED_MASK 0xffffffffffff0200U
+
+
+#endif /* SRC_LIB_X86_SVM_H_ */
diff --git a/x86/svm.h b/x86/svm.h
index e93822b6..ff5fa91e 100644
--- a/x86/svm.h
+++ b/x86/svm.h
@@ -2,367 +2,10 @@
#define X86_SVM_H

#include "libcflat.h"
+#include <x86/svm.h>

-enum {
- INTERCEPT_INTR,
- INTERCEPT_NMI,
- INTERCEPT_SMI,
- INTERCEPT_INIT,
- INTERCEPT_VINTR,
- INTERCEPT_SELECTIVE_CR0,
- INTERCEPT_STORE_IDTR,
- INTERCEPT_STORE_GDTR,
- INTERCEPT_STORE_LDTR,
- INTERCEPT_STORE_TR,
- INTERCEPT_LOAD_IDTR,
- INTERCEPT_LOAD_GDTR,
- INTERCEPT_LOAD_LDTR,
- INTERCEPT_LOAD_TR,
- INTERCEPT_RDTSC,
- INTERCEPT_RDPMC,
- INTERCEPT_PUSHF,
- INTERCEPT_POPF,
- INTERCEPT_CPUID,
- INTERCEPT_RSM,
- INTERCEPT_IRET,
- INTERCEPT_INTn,
- INTERCEPT_INVD,
- INTERCEPT_PAUSE,
- INTERCEPT_HLT,
- INTERCEPT_INVLPG,
- INTERCEPT_INVLPGA,
- INTERCEPT_IOIO_PROT,
- INTERCEPT_MSR_PROT,
- INTERCEPT_TASK_SWITCH,
- INTERCEPT_FERR_FREEZE,
- INTERCEPT_SHUTDOWN,
- INTERCEPT_VMRUN,
- INTERCEPT_VMMCALL,
- INTERCEPT_VMLOAD,
- INTERCEPT_VMSAVE,
- INTERCEPT_STGI,
- INTERCEPT_CLGI,
- INTERCEPT_SKINIT,
- INTERCEPT_RDTSCP,
- INTERCEPT_ICEBP,
- INTERCEPT_WBINVD,
- INTERCEPT_MONITOR,
- INTERCEPT_MWAIT,
- INTERCEPT_MWAIT_COND,
-};
-
-enum {
- VMCB_CLEAN_INTERCEPTS = 1, /* Intercept vectors, TSC offset, pause filter count */
- VMCB_CLEAN_PERM_MAP = 2, /* IOPM Base and MSRPM Base */
- VMCB_CLEAN_ASID = 4, /* ASID */
- VMCB_CLEAN_INTR = 8, /* int_ctl, int_vector */
- VMCB_CLEAN_NPT = 16, /* npt_en, nCR3, gPAT */
- VMCB_CLEAN_CR = 32, /* CR0, CR3, CR4, EFER */
- VMCB_CLEAN_DR = 64, /* DR6, DR7 */
- VMCB_CLEAN_DT = 128, /* GDT, IDT */
- VMCB_CLEAN_SEG = 256, /* CS, DS, SS, ES, CPL */
- VMCB_CLEAN_CR2 = 512, /* CR2 only */
- VMCB_CLEAN_LBR = 1024, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
- VMCB_CLEAN_AVIC = 2048, /* APIC_BAR, APIC_BACKING_PAGE,
- PHYSICAL_TABLE pointer, LOGICAL_TABLE pointer */
- VMCB_CLEAN_ALL = 4095,
-};
-
-struct __attribute__ ((__packed__)) vmcb_control_area {
- u16 intercept_cr_read;
- u16 intercept_cr_write;
- u16 intercept_dr_read;
- u16 intercept_dr_write;
- u32 intercept_exceptions;
- u64 intercept;
- u8 reserved_1[40];
- u16 pause_filter_thresh;
- u16 pause_filter_count;
- u64 iopm_base_pa;
- u64 msrpm_base_pa;
- u64 tsc_offset;
- u32 asid;
- u8 tlb_ctl;
- u8 reserved_2[3];
- u32 int_ctl;
- u32 int_vector;
- u32 int_state;
- u8 reserved_3[4];
- u32 exit_code;
- u32 exit_code_hi;
- u64 exit_info_1;
- u64 exit_info_2;
- u32 exit_int_info;
- u32 exit_int_info_err;
- u64 nested_ctl;
- u8 reserved_4[16];
- u32 event_inj;
- u32 event_inj_err;
- u64 nested_cr3;
- u64 virt_ext;
- u32 clean;
- u32 reserved_5;
- u64 next_rip;
- u8 insn_len;
- u8 insn_bytes[15];
- u8 reserved_6[800];
-};
-
-#define TLB_CONTROL_DO_NOTHING 0
-#define TLB_CONTROL_FLUSH_ALL_ASID 1
-
-#define V_TPR_MASK 0x0f
-
-#define V_IRQ_SHIFT 8
-#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
-
-#define V_GIF_ENABLED_SHIFT 25
-#define V_GIF_ENABLED_MASK (1 << V_GIF_ENABLED_SHIFT)
-
-#define V_GIF_SHIFT 9
-#define V_GIF_MASK (1 << V_GIF_SHIFT)
-
-#define V_INTR_PRIO_SHIFT 16
-#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
-
-#define V_IGN_TPR_SHIFT 20
-#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
-
-#define V_INTR_MASKING_SHIFT 24
-#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
-
-#define SVM_INTERRUPT_SHADOW_MASK 1
-
-#define SVM_IOIO_STR_SHIFT 2
-#define SVM_IOIO_REP_SHIFT 3
-#define SVM_IOIO_SIZE_SHIFT 4
-#define SVM_IOIO_ASIZE_SHIFT 7
-
-#define SVM_IOIO_TYPE_MASK 1
-#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT)
-#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT)
-#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
-#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
-
-#define SVM_VM_CR_VALID_MASK 0x001fULL
-#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
-#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL
-
-#define TSC_RATIO_DEFAULT 0x0100000000ULL
-
-struct __attribute__ ((__packed__)) vmcb_seg {
- u16 selector;
- u16 attrib;
- u32 limit;
- u64 base;
-};
-
-struct __attribute__ ((__packed__)) vmcb_save_area {
- struct vmcb_seg es;
- struct vmcb_seg cs;
- struct vmcb_seg ss;
- struct vmcb_seg ds;
- struct vmcb_seg fs;
- struct vmcb_seg gs;
- struct vmcb_seg gdtr;
- struct vmcb_seg ldtr;
- struct vmcb_seg idtr;
- struct vmcb_seg tr;
- u8 reserved_1[43];
- u8 cpl;
- u8 reserved_2[4];
- u64 efer;
- u8 reserved_3[112];
- u64 cr4;
- u64 cr3;
- u64 cr0;
- u64 dr7;
- u64 dr6;
- u64 rflags;
- u64 rip;
- u8 reserved_4[88];
- u64 rsp;
- u8 reserved_5[24];
- u64 rax;
- u64 star;
- u64 lstar;
- u64 cstar;
- u64 sfmask;
- u64 kernel_gs_base;
- u64 sysenter_cs;
- u64 sysenter_esp;
- u64 sysenter_eip;
- u64 cr2;
- u8 reserved_6[32];
- u64 g_pat;
- u64 dbgctl;
- u64 br_from;
- u64 br_to;
- u64 last_excp_from;
- u64 last_excp_to;
-};
-
-struct __attribute__ ((__packed__)) vmcb {
- struct vmcb_control_area control;
- struct vmcb_save_area save;
-};
-
-#define SVM_CPUID_FEATURE_SHIFT 2
-#define SVM_CPUID_FUNC 0x8000000a
-
-#define SVM_VM_CR_SVM_DISABLE 4
-
-#define SVM_SELECTOR_S_SHIFT 4
-#define SVM_SELECTOR_DPL_SHIFT 5
-#define SVM_SELECTOR_P_SHIFT 7
-#define SVM_SELECTOR_AVL_SHIFT 8
-#define SVM_SELECTOR_L_SHIFT 9
-#define SVM_SELECTOR_DB_SHIFT 10
-#define SVM_SELECTOR_G_SHIFT 11
-
-#define SVM_SELECTOR_TYPE_MASK (0xf)
-#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT)
-#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT)
-#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT)
-#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT)
-#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT)
-#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT)
-#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT)
-
-#define SVM_SELECTOR_WRITE_MASK (1 << 1)
-#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
-#define SVM_SELECTOR_CODE_MASK (1 << 3)
-
-#define INTERCEPT_CR0_MASK 1
-#define INTERCEPT_CR3_MASK (1 << 3)
-#define INTERCEPT_CR4_MASK (1 << 4)
-#define INTERCEPT_CR8_MASK (1 << 8)
-
-#define INTERCEPT_DR0_MASK 1
-#define INTERCEPT_DR1_MASK (1 << 1)
-#define INTERCEPT_DR2_MASK (1 << 2)
-#define INTERCEPT_DR3_MASK (1 << 3)
-#define INTERCEPT_DR4_MASK (1 << 4)
-#define INTERCEPT_DR5_MASK (1 << 5)
-#define INTERCEPT_DR6_MASK (1 << 6)
-#define INTERCEPT_DR7_MASK (1 << 7)
-
-#define SVM_EVTINJ_VEC_MASK 0xff
-
-#define SVM_EVTINJ_TYPE_SHIFT 8
-#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT)
-
-#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT)
-#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT)
-#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT)
-#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT)
-
-#define SVM_EVTINJ_VALID (1 << 31)
-#define SVM_EVTINJ_VALID_ERR (1 << 11)
-
-#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
-#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK
-
-#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
-#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
-#define SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT
-#define SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT
-
-#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
-#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
-
-#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
-#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
-#define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44
-
-#define SVM_EXIT_READ_CR0 0x000
-#define SVM_EXIT_READ_CR3 0x003
-#define SVM_EXIT_READ_CR4 0x004
-#define SVM_EXIT_READ_CR8 0x008
-#define SVM_EXIT_WRITE_CR0 0x010
-#define SVM_EXIT_WRITE_CR3 0x013
-#define SVM_EXIT_WRITE_CR4 0x014
-#define SVM_EXIT_WRITE_CR8 0x018
-#define SVM_EXIT_READ_DR0 0x020
-#define SVM_EXIT_READ_DR1 0x021
-#define SVM_EXIT_READ_DR2 0x022
-#define SVM_EXIT_READ_DR3 0x023
-#define SVM_EXIT_READ_DR4 0x024
-#define SVM_EXIT_READ_DR5 0x025
-#define SVM_EXIT_READ_DR6 0x026
-#define SVM_EXIT_READ_DR7 0x027
-#define SVM_EXIT_WRITE_DR0 0x030
-#define SVM_EXIT_WRITE_DR1 0x031
-#define SVM_EXIT_WRITE_DR2 0x032
-#define SVM_EXIT_WRITE_DR3 0x033
-#define SVM_EXIT_WRITE_DR4 0x034
-#define SVM_EXIT_WRITE_DR5 0x035
-#define SVM_EXIT_WRITE_DR6 0x036
-#define SVM_EXIT_WRITE_DR7 0x037
-#define SVM_EXIT_EXCP_BASE 0x040
-#define SVM_EXIT_INTR 0x060
-#define SVM_EXIT_NMI 0x061
-#define SVM_EXIT_SMI 0x062
-#define SVM_EXIT_INIT 0x063
-#define SVM_EXIT_VINTR 0x064
-#define SVM_EXIT_CR0_SEL_WRITE 0x065
-#define SVM_EXIT_IDTR_READ 0x066
-#define SVM_EXIT_GDTR_READ 0x067
-#define SVM_EXIT_LDTR_READ 0x068
-#define SVM_EXIT_TR_READ 0x069
-#define SVM_EXIT_IDTR_WRITE 0x06a
-#define SVM_EXIT_GDTR_WRITE 0x06b
-#define SVM_EXIT_LDTR_WRITE 0x06c
-#define SVM_EXIT_TR_WRITE 0x06d
-#define SVM_EXIT_RDTSC 0x06e
-#define SVM_EXIT_RDPMC 0x06f
-#define SVM_EXIT_PUSHF 0x070
-#define SVM_EXIT_POPF 0x071
-#define SVM_EXIT_CPUID 0x072
-#define SVM_EXIT_RSM 0x073
-#define SVM_EXIT_IRET 0x074
-#define SVM_EXIT_SWINT 0x075
-#define SVM_EXIT_INVD 0x076
-#define SVM_EXIT_PAUSE 0x077
-#define SVM_EXIT_HLT 0x078
-#define SVM_EXIT_INVLPG 0x079
-#define SVM_EXIT_INVLPGA 0x07a
-#define SVM_EXIT_IOIO 0x07b
-#define SVM_EXIT_MSR 0x07c
-#define SVM_EXIT_TASK_SWITCH 0x07d
-#define SVM_EXIT_FERR_FREEZE 0x07e
-#define SVM_EXIT_SHUTDOWN 0x07f
-#define SVM_EXIT_VMRUN 0x080
-#define SVM_EXIT_VMMCALL 0x081
-#define SVM_EXIT_VMLOAD 0x082
-#define SVM_EXIT_VMSAVE 0x083
-#define SVM_EXIT_STGI 0x084
-#define SVM_EXIT_CLGI 0x085
-#define SVM_EXIT_SKINIT 0x086
-#define SVM_EXIT_RDTSCP 0x087
-#define SVM_EXIT_ICEBP 0x088
-#define SVM_EXIT_WBINVD 0x089
-#define SVM_EXIT_MONITOR 0x08a
-#define SVM_EXIT_MWAIT 0x08b
-#define SVM_EXIT_MWAIT_COND 0x08c
-#define SVM_EXIT_NPF 0x400
-
-#define SVM_EXIT_ERR -1
-
-#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
-
-#define SVM_CR0_RESERVED_MASK 0xffffffff00000000U
-#define SVM_CR3_LONG_MBZ_MASK 0xfff0000000000000U
-#define SVM_CR3_LONG_RESERVED_MASK 0x0000000000000fe7U
-#define SVM_CR3_PAE_LEGACY_RESERVED_MASK 0x0000000000000007U
-#define SVM_CR4_LEGACY_RESERVED_MASK 0xff08e000U
-#define SVM_CR4_RESERVED_MASK 0xffffffffff08e000U
-#define SVM_DR6_RESERVED_MASK 0xffffffffffff1ff0U
-#define SVM_DR7_RESERVED_MASK 0xffffffff0000cc00U
-#define SVM_EFER_RESERVED_MASK 0xffffffffffff0200U

#define MSR_BITMAP_SIZE 8192
-
#define LBR_CTL_ENABLE_MASK BIT_ULL(0)

struct svm_test {
--
2.26.3

From 410f0020fe7330af4fc46dbc728eec0bd94c1c82 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
Date: Mon, 28 Mar 2022 15:32:21 +0300
Subject: [PATCH 2/7] move some svm support functions into lib/x86/svm_lib.h

---
lib/x86/svm_lib.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++
x86/svm.c | 35 +------------------------------
x86/svm.h | 18 ----------------
x86/svm_tests.c | 1 +
4 files changed, 55 insertions(+), 52 deletions(-)
create mode 100644 lib/x86/svm_lib.h

diff --git a/lib/x86/svm_lib.h b/lib/x86/svm_lib.h
new file mode 100644
index 00000000..cdc93408
--- /dev/null
+++ b/lib/x86/svm_lib.h
@@ -0,0 +1,53 @@
+#ifndef SRC_LIB_X86_SVM_LIB_H_
+#define SRC_LIB_X86_SVM_LIB_H_
+
+#include <x86/svm.h>
+#include "processor.h"
+
+static inline bool npt_supported(void)
+{
+ return this_cpu_has(X86_FEATURE_NPT);
+}
+
+static inline bool vgif_supported(void)
+{
+ return this_cpu_has(X86_FEATURE_VGIF);
+}
+
+static inline bool lbrv_supported(void)
+{
+ return this_cpu_has(X86_FEATURE_LBRV);
+}
+
+static inline bool tsc_scale_supported(void)
+{
+ return this_cpu_has(X86_FEATURE_TSCRATEMSR);
+}
+
+static inline bool pause_filter_supported(void)
+{
+ return this_cpu_has(X86_FEATURE_PAUSEFILTER);
+}
+
+static inline bool pause_threshold_supported(void)
+{
+ return this_cpu_has(X86_FEATURE_PFTHRESHOLD);
+}
+
+static inline void vmmcall(void)
+{
+ asm volatile ("vmmcall" : : : "memory");
+}
+
+static inline void stgi(void)
+{
+ asm volatile ("stgi");
+}
+
+static inline void clgi(void)
+{
+ asm volatile ("clgi");
+}
+
+
+#endif /* SRC_LIB_X86_SVM_LIB_H_ */
diff --git a/x86/svm.c b/x86/svm.c
index f6896f02..009d2d8c 100644
--- a/x86/svm.c
+++ b/x86/svm.c
@@ -14,6 +14,7 @@
#include "isr.h"
#include "apic.h"
#include "vmalloc.h"
+#include "svm_lib.h"

/* for the nested page table*/
u64 *pte[2048];
@@ -65,31 +66,6 @@ bool default_supported(void)
return true;
}

-bool vgif_supported(void)
-{
- return this_cpu_has(X86_FEATURE_VGIF);
-}
-
-bool lbrv_supported(void)
-{
- return this_cpu_has(X86_FEATURE_LBRV);
-}
-
-bool tsc_scale_supported(void)
-{
- return this_cpu_has(X86_FEATURE_TSCRATEMSR);
-}
-
-bool pause_filter_supported(void)
-{
- return this_cpu_has(X86_FEATURE_PAUSEFILTER);
-}
-
-bool pause_threshold_supported(void)
-{
- return this_cpu_has(X86_FEATURE_PFTHRESHOLD);
-}
-

void default_prepare(struct svm_test *test)
{
@@ -105,10 +81,6 @@ bool default_finished(struct svm_test *test)
return true; /* one vmexit */
}

-bool npt_supported(void)
-{
- return this_cpu_has(X86_FEATURE_NPT);
-}

int get_test_stage(struct svm_test *test)
{
@@ -139,11 +111,6 @@ static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
seg->base = base;
}

-inline void vmmcall(void)
-{
- asm volatile ("vmmcall" : : : "memory");
-}
-
static test_guest_func guest_main;

void test_set_guest(test_guest_func func)
diff --git a/x86/svm.h b/x86/svm.h
index ff5fa91e..1eb98de3 100644
--- a/x86/svm.h
+++ b/x86/svm.h
@@ -52,21 +52,14 @@ u64 *npt_get_pdpe(void);
u64 *npt_get_pml4e(void);
bool smp_supported(void);
bool default_supported(void);
-bool vgif_supported(void);
-bool lbrv_supported(void);
-bool tsc_scale_supported(void);
-bool pause_filter_supported(void);
-bool pause_threshold_supported(void);
void default_prepare(struct svm_test *test);
void default_prepare_gif_clear(struct svm_test *test);
bool default_finished(struct svm_test *test);
-bool npt_supported(void);
int get_test_stage(struct svm_test *test);
void set_test_stage(struct svm_test *test, int s);
void inc_test_stage(struct svm_test *test);
void vmcb_ident(struct vmcb *vmcb);
struct regs get_regs(void);
-void vmmcall(void);
int __svm_vmrun(u64 rip);
void __svm_bare_vmrun(void);
int svm_vmrun(void);
@@ -75,17 +68,6 @@ void test_set_guest(test_guest_func func);
extern struct vmcb *vmcb;
extern struct svm_test svm_tests[];

-static inline void stgi(void)
-{
- asm volatile ("stgi");
-}
-
-static inline void clgi(void)
-{
- asm volatile ("clgi");
-}
-
-

#define SAVE_GPR_C \
"xchg %%rbx, regs+0x8\n\t" \
diff --git a/x86/svm_tests.c b/x86/svm_tests.c
index 6a9b03bd..b6a0d5e6 100644
--- a/x86/svm_tests.c
+++ b/x86/svm_tests.c
@@ -10,6 +10,7 @@
#include "isr.h"
#include "apic.h"
#include "delay.h"
+#include "svm_lib.h"

#define SVM_EXIT_MAX_DR_INTERCEPT 0x3f

--
2.26.3

From 29c65cc4bd1f4beaca8d92acb0e1a3c39120e556 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
Date: Thu, 31 Mar 2022 09:58:54 +0300
Subject: [PATCH 3/7] svm: add svm_suported

---
lib/x86/svm_lib.h | 5 +++++
x86/svm.c | 2 +-
2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/lib/x86/svm_lib.h b/lib/x86/svm_lib.h
index cdc93408..1c35d4a9 100644
--- a/lib/x86/svm_lib.h
+++ b/lib/x86/svm_lib.h
@@ -4,6 +4,11 @@
#include <x86/svm.h>
#include "processor.h"

+static inline bool svm_supported(void)
+{
+ return this_cpu_has(X86_FEATURE_SVM);
+}
+
static inline bool npt_supported(void)
{
return this_cpu_has(X86_FEATURE_NPT);
diff --git a/x86/svm.c b/x86/svm.c
index 009d2d8c..7a654425 100644
--- a/x86/svm.c
+++ b/x86/svm.c
@@ -375,7 +375,7 @@ int main(int ac, char **av)

__setup_vm(&opt_mask);

- if (!this_cpu_has(X86_FEATURE_SVM)) {
+ if (!svm_supported()) {
printf("SVM not availble\n");
return report_summary();
}
--
2.26.3

From 56dfe907b80b9c4ecaa0042acfa5feca13da98ce Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
Date: Mon, 28 Mar 2022 16:13:32 +0300
Subject: [PATCH 4/7] svm: move setup_svm to svm_lib.c

---
lib/x86/svm.h | 2 +
lib/x86/svm_lib.c | 131 ++++++++++++++++++++++++++++++++++++++++++++
lib/x86/svm_lib.h | 12 ++++
x86/Makefile.x86_64 | 2 +
x86/svm.c | 115 +-------------------------------------
x86/svm.h | 5 --
x86/svm_tests.c | 17 ++++--
7 files changed, 161 insertions(+), 123 deletions(-)
create mode 100644 lib/x86/svm_lib.c

diff --git a/lib/x86/svm.h b/lib/x86/svm.h
index 38bb9224..21eff090 100644
--- a/lib/x86/svm.h
+++ b/lib/x86/svm.h
@@ -2,6 +2,8 @@
#ifndef SRC_LIB_X86_SVM_H_
#define SRC_LIB_X86_SVM_H_

+#include "libcflat.h"
+
enum {
INTERCEPT_INTR,
INTERCEPT_NMI,
diff --git a/lib/x86/svm_lib.c b/lib/x86/svm_lib.c
new file mode 100644
index 00000000..8e59d81c
--- /dev/null
+++ b/lib/x86/svm_lib.c
@@ -0,0 +1,131 @@
+
+#include "svm_lib.h"
+#include "libcflat.h"
+#include "processor.h"
+#include "desc.h"
+#include "msr.h"
+#include "vm.h"
+#include "smp.h"
+#include "alloc_page.h"
+
+/* for the nested page table*/
+static u64 *pte[2048];
+static u64 *pde[4];
+static u64 *pdpe;
+static u64 *pml4e;
+
+static u8 *io_bitmap;
+static u8 io_bitmap_area[16384];
+
+static u8 *msr_bitmap;
+static u8 msr_bitmap_area[MSR_BITMAP_SIZE + PAGE_SIZE];
+
+
+u64 *npt_get_pte(u64 address)
+{
+ int i1, i2;
+
+ address >>= 12;
+ i1 = (address >> 9) & 0x7ff;
+ i2 = address & 0x1ff;
+
+ return &pte[i1][i2];
+}
+
+u64 *npt_get_pde(u64 address)
+{
+ int i1, i2;
+
+ address >>= 21;
+ i1 = (address >> 9) & 0x3;
+ i2 = address & 0x1ff;
+
+ return &pde[i1][i2];
+}
+
+u64 *npt_get_pdpe(void)
+{
+ return pdpe;
+}
+
+u64 *npt_get_pml4e(void)
+{
+ return pml4e;
+}
+
+u8* svm_get_msr_bitmap(void)
+{
+ return msr_bitmap;
+}
+
+u8* svm_get_io_bitmap(void)
+{
+ return io_bitmap;
+}
+
+static void set_additional_vcpu_msr(void *msr_efer)
+{
+ void *hsave = alloc_page();
+
+ wrmsr(MSR_VM_HSAVE_PA, virt_to_phys(hsave));
+ wrmsr(MSR_EFER, (ulong)msr_efer | EFER_SVME);
+}
+
+void setup_svm(void)
+{
+ void *hsave = alloc_page();
+ u64 *page, address;
+ int i,j;
+
+ wrmsr(MSR_VM_HSAVE_PA, virt_to_phys(hsave));
+ wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_SVME);
+
+ io_bitmap = (void *) ALIGN((ulong)io_bitmap_area, PAGE_SIZE);
+
+ msr_bitmap = (void *) ALIGN((ulong)msr_bitmap_area, PAGE_SIZE);
+
+ if (!npt_supported())
+ return;
+
+ for (i = 1; i < cpu_count(); i++)
+ on_cpu(i, (void *)set_additional_vcpu_msr, (void *)rdmsr(MSR_EFER));
+
+ printf("NPT detected - running all tests with NPT enabled\n");
+
+ /*
+ * Nested paging supported - Build a nested page table
+ * Build the page-table bottom-up and map everything with 4k
+ * pages to get enough granularity for the NPT unit-tests.
+ */
+
+ address = 0;
+
+ /* PTE level */
+ for (i = 0; i < 2048; ++i) {
+ page = alloc_page();
+
+ for (j = 0; j < 512; ++j, address += 4096)
+ page[j] = address | 0x067ULL;
+
+ pte[i] = page;
+ }
+
+ /* PDE level */
+ for (i = 0; i < 4; ++i) {
+ page = alloc_page();
+
+ for (j = 0; j < 512; ++j)
+ page[j] = (u64)pte[(i * 512) + j] | 0x027ULL;
+
+ pde[i] = page;
+ }
+
+ /* PDPe level */
+ pdpe = alloc_page();
+ for (i = 0; i < 4; ++i)
+ pdpe[i] = ((u64)(pde[i])) | 0x27;
+
+ /* PML4e level */
+ pml4e = alloc_page();
+ pml4e[0] = ((u64)pdpe) | 0x27;
+}
diff --git a/lib/x86/svm_lib.h b/lib/x86/svm_lib.h
index 1c35d4a9..f5e83b85 100644
--- a/lib/x86/svm_lib.h
+++ b/lib/x86/svm_lib.h
@@ -54,5 +54,17 @@ static inline void clgi(void)
asm volatile ("clgi");
}

+void setup_svm(void);
+
+u64 *npt_get_pte(u64 address);
+u64 *npt_get_pde(u64 address);
+u64 *npt_get_pdpe(void);
+u64 *npt_get_pml4e(void);
+
+u8* svm_get_msr_bitmap(void);
+u8* svm_get_io_bitmap(void);
+
+#define MSR_BITMAP_SIZE 8192
+

#endif /* SRC_LIB_X86_SVM_LIB_H_ */
diff --git a/x86/Makefile.x86_64 b/x86/Makefile.x86_64
index f18c1e20..302acf58 100644
--- a/x86/Makefile.x86_64
+++ b/x86/Makefile.x86_64
@@ -17,6 +17,8 @@ COMMON_CFLAGS += -mno-red-zone -mno-sse -mno-sse2 $(fcf_protection_full)
cflatobjs += lib/x86/setjmp64.o
cflatobjs += lib/x86/intel-iommu.o
cflatobjs += lib/x86/usermode.o
+cflatobjs += lib/x86/svm_lib.o
+

tests = $(TEST_DIR)/apic.$(exe) \
$(TEST_DIR)/emulator.$(exe) $(TEST_DIR)/idt_test.$(exe) \
diff --git a/x86/svm.c b/x86/svm.c
index 7a654425..23e65261 100644
--- a/x86/svm.c
+++ b/x86/svm.c
@@ -16,46 +16,8 @@
#include "vmalloc.h"
#include "svm_lib.h"

-/* for the nested page table*/
-u64 *pte[2048];
-u64 *pde[4];
-u64 *pdpe;
-u64 *pml4e;
-
struct vmcb *vmcb;

-u64 *npt_get_pte(u64 address)
-{
- int i1, i2;
-
- address >>= 12;
- i1 = (address >> 9) & 0x7ff;
- i2 = address & 0x1ff;
-
- return &pte[i1][i2];
-}
-
-u64 *npt_get_pde(u64 address)
-{
- int i1, i2;
-
- address >>= 21;
- i1 = (address >> 9) & 0x3;
- i2 = address & 0x1ff;
-
- return &pde[i1][i2];
-}
-
-u64 *npt_get_pdpe(void)
-{
- return pdpe;
-}
-
-u64 *npt_get_pml4e(void)
-{
- return pml4e;
-}
-
bool smp_supported(void)
{
return cpu_count() > 1;
@@ -124,12 +86,6 @@ static void test_thunk(struct svm_test *test)
vmmcall();
}

-u8 *io_bitmap;
-u8 io_bitmap_area[16384];
-
-u8 *msr_bitmap;
-u8 msr_bitmap_area[MSR_BITMAP_SIZE + PAGE_SIZE];
-
void vmcb_ident(struct vmcb *vmcb)
{
u64 vmcb_phys = virt_to_phys(vmcb);
@@ -165,12 +121,12 @@ void vmcb_ident(struct vmcb *vmcb)
ctrl->intercept = (1ULL << INTERCEPT_VMRUN) |
(1ULL << INTERCEPT_VMMCALL) |
(1ULL << INTERCEPT_SHUTDOWN);
- ctrl->iopm_base_pa = virt_to_phys(io_bitmap);
- ctrl->msrpm_base_pa = virt_to_phys(msr_bitmap);
+ ctrl->iopm_base_pa = virt_to_phys(svm_get_io_bitmap());
+ ctrl->msrpm_base_pa = virt_to_phys(svm_get_msr_bitmap());

if (npt_supported()) {
ctrl->nested_ctl = 1;
- ctrl->nested_cr3 = (u64)pml4e;
+ ctrl->nested_cr3 = (u64)npt_get_pml4e();
ctrl->tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
}
}
@@ -259,72 +215,7 @@ static noinline void test_run(struct svm_test *test)
test->on_vcpu_done = true;
}

-static void set_additional_vcpu_msr(void *msr_efer)
-{
- void *hsave = alloc_page();
-
- wrmsr(MSR_VM_HSAVE_PA, virt_to_phys(hsave));
- wrmsr(MSR_EFER, (ulong)msr_efer | EFER_SVME);
-}
-
-static void setup_svm(void)
-{
- void *hsave = alloc_page();
- u64 *page, address;
- int i,j;
-
- wrmsr(MSR_VM_HSAVE_PA, virt_to_phys(hsave));
- wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_SVME);
-
- io_bitmap = (void *) ALIGN((ulong)io_bitmap_area, PAGE_SIZE);
-
- msr_bitmap = (void *) ALIGN((ulong)msr_bitmap_area, PAGE_SIZE);
-
- if (!npt_supported())
- return;
-
- for (i = 1; i < cpu_count(); i++)
- on_cpu(i, (void *)set_additional_vcpu_msr, (void *)rdmsr(MSR_EFER));
-
- printf("NPT detected - running all tests with NPT enabled\n");
-
- /*
- * Nested paging supported - Build a nested page table
- * Build the page-table bottom-up and map everything with 4k
- * pages to get enough granularity for the NPT unit-tests.
- */
-
- address = 0;

- /* PTE level */
- for (i = 0; i < 2048; ++i) {
- page = alloc_page();
-
- for (j = 0; j < 512; ++j, address += 4096)
- page[j] = address | 0x067ULL;
-
- pte[i] = page;
- }
-
- /* PDE level */
- for (i = 0; i < 4; ++i) {
- page = alloc_page();
-
- for (j = 0; j < 512; ++j)
- page[j] = (u64)pte[(i * 512) + j] | 0x027ULL;
-
- pde[i] = page;
- }
-
- /* PDPe level */
- pdpe = alloc_page();
- for (i = 0; i < 4; ++i)
- pdpe[i] = ((u64)(pde[i])) | 0x27;
-
- /* PML4e level */
- pml4e = alloc_page();
- pml4e[0] = ((u64)pdpe) | 0x27;
-}

int matched;

diff --git a/x86/svm.h b/x86/svm.h
index 1eb98de3..7fecb429 100644
--- a/x86/svm.h
+++ b/x86/svm.h
@@ -5,7 +5,6 @@
#include <x86/svm.h>


-#define MSR_BITMAP_SIZE 8192
#define LBR_CTL_ENABLE_MASK BIT_ULL(0)

struct svm_test {
@@ -46,10 +45,6 @@ struct regs {

typedef void (*test_guest_func)(struct svm_test *);

-u64 *npt_get_pte(u64 address);
-u64 *npt_get_pde(u64 address);
-u64 *npt_get_pdpe(void);
-u64 *npt_get_pml4e(void);
bool smp_supported(void);
bool default_supported(void);
void default_prepare(struct svm_test *test);
diff --git a/x86/svm_tests.c b/x86/svm_tests.c
index b6a0d5e6..07ac01ff 100644
--- a/x86/svm_tests.c
+++ b/x86/svm_tests.c
@@ -309,14 +309,13 @@ static bool check_next_rip(struct svm_test *test)
return address == vmcb->control.next_rip;
}

-extern u8 *msr_bitmap;

static void prepare_msr_intercept(struct svm_test *test)
{
default_prepare(test);
vmcb->control.intercept |= (1ULL << INTERCEPT_MSR_PROT);
vmcb->control.intercept_exceptions |= (1ULL << GP_VECTOR);
- memset(msr_bitmap, 0xff, MSR_BITMAP_SIZE);
+ memset(svm_get_msr_bitmap(), 0xff, MSR_BITMAP_SIZE);
}

static void test_msr_intercept(struct svm_test *test)
@@ -427,7 +426,7 @@ static bool msr_intercept_finished(struct svm_test *test)

static bool check_msr_intercept(struct svm_test *test)
{
- memset(msr_bitmap, 0, MSR_BITMAP_SIZE);
+ memset(svm_get_msr_bitmap(), 0, MSR_BITMAP_SIZE);
return (test->scratch == -2);
}

@@ -539,10 +538,10 @@ static bool check_mode_switch(struct svm_test *test)
return test->scratch == 2;
}

-extern u8 *io_bitmap;
-
static void prepare_ioio(struct svm_test *test)
{
+ u8 *io_bitmap = svm_get_io_bitmap();
+
vmcb->control.intercept |= (1ULL << INTERCEPT_IOIO_PROT);
test->scratch = 0;
memset(io_bitmap, 0, 8192);
@@ -551,6 +550,8 @@ static void prepare_ioio(struct svm_test *test)

static void test_ioio(struct svm_test *test)
{
+ u8 *io_bitmap = svm_get_io_bitmap();
+
// stage 0, test IO pass
inb(0x5000);
outb(0x0, 0x5000);
@@ -623,6 +624,7 @@ fail:
static bool ioio_finished(struct svm_test *test)
{
unsigned port, size;
+ u8 *io_bitmap = svm_get_io_bitmap();

/* Only expect IOIO intercepts */
if (vmcb->control.exit_code == SVM_EXIT_VMMCALL)
@@ -647,6 +649,8 @@ static bool ioio_finished(struct svm_test *test)

static bool check_ioio(struct svm_test *test)
{
+ u8 *io_bitmap = svm_get_io_bitmap();
+
memset(io_bitmap, 0, 8193);
return test->scratch != -1;
}
@@ -2514,7 +2518,8 @@ static void test_msrpm_iopm_bitmap_addrs(void)
{
u64 saved_intercept = vmcb->control.intercept;
u64 addr_beyond_limit = 1ull << cpuid_maxphyaddr();
- u64 addr = virt_to_phys(msr_bitmap) & (~((1ull << 12) - 1));
+ u64 addr = virt_to_phys(svm_get_msr_bitmap()) & (~((1ull << 12) - 1));
+ u8 *io_bitmap = svm_get_io_bitmap();

TEST_BITMAP_ADDR(saved_intercept, INTERCEPT_MSR_PROT,
addr_beyond_limit - 2 * PAGE_SIZE, SVM_EXIT_ERR,
--
2.26.3

From 7315483ca9c06017a4642ef8d5dfd4b19d47d712 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
Date: Mon, 28 Mar 2022 16:16:24 +0300
Subject: [PATCH 5/7] svm: move vmcb_ident to svm_lib.c

---
lib/x86/svm_lib.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++
lib/x86/svm_lib.h | 4 ++++
x86/svm.c | 54 -----------------------------------------------
x86/svm.h | 1 -
4 files changed, 58 insertions(+), 55 deletions(-)

diff --git a/lib/x86/svm_lib.c b/lib/x86/svm_lib.c
index 8e59d81c..48246810 100644
--- a/lib/x86/svm_lib.c
+++ b/lib/x86/svm_lib.c
@@ -71,6 +71,15 @@ static void set_additional_vcpu_msr(void *msr_efer)
wrmsr(MSR_EFER, (ulong)msr_efer | EFER_SVME);
}

+void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
+ u64 base, u32 limit, u32 attr)
+{
+ seg->selector = selector;
+ seg->attrib = attr;
+ seg->limit = limit;
+ seg->base = base;
+}
+
void setup_svm(void)
{
void *hsave = alloc_page();
@@ -129,3 +138,48 @@ void setup_svm(void)
pml4e = alloc_page();
pml4e[0] = ((u64)pdpe) | 0x27;
}
+
+void vmcb_ident(struct vmcb *vmcb)
+{
+ u64 vmcb_phys = virt_to_phys(vmcb);
+ struct vmcb_save_area *save = &vmcb->save;
+ struct vmcb_control_area *ctrl = &vmcb->control;
+ u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
+ | SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK;
+ u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
+ | SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK;
+ struct descriptor_table_ptr desc_table_ptr;
+
+ memset(vmcb, 0, sizeof(*vmcb));
+ asm volatile ("vmsave %0" : : "a"(vmcb_phys) : "memory");
+ vmcb_set_seg(&save->es, read_es(), 0, -1U, data_seg_attr);
+ vmcb_set_seg(&save->cs, read_cs(), 0, -1U, code_seg_attr);
+ vmcb_set_seg(&save->ss, read_ss(), 0, -1U, data_seg_attr);
+ vmcb_set_seg(&save->ds, read_ds(), 0, -1U, data_seg_attr);
+ sgdt(&desc_table_ptr);
+ vmcb_set_seg(&save->gdtr, 0, desc_table_ptr.base, desc_table_ptr.limit, 0);
+ sidt(&desc_table_ptr);
+ vmcb_set_seg(&save->idtr, 0, desc_table_ptr.base, desc_table_ptr.limit, 0);
+ ctrl->asid = 1;
+ save->cpl = 0;
+ save->efer = rdmsr(MSR_EFER);
+ save->cr4 = read_cr4();
+ save->cr3 = read_cr3();
+ save->cr0 = read_cr0();
+ save->dr7 = read_dr7();
+ save->dr6 = read_dr6();
+ save->cr2 = read_cr2();
+ save->g_pat = rdmsr(MSR_IA32_CR_PAT);
+ save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
+ ctrl->intercept = (1ULL << INTERCEPT_VMRUN) |
+ (1ULL << INTERCEPT_VMMCALL) |
+ (1ULL << INTERCEPT_SHUTDOWN);
+ ctrl->iopm_base_pa = virt_to_phys(svm_get_io_bitmap());
+ ctrl->msrpm_base_pa = virt_to_phys(svm_get_msr_bitmap());
+
+ if (npt_supported()) {
+ ctrl->nested_ctl = 1;
+ ctrl->nested_cr3 = (u64)npt_get_pml4e();
+ ctrl->tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
+ }
+}
diff --git a/lib/x86/svm_lib.h b/lib/x86/svm_lib.h
index f5e83b85..6d9a86aa 100644
--- a/lib/x86/svm_lib.h
+++ b/lib/x86/svm_lib.h
@@ -54,7 +54,11 @@ static inline void clgi(void)
asm volatile ("clgi");
}

+void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
+ u64 base, u32 limit, u32 attr);
+
void setup_svm(void);
+void vmcb_ident(struct vmcb *vmcb);

u64 *npt_get_pte(u64 address);
u64 *npt_get_pde(u64 address);
diff --git a/x86/svm.c b/x86/svm.c
index 23e65261..74c3931b 100644
--- a/x86/svm.c
+++ b/x86/svm.c
@@ -64,15 +64,6 @@ void inc_test_stage(struct svm_test *test)
barrier();
}

-static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
- u64 base, u32 limit, u32 attr)
-{
- seg->selector = selector;
- seg->attrib = attr;
- seg->limit = limit;
- seg->base = base;
-}
-
static test_guest_func guest_main;

void test_set_guest(test_guest_func func)
@@ -86,51 +77,6 @@ static void test_thunk(struct svm_test *test)
vmmcall();
}

-void vmcb_ident(struct vmcb *vmcb)
-{
- u64 vmcb_phys = virt_to_phys(vmcb);
- struct vmcb_save_area *save = &vmcb->save;
- struct vmcb_control_area *ctrl = &vmcb->control;
- u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
- | SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK;
- u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
- | SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK;
- struct descriptor_table_ptr desc_table_ptr;
-
- memset(vmcb, 0, sizeof(*vmcb));
- asm volatile ("vmsave %0" : : "a"(vmcb_phys) : "memory");
- vmcb_set_seg(&save->es, read_es(), 0, -1U, data_seg_attr);
- vmcb_set_seg(&save->cs, read_cs(), 0, -1U, code_seg_attr);
- vmcb_set_seg(&save->ss, read_ss(), 0, -1U, data_seg_attr);
- vmcb_set_seg(&save->ds, read_ds(), 0, -1U, data_seg_attr);
- sgdt(&desc_table_ptr);
- vmcb_set_seg(&save->gdtr, 0, desc_table_ptr.base, desc_table_ptr.limit, 0);
- sidt(&desc_table_ptr);
- vmcb_set_seg(&save->idtr, 0, desc_table_ptr.base, desc_table_ptr.limit, 0);
- ctrl->asid = 1;
- save->cpl = 0;
- save->efer = rdmsr(MSR_EFER);
- save->cr4 = read_cr4();
- save->cr3 = read_cr3();
- save->cr0 = read_cr0();
- save->dr7 = read_dr7();
- save->dr6 = read_dr6();
- save->cr2 = read_cr2();
- save->g_pat = rdmsr(MSR_IA32_CR_PAT);
- save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
- ctrl->intercept = (1ULL << INTERCEPT_VMRUN) |
- (1ULL << INTERCEPT_VMMCALL) |
- (1ULL << INTERCEPT_SHUTDOWN);
- ctrl->iopm_base_pa = virt_to_phys(svm_get_io_bitmap());
- ctrl->msrpm_base_pa = virt_to_phys(svm_get_msr_bitmap());
-
- if (npt_supported()) {
- ctrl->nested_ctl = 1;
- ctrl->nested_cr3 = (u64)npt_get_pml4e();
- ctrl->tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
- }
-}
-
struct regs regs;

struct regs get_regs(void)
diff --git a/x86/svm.h b/x86/svm.h
index 7fecb429..4c609795 100644
--- a/x86/svm.h
+++ b/x86/svm.h
@@ -53,7 +53,6 @@ bool default_finished(struct svm_test *test);
int get_test_stage(struct svm_test *test);
void set_test_stage(struct svm_test *test, int s);
void inc_test_stage(struct svm_test *test);
-void vmcb_ident(struct vmcb *vmcb);
struct regs get_regs(void);
int __svm_vmrun(u64 rip);
void __svm_bare_vmrun(void);
--
2.26.3

From f06ebf20cd0115be33c38ce887ef6d28ad562183 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
Date: Sun, 3 Apr 2022 10:46:43 +0300
Subject: [PATCH 6/7] svm: move svm entry macros to svm_lib.h

---
lib/x86/svm_lib.h | 68 +++++++++++++++++++++++++++++++++++++++++++++
x86/svm.c | 22 ++++++---------
x86/svm.h | 71 ++---------------------------------------------
x86/svm_tests.c | 9 +++---
4 files changed, 85 insertions(+), 85 deletions(-)

diff --git a/lib/x86/svm_lib.h b/lib/x86/svm_lib.h
index 6d9a86aa..f682c679 100644
--- a/lib/x86/svm_lib.h
+++ b/lib/x86/svm_lib.h
@@ -71,4 +71,72 @@ u8* svm_get_io_bitmap(void);
#define MSR_BITMAP_SIZE 8192


+struct x86_gpr_regs
+{
+ u64 rax;
+ u64 rbx;
+ u64 rcx;
+ u64 rdx;
+ u64 cr2;
+ u64 rbp;
+ u64 rsi;
+ u64 rdi;
+
+ u64 r8;
+ u64 r9;
+ u64 r10;
+ u64 r11;
+ u64 r12;
+ u64 r13;
+ u64 r14;
+ u64 r15;
+ u64 rflags;
+};
+
+#define SAVE_GPR_C(regs) \
+ "xchg %%rbx, %p[" #regs "]+0x8\n\t" \
+ "xchg %%rcx, %p[" #regs "]+0x10\n\t" \
+ "xchg %%rdx, %p[" #regs "]+0x18\n\t" \
+ "xchg %%rbp, %p[" #regs "]+0x28\n\t" \
+ "xchg %%rsi, %p[" #regs "]+0x30\n\t" \
+ "xchg %%rdi, %p[" #regs "]+0x38\n\t" \
+ "xchg %%r8, %p[" #regs "]+0x40\n\t" \
+ "xchg %%r9, %p[" #regs "]+0x48\n\t" \
+ "xchg %%r10, %p[" #regs "]+0x50\n\t" \
+ "xchg %%r11, %p[" #regs "]+0x58\n\t" \
+ "xchg %%r12, %p[" #regs "]+0x60\n\t" \
+ "xchg %%r13, %p[" #regs "]+0x68\n\t" \
+ "xchg %%r14, %p[" #regs "]+0x70\n\t" \
+ "xchg %%r15, %p[" #regs "]+0x78\n\t" \
+
+#define LOAD_GPR_C(regs) SAVE_GPR_C(regs)
+
+#define ASM_PRE_VMRUN_CMD(regs) \
+ "vmload %%rax\n\t" \
+ "mov %p[" #regs "]+0x80, %%r15\n\t" \
+ "mov %%r15, 0x170(%%rax)\n\t" \
+ "mov %p[" #regs "], %%r15\n\t" \
+ "mov %%r15, 0x1f8(%%rax)\n\t" \
+ LOAD_GPR_C(regs) \
+
+#define ASM_POST_VMRUN_CMD(regs) \
+ SAVE_GPR_C(regs) \
+ "mov 0x170(%%rax), %%r15\n\t" \
+ "mov %%r15, %p[regs]+0x80\n\t" \
+ "mov 0x1f8(%%rax), %%r15\n\t" \
+ "mov %%r15, %p[regs]\n\t" \
+ "vmsave %%rax\n\t" \
+
+
+#define SVM_BARE_VMRUN(vmcb, regs) \
+ asm volatile ( \
+ ASM_PRE_VMRUN_CMD(regs) \
+ "vmrun %%rax\n\t" \
+ ASM_POST_VMRUN_CMD(regs) \
+ : \
+ : "a" (virt_to_phys(vmcb)), \
+ [regs] "i" (&regs) \
+ : "memory", "r15")
+
+
#endif /* SRC_LIB_X86_SVM_LIB_H_ */
diff --git a/x86/svm.c b/x86/svm.c
index 74c3931b..b2dbef75 100644
--- a/x86/svm.c
+++ b/x86/svm.c
@@ -77,9 +77,9 @@ static void test_thunk(struct svm_test *test)
vmmcall();
}

-struct regs regs;
+struct x86_gpr_regs regs;

-struct regs get_regs(void)
+struct x86_gpr_regs get_regs(void)
{
return regs;
}
@@ -98,13 +98,7 @@ int __svm_vmrun(u64 rip)
vmcb->save.rsp = (ulong)(guest_stack + ARRAY_SIZE(guest_stack));
regs.rdi = (ulong)v2_test;

- asm volatile (
- ASM_PRE_VMRUN_CMD
- "vmrun %%rax\n\t" \
- ASM_POST_VMRUN_CMD
- :
- : "a" (virt_to_phys(vmcb))
- : "memory", "r15");
+ SVM_BARE_VMRUN(vmcb, regs);

return (vmcb->control.exit_code);
}
@@ -118,6 +112,7 @@ extern u8 vmrun_rip;

static noinline void test_run(struct svm_test *test)
{
+
u64 vmcb_phys = virt_to_phys(vmcb);

irq_disable();
@@ -136,18 +131,19 @@ static noinline void test_run(struct svm_test *test)
"sti \n\t"
"call *%c[PREPARE_GIF_CLEAR](%[test]) \n \t"
"mov %[vmcb_phys], %%rax \n\t"
- ASM_PRE_VMRUN_CMD
+ ASM_PRE_VMRUN_CMD(regs)
".global vmrun_rip\n\t" \
"vmrun_rip: vmrun %%rax\n\t" \
- ASM_POST_VMRUN_CMD
+ ASM_POST_VMRUN_CMD(regs)
"cli \n\t"
"stgi"
: // inputs clobbered by the guest:
"=D" (the_test), // first argument register
"=b" (the_vmcb) // callee save register!
: [test] "0" (the_test),
- [vmcb_phys] "1"(the_vmcb),
- [PREPARE_GIF_CLEAR] "i" (offsetof(struct svm_test, prepare_gif_clear))
+ [vmcb_phys] "1"(the_vmcb),
+ [PREPARE_GIF_CLEAR] "i" (offsetof(struct svm_test, prepare_gif_clear)),
+ [regs] "i"(&regs)
: "rax", "rcx", "rdx", "rsi",
"r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15",
"memory");
diff --git a/x86/svm.h b/x86/svm.h
index 4c609795..7cc3b690 100644
--- a/x86/svm.h
+++ b/x86/svm.h
@@ -23,28 +23,10 @@ struct svm_test {
bool on_vcpu_done;
};

-struct regs {
- u64 rax;
- u64 rbx;
- u64 rcx;
- u64 rdx;
- u64 cr2;
- u64 rbp;
- u64 rsi;
- u64 rdi;
- u64 r8;
- u64 r9;
- u64 r10;
- u64 r11;
- u64 r12;
- u64 r13;
- u64 r14;
- u64 r15;
- u64 rflags;
-};
-
typedef void (*test_guest_func)(struct svm_test *);

+extern struct x86_gpr_regs regs;
+
bool smp_supported(void);
bool default_supported(void);
void default_prepare(struct svm_test *test);
@@ -53,7 +35,7 @@ bool default_finished(struct svm_test *test);
int get_test_stage(struct svm_test *test);
void set_test_stage(struct svm_test *test, int s);
void inc_test_stage(struct svm_test *test);
-struct regs get_regs(void);
+struct x86_gpr_regs get_regs(void);
int __svm_vmrun(u64 rip);
void __svm_bare_vmrun(void);
int svm_vmrun(void);
@@ -61,51 +43,4 @@ void test_set_guest(test_guest_func func);

extern struct vmcb *vmcb;
extern struct svm_test svm_tests[];
-
-
-#define SAVE_GPR_C \
- "xchg %%rbx, regs+0x8\n\t" \
- "xchg %%rcx, regs+0x10\n\t" \
- "xchg %%rdx, regs+0x18\n\t" \
- "xchg %%rbp, regs+0x28\n\t" \
- "xchg %%rsi, regs+0x30\n\t" \
- "xchg %%rdi, regs+0x38\n\t" \
- "xchg %%r8, regs+0x40\n\t" \
- "xchg %%r9, regs+0x48\n\t" \
- "xchg %%r10, regs+0x50\n\t" \
- "xchg %%r11, regs+0x58\n\t" \
- "xchg %%r12, regs+0x60\n\t" \
- "xchg %%r13, regs+0x68\n\t" \
- "xchg %%r14, regs+0x70\n\t" \
- "xchg %%r15, regs+0x78\n\t"
-
-#define LOAD_GPR_C SAVE_GPR_C
-
-#define ASM_PRE_VMRUN_CMD \
- "vmload %%rax\n\t" \
- "mov regs+0x80, %%r15\n\t" \
- "mov %%r15, 0x170(%%rax)\n\t" \
- "mov regs, %%r15\n\t" \
- "mov %%r15, 0x1f8(%%rax)\n\t" \
- LOAD_GPR_C \
-
-#define ASM_POST_VMRUN_CMD \
- SAVE_GPR_C \
- "mov 0x170(%%rax), %%r15\n\t" \
- "mov %%r15, regs+0x80\n\t" \
- "mov 0x1f8(%%rax), %%r15\n\t" \
- "mov %%r15, regs\n\t" \
- "vmsave %%rax\n\t" \
-
-
-
-#define SVM_BARE_VMRUN \
- asm volatile ( \
- ASM_PRE_VMRUN_CMD \
- "vmrun %%rax\n\t" \
- ASM_POST_VMRUN_CMD \
- : \
- : "a" (virt_to_phys(vmcb)) \
- : "memory", "r15") \
-
#endif
diff --git a/x86/svm_tests.c b/x86/svm_tests.c
index 07ac01ff..cb47fb02 100644
--- a/x86/svm_tests.c
+++ b/x86/svm_tests.c
@@ -3147,6 +3147,7 @@ into:
static void svm_into_test(void)
{
handle_exception(OF_VECTOR, guest_test_of_handler);
+
test_set_guest(svm_of_test_guest);
report(svm_vmrun() == SVM_EXIT_VMMCALL && of_test_counter == 1,
"#OF is generated in L2 exception handler0");
@@ -3351,7 +3352,7 @@ static void svm_lbrv_test1(void)

wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
DO_BRANCH(host_branch1);
- SVM_BARE_VMRUN;
+ SVM_BARE_VMRUN(vmcb,regs);
dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);

if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
@@ -3374,7 +3375,7 @@ static void svm_lbrv_test2(void)
wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
DO_BRANCH(host_branch2);
wrmsr(MSR_IA32_DEBUGCTLMSR, 0);
- SVM_BARE_VMRUN;
+ SVM_BARE_VMRUN(vmcb,regs);
dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
wrmsr(MSR_IA32_DEBUGCTLMSR, 0);

@@ -3402,7 +3403,7 @@ static void svm_lbrv_nested_test1(void)

wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
DO_BRANCH(host_branch3);
- SVM_BARE_VMRUN;
+ SVM_BARE_VMRUN(vmcb,regs);
dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
wrmsr(MSR_IA32_DEBUGCTLMSR, 0);

@@ -3437,7 +3438,7 @@ static void svm_lbrv_nested_test2(void)

wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
DO_BRANCH(host_branch4);
- SVM_BARE_VMRUN;
+ SVM_BARE_VMRUN(vmcb,regs);
dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
wrmsr(MSR_IA32_DEBUGCTLMSR, 0);

--
2.26.3

From d5acfbc39399d4727eaafcbe0d9eabedb54d76a9 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
Date: Tue, 30 Nov 2021 13:56:57 +0200
Subject: [PATCH 7/7] add unit test for avic ipi

Signed-off-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
---
x86/Makefile.common | 4 +-
x86/ipi_stress.c | 252 ++++++++++++++++++++++++++++++++++++++++++++
x86/unittests.cfg | 5 +
3 files changed, 260 insertions(+), 1 deletion(-)
create mode 100644 x86/ipi_stress.c

diff --git a/x86/Makefile.common b/x86/Makefile.common
index b9039882..21c6af15 100644
--- a/x86/Makefile.common
+++ b/x86/Makefile.common
@@ -84,7 +84,9 @@ tests-common = $(TEST_DIR)/vmexit.$(exe) $(TEST_DIR)/tsc.$(exe) \
$(TEST_DIR)/tsx-ctrl.$(exe) \
$(TEST_DIR)/eventinj.$(exe) \
$(TEST_DIR)/smap.$(exe) \
- $(TEST_DIR)/umip.$(exe)
+ $(TEST_DIR)/umip.$(exe) \
+ $(TEST_DIR)/ipi_stress.$(exe)
+

# The following test cases are disabled when building EFI tests because they
# use absolute addresses in their inline assembly code, which cannot compile
diff --git a/x86/ipi_stress.c b/x86/ipi_stress.c
new file mode 100644
index 00000000..950c2439
--- /dev/null
+++ b/x86/ipi_stress.c
@@ -0,0 +1,252 @@
+#include "libcflat.h"
+#include "smp.h"
+#include "alloc.h"
+#include "apic.h"
+#include "processor.h"
+#include "isr.h"
+#include "asm/barrier.h"
+#include "delay.h"
+#include "svm.h"
+#include "desc.h"
+#include "msr.h"
+#include "vm.h"
+#include "types.h"
+#include "alloc_page.h"
+#include "vmalloc.h"
+#include "svm_lib.h"
+
+u64 num_iterations = -1;
+struct x86_gpr_regs regs;
+u64 guest_stack[10000];
+struct vmcb *vmcb;
+
+volatile u64 *isr_counts;
+bool use_svm;
+int hlt_allowed = -1;
+
+
+static int get_random(int min, int max)
+{
+ /* TODO : use rdrand to seed an PRNG instead */
+ u64 random_value = rdtsc() >> 4;
+
+ return min + random_value % (max - min + 1);
+}
+
+static void ipi_interrupt_handler(isr_regs_t *r)
+{
+ isr_counts[smp_id()]++;
+ eoi();
+}
+
+static void wait_for_ipi(volatile u64 *count)
+{
+ u64 old_count = *count;
+ bool use_halt;
+
+ switch (hlt_allowed) {
+ case -1:
+ use_halt = get_random(0,10000) == 0;
+ break;
+ case 0:
+ use_halt = false;
+ break;
+ case 1:
+ use_halt = true;
+ break;
+ default:
+ use_halt = false;
+ break;
+ }
+
+ do {
+ if (use_halt)
+ asm volatile ("sti;hlt;cli\n");
+ else
+ asm volatile ("sti;nop;cli");
+
+ } while (old_count == *count);
+}
+
+/******************************************************************************************************/
+
+#ifdef __x86_64__
+static void l2_guest_wait_for_ipi(volatile u64 *count)
+{
+ wait_for_ipi(count);
+ asm volatile("vmmcall");
+}
+
+static void l2_guest_dummy(void)
+{
+ asm volatile("vmmcall");
+}
+
+static void wait_for_ipi_in_l2(volatile u64 *count, struct vmcb *vmcb)
+{
+ u64 old_count = *count;
+ bool irq_on_vmentry = get_random(0,1) == 0;
+
+ vmcb->save.rip = (ulong)l2_guest_wait_for_ipi;
+ vmcb->save.rsp = (ulong)(guest_stack + ARRAY_SIZE(guest_stack));
+ regs.rdi = (u64)count;
+
+ vmcb->save.rip = irq_on_vmentry ? (ulong)l2_guest_dummy : (ulong)l2_guest_wait_for_ipi;
+
+ do {
+ if (irq_on_vmentry)
+ vmcb->save.rflags |= X86_EFLAGS_IF;
+ else
+ vmcb->save.rflags &= ~X86_EFLAGS_IF;
+
+ asm volatile("clgi;nop;sti");
+ // GIF is set by VMRUN
+ SVM_BARE_VMRUN(vmcb, regs);
+ // GIF is cleared by VMEXIT
+ asm volatile("cli;nop;stgi");
+
+ assert(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+
+ } while (old_count == *count);
+}
+#endif
+
+/******************************************************************************************************/
+
+#define FIRST_TEST_VCPU 1
+
+static void vcpu_init(void *data)
+{
+ /* To make it easier to see iteration number in the trace */
+ handle_irq(0x40, ipi_interrupt_handler);
+ handle_irq(0x50, ipi_interrupt_handler);
+}
+
+static void vcpu_code(void *data)
+{
+ int ncpus = cpu_count();
+ int cpu = (long)data;
+
+ u64 i;
+
+#ifdef __x86_64__
+ if (cpu == 2 && use_svm)
+ {
+ vmcb = alloc_page();
+ vmcb_ident(vmcb);
+
+ // when set, intercept physical interrupts
+ //vmcb->control.intercept |= (1 << INTERCEPT_INTR);
+
+ // when set, host IF controls the masking of interrupts while the guest runs
+ // guest IF only might allow a virtual interrupt to be injected (if set in int_ctl)
+ //vmcb->control.int_ctl |= V_INTR_MASKING_MASK;
+ }
+#endif
+
+ assert(cpu != 0);
+
+ if (cpu != FIRST_TEST_VCPU)
+ wait_for_ipi(&isr_counts[cpu]);
+
+ for (i = 0; i < num_iterations; i++)
+ {
+ u8 physical_dst = cpu == ncpus -1 ? 1 : cpu + 1;
+
+ // send IPI to a next vCPU in a circular fashion
+ apic_icr_write(APIC_INT_ASSERT |
+ APIC_DEST_PHYSICAL |
+ APIC_DM_FIXED |
+ (i % 2 ? 0x40 : 0x50),
+ physical_dst);
+
+ if (i == (num_iterations - 1) && cpu != FIRST_TEST_VCPU)
+ break;
+
+#ifdef __x86_64__
+ // wait for the IPI interrupt chain to come back to us
+ if (cpu == 2 && use_svm) {
+ wait_for_ipi_in_l2(&isr_counts[cpu], vmcb);
+ continue;
+ }
+#endif
+
+ wait_for_ipi(&isr_counts[cpu]);
+ }
+}
+
+int main(int argc, void** argv)
+{
+ int cpu, ncpus = cpu_count();
+
+ assert(ncpus > 2);
+
+ if (argc > 1)
+ hlt_allowed = atol(argv[1]);
+
+ if (argc > 2)
+ num_iterations = atol(argv[2]);
+
+ setup_vm();
+
+#ifdef __x86_64__
+ if (svm_supported()) {
+ use_svm = true;
+ setup_svm();
+ }
+#endif
+
+ isr_counts = (volatile u64 *)calloc(ncpus, sizeof(u64));
+
+ printf("found %d cpus\n", ncpus);
+ printf("running for %lld iterations - test\n",
+ (long long unsigned int)num_iterations);
+
+ /*
+ * Ensure that we don't have interrupt window pending
+ * from PIT timer which inhibits the AVIC.
+ */
+
+ asm volatile("sti;nop;cli\n");
+
+ for (cpu = 0; cpu < ncpus; ++cpu)
+ on_cpu_async(cpu, vcpu_init, (void *)(long)cpu);
+
+ /* now let all the vCPUs end the IPI function*/
+ while (cpus_active() > 1)
+ pause();
+
+ printf("starting test on all cpus but 0...\n");
+
+ for (cpu = ncpus-1; cpu >= FIRST_TEST_VCPU; cpu--)
+ on_cpu_async(cpu, vcpu_code, (void *)(long)cpu);
+
+ printf("test started, waiting to end...\n");
+
+ while (cpus_active() > 1) {
+
+ unsigned long isr_count1, isr_count2;
+
+ isr_count1 = isr_counts[1];
+ delay(5ULL*1000*1000*1000);
+ isr_count2 = isr_counts[1];
+
+ if (isr_count1 == isr_count2) {
+ printf("\n");
+ printf("hang detected!!\n");
+ //break;
+ } else {
+ printf("made %ld IPIs \n", (isr_count2 - isr_count1)*(ncpus-1));
+ }
+ }
+
+ printf("\n");
+
+ for (cpu = 1; cpu < ncpus; ++cpu)
+ report(isr_counts[cpu] == num_iterations,
+ "Number of IPIs match (%lld)",
+ (long long unsigned int)isr_counts[cpu]);
+
+ free((void*)isr_counts);
+ return report_summary();
+}
diff --git a/x86/unittests.cfg b/x86/unittests.cfg
index 37017971..c001f42b 100644
--- a/x86/unittests.cfg
+++ b/x86/unittests.cfg
@@ -61,6 +61,11 @@ smp = 2
file = smptest.flat
smp = 3

+[ipi_stress]
+file = ipi_stress.flat
+extra_params = -cpu host,-x2apic,-svm,-hypervisor -global kvm-pit.lost_tick_policy=discard -machine kernel-irqchip=on -append '50000'
+smp = 4
+
[vmexit_cpuid]
file = vmexit.flat
extra_params = -append 'cpuid'
--
2.26.3