Re: [PATCH v11 005/113] KVM: x86: Introduce vm_type to differentiate default VMs from confidential VMs

From: Binbin Wu
Date: Mon Jan 16 2023 - 22:31:45 EST



On 1/13/2023 12:31 AM, isaku.yamahata@xxxxxxxxx wrote:
From: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>

Unlike default VMs, confidential VMs (Intel TDX and AMD SEV-ES) don't allow
some operations (e.g., memory read/write, register state access, etc).

Introduce vm_type to track the type of the VM to x86 KVM. Other arch KVMs
already use vm_type, KVM_INIT_VM accepts vm_type, and x86 KVM callback
vm_init accepts vm_type. So follow them. Further, a different policy can
be made based on vm_type. Define KVM_X86_DEFAULT_VM for default VM as
default and define KVM_X86_TDX_VM for Intel TDX VM. The wrapper function
will be defined as "bool is_td(kvm) { return vm_type == VM_TYPE_TDX; }"

Add a capability KVM_CAP_VM_TYPES to effectively allow device model,
e.g. qemu, to query what VM types are supported by KVM. This (introduce a
new capability and add vm_type) is chosen to align with other arch KVMs
that have VM types already. Other arch KVMs uses

uses -> use


different name
name -> names
to query
supported vm types and there is no common name for it, so new name was
chosen.

Co-developed-by: Xiaoyao Li <xiaoyao.li@xxxxxxxxx>
Signed-off-by: Xiaoyao Li <xiaoyao.li@xxxxxxxxx>
Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
Reviewed-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---
Documentation/virt/kvm/api.rst | 21 +++++++++++++++++++++
arch/x86/include/asm/kvm-x86-ops.h | 1 +
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/include/uapi/asm/kvm.h | 3 +++
arch/x86/kvm/svm/svm.c | 6 ++++++
arch/x86/kvm/vmx/main.c | 1 +
arch/x86/kvm/vmx/tdx.h | 6 +-----
arch/x86/kvm/vmx/vmx.c | 5 +++++
arch/x86/kvm/vmx/x86_ops.h | 1 +
arch/x86/kvm/x86.c | 9 ++++++++-
include/uapi/linux/kvm.h | 1 +
tools/arch/x86/include/uapi/asm/kvm.h | 3 +++
tools/include/uapi/linux/kvm.h | 1 +
13 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 98459999273c..d2baa05f7c04 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -147,10 +147,31 @@ described as 'basic' will be available.
The new VM has no virtual cpus and no memory.
You probably want to use 0 as machine type.
+X86:
+^^^^
+
+Supported vm type can be queried from KVM_CAP_VM_TYPES, which returns the
+bitmap of supported vm types. The 1-setting of bit @n means vm type with
+value @n is supported.
+
+S390:
+^^^^^
+
In order to create user controlled virtual machines on S390, check
KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as
privileged user (CAP_SYS_ADMIN).
+MIPS:
+^^^^^
+
+To use hardware assisted virtualization on MIPS (VZ ASE) rather than
+the default trap & emulate implementation (which changes the virtual
+memory layout to fit in user mode), check KVM_CAP_MIPS_VZ and use the
+flag KVM_VM_MIPS_VZ.
+
+ARM64:
+^^^^^^
+
On arm64, the physical address size for a VM (IPA Size limit) is limited
to 40bits by default. The limit can be configured if the host supports the
extension KVM_CAP_ARM_VM_IPA_SIZE. When supported, use
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index dba2909e5ae2..59181b12ad70 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -20,6 +20,7 @@ KVM_X86_OP(hardware_disable)
KVM_X86_OP(hardware_unsetup)
KVM_X86_OP(has_emulated_msr)
KVM_X86_OP(vcpu_after_set_cpuid)
+KVM_X86_OP(is_vm_type_supported)
KVM_X86_OP(vm_init)
KVM_X86_OP_OPTIONAL(vm_destroy)
KVM_X86_OP_OPTIONAL_RET0(vcpu_precreate)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 60dc8f1631de..c6ccfce7dc9e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1212,6 +1212,7 @@ enum kvm_apicv_inhibit {
};
struct kvm_arch {
+ unsigned long vm_type;
unsigned long n_used_mmu_pages;
unsigned long n_requested_mmu_pages;
unsigned long n_max_mmu_pages;
@@ -1536,6 +1537,7 @@ struct kvm_x86_ops {
bool (*has_emulated_msr)(struct kvm *kvm, u32 index);
void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
+ bool (*is_vm_type_supported)(unsigned long vm_type);
unsigned int vm_size;
int (*vm_init)(struct kvm *kvm);
void (*vm_destroy)(struct kvm *kvm);
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index e48deab8901d..a4cca6bc6b06 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -529,4 +529,7 @@ struct kvm_pmu_event_filter {
#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */
#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */
+#define KVM_X86_DEFAULT_VM 0
+#define KVM_X86_TDX_VM 1
+
#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 799b24801d31..55f2e0a9b0f6 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4682,6 +4682,11 @@ static void svm_vm_destroy(struct kvm *kvm)
sev_vm_destroy(kvm);
}
+static bool svm_is_vm_type_supported(unsigned long type)
+{
+ return type == KVM_X86_DEFAULT_VM;
+}
+
static int svm_vm_init(struct kvm *kvm)
{
if (!pause_filter_count || !pause_filter_thresh)
@@ -4710,6 +4715,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.vcpu_free = svm_vcpu_free,
.vcpu_reset = svm_vcpu_reset,
+ .is_vm_type_supported = svm_is_vm_type_supported,
.vm_size = sizeof(struct kvm_svm),
.vm_init = svm_vm_init,
.vm_destroy = svm_vm_destroy,
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index f5d1166d2718..3b24e32077d6 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -34,6 +34,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.hardware_disable = vmx_hardware_disable,
.has_emulated_msr = vmx_has_emulated_msr,
+ .is_vm_type_supported = vmx_is_vm_type_supported,
.vm_size = sizeof(struct kvm_vmx),
.vm_init = vmx_vm_init,
.vm_destroy = vmx_vm_destroy,
diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
index 060bf48ec3d6..473013265bd8 100644
--- a/arch/x86/kvm/vmx/tdx.h
+++ b/arch/x86/kvm/vmx/tdx.h
@@ -15,11 +15,7 @@ struct vcpu_tdx {
static inline bool is_td(struct kvm *kvm)
{
- /*
- * TDX VM type isn't defined yet.
- * return kvm->arch.vm_type == KVM_X86_TDX_VM;
- */
- return false;
+ return kvm->arch.vm_type == KVM_X86_TDX_VM;
}
static inline bool is_td_vcpu(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 5dc7687dcf16..f1dea386d6c2 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7501,6 +7501,11 @@ int vmx_vcpu_create(struct kvm_vcpu *vcpu)
return err;
}
+bool vmx_is_vm_type_supported(unsigned long type)
+{
+ return type == KVM_X86_DEFAULT_VM;
+}
+
#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index fbc57fcbdd21..6980126bc32a 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -32,6 +32,7 @@ void vmx_hardware_unsetup(void);
int vmx_check_processor_compat(void);
int vmx_hardware_enable(void);
void vmx_hardware_disable(void);
+bool vmx_is_vm_type_supported(unsigned long type);
int vmx_vm_init(struct kvm *kvm);
void vmx_vm_destroy(struct kvm *kvm);
int vmx_vcpu_precreate(struct kvm *kvm);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 07e8ab791e37..68bff699096a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4535,6 +4535,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_X86_NOTIFY_VMEXIT:
r = kvm_caps.has_notify_vmexit;
break;
+ case KVM_CAP_VM_TYPES:
+ r = BIT(KVM_X86_DEFAULT_VM);
+ if (static_call(kvm_x86_is_vm_type_supported)(KVM_X86_TDX_VM))
+ r |= BIT(KVM_X86_TDX_VM);
+ break;
default:
break;
}
@@ -12126,9 +12131,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
int ret;
unsigned long flags;
- if (type)
+ if (!static_call(kvm_x86_is_vm_type_supported)(type))
return -EINVAL;
+ kvm->arch.vm_type = type;
+
ret = kvm_page_track_init(kvm);
if (ret)
goto out;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 679d293ece0f..2a47fd0e51fd 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1212,6 +1212,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224
#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
#define KVM_CAP_MEMORY_ATTRIBUTES 226
+#define KVM_CAP_VM_TYPES 227
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 649e50a8f9dd..b67d2d59eb6c 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -524,4 +524,7 @@ struct kvm_pmu_event_filter {
#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */
#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */
+#define KVM_X86_DEFAULT_VM 0
+#define KVM_X86_TDX_VM 1
+
#endif /* _ASM_X86_KVM_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 20522d4ba1e0..792a4889d1f4 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1175,6 +1175,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223
#define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224
#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
+#define KVM_CAP_VM_TYPES 227
#ifdef KVM_CAP_IRQ_ROUTING