Re: [RFC PATCH v5 003/104] KVM: TDX: Detect CPU feature on kernel module initialization

From: Sean Christopherson
Date: Fri Apr 08 2022 - 12:46:17 EST


On Fri, Mar 04, 2022, isaku.yamahata@xxxxxxxxx wrote:
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> new file mode 100644
> index 000000000000..1acf08c310c4
> --- /dev/null
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -0,0 +1,53 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/cpu.h>
> +
> +#include <asm/tdx.h>
> +
> +#include "capabilities.h"
> +#include "x86_ops.h"
> +
> +#undef pr_fmt
> +#define pr_fmt(fmt) "tdx: " fmt
> +
> +static bool __read_mostly enable_tdx = true;
> +module_param_named(tdx, enable_tdx, bool, 0644);

This is comically unsafe, userspace must not be allowed to toggle enable_tdx
after KVM is loaded.

> +static u64 hkid_mask __ro_after_init;
> +static u8 hkid_start_pos __ro_after_init;
> +
> +static int __init __tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
> +{
> + u32 max_pa;
> +
> + if (!enable_ept) {
> + pr_warn("Cannot enable TDX with EPT disabled\n");
> + return -EINVAL;
> + }
> +
> + if (!platform_has_tdx()) {
> + pr_warn("Cannot enable TDX with SEAMRR disabled\n");
> + return -ENODEV;
> + }
> +
> + if (WARN_ON_ONCE(x86_ops->tlb_remote_flush))
> + return -EIO;
> +
> + max_pa = cpuid_eax(0x80000008) & 0xff;
> + hkid_start_pos = boot_cpu_data.x86_phys_bits;
> + hkid_mask = GENMASK_ULL(max_pa - 1, hkid_start_pos);
> +
> + return 0;
> +}
> +
> +void __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
> +{
> + /*
> + * This function is called at the initialization. No need to protect
> + * enable_tdx.
> + */
> + if (!enable_tdx)
> + return;
> +
> + if (__tdx_hardware_setup(&vt_x86_ops))
> + enable_tdx = false;

Clearing enable_tdx here unnecessarily risks introducing bugs in the caller,
e.g. acting on enable_tdx before tdx_hardware_setup() is invoked. I'm guessing
this was the result of trying to defer module load until VM creation. With that
gone, the flag can be moved to vmx/main.c, as there should be zero reason for
tdx.c to check/modify enable_tdx, i.e. functions in tdx.c should never be called
if enabled_tdx = false.

An alteranative to

if (enable_tdx && tdx_hardware_setup(&vt_x86_ops))
enable_tdx = false;

would be

enable_tdx = enable_tdx && !tdx_hardware_setup(&vt_x86_ops);

I actually prefer the latter (no "if"), but I already generated and wiped the below
diff before thinking of that.

diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index b79fcc8d81dd..43e13c2a804e 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -6,6 +6,9 @@
#include "nested.h"
#include "pmu.h"

+static bool __read_mostly enable_tdx = IS_ENABLED(CONFIG_INTEL_TDX_HOST);
+module_param_named(tdx, enable_tdx, bool, 0644);
+
static __init int vt_hardware_setup(void)
{
int ret;
@@ -14,7 +17,8 @@ static __init int vt_hardware_setup(void)
if (ret)
return ret;

- tdx_hardware_setup(&vt_x86_ops);
+ if (enable_tdx && tdx_hardware_setup(&vt_x86_ops))
+ enable_tdx = false;

return 0;
}
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 1acf08c310c4..3f660f323426 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -9,13 +9,10 @@
#undef pr_fmt
#define pr_fmt(fmt) "tdx: " fmt

-static bool __read_mostly enable_tdx = true;
-module_param_named(tdx, enable_tdx, bool, 0644);
-
static u64 hkid_mask __ro_after_init;
static u8 hkid_start_pos __ro_after_init;

-static int __init __tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
+static int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
{
u32 max_pa;

@@ -38,16 +35,3 @@ static int __init __tdx_hardware_setup(struct kvm_x86_ops *x86_ops)

return 0;
}
-
-void __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
-{
- /*
- * This function is called at the initialization. No need to protect
- * enable_tdx.
- */
- if (!enable_tdx)
- return;
-
- if (__tdx_hardware_setup(&vt_x86_ops))
- enable_tdx = false;
-}
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index ccf98e79d8c3..fd60128eb10a 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -124,9 +124,9 @@ void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu);
void vmx_setup_mce(struct kvm_vcpu *vcpu);

#ifdef CONFIG_INTEL_TDX_HOST
-void __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops);
+int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops);
#else
-static inline void tdx_hardware_setup(struct kvm_x86_ops *x86_ops) {}
+static inline int void tdx_hardware_setup(struct kvm_x86_ops *x86_ops) { return 0; }
#endif

#endif /* __KVM_X86_VMX_X86_OPS_H */