[tip: x86/mm] x86/kvm: Refactor L1D flushing

From: tip-bot2 for Balbir Singh
Date: Fri May 22 2020 - 05:33:23 EST


The following commit has been merged into the x86/mm branch of tip:

Commit-ID: 3f768f0032dbc0657ed7e48f4735a3c4e49e25d7
Gitweb: https://git.kernel.org/tip/3f768f0032dbc0657ed7e48f4735a3c4e49e25d7
Author: Balbir Singh <sblbir@xxxxxxxxxx>
AuthorDate: Sun, 10 May 2020 11:48:01 +10:00
Committer: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
CommitterDate: Wed, 13 May 2020 18:12:20 +02:00

x86/kvm: Refactor L1D flushing

Move more L1D flush related code out of KVM/VMX into builtin code to allow
reuse for L1D flushing:

- Move the initialization to l1d_flush_init_once() and remove the
deallocation of the L1D flush pages.

This avoids adding complex refcounting of users (VMX or tasks which
opt into a L1D flush on context switch) for the price of a few pages
potentially wasted when no users are left.

- Unify the flush invocations as arch_l1d_flush() which attempts
hardware flushing and falls back to the software implementation
with the option of prepopulating the TLB entries first.

[ tglx: Massage changelog and add a paranoid check of the flush pages
pointer ]

Suggested-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Signed-off-by: Balbir Singh <sblbir@xxxxxxxxxx>
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Link: https://lkml.kernel.org/r/20200510014803.12190-5-sblbir@xxxxxxxxxx

---
arch/x86/include/asm/cacheflush.h | 12 ++---
arch/x86/kernel/l1d_flush.c | 68 ++++++++++++++++++++++--------
arch/x86/kvm/vmx/vmx.c | 20 +--------
3 files changed, 60 insertions(+), 40 deletions(-)

diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index 21cc3b2..851d8f1 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -7,11 +7,13 @@
#include <asm/special_insns.h>

#define L1D_CACHE_ORDER 4
+
+enum l1d_flush_options {
+ L1D_FLUSH_POPULATE_TLB = 0x1,
+};
+
void clflush_cache_range(void *addr, unsigned int size);
-void l1d_flush_populate_tlb(void *l1d_flush_pages);
-void *l1d_flush_alloc_pages(void);
-void l1d_flush_cleanup_pages(void *l1d_flush_pages);
-void l1d_flush_sw(void *l1d_flush_pages);
-int l1d_flush_hw(void);
+int l1d_flush_init_once(void);
+void arch_l1d_flush(enum l1d_flush_options options);

#endif /* _ASM_X86_CACHEFLUSH_H */
diff --git a/arch/x86/kernel/l1d_flush.c b/arch/x86/kernel/l1d_flush.c
index 32119ee..4662f90 100644
--- a/arch/x86/kernel/l1d_flush.c
+++ b/arch/x86/kernel/l1d_flush.c
@@ -4,10 +4,10 @@

#include <asm/cacheflush.h>

-void *l1d_flush_alloc_pages(void)
+static void *l1d_flush_alloc_pages(void)
{
struct page *page;
- void *l1d_flush_pages = NULL;
+ void *flush_pages = NULL;
int i;

/*
@@ -17,7 +17,7 @@ void *l1d_flush_alloc_pages(void)
page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
if (!page)
return NULL;
- l1d_flush_pages = page_address(page);
+ flush_pages = page_address(page);

/*
* Initialize each page with a different pattern in
@@ -25,20 +25,13 @@ void *l1d_flush_alloc_pages(void)
* virtualization case.
*/
for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) {
- memset(l1d_flush_pages + i * PAGE_SIZE, i + 1,
+ memset(flush_pages + i * PAGE_SIZE, i + 1,
PAGE_SIZE);
}
- return l1d_flush_pages;
+ return flush_pages;
}
-EXPORT_SYMBOL_GPL(l1d_flush_alloc_pages);

-void l1d_flush_cleanup_pages(void *l1d_flush_pages)
-{
- free_pages((unsigned long)l1d_flush_pages, L1D_CACHE_ORDER);
-}
-EXPORT_SYMBOL_GPL(l1d_flush_cleanup_pages);
-
-void l1d_flush_populate_tlb(void *l1d_flush_pages)
+static void l1d_flush_populate_tlb(void *l1d_flush_pages)
{
int size = PAGE_SIZE << L1D_CACHE_ORDER;

@@ -56,9 +49,8 @@ void l1d_flush_populate_tlb(void *l1d_flush_pages)
[size] "r" (size)
: "eax", "ebx", "ecx", "edx");
}
-EXPORT_SYMBOL_GPL(l1d_flush_populate_tlb);

-int l1d_flush_hw(void)
+static int l1d_flush_hw(void)
{
if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
@@ -66,9 +58,8 @@ int l1d_flush_hw(void)
}
return -ENOTSUPP;
}
-EXPORT_SYMBOL_GPL(l1d_flush_hw);

-void l1d_flush_sw(void *l1d_flush_pages)
+static void l1d_flush_sw(void *l1d_flush_pages)
{
int size = PAGE_SIZE << L1D_CACHE_ORDER;

@@ -85,4 +76,45 @@ void l1d_flush_sw(void *l1d_flush_pages)
[size] "r" (size)
: "eax", "ecx");
}
-EXPORT_SYMBOL_GPL(l1d_flush_sw);
+
+static void *l1d_flush_pages;
+static DEFINE_MUTEX(l1d_flush_mutex);
+
+/*
+ * Initialize and setup L1D flush once, each caller will reuse the
+ * l1d_flush_pages for flushing, no per CPU allocations or NUMA aware
+ * allocations at the moment.
+ */
+int l1d_flush_init_once(void)
+{
+ int ret = 0;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return -ENOTSUPP;
+
+ if (static_cpu_has(X86_FEATURE_FLUSH_L1D) || l1d_flush_pages)
+ return ret;
+
+ mutex_lock(&l1d_flush_mutex);
+ if (!l1d_flush_pages)
+ l1d_flush_pages = l1d_flush_alloc_pages();
+ ret = l1d_flush_pages ? 0 : -ENOMEM;
+ mutex_unlock(&l1d_flush_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(l1d_flush_init_once);
+
+void arch_l1d_flush(enum l1d_flush_options options)
+{
+ if (!l1d_flush_hw())
+ return;
+
+ if (WARN_ON_ONCE(!l1d_flush_pages))
+ return;
+
+ if (options & L1D_FLUSH_POPULATE_TLB)
+ l1d_flush_populate_tlb(l1d_flush_pages);
+
+ l1d_flush_sw(l1d_flush_pages);
+}
+EXPORT_SYMBOL_GPL(arch_l1d_flush);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 786d161..d489234 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -203,8 +203,6 @@ static const struct {
[VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false},
};

-static void *vmx_l1d_flush_pages;
-
static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
{
if (!boot_cpu_has_bug(X86_BUG_L1TF)) {
@@ -247,12 +245,9 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
l1tf = VMENTER_L1D_FLUSH_ALWAYS;
}

- if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages &&
- !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
- vmx_l1d_flush_pages = l1d_flush_alloc_pages();
- if (!vmx_l1d_flush_pages)
+ if (l1tf != VMENTER_L1D_FLUSH_NEVER)
+ if (l1d_flush_init_once())
return -ENOMEM;
- }

l1tf_vmx_mitigation = l1tf;

@@ -6010,12 +6005,7 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
}

vcpu->stat.l1d_flush++;
-
- if (!l1d_flush_hw())
- return;
-
- l1d_flush_populate_tlb(vmx_l1d_flush_pages);
- l1d_flush_sw(vmx_l1d_flush_pages);
+ arch_l1d_flush(L1D_FLUSH_POPULATE_TLB);
}

static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
@@ -7983,10 +7973,6 @@ static struct kvm_x86_init_ops vmx_init_ops __initdata = {

static void vmx_cleanup_l1d_flush(void)
{
- if (vmx_l1d_flush_pages) {
- l1d_flush_cleanup_pages(vmx_l1d_flush_pages);
- vmx_l1d_flush_pages = NULL;
- }
/* Restore state so sysfs ignores VMX */
l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
}