[PATCH 7/7] x86,idle: do not leave mm in idle state

From: Rik van Riel
Date: Wed Jun 20 2018 - 15:57:31 EST


Do not call leave_mm when going into a cstate. Now that mprotect and
madvise no longer send IPIs for TLB shootdowns to idle CPUs, there is
no real reason to disable lazy TLB mode in idle states.

This seems to help performance on Broadwell systems. Haswell performance
numbers are inconclusive.

Signed-off-by: Rik van Riel <riel@xxxxxxxxxxx>
Tested-by: Song Liu <songliubraving@xxxxxx>
---
arch/x86/include/asm/mmu.h | 2 --
arch/x86/mm/tlb.c | 26 +-------------------------
drivers/idle/intel_idle.c | 7 -------
3 files changed, 1 insertion(+), 34 deletions(-)

diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 5ff3e8af2c20..096ee9340685 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -61,6 +61,4 @@ typedef struct {
.ctx_id = 1, \
}

-void leave_mm(int cpu);
-
#endif /* _ASM_X86_MMU_H */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 26b3aeef6266..b861084e59d1 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -121,28 +121,6 @@ static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
write_cr3(new_mm_cr3);
}

-void leave_mm(int cpu)
-{
- struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
-
- /*
- * It's plausible that we're in lazy TLB mode while our mm is init_mm.
- * If so, our callers still expect us to flush the TLB, but there
- * aren't any user TLB entries in init_mm to worry about.
- *
- * This needs to happen before any other sanity checks due to
- * intel_idle's shenanigans.
- */
- if (loaded_mm == &init_mm)
- return;
-
- /* Warn if we're not lazy. */
- WARN_ON((this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK));
-
- switch_mm(NULL, &init_mm, NULL);
-}
-EXPORT_SYMBOL_GPL(leave_mm);
-
void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
@@ -194,8 +172,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* from lazy TLB mode to normal mode if active_mm isn't changing.
* When this happens, we don't assume that CR3 (and hence
* cpu_tlbstate.loaded_mm) matches next.
- *
- * NB: leave_mm() calls us with prev == NULL and tsk == NULL.
*/

/* We don't want flush_tlb_func_* to run concurrently with us. */
@@ -205,7 +181,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
/*
* Verify that CR3 is what we think it is. This will catch
* hypothetical buggy code that directly switches to swapper_pg_dir
- * without going through leave_mm() / switch_mm_irqs_off() or that
+ * without going through switch_mm_irqs_off() or that
* does something like write_cr3(read_cr3_pa()).
*
* Only do this check if CONFIG_DEBUG_VM=y because __read_cr3()
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index b2ccce5fb071..d3727aa34836 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -916,13 +916,6 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev,
bool uninitialized_var(tick);
int cpu = smp_processor_id();

- /*
- * leave_mm() to avoid costly and often unnecessary wakeups
- * for flushing the user TLB's associated with the active mm.
- */
- if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
- leave_mm(cpu);
-
if (!static_cpu_has(X86_FEATURE_ARAT)) {
cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) &
MWAIT_CSTATE_MASK) + 1;
--
2.14.4