[PATCH v13 4/6] tick/nohz_full: Ensure quiet_vmstat() is called on exit to user-mode when the idle tick is stopped

From: Marcelo Tosatti
Date: Thu Jan 05 2023 - 07:57:54 EST


From: Aaron Tomlin <atomlin@xxxxxxxxxxx>

For nohz full CPUs, we'd like the per-CPU vm statistics to be
synchronized when userspace is executing. Otherwise, the vmstat_shepherd
might queue a work item to synchronize them, which is undesired
intereference for isolated CPUs.

This patch syncs CPU-specific vmstat differentials, on return to
userspace, if CONFIG_FLUSH_WORK_ON_RESUME_USER is enabled and the tick
is stopped.

A trivial test program was used to determine the impact of the proposed
changes and under vanilla. The mlock(2) and munlock(2) system calls
was used solely to modify vmstat item 'NR_MLOCK'. The following is an
average count of CPU-cycles across the aforementioned system calls:

Vanilla Modified

Cycles per syscall 8461 8690 (+2.6%)

Signed-off-by: Aaron Tomlin <atomlin@xxxxxxxxxxx>
Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx>

---
include/linux/tick.h | 5 +++--
kernel/time/tick-sched.c | 15 +++++++++++++++
2 files changed, 18 insertions(+), 2 deletions(-)

Index: linux-2.6/include/linux/tick.h
===================================================================
--- linux-2.6.orig/include/linux/tick.h
+++ linux-2.6/include/linux/tick.h
@@ -11,7 +11,6 @@
#include <linux/context_tracking_state.h>
#include <linux/cpumask.h>
#include <linux/sched.h>
-#include <linux/rcupdate.h>

#ifdef CONFIG_GENERIC_CLOCKEVENTS
extern void __init tick_init(void);
@@ -272,6 +271,7 @@ static inline void tick_dep_clear_signal

extern void tick_nohz_full_kick_cpu(int cpu);
extern void __tick_nohz_task_switch(void);
+void __tick_nohz_user_enter_prepare(void);
extern void __init tick_nohz_full_setup(cpumask_var_t cpumask);
#else
static inline bool tick_nohz_full_enabled(void) { return false; }
@@ -296,6 +296,7 @@ static inline void tick_dep_clear_signal

static inline void tick_nohz_full_kick_cpu(int cpu) { }
static inline void __tick_nohz_task_switch(void) { }
+static inline void __tick_nohz_user_enter_prepare(void) { }
static inline void tick_nohz_full_setup(cpumask_var_t cpumask) { }
#endif

@@ -308,7 +309,7 @@ static inline void tick_nohz_task_switch
static inline void tick_nohz_user_enter_prepare(void)
{
if (tick_nohz_full_cpu(smp_processor_id()))
- rcu_nocb_flush_deferred_wakeup();
+ __tick_nohz_user_enter_prepare();
}

#endif
Index: linux-2.6/kernel/time/tick-sched.c
===================================================================
--- linux-2.6.orig/kernel/time/tick-sched.c
+++ linux-2.6/kernel/time/tick-sched.c
@@ -26,6 +26,7 @@
#include <linux/posix-timers.h>
#include <linux/context_tracking.h>
#include <linux/mm.h>
+#include <linux/rcupdate.h>

#include <asm/irq_regs.h>

@@ -519,6 +520,23 @@ void __tick_nohz_task_switch(void)
}
}

+void __tick_nohz_user_enter_prepare(void)
+{
+ if (tick_nohz_full_cpu(smp_processor_id())) {
+ if (IS_ENABLED(CONFIG_FLUSH_WORK_ON_RESUME_USER)) {
+ struct tick_sched *ts;
+
+ ts = this_cpu_ptr(&tick_cpu_sched);
+
+ if (ts->tick_stopped)
+ quiet_vmstat(true);
+ }
+
+ rcu_nocb_flush_deferred_wakeup();
+ }
+}
+EXPORT_SYMBOL_GPL(__tick_nohz_user_enter_prepare);
+
/* Get the boot-time nohz CPU list from the kernel parameters. */
void __init tick_nohz_full_setup(cpumask_var_t cpumask)
{