[PATCH 3/5] cputime: Allow dynamic switch between tick/virtual based cputime accounting

From: Frederic Weisbecker
Date: Fri Aug 03 2012 - 11:02:51 EST


Allow to dynamically switch between tick and virtual based cputime accounting.
This way we can provide a kind of "on-demand" virtual based cputime
accounting. In this mode, the kernel will rely on the user hooks
subsystem to dynamically hook on kernel boundaries.

This is in preparation for beeing able to stop the timer tick further
idle. Doing so will depend on CONFIG_VIRT_CPU_ACCOUNTING which makes
it possible to account the cputime without the tick by hooking on
kernel/user boundaries.

Depending whether the tick is stopped or not, we can switch between
tick and vtime based accounting anytime in order to minimize the
overhead associated to user hooks.

Signed-off-by: Frederic Weisbecker <fweisbec@xxxxxxxxx>
Cc: Alessio Igor Bogani <abogani@xxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Avi Kivity <avi@xxxxxxxxxx>
Cc: Chris Metcalf <cmetcalf@xxxxxxxxxx>
Cc: Christoph Lameter <cl@xxxxxxxxx>
Cc: Geoff Levand <geoff@xxxxxxxxxxxxx>
Cc: Gilad Ben Yossef <gilad@xxxxxxxxxxxxx>
Cc: Hakan Akkan <hakanakkan@xxxxxxxxx>
Cc: H. Peter Anvin <hpa@xxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Kevin Hilman <khilman@xxxxxx>
Cc: Max Krasnyansky <maxk@xxxxxxxxxxxx>
Cc: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Stephen Hemminger <shemminger@xxxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Sven-Thorsten Dietrich <thebigcorporation@xxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
---
arch/ia64/include/asm/cputime.h | 5 ++++
arch/ia64/kernel/time.c | 2 +-
arch/powerpc/include/asm/cputime.h | 5 ++++
arch/powerpc/kernel/time.c | 2 +-
arch/s390/include/asm/cputime.h | 5 ++++
arch/s390/kernel/vtime.c | 2 +-
include/asm-generic/cputime.h | 5 ++++
include/linux/kernel_stat.h | 3 ++
include/linux/sched.h | 5 +---
kernel/fork.c | 3 +-
kernel/sched/cputime.c | 39 +++++++++++++++--------------------
kernel/time/tick-sched.c | 28 ++++++++++++-------------
12 files changed, 58 insertions(+), 46 deletions(-)

diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h
index 3deac95..9532b9c 100644
--- a/arch/ia64/include/asm/cputime.h
+++ b/arch/ia64/include/asm/cputime.h
@@ -103,5 +103,10 @@ static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
#define cputime64_to_clock_t(__ct) \
cputime_to_clock_t((__force cputime_t)__ct)

+static inline bool accounting_vtime(void)
+{
+ return true;
+}
+
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* __IA64_CPUTIME_H */
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 6247197..7afcf93 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -144,7 +144,7 @@ EXPORT_SYMBOL_GPL(account_system_vtime);
* Called from the timer interrupt handler to charge accumulated user time
* to the current process. Must be called with interrupts disabled.
*/
-void account_process_tick(struct task_struct *p, int user_tick)
+void account_process_tick_vtime(struct task_struct *p, int user_tick)
{
struct thread_info *ti = task_thread_info(p);
cputime_t delta_utime;
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index 487d46f..901e0ac 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -228,6 +228,11 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk)

#define cputime64_to_clock_t(ct) cputime_to_clock_t((cputime_t)(ct))

+static inline bool accounting_vtime(void)
+{
+ return true;
+}
+
#endif /* __KERNEL__ */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* __POWERPC_CPUTIME_H */
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 49da7f0..3f1918a 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -354,7 +354,7 @@ EXPORT_SYMBOL_GPL(account_system_vtime);
* (i.e. since the last entry from usermode) so that
* get_paca()->user_time_scaled is up to date.
*/
-void account_process_tick(struct task_struct *tsk, int user_tick)
+void account_process_tick_vtime(struct task_struct *tsk, int user_tick)
{
cputime_t utime, utimescaled;

diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 718374d..05ffda7 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -188,4 +188,9 @@ static inline int s390_nohz_delay(int cpu)

#define arch_needs_cpu(cpu) s390_nohz_delay(cpu)

+static inline bool accounting_vtime(void)
+{
+ return true;
+}
+
#endif /* _S390_CPUTIME_H */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 506e9bd..29f20fc 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -99,7 +99,7 @@ void account_switch_vtime(struct task_struct *prev)
S390_lowcore.system_timer = ti->system_timer;
}

-void account_process_tick(struct task_struct *tsk, int user_tick)
+void account_process_tick_vtime(struct task_struct *tsk, int user_tick)
{
do_account_vtime(tsk, HARDIRQ_OFFSET);
}
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index 9a62937..212c8bb 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -66,4 +66,9 @@ typedef u64 __nocast cputime64_t;
#define cputime64_to_clock_t(__ct) \
jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))

+static inline bool accounting_vtime(void)
+{
+ return false;
+}
+
#endif
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index bbe5d15..1270b86 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -132,8 +132,11 @@ extern void account_idle_ticks(unsigned long ticks);

#ifdef CONFIG_VIRT_CPU_ACCOUNTING
extern void account_switch_vtime(struct task_struct *prev);
+extern bool account_process_tick_vtime(struct task_struct *p, int user_tick);
#else
static inline void account_switch_vtime(struct task_struct *prev) { }
+static inline void account_process_tick_vtime(struct task_struct *p,
+ int user_tick) { }
#endif

#endif /* _LINUX_KERNEL_STAT_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7b7a438..6209682 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -612,9 +612,7 @@ struct signal_struct {
cputime_t utime, stime, cutime, cstime;
cputime_t gtime;
cputime_t cgtime;
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
cputime_t prev_utime, prev_stime;
-#endif
unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
unsigned long inblock, oublock, cinblock, coublock;
@@ -1363,9 +1361,8 @@ struct task_struct {

cputime_t utime, stime, utimescaled, stimescaled;
cputime_t gtime;
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
cputime_t prev_utime, prev_stime;
-#endif
+
unsigned long nvcsw, nivcsw; /* context switch counts */
struct timespec start_time; /* monotonic time */
struct timespec real_start_time; /* boot based time */
diff --git a/kernel/fork.c b/kernel/fork.c
index 3e2ed43..b19dd02 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1245,9 +1245,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,

p->utime = p->stime = p->gtime = 0;
p->utimescaled = p->stimescaled = 0;
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
p->prev_utime = p->prev_stime = 0;
-#endif
+
#if defined(SPLIT_RSS_COUNTING)
memset(&p->rss_stat, 0, sizeof(p->rss_stat));
#endif
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index fd5bd01..ff525ca 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -287,8 +287,6 @@ static __always_inline bool steal_account_process_tick(void)
return false;
}

-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
-
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
/*
* Account a tick to a process and cpustat
@@ -368,6 +366,11 @@ void account_process_tick(struct task_struct *p, int user_tick)
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
struct rq *rq = this_rq();

+ if (accounting_vtime()) {
+ account_process_tick_vtime(p, user_tick);
+ return;
+ }
+
if (sched_clock_irqtime) {
irqtime_account_process_tick(p, user_tick, rq);
return;
@@ -410,29 +413,10 @@ void account_idle_ticks(unsigned long ticks)
account_idle_time(jiffies_to_cputime(ticks));
}

-#endif

/*
* Use precise platform statistics if available:
*/
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
-{
- *ut = p->utime;
- *st = p->stime;
-}
-
-void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
-{
- struct task_cputime cputime;
-
- thread_group_cputime(p, &cputime);
-
- *ut = cputime.utime;
- *st = cputime.stime;
-}
-#else
-
#ifndef nsecs_to_cputime
# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
#endif
@@ -441,6 +425,12 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
{
cputime_t rtime, utime = p->utime, total = utime + p->stime;

+ if (accounting_vtime()) {
+ *ut = p->utime;
+ *st = p->stime;
+ return;
+ }
+
/*
* Use CFS's precise accounting:
*/
@@ -476,6 +466,12 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)

thread_group_cputime(p, &cputime);

+ if (accounting_vtime()) {
+ *ut = cputime.utime;
+ *st = cputime.stime;
+ return;
+ }
+
total = cputime.utime + cputime.stime;
rtime = nsecs_to_cputime(cputime.sum_exec_runtime);

@@ -494,4 +490,3 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
*ut = sig->prev_utime;
*st = sig->prev_stime;
}
-#endif
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 45b17ae..76dc22b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -557,9 +557,7 @@ void tick_nohz_idle_exit(void)
{
int cpu = smp_processor_id();
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
unsigned long ticks;
-#endif
ktime_t now;

local_irq_disable();
@@ -584,19 +582,19 @@ void tick_nohz_idle_exit(void)
tick_do_update_jiffies64(now);
update_cpu_load_nohz();

-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
- /*
- * We stopped the tick in idle. Update process times would miss the
- * time we slept as update_process_times does only a 1 tick
- * accounting. Enforce that this is accounted to idle !
- */
- ticks = jiffies - ts->idle_jiffies;
- /*
- * We might be one off. Do not randomly account a huge number of ticks!
- */
- if (ticks && ticks < LONG_MAX)
- account_idle_ticks(ticks);
-#endif
+ if (!accounting_vtime()) {
+ /*
+ * We stopped the tick in idle. Update process times would miss the
+ * time we slept as update_process_times does only a 1 tick
+ * accounting. Enforce that this is accounted to idle !
+ */
+ ticks = jiffies - ts->idle_jiffies;
+ /*
+ * We might be one off. Do not randomly account a huge number of ticks!
+ */
+ if (ticks && ticks < LONG_MAX)
+ account_idle_ticks(ticks);
+ }

calc_load_exit_idle();
touch_softlockup_watchdog();
--
1.7.5.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/