Re: [PATCH] scheduler: Extract cgroups_cpuaccount code from sched.c into own file

From: Mike Chan
Date: Wed May 19 2010 - 15:06:16 EST


2010/5/19 Thomas Renninger <trenn@xxxxxxx>:
> Hi,
>
> thread topic was:
> Re: [PATCH 3/4] scheduler: cpuacct: Enable platform callbacks for cpuacct power tracking
>
> I mingled this patch together with a minor comment for Mike's patch.
> Like this interested people in CC are kept.
>
> Peter/Ingo: Can you pick up this cleanup if appropriate, please.
> Shall I resend separately or could you cut out comments below?
>
> On Wednesday 19 May 2010 03:30:19 Mike Chan wrote:
>> Platform must register cpu power function that return power in
>> milliWatt seconds.
>>
>> Signed-off-by: Mike Chan <mike@xxxxxxxxxxx>
>> ---
>>  Documentation/cgroups/cpuacct.txt |    3 +++
>>  include/linux/cpuacct.h           |    4 +++-
>>  kernel/sched.c                    |   24 ++++++++++++++++++++++--
>>  3 files changed, 28 insertions(+), 3 deletions(-)
> ...
>> diff --git a/include/linux/cpuacct.h b/include/linux/cpuacct.h
>> index 9ff479e..effe842 100644
>> --- a/include/linux/cpuacct.h
>> +++ b/include/linux/cpuacct.h
>> @@ -31,7 +31,9 @@ struct cpuacct_cpufreq_calls {
> This is a general cpuacct_charge interface, not cpufreq specific?
> I'd call it "struct cpuacct_charge_calls".
> Platforms can account C-states, frequency, power, whatever they like?
> The latter two are implemented with your patches.

I'm ok with the name change if that's what people prefer.

>>        */
>>       void (*init) (void **cpuacct_data);
>>       void (*charge) (void *cpuacct_data,  u64 cputime, unsigned int cpu);
>> -     void (*show) (void *cpuacct_data, struct cgroup_map_cb *cb);
>> +     void (*cpufreq_show) (void *cpuacct_data, struct cgroup_map_cb *cb);
>> +     /* Returns power consumed in milliWatt seconds */
>> +     u64 (*power_usage) (void *cpuacct_data);
>>  };
>>  int cpuacct_register_cpufreq(struct cpuacct_cpufreq_calls *fn);
> Same here, why not name it cpuacct_register_charge?
> Eventually at other places.
>
>> diff --git a/kernel/sched.c b/kernel/sched.c
>> index 6b6c45a..d55d8af 100644
>> --- a/kernel/sched.c
>> +++ b/kernel/sched.c
> Nothing to do with this patch, but I wonder why this is all in kernel/sched.c.
> Try a quick cleanup... works.
> Whatabout below cleanup?
> Not sure through which tree this should go in, but if below is accepted,
> would you mind rebasing your things against it.
> Then it would already show up in cgroup_cpuaccount.c git history.

Sounds reasonable

-- Mike

>
>       Thomas
>
> This is a cleanup against current linux-2.6 Linus tree.
>
> Having CONFIG_CGROUP_CPUACCT code in kernel/sched.c looks wrong.
> Move this out to kernel/cgroup_cpuaccount.c
>
> Test compiled with and without CONFIG_CGROUP_CPUACCT set on x86_64.
>
> Signed-off-by: Thomas Renninger <trenn@xxxxxxx>
> CC: linux-kernel@xxxxxxxxxxxxxxx
> CC: mike@xxxxxxxxxxx
> CC: menage@xxxxxxxxxx
> CC: lizf@xxxxxxxxxxxxxx
> CC: containers@xxxxxxxxxxxxxxxxxxxxxxxxxx
> CC: mingo@xxxxxxx
> CC: peterz@xxxxxxxxxxxxx
>
> ---
> diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
> index 8f78073..6e2c88a 100644
> --- a/include/linux/cgroup.h
> +++ b/include/linux/cgroup.h
> @@ -609,6 +609,24 @@ bool css_is_ancestor(struct cgroup_subsys_state *cg,
>  unsigned short css_id(struct cgroup_subsys_state *css);
>  unsigned short css_depth(struct cgroup_subsys_state *css);
>
> +/* Time spent by the tasks of the cpu accounting group executing in ... */
> +enum cpuacct_stat_index {
> +       CPUACCT_STAT_USER,      /* ... user mode */
> +       CPUACCT_STAT_SYSTEM,    /* ... kernel mode */
> +
> +       CPUACCT_STAT_NSTATS,
> +};
> +
> +#ifdef CONFIG_CGROUP_CPUACCT
> +void cpuacct_charge(struct task_struct *tsk, u64 cputime);
> +void cpuacct_update_stats(struct task_struct *tsk,
> +               enum cpuacct_stat_index idx, cputime_t val);
> +#else
> +static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
> +static inline void cpuacct_update_stats(struct task_struct *tsk,
> +                       enum cpuacct_stat_index idx, cputime_t val) {}
> +#endif
> +
>  #else /* !CONFIG_CGROUPS */
>
>  static inline int cgroup_init_early(void) { return 0; }
> diff --git a/kernel/Makefile b/kernel/Makefile
> index 149e18e..1df6e53 100644
> --- a/kernel/Makefile
> +++ b/kernel/Makefile
> @@ -60,6 +60,7 @@ obj-$(CONFIG_KEXEC) += kexec.o
>  obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
>  obj-$(CONFIG_COMPAT) += compat.o
>  obj-$(CONFIG_CGROUPS) += cgroup.o
> +obj-$(CONFIG_CGROUP_CPUACCT) += cgroup_cpuaccount.o
>  obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
>  obj-$(CONFIG_CPUSETS) += cpuset.o
>  obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
> diff --git a/kernel/cgroup_cpuaccount.c b/kernel/cgroup_cpuaccount.c
> new file mode 100644
> index 0000000..d32b927
> --- /dev/null
> +++ b/kernel/cgroup_cpuaccount.c
> @@ -0,0 +1,284 @@
> +#include <linux/kernel.h>
> +#include <linux/percpu.h>
> +#include <linux/spinlock.h>
> +#include <linux/sched.h>
> +#include <linux/cgroup.h>
> +#include <linux/srcu.h>
> +#include <linux/slab.h>
> +#include <linux/seq_file.h>
> +
> +#include <asm/cputime.h>
> +
> +/*
> + * CPU accounting code for task groups.
> + *
> + * Based on the work by Paul Menage (menage@xxxxxxxxxx) and Balbir Singh
> + * (balbir@xxxxxxxxxx).
> + */
> +
> +/* track cpu usage of a group of tasks and its child groups */
> +struct cpuacct {
> +       struct cgroup_subsys_state css;
> +       /* cpuusage holds pointer to a u64-type object on every cpu */
> +       u64 __percpu *cpuusage;
> +       struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
> +       struct cpuacct *parent;
> +};
> +
> +struct cgroup_subsys cpuacct_subsys;
> +
> +/* return cpu accounting group corresponding to this container */
> +static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
> +{
> +       return container_of(cgroup_subsys_state(cgrp, cpuacct_subsys_id),
> +                           struct cpuacct, css);
> +}
> +
> +/* return cpu accounting group to which this task belongs */
> +static inline struct cpuacct *task_ca(struct task_struct *tsk)
> +{
> +       return container_of(task_subsys_state(tsk, cpuacct_subsys_id),
> +                           struct cpuacct, css);
> +}
> +
> +/* create a new cpu accounting group */
> +static struct cgroup_subsys_state *cpuacct_create(
> +       struct cgroup_subsys *ss, struct cgroup *cgrp)
> +{
> +       struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
> +       int i;
> +
> +       if (!ca)
> +               goto out;
> +
> +       ca->cpuusage = alloc_percpu(u64);
> +       if (!ca->cpuusage)
> +               goto out_free_ca;
> +
> +       for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
> +               if (percpu_counter_init(&ca->cpustat[i], 0))
> +                       goto out_free_counters;
> +
> +       if (cgrp->parent)
> +               ca->parent = cgroup_ca(cgrp->parent);
> +
> +       return &ca->css;
> +
> +out_free_counters:
> +       while (--i >= 0)
> +               percpu_counter_destroy(&ca->cpustat[i]);
> +       free_percpu(ca->cpuusage);
> +out_free_ca:
> +       kfree(ca);
> +out:
> +       return ERR_PTR(-ENOMEM);
> +}
> +
> +/* destroy an existing cpu accounting group */
> +static void
> +cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
> +{
> +       struct cpuacct *ca = cgroup_ca(cgrp);
> +       int i;
> +
> +       for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
> +               percpu_counter_destroy(&ca->cpustat[i]);
> +       free_percpu(ca->cpuusage);
> +       kfree(ca);
> +}
> +
> +static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
> +{
> +       u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
> +       u64 data;
> +
> +#ifndef CONFIG_64BIT
> +       /*
> +        * Take rq->lock to make 64-bit read safe on 32-bit platforms.
> +        */
> +       raw_spin_lock_irq(&cpu_rq(cpu)->lock);
> +       data = *cpuusage;
> +       raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
> +#else
> +       data = *cpuusage;
> +#endif
> +
> +       return data;
> +}
> +
> +static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
> +{
> +       u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
> +
> +#ifndef CONFIG_64BIT
> +       /*
> +        * Take rq->lock to make 64-bit write safe on 32-bit platforms.
> +        */
> +       raw_spin_lock_irq(&cpu_rq(cpu)->lock);
> +       *cpuusage = val;
> +       raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
> +#else
> +       *cpuusage = val;
> +#endif
> +}
> +
> +/* return total cpu usage (in nanoseconds) of a group */
> +static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
> +{
> +       struct cpuacct *ca = cgroup_ca(cgrp);
> +       u64 totalcpuusage = 0;
> +       int i;
> +
> +       for_each_present_cpu(i)
> +               totalcpuusage += cpuacct_cpuusage_read(ca, i);
> +
> +       return totalcpuusage;
> +}
> +
> +static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype,
> +                                                               u64 reset)
> +{
> +       struct cpuacct *ca = cgroup_ca(cgrp);
> +       int err = 0;
> +       int i;
> +
> +       if (reset) {
> +               err = -EINVAL;
> +               goto out;
> +       }
> +
> +       for_each_present_cpu(i)
> +               cpuacct_cpuusage_write(ca, i, 0);
> +
> +out:
> +       return err;
> +}
> +
> +static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
> +                                  struct seq_file *m)
> +{
> +       struct cpuacct *ca = cgroup_ca(cgroup);
> +       u64 percpu;
> +       int i;
> +
> +       for_each_present_cpu(i) {
> +               percpu = cpuacct_cpuusage_read(ca, i);
> +               seq_printf(m, "%llu ", (unsigned long long) percpu);
> +       }
> +       seq_printf(m, "\n");
> +       return 0;
> +}
> +
> +static const char *cpuacct_stat_desc[] = {
> +       [CPUACCT_STAT_USER] = "user",
> +       [CPUACCT_STAT_SYSTEM] = "system",
> +};
> +
> +static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
> +               struct cgroup_map_cb *cb)
> +{
> +       struct cpuacct *ca = cgroup_ca(cgrp);
> +       int i;
> +
> +       for (i = 0; i < CPUACCT_STAT_NSTATS; i++) {
> +               s64 val = percpu_counter_read(&ca->cpustat[i]);
> +               val = cputime64_to_clock_t(val);
> +               cb->fill(cb, cpuacct_stat_desc[i], val);
> +       }
> +       return 0;
> +}
> +
> +static struct cftype files[] = {
> +       {
> +               .name = "usage",
> +               .read_u64 = cpuusage_read,
> +               .write_u64 = cpuusage_write,
> +       },
> +       {
> +               .name = "usage_percpu",
> +               .read_seq_string = cpuacct_percpu_seq_read,
> +       },
> +       {
> +               .name = "stat",
> +               .read_map = cpuacct_stats_show,
> +       },
> +};
> +
> +static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
> +{
> +       return cgroup_add_files(cgrp, ss, files, ARRAY_SIZE(files));
> +}
> +
> +/*
> + * charge this task's execution time to its accounting group.
> + *
> + * called with rq->lock held.
> + */
> +void cpuacct_charge(struct task_struct *tsk, u64 cputime)
> +{
> +       struct cpuacct *ca;
> +       int cpu;
> +
> +       if (unlikely(!cpuacct_subsys.active))
> +               return;
> +
> +       cpu = task_cpu(tsk);
> +
> +       rcu_read_lock();
> +
> +       ca = task_ca(tsk);
> +
> +       for (; ca; ca = ca->parent) {
> +               u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
> +               *cpuusage += cputime;
> +       }
> +
> +       rcu_read_unlock();
> +}
> +
> +/*
> + * When CONFIG_VIRT_CPU_ACCOUNTING is enabled one jiffy can be very large
> + * in cputime_t units. As a result, cpuacct_update_stats calls
> + * percpu_counter_add with values large enough to always overflow the
> + * per cpu batch limit causing bad SMP scalability.
> + *
> + * To fix this we scale percpu_counter_batch by cputime_one_jiffy so we
> + * batch the same amount of time with CONFIG_VIRT_CPU_ACCOUNTING disabled
> + * and enabled. We cap it at INT_MAX which is the largest allowed batch value.
> + */
> +#ifdef CONFIG_SMP
> +#define CPUACCT_BATCH  \
> +       min_t(long, percpu_counter_batch * cputime_one_jiffy, INT_MAX)
> +#else
> +#define CPUACCT_BATCH  0
> +#endif
> +
> +/*
> + * Charge the system/user time to the task's accounting group.
> + */
> +void cpuacct_update_stats(struct task_struct *tsk,
> +                         enum cpuacct_stat_index idx, cputime_t val)
> +{
> +       struct cpuacct *ca;
> +       int batch = CPUACCT_BATCH;
> +
> +       if (unlikely(!cpuacct_subsys.active))
> +               return;
> +
> +       rcu_read_lock();
> +       ca = task_ca(tsk);
> +
> +       do {
> +               __percpu_counter_add(&ca->cpustat[idx], val, batch);
> +               ca = ca->parent;
> +       } while (ca);
> +       rcu_read_unlock();
> +}
> +
> +struct cgroup_subsys cpuacct_subsys = {
> +       .name = "cpuacct",
> +       .create = cpuacct_create,
> +       .destroy = cpuacct_destroy,
> +       .populate = cpuacct_populate,
> +       .subsys_id = cpuacct_subsys_id,
> +};
> diff --git a/kernel/sched.c b/kernel/sched.c
> index 1d93cd0..45d60dd 100644
> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -1394,24 +1394,6 @@ static const u32 prio_to_wmult[40] = {
>  /*  15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
>  };
>
> -/* Time spent by the tasks of the cpu accounting group executing in ... */
> -enum cpuacct_stat_index {
> -       CPUACCT_STAT_USER,      /* ... user mode */
> -       CPUACCT_STAT_SYSTEM,    /* ... kernel mode */
> -
> -       CPUACCT_STAT_NSTATS,
> -};
> -
> -#ifdef CONFIG_CGROUP_CPUACCT
> -static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
> -static void cpuacct_update_stats(struct task_struct *tsk,
> -               enum cpuacct_stat_index idx, cputime_t val);
> -#else
> -static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
> -static inline void cpuacct_update_stats(struct task_struct *tsk,
> -               enum cpuacct_stat_index idx, cputime_t val) {}
> -#endif
> -
>  static inline void inc_cpu_load(struct rq *rq, unsigned long load)
>  {
>        update_load_add(&rq->load, load);
> @@ -8617,283 +8599,6 @@ struct cgroup_subsys cpu_cgroup_subsys = {
>
>  #endif /* CONFIG_CGROUP_SCHED */
>
> -#ifdef CONFIG_CGROUP_CPUACCT
> -
> -/*
> - * CPU accounting code for task groups.
> - *
> - * Based on the work by Paul Menage (menage@xxxxxxxxxx) and Balbir Singh
> - * (balbir@xxxxxxxxxx).
> - */
> -
> -/* track cpu usage of a group of tasks and its child groups */
> -struct cpuacct {
> -       struct cgroup_subsys_state css;
> -       /* cpuusage holds pointer to a u64-type object on every cpu */
> -       u64 __percpu *cpuusage;
> -       struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
> -       struct cpuacct *parent;
> -};
> -
> -struct cgroup_subsys cpuacct_subsys;
> -
> -/* return cpu accounting group corresponding to this container */
> -static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
> -{
> -       return container_of(cgroup_subsys_state(cgrp, cpuacct_subsys_id),
> -                           struct cpuacct, css);
> -}
> -
> -/* return cpu accounting group to which this task belongs */
> -static inline struct cpuacct *task_ca(struct task_struct *tsk)
> -{
> -       return container_of(task_subsys_state(tsk, cpuacct_subsys_id),
> -                           struct cpuacct, css);
> -}
> -
> -/* create a new cpu accounting group */
> -static struct cgroup_subsys_state *cpuacct_create(
> -       struct cgroup_subsys *ss, struct cgroup *cgrp)
> -{
> -       struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
> -       int i;
> -
> -       if (!ca)
> -               goto out;
> -
> -       ca->cpuusage = alloc_percpu(u64);
> -       if (!ca->cpuusage)
> -               goto out_free_ca;
> -
> -       for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
> -               if (percpu_counter_init(&ca->cpustat[i], 0))
> -                       goto out_free_counters;
> -
> -       if (cgrp->parent)
> -               ca->parent = cgroup_ca(cgrp->parent);
> -
> -       return &ca->css;
> -
> -out_free_counters:
> -       while (--i >= 0)
> -               percpu_counter_destroy(&ca->cpustat[i]);
> -       free_percpu(ca->cpuusage);
> -out_free_ca:
> -       kfree(ca);
> -out:
> -       return ERR_PTR(-ENOMEM);
> -}
> -
> -/* destroy an existing cpu accounting group */
> -static void
> -cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
> -{
> -       struct cpuacct *ca = cgroup_ca(cgrp);
> -       int i;
> -
> -       for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
> -               percpu_counter_destroy(&ca->cpustat[i]);
> -       free_percpu(ca->cpuusage);
> -       kfree(ca);
> -}
> -
> -static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
> -{
> -       u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
> -       u64 data;
> -
> -#ifndef CONFIG_64BIT
> -       /*
> -        * Take rq->lock to make 64-bit read safe on 32-bit platforms.
> -        */
> -       raw_spin_lock_irq(&cpu_rq(cpu)->lock);
> -       data = *cpuusage;
> -       raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
> -#else
> -       data = *cpuusage;
> -#endif
> -
> -       return data;
> -}
> -
> -static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
> -{
> -       u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
> -
> -#ifndef CONFIG_64BIT
> -       /*
> -        * Take rq->lock to make 64-bit write safe on 32-bit platforms.
> -        */
> -       raw_spin_lock_irq(&cpu_rq(cpu)->lock);
> -       *cpuusage = val;
> -       raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
> -#else
> -       *cpuusage = val;
> -#endif
> -}
> -
> -/* return total cpu usage (in nanoseconds) of a group */
> -static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
> -{
> -       struct cpuacct *ca = cgroup_ca(cgrp);
> -       u64 totalcpuusage = 0;
> -       int i;
> -
> -       for_each_present_cpu(i)
> -               totalcpuusage += cpuacct_cpuusage_read(ca, i);
> -
> -       return totalcpuusage;
> -}
> -
> -static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype,
> -                                                               u64 reset)
> -{
> -       struct cpuacct *ca = cgroup_ca(cgrp);
> -       int err = 0;
> -       int i;
> -
> -       if (reset) {
> -               err = -EINVAL;
> -               goto out;
> -       }
> -
> -       for_each_present_cpu(i)
> -               cpuacct_cpuusage_write(ca, i, 0);
> -
> -out:
> -       return err;
> -}
> -
> -static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
> -                                  struct seq_file *m)
> -{
> -       struct cpuacct *ca = cgroup_ca(cgroup);
> -       u64 percpu;
> -       int i;
> -
> -       for_each_present_cpu(i) {
> -               percpu = cpuacct_cpuusage_read(ca, i);
> -               seq_printf(m, "%llu ", (unsigned long long) percpu);
> -       }
> -       seq_printf(m, "\n");
> -       return 0;
> -}
> -
> -static const char *cpuacct_stat_desc[] = {
> -       [CPUACCT_STAT_USER] = "user",
> -       [CPUACCT_STAT_SYSTEM] = "system",
> -};
> -
> -static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
> -               struct cgroup_map_cb *cb)
> -{
> -       struct cpuacct *ca = cgroup_ca(cgrp);
> -       int i;
> -
> -       for (i = 0; i < CPUACCT_STAT_NSTATS; i++) {
> -               s64 val = percpu_counter_read(&ca->cpustat[i]);
> -               val = cputime64_to_clock_t(val);
> -               cb->fill(cb, cpuacct_stat_desc[i], val);
> -       }
> -       return 0;
> -}
> -
> -static struct cftype files[] = {
> -       {
> -               .name = "usage",
> -               .read_u64 = cpuusage_read,
> -               .write_u64 = cpuusage_write,
> -       },
> -       {
> -               .name = "usage_percpu",
> -               .read_seq_string = cpuacct_percpu_seq_read,
> -       },
> -       {
> -               .name = "stat",
> -               .read_map = cpuacct_stats_show,
> -       },
> -};
> -
> -static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
> -{
> -       return cgroup_add_files(cgrp, ss, files, ARRAY_SIZE(files));
> -}
> -
> -/*
> - * charge this task's execution time to its accounting group.
> - *
> - * called with rq->lock held.
> - */
> -static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
> -{
> -       struct cpuacct *ca;
> -       int cpu;
> -
> -       if (unlikely(!cpuacct_subsys.active))
> -               return;
> -
> -       cpu = task_cpu(tsk);
> -
> -       rcu_read_lock();
> -
> -       ca = task_ca(tsk);
> -
> -       for (; ca; ca = ca->parent) {
> -               u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
> -               *cpuusage += cputime;
> -       }
> -
> -       rcu_read_unlock();
> -}
> -
> -/*
> - * When CONFIG_VIRT_CPU_ACCOUNTING is enabled one jiffy can be very large
> - * in cputime_t units. As a result, cpuacct_update_stats calls
> - * percpu_counter_add with values large enough to always overflow the
> - * per cpu batch limit causing bad SMP scalability.
> - *
> - * To fix this we scale percpu_counter_batch by cputime_one_jiffy so we
> - * batch the same amount of time with CONFIG_VIRT_CPU_ACCOUNTING disabled
> - * and enabled. We cap it at INT_MAX which is the largest allowed batch value.
> - */
> -#ifdef CONFIG_SMP
> -#define CPUACCT_BATCH  \
> -       min_t(long, percpu_counter_batch * cputime_one_jiffy, INT_MAX)
> -#else
> -#define CPUACCT_BATCH  0
> -#endif
> -
> -/*
> - * Charge the system/user time to the task's accounting group.
> - */
> -static void cpuacct_update_stats(struct task_struct *tsk,
> -               enum cpuacct_stat_index idx, cputime_t val)
> -{
> -       struct cpuacct *ca;
> -       int batch = CPUACCT_BATCH;
> -
> -       if (unlikely(!cpuacct_subsys.active))
> -               return;
> -
> -       rcu_read_lock();
> -       ca = task_ca(tsk);
> -
> -       do {
> -               __percpu_counter_add(&ca->cpustat[idx], val, batch);
> -               ca = ca->parent;
> -       } while (ca);
> -       rcu_read_unlock();
> -}
> -
> -struct cgroup_subsys cpuacct_subsys = {
> -       .name = "cpuacct",
> -       .create = cpuacct_create,
> -       .destroy = cpuacct_destroy,
> -       .populate = cpuacct_populate,
> -       .subsys_id = cpuacct_subsys_id,
> -};
> -#endif /* CONFIG_CGROUP_CPUACCT */
> -
>  #ifndef CONFIG_SMP
>
>  void synchronize_sched_expedited(void)
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/