Re: [PATCH] sche/fair: static cpumasks for load balance

From: Peter Zijlstra
Date: Wed May 25 2022 - 13:05:15 EST


On Mon, May 23, 2022 at 05:42:16PM +0800, Bing Huang wrote:
> The both cpu mask load_balance_mask and select_idle_mask just only used
> in fair.c, but allocation in core.c in CONFIG_CPUMASK_OFFSTACK=y case,
> and global via declare per cpu variations. More or less, it looks wired.
>
> Signed-off-by: Bing Huang <huangbing@xxxxxxxxxx>
> ---
> kernel/sched/core.c | 13 +++----------
> kernel/sched/fair.c | 18 ++++++++++++++++--
> kernel/sched/sched.h | 4 ++++
> 3 files changed, 23 insertions(+), 12 deletions(-)
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index d58c0389eb23..64934c5e236d 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -9448,9 +9448,6 @@ LIST_HEAD(task_groups);
> static struct kmem_cache *task_group_cache __read_mostly;
> #endif
>
> -DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
> -DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
> -
> void __init sched_init(void)
> {
> unsigned long ptr = 0;
> @@ -9494,14 +9491,10 @@ void __init sched_init(void)
>
> #endif /* CONFIG_RT_GROUP_SCHED */
> }
> +
> #ifdef CONFIG_CPUMASK_OFFSTACK
> - for_each_possible_cpu(i) {
> - per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node(
> - cpumask_size(), GFP_KERNEL, cpu_to_node(i));
> - per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node(
> - cpumask_size(), GFP_KERNEL, cpu_to_node(i));
> - }
> -#endif /* CONFIG_CPUMASK_OFFSTACK */
> + init_working_cpumask();
> +#endif

We already have init_sched_fair_class(), can't you use that?

>
> init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime());
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index a68482d66535..3a4b0dd1beb5 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -5831,8 +5831,8 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> #ifdef CONFIG_SMP
>
> /* Working cpumask for: load_balance, load_balance_newidle. */
> -DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
> -DEFINE_PER_CPU(cpumask_var_t, select_idle_mask);
> +static DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
> +static DEFINE_PER_CPU(cpumask_var_t, select_idle_mask);
>
> #ifdef CONFIG_NO_HZ_COMMON
>
> @@ -7052,6 +7052,20 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
> }
> #endif /* CONFIG_SMP */
>
> +#ifdef CONFIG_CPUMASK_OFFSTACK
> +void __init init_working_cpumask(void)
> +{
> + int i;
> +
> + for_each_possible_cpu(i) {
> + per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node(
> + cpumask_size(), GFP_KERNEL, cpu_to_node(i));
> + per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node(
> + cpumask_size(), GFP_KERNEL, cpu_to_node(i));
> + }
> +}
> +#endif

If you stick that #ifdef inside the function, you can remove it
everwhere else. Less #ifdef is more better.