Re: [PATCH 10/19] sched/fair: Prioritize tasks preferring destination LLC during balancing
From: Tim Chen
Date: Wed Oct 15 2025 - 17:28:50 EST
On Wed, 2025-10-15 at 17:08 +0200, Peter Zijlstra wrote:
> On Sat, Oct 11, 2025 at 11:24:47AM -0700, Tim Chen wrote:
> > During LLC load balancing, first check for tasks that prefer the
> > destination LLC and balance them to it before others.
> >
> > Mark source sched groups containing tasks preferring non local LLCs
> > with the group_llc_balance flag. This ensures the load balancer later
> > pulls or pushes these tasks toward their preferred LLCs.
> >
> > Co-developed-by: Chen Yu <yu.c.chen@xxxxxxxxx>
> > Signed-off-by: Chen Yu <yu.c.chen@xxxxxxxxx>
> > Signed-off-by: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
> > ---
>
> For me this patch is cut too fine; it only sets group_llc_balance but
> then we don't see how it is used.
Okay, will combine this patch with the following one.
>
> > kernel/sched/fair.c | 43 +++++++++++++++++++++++++++++++++++++++++--
> > 1 file changed, 41 insertions(+), 2 deletions(-)
> >
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index cbd1e97bca4b..af7b578eaa06 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -9822,8 +9822,7 @@ static __maybe_unused enum llc_mig can_migrate_llc_task(int src_cpu, int dst_cpu
> > else
> > return mig_unrestricted;
> >
> > - return can_migrate_llc(src_cpu, dst_cpu,
> > - task_util(p), to_pref);
> > + return can_migrate_llc(src_cpu, dst_cpu, task_util(p), to_pref);
> > }
> >
> > #else
> > @@ -10394,6 +10393,7 @@ struct sg_lb_stats {
> > enum group_type group_type;
> > unsigned int group_asym_packing; /* Tasks should be moved to preferred CPU */
> > unsigned int group_smt_balance; /* Task on busy SMT be moved */
> > + unsigned int group_llc_balance; /* Tasks should be moved to preferred LLC */
> > unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */
> > #ifdef CONFIG_NUMA_BALANCING
> > unsigned int nr_numa_running;
> > @@ -10849,11 +10849,45 @@ static void record_sg_llc_stats(struct lb_env *env,
> > if (unlikely(READ_ONCE(sd_share->capacity) != sgs->group_capacity))
> > WRITE_ONCE(sd_share->capacity, sgs->group_capacity);
> > }
> > +
> > +/*
> > + * Do LLC balance on sched group that contains LLC, and have tasks preferring
> > + * to run on LLC in idle dst_cpu.
> > + */
> > +static inline bool llc_balance(struct lb_env *env, struct sg_lb_stats *sgs,
> > + struct sched_group *group)
> > +{
> > + struct sched_domain *child = env->sd->child;
> > + int llc;
> > +
> > + if (!sched_cache_enabled())
> > + return false;
> > +
> > + if (env->sd->flags & SD_SHARE_LLC)
> > + return false;
> > +
> > + /* only care about task migration among LLCs */
> > + if (child && !(child->flags & SD_SHARE_LLC))
> > + return false;
> > +
> > + llc = llc_idx(env->dst_cpu);
> > + if (sgs->nr_pref_llc[llc] > 0 &&
>
> Nit: s/> 0// would be the same, right?
Sure.
>
> > + can_migrate_llc(env->src_cpu, env->dst_cpu, 0, true) == mig_llc)
> > + return true;
> > +
> > + return false;
> > +}
> > #else
> > static inline void record_sg_llc_stats(struct lb_env *env, struct sg_lb_stats *sgs,
> > struct sched_group *group)
> > {
> > }
> > +
> > +static inline bool llc_balance(struct lb_env *env, struct sg_lb_stats *sgs,
> > + struct sched_group *group)
> > +{
> > + return false;
> > +}
> > #endif
> >
> > /**
> > @@ -10954,6 +10988,11 @@ static inline void update_sg_lb_stats(struct lb_env *env,
> > sgs->group_type = group_classify(env->sd->imbalance_pct, group, sgs);
> >
> > record_sg_llc_stats(env, sgs, group);
> > +
> > + /* Check for tasks in this group can be moved to their preferred LLC */
> > + if (!local_group && llc_balance(env, sgs, group))
> > + sgs->group_llc_balance = 1;
>
> We now have 3 (or so) branches that start with:
>
> if (!local_group &&
>
> perhaps collate that some?
Sure.
>
> > +
> > /* Computing avg_load makes sense only when group is overloaded */
> > if (sgs->group_type == group_overloaded)
> > sgs->avg_load = (sgs->group_load * SCHED_CAPACITY_SCALE) /
> > --
> > 2.32.0
> >