Re: [External] Re: [PATCH] sched/fair: optimize and simplify rq leaf_cfs_rq_list

From: Chengming Zhou
Date: Tue May 24 2022 - 05:35:35 EST


On 2022/5/24 00:23, Vincent Guittot wrote:
> On Wed, 27 Apr 2022 at 18:07, Chengming Zhou
> <zhouchengming@xxxxxxxxxxxxx> wrote:
[...]
>> kernel/sched/fair.c | 72 ++++++++++-----------------------------------
>> 1 file changed, 16 insertions(+), 56 deletions(-)
>>
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index 1ad18b5cc1b8..083c3d32c899 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -309,6 +309,8 @@ const struct sched_class fair_sched_class;
>>
>> #ifdef CONFIG_FAIR_GROUP_SCHED
>>
>> +static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
>> +
>> /* Walk up scheduling entities hierarchy */
>> #define for_each_sched_entity(se) \
>> for (; se; se = se->parent)
>> @@ -331,7 +333,7 @@ static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
>> struct rq *rq = rq_of(cfs_rq);
>> int cpu = cpu_of(rq);
>>
>> - if (cfs_rq->on_list)
>> + if (cfs_rq->on_list || throttled_hierarchy(cfs_rq))
>
> Please move throttled_hierarchy() outside list_add_leaf_cfs_rq()
> because the task will not be added in this case which is quite
> misleading

Ok, will do. I will move throttled_hierarchy() outside.

>
> I will continue to check the various corner cases but I haven't seen
> problem so far with your method

Thanks!

>
>> return rq->tmp_alone_branch == &rq->leaf_cfs_rq_list;
>>
>> cfs_rq->on_list = 1;
>> @@ -3242,8 +3244,6 @@ static long calc_group_shares(struct cfs_rq *cfs_rq)
>> }
>> #endif /* CONFIG_SMP */
>>
>> -static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
>> -
>> /*
>> * Recomputes the group entity based on the current state of its group
>> * runqueue.
>> @@ -4356,16 +4356,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
>> __enqueue_entity(cfs_rq, se);
>> se->on_rq = 1;
>>
>> - /*
>> - * When bandwidth control is enabled, cfs might have been removed
>> - * because of a parent been throttled but cfs->nr_running > 1. Try to
>> - * add it unconditionally.
>> - */
>> - if (cfs_rq->nr_running == 1 || cfs_bandwidth_used())
>> + if (cfs_rq->nr_running == 1) {
>> list_add_leaf_cfs_rq(cfs_rq);
>> -
>> - if (cfs_rq->nr_running == 1)
>> check_enqueue_throttle(cfs_rq);
>> + }
>> }
>>
>> static void __clear_buddies_last(struct sched_entity *se)
>> @@ -4980,11 +4974,18 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
>> /* update hierarchical throttle state */
>> walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
>>
>> - /* Nothing to run but something to decay (on_list)? Complete the branch */
>> if (!cfs_rq->load.weight) {
>> - if (cfs_rq->on_list)
>> - goto unthrottle_throttle;
>> - return;
>> + if (!cfs_rq->on_list)
>> + return;
>> + /*
>> + * Nothing to run but something to decay (on_list)?
>> + * Complete the branch.
>> + */
>> + for_each_sched_entity(se) {
>> + if (list_add_leaf_cfs_rq(cfs_rq_of(se)))
>> + break;
>> + }
>> + goto unthrottle_throttle;
>> }
>>
>> task_delta = cfs_rq->h_nr_running;
>> @@ -5022,31 +5023,12 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
>> /* end evaluation on encountering a throttled cfs_rq */
>> if (cfs_rq_throttled(qcfs_rq))
>> goto unthrottle_throttle;
>> -
>> - /*
>> - * One parent has been throttled and cfs_rq removed from the
>> - * list. Add it back to not break the leaf list.
>> - */
>> - if (throttled_hierarchy(qcfs_rq))
>> - list_add_leaf_cfs_rq(qcfs_rq);
>> }
>>
>> /* At this point se is NULL and we are at root level*/
>> add_nr_running(rq, task_delta);
>>
>> unthrottle_throttle:
>> - /*
>> - * The cfs_rq_throttled() breaks in the above iteration can result in
>> - * incomplete leaf list maintenance, resulting in triggering the
>> - * assertion below.
>> - */
>> - for_each_sched_entity(se) {
>> - struct cfs_rq *qcfs_rq = cfs_rq_of(se);
>> -
>> - if (list_add_leaf_cfs_rq(qcfs_rq))
>> - break;
>> - }
>> -
>> assert_list_leaf_cfs_rq(rq);
>>
>> /* Determine whether we need to wake up potentially idle CPU: */
>> @@ -5701,13 +5683,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>> /* end evaluation on encountering a throttled cfs_rq */
>> if (cfs_rq_throttled(cfs_rq))
>> goto enqueue_throttle;
>> -
>> - /*
>> - * One parent has been throttled and cfs_rq removed from the
>> - * list. Add it back to not break the leaf list.
>> - */
>> - if (throttled_hierarchy(cfs_rq))
>> - list_add_leaf_cfs_rq(cfs_rq);
>> }
>>
>> /* At this point se is NULL and we are at root level*/
>> @@ -5731,21 +5706,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>> update_overutilized_status(rq);
>>
>> enqueue_throttle:
>> - if (cfs_bandwidth_used()) {
>> - /*
>> - * When bandwidth control is enabled; the cfs_rq_throttled()
>> - * breaks in the above iteration can result in incomplete
>> - * leaf list maintenance, resulting in triggering the assertion
>> - * below.
>> - */
>> - for_each_sched_entity(se) {
>> - cfs_rq = cfs_rq_of(se);
>> -
>> - if (list_add_leaf_cfs_rq(cfs_rq))
>> - break;
>> - }
>> - }
>> -
>> assert_list_leaf_cfs_rq(rq);
>>
>> hrtick_update(rq);
>> --
>> 2.35.2
>>