diff --git a/include/linux/sched.h b/include/linux/sched.h index 0dd42a0..5b2ea5a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1043,6 +1043,7 @@ struct sched_domain; #else #define ENQUEUE_WAKING 0 #endif +#define ENQUEUE_BOOST 8 #define DEQUEUE_SLEEP 1 @@ -1089,6 +1090,8 @@ struct sched_class { #ifdef CONFIG_FAIR_GROUP_SCHED void (*task_move_group) (struct task_struct *p, int on_rq); #endif + + void (*task_scheduled) (struct rq *rq, struct task_struct *p); }; struct load_weight { @@ -1137,6 +1140,11 @@ struct sched_entity { struct list_head group_node; unsigned int on_rq; +#ifdef CONFIG_CFS_BANDWIDTH + unsigned int boosted; + struct list_head boost_node; +#endif + u64 exec_start; u64 sum_exec_runtime; u64 vruntime; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2d8927f..5fe4fd5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1783,6 +1783,12 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) } } +static inline void task_scheduled(struct rq *rq, struct task_struct *p) +{ + if (p->sched_class->task_scheduled) + p->sched_class->task_scheduled(rq, p); +} + #ifdef CONFIG_SMP /* assumes rq->lock is held */ @@ -2888,6 +2894,8 @@ need_resched: sched_preempt_enable_no_resched(); if (need_resched()) goto need_resched; + + task_scheduled(rq, current); } static inline void sched_submit_work(struct task_struct *tsk) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6b800a1..a723a7e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -772,6 +772,110 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) se->exec_start = rq_of(cfs_rq)->clock_task; } +#ifdef CONFIG_CFS_BANDWIDTH +static inline int cfs_rq_has_boosted_entities(struct cfs_rq *cfs_rq) +{ + return !list_empty(&cfs_rq->boosted_entities); +} + +static inline int entity_boosted(struct sched_entity *se) +{ + return se->boosted; +} + +static inline void update_entity_boost(struct sched_entity *se) +{ + if (!entity_is_task(se)) + se->boosted = cfs_rq_has_boosted_entities(group_cfs_rq(se)); +} + +static int check_enqueue_boost(struct rq *rq, struct task_struct *p, int flags) +{ + if (sched_feat(BOOST_WAKEUPS) && (flags & ENQUEUE_WAKEUP)) + p->se.boosted = 1; + return p->se.boosted; +} + +static inline void __enqueue_boosted_entity(struct cfs_rq *cfs_rq, + struct sched_entity *se) +{ + list_add(&se->boost_node, &cfs_rq->boosted_entities); +} + +static inline void __dequeue_boosted_entity(struct cfs_rq *cfs_rq, + struct sched_entity *se) +{ + list_del(&se->boost_node); +} + +static int enqueue_boosted_entity(struct cfs_rq *cfs_rq, + struct sched_entity *se) +{ + if (se == cfs_rq->curr) + return 0; + + if (entity_is_task(se) || !entity_boosted(se)) { + __enqueue_boosted_entity(cfs_rq, se); + se->boosted = 1; + return 1; + } + + return 0; +} + +static int dequeue_boosted_entity(struct cfs_rq *cfs_rq, + struct sched_entity *se) +{ + if (se == cfs_rq->curr) + return 0; + + if (entity_is_task(se) || + !cfs_rq_has_boosted_entities(group_cfs_rq(se))) { + __dequeue_boosted_entity(cfs_rq, se); + if (!entity_is_task(se)) + se->boosted = 0; + return 1; + } + + return 0; +} +#else +static inline int cfs_rq_has_boosted_entities(struct cfs_rq *cfs_rq) +{ + return 0; +} + +static inline int entity_boosted(struct sched_entity *se) +{ + return 0; +} + +static inline void update_entity_boost(struct sched_entity *se) {} + +static inline int check_enqueue_boost(struct rq *rq, + struct task_struct *p, int flags) +{ + return 0; +} + +static inline void __enqueue_boosted_entity(struct cfs_rq *cfs_rq, + struct sched_entity *se) {} +static inline void __dequeue_boosted_entity(struct cfs_rq *cfs_rq, + struct sched_entity *se) {} + +static inline int enqueue_boosted_entity(struct cfs_rq *cfs_rq, + struct sched_entity *se) +{ + return 0; +} + +static inline int dequeue_boosted_entity(struct cfs_rq *cfs_rq, + struct sched_entity *se) +{ + return 0; +} +#endif + /************************************************** * Scheduling class queueing methods: */ @@ -1113,7 +1217,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) if (cfs_rq->nr_running == 1) { list_add_leaf_cfs_rq(cfs_rq); - check_enqueue_throttle(cfs_rq); + if (!(flags & ENQUEUE_BOOST)) + check_enqueue_throttle(cfs_rq); } } @@ -1261,6 +1366,8 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) */ update_stats_wait_end(cfs_rq, se); __dequeue_entity(cfs_rq, se); + if (entity_boosted(se)) + __dequeue_boosted_entity(cfs_rq, se); } update_stats_curr_start(cfs_rq, se); @@ -1289,7 +1396,7 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); * 3) pick the "last" process, for cache locality * 4) do not run the "skip" process, if something else is available */ -static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) +static struct sched_entity *do_pick_next_entity(struct cfs_rq *cfs_rq) { struct sched_entity *se = __pick_first_entity(cfs_rq); struct sched_entity *left = se; @@ -1321,6 +1428,42 @@ static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) return se; } +#ifdef CONFIG_CFS_BANDWIDTH +static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) +{ + struct rq *rq = rq_of(cfs_rq); + struct sched_entity *se = NULL; + + if (cfs_rq->runtime_enabled && cfs_rq->runtime_remaining <= 0) { + rq->cfs_quota_exceeded = 1; + if (cfs_rq_has_boosted_entities(cfs_rq)) { + se = list_first_entry(&cfs_rq->boosted_entities, + struct sched_entity, boost_node); + clear_buddies(cfs_rq, se); + } + } + + if (!se) + se = do_pick_next_entity(cfs_rq); + + if (entity_is_task(se) && + !entity_boosted(se) && rq->cfs_quota_exceeded) + resched_task(task_of(se)); + + return se; +} + +static void task_scheduled_fair(struct rq *rq, struct task_struct *p) +{ + if (rq->cfs_quota_exceeded) + set_tsk_need_resched(p); + rq->cfs_quota_exceeded = 0; + p->se.boosted = 0; +} +#else +# define pick_next_entity do_pick_next_entity +#endif + static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq); static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) @@ -1332,6 +1475,10 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) if (prev->on_rq) update_curr(cfs_rq); + update_entity_boost(prev); + if (entity_boosted(prev) && prev->on_rq) + __enqueue_boosted_entity(cfs_rq, prev); + /* throttle cfs_rqs exceeding runtime */ check_cfs_rq_runtime(cfs_rq); @@ -1965,6 +2112,9 @@ static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) if (cfs_rq_throttled(cfs_rq)) return; + if (cfs_rq_has_boosted_entities(cfs_rq)) + return; + throttle_cfs_rq(cfs_rq); } @@ -2020,6 +2170,7 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) { cfs_rq->runtime_enabled = 0; INIT_LIST_HEAD(&cfs_rq->throttled_list); + INIT_LIST_HEAD(&cfs_rq->boosted_entities); } /* requires cfs_b->lock, may release to reprogram timer */ @@ -2180,11 +2331,14 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) { struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; + int boost = check_enqueue_boost(rq, p, flags); for_each_sched_entity(se) { if (se->on_rq) break; cfs_rq = cfs_rq_of(se); + if (boost) + flags |= ENQUEUE_BOOST; enqueue_entity(cfs_rq, se, flags); /* @@ -2197,6 +2351,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) break; cfs_rq->h_nr_running++; + if (boost) + boost = enqueue_boosted_entity(cfs_rq, se); + flags = ENQUEUE_WAKEUP; } @@ -2207,12 +2364,23 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (cfs_rq_throttled(cfs_rq)) break; + if (boost) + boost = enqueue_boosted_entity(cfs_rq, se); + update_cfs_load(cfs_rq, 0); update_cfs_shares(cfs_rq); } if (!se) inc_nr_running(rq); + else if (boost) + for_each_sched_entity(se) { + cfs_rq = cfs_rq_of(se); + if (!enqueue_boosted_entity(cfs_rq, se)) + break; + if (cfs_rq_throttled(cfs_rq)) + unthrottle_cfs_rq(cfs_rq); + } hrtick_update(rq); } @@ -2227,6 +2395,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) { struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; + int boosted = entity_boosted(se); int task_sleep = flags & DEQUEUE_SLEEP; for_each_sched_entity(se) { @@ -2243,6 +2412,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) break; cfs_rq->h_nr_running--; + if (boosted) + boosted = dequeue_boosted_entity(cfs_rq, se); + /* Don't dequeue parent if it has other entities besides us */ if (cfs_rq->load.weight) { /* @@ -2266,6 +2438,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (cfs_rq_throttled(cfs_rq)) break; + if (boosted) + boosted = dequeue_boosted_entity(cfs_rq, se); + update_cfs_load(cfs_rq, 0); update_cfs_shares(cfs_rq); } @@ -5339,6 +5514,9 @@ const struct sched_class fair_sched_class = { #ifdef CONFIG_FAIR_GROUP_SCHED .task_move_group = task_move_group_fair, #endif +#ifdef CONFIG_CFS_BANDWIDTH + .task_scheduled = task_scheduled_fair, +#endif }; #ifdef CONFIG_SCHED_DEBUG diff --git a/kernel/sched/features.h b/kernel/sched/features.h index eebefca..fbc59cd 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -61,3 +61,5 @@ SCHED_FEAT(TTWU_QUEUE, true) SCHED_FEAT(FORCE_SD_OVERLAP, false) SCHED_FEAT(RT_RUNTIME_SHARE, true) SCHED_FEAT(LB_MIN, false) + +SCHED_FEAT(BOOST_WAKEUPS, true) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 7a7db09..681be1d 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -267,6 +267,8 @@ struct cfs_rq { u64 throttled_timestamp; int throttled, throttle_count; struct list_head throttled_list; + + struct list_head boosted_entities; #endif /* CONFIG_CFS_BANDWIDTH */ #endif /* CONFIG_FAIR_GROUP_SCHED */ }; @@ -446,6 +448,10 @@ struct rq { struct hrtimer hrtick_timer; #endif +#ifdef CONFIG_CFS_BANDWIDTH + int cfs_quota_exceeded; +#endif + #ifdef CONFIG_SCHEDSTATS /* latency stats */ struct sched_info rq_sched_info;