[PATCH] tick/sched: fix data races at tick_do_timer_cpu

From: Qian Cai
Date: Mon Feb 24 2020 - 22:08:29 EST


tick_do_timer_cpu could be accessed concurrently where both plain writes
and plain reads are not protected by a lock. Thus, it could result in
data races. Fix them by adding pairs of READ|WRITE_ONCE(). The data
races were reported by KCSAN,

write to 0xffffffffb2dc94ac of 4 bytes by interrupt on cpu 98:
tick_sched_do_timer+0x77/0x90
tick_sched_do_timer at kernel/time/tick-sched.c:136
tick_sched_timer+0x35/0xc0
__hrtimer_run_queues+0x217/0x7c0
hrtimer_interrupt+0x1d4/0x3e0
smp_apic_timer_interrupt+0x107/0x460
apic_timer_interrupt+0xf/0x20
cpuidle_enter_state+0x15e/0x980
cpuidle_enter+0x69/0xc0
call_cpuidle+0x23/0x40
do_idle+0x248/0x280
cpu_startup_entry+0x1d/0x1f
start_secondary+0x1b2/0x230
secondary_startup_64+0xb6/0xc0

read to 0xffffffffb2dc94ac of 4 bytes by interrupt on cpu 67:
tick_sched_do_timer+0x31/0x90
tick_sched_do_timer at kernel/time/tick-sched.c:132
tick_sched_timer+0x35/0xc0
__hrtimer_run_queues+0x217/0x7c0
hrtimer_interrupt+0x1d4/0x3e0
smp_apic_timer_interrupt+0x107/0x460
apic_timer_interrupt+0xf/0x20
cpuidle_enter_state+0x15e/0x980
cpuidle_enter+0x69/0xc0
call_cpuidle+0x23/0x40
do_idle+0x248/0x280
cpu_startup_entry+0x1d/0x1f
start_secondary+0x1b2/0x230
secondary_startup_64+0xb6/0xc0

read to 0xffffffffb2dc94ac of 4 bytes by task 0 on cpu 107:
tick_nohz_idle_stop_tick+0x149/0x5f0
tick_nohz_stop_tick at kernel/time/tick-sched.c:774
(inlined by) __tick_nohz_idle_stop_tick at kernel/time/tick-sched.c:967
(inlined by) tick_nohz_idle_stop_tick at kernel/time/tick-sched.c:988
do_idle+0x235/0x280
cpu_startup_entry+0x1d/0x1f
start_secondary+0x1b2/0x230
secondary_startup_64+0xb6/0xc0

read to 0xffffffffb2dc94ac of 4 bytes by task 0 on cpu 21:
tick_nohz_next_event+0x19b/0x2c0
tick_nohz_next_event at kernel/time/tick-sched.c:740
tick_nohz_get_sleep_length+0xae/0xe0
menu_select+0x8b/0xc29
cpuidle_select+0x50/0x70
do_idle+0x214/0x280
cpu_startup_entry+0x1d/0x1f
start_secondary+0x1b2/0x230
secondary_startup_64+0xb6/0xc0

Signed-off-by: Qian Cai <cai@xxxxxx>
---
kernel/time/tick-sched.c | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index a792d21cac64..54bcf4eff238 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -129,16 +129,16 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
* If nohz_full is enabled, this should not happen because the
* tick_do_timer_cpu never relinquishes.
*/
- if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
+ if (unlikely(READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_NONE)) {
#ifdef CONFIG_NO_HZ_FULL
WARN_ON(tick_nohz_full_running);
#endif
- tick_do_timer_cpu = cpu;
+ WRITE_ONCE(tick_do_timer_cpu, cpu);
}
#endif

/* Check, if the jiffies need an update */
- if (tick_do_timer_cpu == cpu)
+ if (READ_ONCE(tick_do_timer_cpu) == cpu)
tick_do_update_jiffies64(now);

if (ts->inidle)
@@ -737,8 +737,9 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
* Otherwise we can sleep as long as we want.
*/
delta = timekeeping_max_deferment();
- if (cpu != tick_do_timer_cpu &&
- (tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last))
+ if (cpu != READ_ONCE(tick_do_timer_cpu) &&
+ (READ_ONCE(tick_do_timer_cpu) != TICK_DO_TIMER_NONE ||
+ !ts->do_timer_last))
delta = KTIME_MAX;

/* Calculate the next expiry time */
@@ -771,10 +772,10 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
* do_timer() never invoked. Keep track of the fact that it
* was the one which had the do_timer() duty last.
*/
- if (cpu == tick_do_timer_cpu) {
- tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+ if (cpu == READ_ONCE(tick_do_timer_cpu)) {
+ WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE);
ts->do_timer_last = 1;
- } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
+ } else if (READ_ONCE(tick_do_timer_cpu) != TICK_DO_TIMER_NONE) {
ts->do_timer_last = 0;
}

--
2.21.0 (Apple Git-122.2)