[PATCH] timer: use raw_spin_unlock_irqrestore and raw_spin_lock_irqsave instead of raw_spin_{lock|unlock}

From: Wang Long
Date: Wed Aug 19 2020 - 22:59:53 EST


The current code in function __mod_timer(https://github.com/torvalds/linux/blob/master/kernel/time/timer.c#L961):

994 base = lock_timer_base(timer, &flags); ----------------------(1)
forward_timer_base(base);

if (timer_pending(timer) && (options & MOD_TIMER_REDUCE) &&
time_before_eq(timer->expires, expires)) {
ret = 1;
goto out_unlock;
}

clk = base->clk;
idx = calc_wheel_index(expires, clk, &bucket_expiry);

/*
* Retrieve and compare the array index of the pending
* timer. If it matches set the expiry to the new value so a
* subsequent call will exit in the expires check above.
*/
if (idx == timer_get_idx(timer)) {
if (!(options & MOD_TIMER_REDUCE))
timer->expires = expires;
else if (time_after(timer->expires, expires))
timer->expires = expires;
ret = 1;
goto out_unlock;
}
} else {
1021 base = lock_timer_base(timer, &flags); ------------------------(2)
forward_timer_base(base);
}

ret = detach_if_pending(timer, base, false);
if (!ret && (options & MOD_TIMER_PENDING_ONLY))
goto out_unlock;

new_base = get_target_base(base, timer->flags);

if (base != new_base) {
/*
* We are trying to schedule the timer on the new base.
* However we can't change timer's base while it is running,
* otherwise del_timer_sync() can't detect that the timer's
* handler yet has not finished. This also guarantees that the
* timer is serialized wrt itself.
*/
if (likely(base->running_timer != timer)) {
/* See the comment in lock_timer_base() */
timer->flags |= TIMER_MIGRATING;

1042 raw_spin_unlock(&base->lock); -----------------------(3)
base = new_base;
raw_spin_lock(&base->lock); -------------------------(4)
WRITE_ONCE(timer->flags,
(timer->flags & ~TIMER_BASEMASK) | base->cpu);
forward_timer_base(base);
}
}

debug_timer_activate(timer);

timer->expires = expires;
/*
* If 'idx' was calculated above and the base time did not advance
* between calculating 'idx' and possibly switching the base, only
* enqueue_timer() is required. Otherwise we need to (re)calculate
* the wheel index via internal_add_timer().
*/
if (idx != UINT_MAX && clk == base->clk)
enqueue_timer(base, timer, idx, bucket_expiry);
else
internal_add_timer(base, timer);

out_unlock:
1066 raw_spin_unlock_irqrestore(&base->lock, flags); ---------------------(5)

return ret;
}

The code in (1)(2) lock the base with raw_spin_lock_irqsave(&base->lock, flag),
if base != new_base, the code in (3) unlock the old base, the code in (4) lock the
new base. at the end of the function(5), use raw_spin_unlock_irqrestore(&base->lock, flags);
to unlock the new_base.

Consider the following situation:

CPU0 CPU1
base = lock_timer_base(timer, &flags); (1)(2)
raw_spin_unlock(&base->lock); (3)
base = new_base;
raw_spin_lock(&base->lock); (4)
raw_spin_unlock_irqrestore(&base->lock, flags); (5)

The flags save from CPU0, and restore to CPU1. Is this wrong?

we encountered a kernel panic, and we suspect that it is the problem. How about the following patch to fix.

Signed-off-by: Wang Long <w@xxxxxxxxxxxxx>
---
kernel/time/timer.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index a16764b..4153766 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1039,9 +1039,9 @@ static struct timer_base *lock_timer_base(struct timer_list *timer,
/* See the comment in lock_timer_base() */
timer->flags |= TIMER_MIGRATING;

- raw_spin_unlock(&base->lock);
+ raw_spin_unlock_irqrestore(&base->lock, flags);
base = new_base;
- raw_spin_lock(&base->lock);
+ raw_spin_lock_irqsave(&base->lock, flags);
WRITE_ONCE(timer->flags,
(timer->flags & ~TIMER_BASEMASK) | base->cpu);
forward_timer_base(base);
--
1.8.3.1