[PATCH RFC] mailbox: fix hrtimer_forward() warning

From: Alexey Klimov
Date: Thu May 25 2017 - 13:30:13 EST


When mailbox controller provides two or more channels and
they are actively used by mailbox client(s) it's very easy
to trigger the warning in hrtimer_forward():

[ 247.853060] WARNING: CPU: 6 PID: 0 at kernel/time/hrtimer.c:805 hrtimer_forward+0x88/0xd8
[ 247.853549] Modules linked in:
[ 247.853907] CPU: 6 PID: 0 Comm: swapper/6 Tainted: G W 4.11.0-rc2-00362-g93afaa4513bb-dirty #13
[ 247.854472] Hardware name: linux,dummy-virt (DT)
[ 247.854699] task: ffff80001d89d780 task.stack: ffff80001d8c4000
[ 247.854999] PC is at hrtimer_forward+0x88/0xd8
[ 247.855280] LR is at txdone_hrtimer+0xd4/0xf8
[ 247.855551] pc : [<ffff0000081039f0>] lr : [<ffff00000881b874>] pstate: 200001c5
[ 247.855857] sp : ffff80001efbdeb0
[ 247.856072] x29: ffff80001efbdeb0 x28: ffff80001efc3140
[ 247.856358] x27: ffff00000881b7a0 x26: 00000039ac93e8b6
[ 247.856604] x25: ffff000008e756be x24: ffff80001c4a1348
[ 247.856882] x23: 0000000000000001 x22: 00000000000000f8
[ 247.857189] x21: ffff80001c4a1318 x20: ffff80001d327110
[ 247.857509] x19: 00000000000f4240 x18: 0000000000000030
[ 247.857808] x17: 0000ffffaecdf370 x16: ffff0000081ccc80
[ 247.858000] x15: 0000000000000010 x14: 00000000fffffff0
[ 247.858186] x13: ffff000008f488e0 x12: 000000000002e3eb
[ 247.858381] x11: ffff000008979690 x10: 0000000000000000
[ 247.858573] x9 : 0000000000000001 x8 : ffff80001efc66e0
[ 247.858758] x7 : ffff80001efc6708 x6 : 00000005be7732f2
[ 247.858943] x5 : 0000000000000001 x4 : ffff80001c4a1348
[ 247.859130] x3 : 00000039ac94952a x2 : 00000000000f4240
[ 247.859315] x1 : 00000039ac98243c x0 : 0000000000038f12
[ 247.859582] ---[ end trace d61812426ec3c30b ]---

To fix it this patch introduces one more mbox spinlock for hr-timer and
hrt_active variable in controller structure.
The hr-timer callback restarts timer unless there no active requests
on some channel.

Signed-off-by: Alexey Klimov <alexey.klimov@xxxxxxx>
---
drivers/mailbox/mailbox.c | 66 ++++++++++++++++++++++++++++++++------
include/linux/mailbox_controller.h | 2 ++
2 files changed, 59 insertions(+), 9 deletions(-)

diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c
index 9dfbf7e..0334676 100644
--- a/drivers/mailbox/mailbox.c
+++ b/drivers/mailbox/mailbox.c
@@ -85,9 +85,17 @@ static void msg_submit(struct mbox_chan *chan)
exit:
spin_unlock_irqrestore(&chan->lock, flags);

- if (!err && (chan->txdone_method & TXDONE_BY_POLL))
- /* kick start the timer immediately to avoid delays */
- hrtimer_start(&chan->mbox->poll_hrt, 0, HRTIMER_MODE_REL);
+ if (!err && (chan->txdone_method & TXDONE_BY_POLL)) {
+
+ spin_lock_irqsave(&chan->mbox->lock_hrt, flags);
+ /* try to start the timer immediately to avoid delays */
+ if (!chan->mbox->hrt_active) {
+ chan->mbox->hrt_active = 1;
+ hrtimer_start(&chan->mbox->poll_hrt, 0,
+ HRTIMER_MODE_REL);
+ }
+ spin_unlock_irqrestore(&chan->mbox->lock_hrt, flags);
+ }
}

static void tx_tick(struct mbox_chan *chan, int r)
@@ -118,26 +126,65 @@ static enum hrtimer_restart txdone_hrtimer(struct hrtimer *hrtimer)
{
struct mbox_controller *mbox =
container_of(hrtimer, struct mbox_controller, poll_hrt);
+ unsigned long flags;
bool txdone, resched = false;
int i;

+ spin_lock(&mbox->lock_hrt);
+
for (i = 0; i < mbox->num_chans; i++) {
struct mbox_chan *chan = &mbox->chans[i];

+ /*
+ * While iterating over channels we take the channel locks
+ * to be sure that reading of active_req is not racy and
+ * there are no randomly queued requests.
+ */
+ spin_lock_irqsave(&chan->lock, flags);
+
if (chan->active_req && chan->cl) {
txdone = chan->mbox->ops->last_tx_done(chan);
- if (txdone)
+
+ if (txdone) {
+
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ /* We have to unlock HR timer lock to avoid
+ * deadlock: tx_tick() acquires HR-timer
+ * spinlock to run timer. */
+ spin_unlock(&mbox->lock_hrt);
+
tx_tick(chan, 0);
- else
+
+ spin_lock(&mbox->lock_hrt);
+
+ /*
+ * It's possible that while we unlocked
+ * HR-timer lock here then someone can queue
+ * another request on another channel or
+ * tx_tick() above can queue a request. Such
+ * sequence will observe that timer is active
+ * and won't start it and leave. Therefore, we
+ * have to resched timer here in all cases
+ * manually.
+ */
resched = true;
+ continue;
+ }
+
+ resched = true;
}
+
+ spin_unlock_irqrestore(&chan->lock, flags);
}

- if (resched) {
+ if (resched)
hrtimer_forward_now(hrtimer, ms_to_ktime(mbox->txpoll_period));
- return HRTIMER_RESTART;
- }
- return HRTIMER_NORESTART;
+ else
+ mbox->hrt_active = 0;
+ spin_unlock(&mbox->lock_hrt);
+
+ return resched ? HRTIMER_RESTART : HRTIMER_NORESTART;
}

/**
@@ -462,6 +509,7 @@ int mbox_controller_register(struct mbox_controller *mbox)
return -EINVAL;
}

+ spin_lock_init(&mbox->lock_hrt);
hrtimer_init(&mbox->poll_hrt, CLOCK_MONOTONIC,
HRTIMER_MODE_REL);
mbox->poll_hrt.function = txdone_hrtimer;
diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h
index 74deadb..1dd4c47 100644
--- a/include/linux/mailbox_controller.h
+++ b/include/linux/mailbox_controller.h
@@ -83,6 +83,8 @@ struct mbox_controller {
const struct of_phandle_args *sp);
/* Internal to API */
struct hrtimer poll_hrt;
+ spinlock_t lock_hrt;
+ unsigned int hrt_active;
struct list_head node;
};

--
1.9.1