[PATCH 4/6] ipc/msg.c: Update and document memory barriers.

From: Manfred Spraul
Date: Sat Oct 12 2019 - 01:50:32 EST


Transfer findings from ipc/mqueue.c:
- A control barrier was missing for the lockless receive case
So in theory, not yet initialized data may have been copied
to user space - obviously only for architectures where
control barriers are not NOP.

- use smp_store_release(). In theory, the refount
may have been decreased to 0 already when wake_q_add()
tries to get a reference.

Signed-off-by: Manfred Spraul <manfred@xxxxxxxxxxxxxxxx>
Cc: Waiman Long <longman@xxxxxxxxxx>
Cc: Davidlohr Bueso <dave@xxxxxxxxxxxx>
---
ipc/msg.c | 44 ++++++++++++++++++++++++++++++++++++++------
1 file changed, 38 insertions(+), 6 deletions(-)

diff --git a/ipc/msg.c b/ipc/msg.c
index 8dec945fa030..e6b20a7e6341 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -184,6 +184,10 @@ static inline void ss_add(struct msg_queue *msq,
{
mss->tsk = current;
mss->msgsz = msgsz;
+ /*
+ * No memory barrier required: we did ipc_lock_object(),
+ * and the waker obtains that lock before calling wake_q_add().
+ */
__set_current_state(TASK_INTERRUPTIBLE);
list_add_tail(&mss->list, &msq->q_senders);
}
@@ -238,7 +242,14 @@ static void expunge_all(struct msg_queue *msq, int res,

list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
wake_q_add(wake_q, msr->r_tsk);
- WRITE_ONCE(msr->r_msg, ERR_PTR(res));
+
+ /*
+ * The barrier is required to ensure that the refcount increase
+ * inside wake_q_add() is completed before the state is updated.
+ *
+ * The barrier pairs with READ_ONCE()+smp_mb__after_ctrl_dep().
+ */
+ smp_store_release(&msr->r_msg, ERR_PTR(res));
}
}

@@ -798,13 +809,17 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg,
list_del(&msr->r_list);
if (msr->r_maxsize < msg->m_ts) {
wake_q_add(wake_q, msr->r_tsk);
- WRITE_ONCE(msr->r_msg, ERR_PTR(-E2BIG));
+
+ /* See expunge_all regarding memory barrier */
+ smp_store_release(&msr->r_msg, ERR_PTR(-E2BIG));
} else {
ipc_update_pid(&msq->q_lrpid, task_pid(msr->r_tsk));
msq->q_rtime = ktime_get_real_seconds();

wake_q_add(wake_q, msr->r_tsk);
- WRITE_ONCE(msr->r_msg, msg);
+
+ /* See expunge_all regarding memory barrier */
+ smp_store_release(&msr->r_msg, msg);
return 1;
}
}
@@ -1154,7 +1169,11 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
msr_d.r_maxsize = INT_MAX;
else
msr_d.r_maxsize = bufsz;
- msr_d.r_msg = ERR_PTR(-EAGAIN);
+
+ /* memory barrier not require due to ipc_lock_object() */
+ WRITE_ONCE(msr_d.r_msg, ERR_PTR(-EAGAIN));
+
+ /* memory barrier not required, we own ipc_lock_object() */
__set_current_state(TASK_INTERRUPTIBLE);

ipc_unlock_object(&msq->q_perm);
@@ -1183,8 +1202,21 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
* signal) it will either see the message and continue ...
*/
msg = READ_ONCE(msr_d.r_msg);
- if (msg != ERR_PTR(-EAGAIN))
+ if (msg != ERR_PTR(-EAGAIN)) {
+ /*
+ * Memory barrier for msr_d.r_msg
+ * The smp_acquire__after_ctrl_dep(), together with the
+ * READ_ONCE() above pairs with the barrier inside
+ * wake_q_add().
+ * The barrier protects the accesses to the message in
+ * do_msg_fill(). In addition, the barrier protects user
+ * space, too: User space may assume that all data from
+ * the CPU that sent the message is visible.
+ */
+ smp_acquire__after_ctrl_dep();
+
goto out_unlock1;
+ }

/*
* ... or see -EAGAIN, acquire the lock to check the message
@@ -1192,7 +1224,7 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
*/
ipc_lock_object(&msq->q_perm);

- msg = msr_d.r_msg;
+ msg = READ_ONCE(msr_d.r_msg);
if (msg != ERR_PTR(-EAGAIN))
goto out_unlock0;

--
2.21.0