[tip: sched/core] rseq: Extend struct rseq with per-memory-map concurrency ID

From: tip-bot2 for Mathieu Desnoyers
Date: Tue Dec 27 2022 - 07:16:12 EST


The following commit has been merged into the sched/core branch of tip:

Commit-ID: f7b01bb0b57f994a44ea6368536b59062b796381
Gitweb: https://git.kernel.org/tip/f7b01bb0b57f994a44ea6368536b59062b796381
Author: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>
AuthorDate: Tue, 22 Nov 2022 15:39:10 -05:00
Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
CommitterDate: Tue, 27 Dec 2022 12:52:12 +01:00

rseq: Extend struct rseq with per-memory-map concurrency ID

If a memory map has fewer threads than there are cores on the system, or
is limited to run on few cores concurrently through sched affinity or
cgroup cpusets, the concurrency IDs will be values close to 0, thus
allowing efficient use of user-space memory for per-cpu data structures.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Link: https://lore.kernel.org/r/20221122203932.231377-9-mathieu.desnoyers@xxxxxxxxxxxx
---
include/uapi/linux/rseq.h | 9 +++++++++
kernel/rseq.c | 11 ++++++++++-
2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h
index 1cb90a4..c233aae 100644
--- a/include/uapi/linux/rseq.h
+++ b/include/uapi/linux/rseq.h
@@ -140,6 +140,15 @@ struct rseq {
__u32 node_id;

/*
+ * Restartable sequences mm_cid field. Updated by the kernel. Read by
+ * user-space with single-copy atomicity semantics. This field should
+ * only be read by the thread which registered this data structure.
+ * Aligned on 32-bit. Contains the current thread's concurrency ID
+ * (allocated uniquely within a memory map).
+ */
+ __u32 mm_cid;
+
+ /*
* Flexible array member at end of structure, after last feature field.
*/
char end[];
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 193cfcc..9de6e35 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -90,12 +90,15 @@ static int rseq_update_cpu_node_id(struct task_struct *t)
struct rseq __user *rseq = t->rseq;
u32 cpu_id = raw_smp_processor_id();
u32 node_id = cpu_to_node(cpu_id);
+ u32 mm_cid = task_mm_cid(t);

+ WARN_ON_ONCE((int) mm_cid < 0);
if (!user_write_access_begin(rseq, t->rseq_len))
goto efault;
unsafe_put_user(cpu_id, &rseq->cpu_id_start, efault_end);
unsafe_put_user(cpu_id, &rseq->cpu_id, efault_end);
unsafe_put_user(node_id, &rseq->node_id, efault_end);
+ unsafe_put_user(mm_cid, &rseq->mm_cid, efault_end);
/*
* Additional feature fields added after ORIG_RSEQ_SIZE
* need to be conditionally updated only if
@@ -113,7 +116,8 @@ efault:

static int rseq_reset_rseq_cpu_node_id(struct task_struct *t)
{
- u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED, node_id = 0;
+ u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED, node_id = 0,
+ mm_cid = 0;

/*
* Reset cpu_id_start to its initial state (0).
@@ -133,6 +137,11 @@ static int rseq_reset_rseq_cpu_node_id(struct task_struct *t)
if (put_user(node_id, &t->rseq->node_id))
return -EFAULT;
/*
+ * Reset mm_cid to its initial state (0).
+ */
+ if (put_user(mm_cid, &t->rseq->mm_cid))
+ return -EFAULT;
+ /*
* Additional feature fields added after ORIG_RSEQ_SIZE
* need to be conditionally reset only if
* t->rseq_len != ORIG_RSEQ_SIZE.