[tip:sched/core] sched/debug: Use task_pid_nr_ns in /proc/$pid/sched

From: tip-bot for Aleksa Sarai
Date: Thu Aug 10 2017 - 08:14:35 EST


Commit-ID: 74dc3384fc7983b78cc46ebb1824968a3db85eb1
Gitweb: http://git.kernel.org/tip/74dc3384fc7983b78cc46ebb1824968a3db85eb1
Author: Aleksa Sarai <asarai@xxxxxxxx>
AuthorDate: Sun, 6 Aug 2017 14:41:41 +1000
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Thu, 10 Aug 2017 12:18:19 +0200

sched/debug: Use task_pid_nr_ns in /proc/$pid/sched

It appears as though the addition of the PID namespace did not update
the output code for /proc/*/sched, which resulted in it providing PIDs
that were not self-consistent with the /proc mount. This additionally
made it trivial to detect whether a process was inside &init_pid_ns from
userspace, making container detection trivial:

https://github.com/jessfraz/amicontained

This leads to situations such as:

% unshare -pmf
% mount -t proc proc /proc
% head -n1 /proc/1/sched
head (10047, #threads: 1)

Fix this by just using task_pid_nr_ns for the output of /proc/*/sched.
All of the other uses of task_pid_nr in kernel/sched/debug.c are from a
sysctl context and thus don't need to be namespaced.

Signed-off-by: Aleksa Sarai <asarai@xxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Acked-by: Eric W. Biederman <ebiederm@xxxxxxxxxxxx>
Cc: Jess Frazelle <acidburn@xxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: cyphar@xxxxxxxxxx
Link: http://lkml.kernel.org/r/20170806044141.5093-1-asarai@xxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
fs/proc/base.c | 3 ++-
include/linux/sched/debug.h | 4 +++-
kernel/sched/debug.c | 5 +++--
3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 719c2e9..98fd8f6 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1408,12 +1408,13 @@ static const struct file_operations proc_fail_nth_operations = {
static int sched_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
+ struct pid_namespace *ns = inode->i_sb->s_fs_info;
struct task_struct *p;

p = get_proc_task(inode);
if (!p)
return -ESRCH;
- proc_sched_show_task(p, m);
+ proc_sched_show_task(p, ns, m);

put_task_struct(p);

diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h
index e0eaee5..5d58d49 100644
--- a/include/linux/sched/debug.h
+++ b/include/linux/sched/debug.h
@@ -6,6 +6,7 @@
*/

struct task_struct;
+struct pid_namespace;

extern void dump_cpu_task(int cpu);

@@ -34,7 +35,8 @@ extern void sched_show_task(struct task_struct *p);

#ifdef CONFIG_SCHED_DEBUG
struct seq_file;
-extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
+extern void proc_sched_show_task(struct task_struct *p,
+ struct pid_namespace *ns, struct seq_file *m);
extern void proc_sched_set_task(struct task_struct *p);
#endif

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 4fa66de..ac34511 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -872,11 +872,12 @@ static void sched_show_numa(struct task_struct *p, struct seq_file *m)
#endif
}

-void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
+void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
+ struct seq_file *m)
{
unsigned long nr_switches;

- SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr(p),
+ SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
get_nr_threads(p));
SEQ_printf(m,
"---------------------------------------------------------"