[PATCH] fork: report pid reservation failure properly

From: Michal Hocko
Date: Fri Jan 30 2015 - 08:50:15 EST


copy_process will report any failure in alloc_pid as ENOMEM currently
which is misleading because the pid allocation might fail not only when
the memory is short but also when the pid space is consumed already.

The current man page even mentions this case:
"
EAGAIN

A system-imposed limit on the number of threads was encountered.
There are a number of limits that may trigger this error: the
RLIMIT_NPROC soft resource limit (set via setrlimit(2)), which
limits the number of processes and threads for a real user ID, was
reached; the kernel's system-wide limit on the number of processes
and threads, /proc/sys/kernel/threads-max, was reached (see
proc(5)); or the maximum number of PIDs, /proc/sys/kernel/pid_max,
was reached (see proc(5)).
"

so the current behavior is also incorrect wrt. documentation. This patch
simply propagates error code from alloc_pid and makes sure we return
-EAGAIN due to reservation failure.

There is one side effect of the change which might be unexpected.
alloc_pid might fail even when the repear in the pid namespace is dead
(the namespace basically disallows all new processes) and there is no
good error code which would match documented ones. I've taken EBUSY
because it felt like a best fit for the condition - namespace is busy
tearing down all the infrastructure.

Signed-off-by: Michal Hocko <mhocko@xxxxxxx>
---
kernel/fork.c | 5 +++--
kernel/pid.c | 19 +++++++++++--------
2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 3ce8d57cff09..c37b88a162c5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1405,10 +1405,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
goto bad_fork_cleanup_io;

if (pid != &init_struct_pid) {
- retval = -ENOMEM;
pid = alloc_pid(p->nsproxy->pid_ns_for_children);
- if (!pid)
+ if (IS_ERR(pid)) {
+ retval = PTR_ERR(pid);
goto bad_fork_cleanup_io;
+ }
}

p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
diff --git a/kernel/pid.c b/kernel/pid.c
index 82430c858d69..c1a5875bd1ab 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -207,7 +207,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
}
pid = mk_pid(pid_ns, map, offset);
}
- return -1;
+ return -EAGAIN;
}

int next_pidmap(struct pid_namespace *pid_ns, unsigned int last)
@@ -298,17 +298,20 @@ struct pid *alloc_pid(struct pid_namespace *ns)
int i, nr;
struct pid_namespace *tmp;
struct upid *upid;
+ int retval;

pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
if (!pid)
- goto out;
+ return ERR_PTR(-ENOMEM);

tmp = ns;
pid->level = ns->level;
for (i = ns->level; i >= 0; i--) {
nr = alloc_pidmap(tmp);
- if (nr < 0)
+ if (IS_ERR_VALUE(nr)) {
+ retval = nr;
goto out_free;
+ }

pid->numbers[i].nr = nr;
pid->numbers[i].ns = tmp;
@@ -316,7 +319,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)
}

if (unlikely(is_child_reaper(pid))) {
- if (pid_ns_prepare_proc(ns))
+ if ((retval = pid_ns_prepare_proc(ns)))
goto out_free;
}

@@ -327,8 +330,10 @@ struct pid *alloc_pid(struct pid_namespace *ns)

upid = pid->numbers + ns->level;
spin_lock_irq(&pidmap_lock);
- if (!(ns->nr_hashed & PIDNS_HASH_ADDING))
+ if (!(ns->nr_hashed & PIDNS_HASH_ADDING)) {
+ retval = -EBUSY;
goto out_unlock;
+ }
for ( ; upid >= pid->numbers; --upid) {
hlist_add_head_rcu(&upid->pid_chain,
&pid_hash[pid_hashfn(upid->nr, upid->ns)]);
@@ -336,7 +341,6 @@ struct pid *alloc_pid(struct pid_namespace *ns)
}
spin_unlock_irq(&pidmap_lock);

-out:
return pid;

out_unlock:
@@ -348,8 +352,7 @@ out_free:
free_pidmap(pid->numbers + i);

kmem_cache_free(ns->pid_cachep, pid);
- pid = NULL;
- goto out;
+ return ERR_PTR(retval);
}

void disable_pid_allocation(struct pid_namespace *ns)
--
2.1.4

--
Michal Hocko
SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/