[PATCH -mm]pidns-guarantee-that-the-pidns-init-will-be-the-last-pidns-process-reaped-v2-fix-fix

From: Oleg Nesterov
Date: Fri May 25 2012 - 11:16:52 EST


So. Eric, Andrew, will you agree with this cleanup on top of
pidns-guarantee-that-the-pidns-init-will-be-the-last-pidns-process-reaped-v2-fix.patch
?

1. Update the comments in zap_pid_ns_processes() and __unhash_process()

2. Move the wake-up-reaper code in __unhash_process() under IS_ENABLED()

3. Re-structure the wait-for-empty-children code in zap_pid_ns_processes()

Signed-off-by: Oleg Nesterov <oleg@xxxxxxxxxx>
---
kernel/exit.c | 17 +++++++++--------
kernel/pid_namespace.c | 21 ++++++++++++++-------
2 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/kernel/exit.c b/kernel/exit.c
index 231decb..b3e6e0e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -65,8 +65,6 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
{
nr_threads--;
if (group_dead) {
- struct task_struct *parent;
-
detach_pid(p, PIDTYPE_PGID);
detach_pid(p, PIDTYPE_SID);

@@ -76,13 +74,16 @@ static void __unhash_process(struct task_struct *p, bool group_dead)

/*
* If we are the last child process in a pid namespace to be
- * reaped, notify the child_reaper.
+ * reaped, notify the child_reaper, see zap_pid_ns_processes().
*/
- parent = p->real_parent;
- if ((task_active_pid_ns(p)->child_reaper == parent) &&
- list_empty(&parent->children) &&
- (parent->flags & PF_EXITING))
- wake_up_process(parent);
+ if (IS_ENABLED(CONFIG_PID_NS)) {
+ struct task_struct *parent = p->real_parent;
+
+ if ((task_active_pid_ns(p)->child_reaper == parent) &&
+ list_empty(&parent->children) &&
+ (parent->flags & PF_EXITING))
+ wake_up_process(parent);
+ }
}
detach_pid(p, PIDTYPE_PID);
list_del_rcu(&p->thread_group);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 723c948..c2b0df3 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -184,17 +184,24 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
rc = sys_wait4(-1, NULL, __WALL, NULL);
} while (rc != -ECHILD);

- read_lock(&tasklist_lock);
+ /*
+ * sys_wait4() above can't reap the TASK_DEAD children we may
+ * have. Make sure they all go away, see __unhash_process().
+ */
for (;;) {
- __set_current_state(TASK_UNINTERRUPTIBLE);
- if (list_empty(&current->children))
- break;
+ bool need_wait = false;
+
+ read_lock(&tasklist_lock);
+ if (!list_empty(&current->children)) {
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ need_wait = true;
+ }
read_unlock(&tasklist_lock);
+
+ if (!need_wait)
+ break;
schedule();
- read_lock(&tasklist_lock);
}
- read_unlock(&tasklist_lock);
- set_current_state(TASK_RUNNING);

if (pid_ns->reboot)
current->signal->group_exit_code = pid_ns->reboot;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/