[PATCH 2/2] coredump: exit_mm: clear ->mm first, then play with ->core_state

From: Oleg Nesterov
Date: Wed Jul 16 2008 - 09:51:31 EST


With the previous changes the sub-threads which participate in coredump do
not need to have the valid ->mm when the coredump is in progress, now we
can decouple exit_mm() from coredumping code.

Change exit_mm() to clear ->mm first, then play with mm->core_state. This
simplifies the code because we can avoid unlock/lock games with ->mmap_sem,
and more importantly this makes the coredumping process visible to oom_kill.
Currently the PF_EXITING task can sleep with ->mm != NULL "unpredictably"
long.

The patch moves the coredumping code to the new function for the readability.

Signed-off-by: Oleg Nesterov <oleg@xxxxxxxxxx>

kernel/exit.c | 47 +++++++++++++++++++++++++----------------------
fs/exec.c | 4 ++--
2 files changed, 27 insertions(+), 24 deletions(-)

--- 26-rc2/kernel/exit.c~7_CLEAR_MM_FIRST 2008-07-15 20:25:48.000000000 +0400
+++ 26-rc2/kernel/exit.c 2008-07-15 20:24:50.000000000 +0400
@@ -646,6 +646,28 @@ assign_new_owner:
}
#endif /* CONFIG_MM_OWNER */

+static void exit_coredump(struct task_struct * tsk,
+ struct core_state *core_state)
+{
+ struct core_thread self;
+
+ self.task = tsk;
+ self.next = xchg(&core_state->dumper.next, &self);
+ /*
+ * Implies mb(), the result of xchg() must be visible
+ * to core_state->dumper.
+ */
+ if (atomic_dec_and_test(&core_state->nr_threads))
+ complete(&core_state->startup);
+
+ for (;;) {
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ if (!self.task) /* see coredump_finish() */
+ break;
+ schedule();
+ }
+ __set_task_state(tsk, TASK_RUNNING);
+}
/*
* Turn us into a lazy TLB process if we
* aren't already..
@@ -667,28 +689,6 @@ static void exit_mm(struct task_struct *
*/
down_read(&mm->mmap_sem);
core_state = mm->core_state;
- if (core_state) {
- struct core_thread self;
- up_read(&mm->mmap_sem);
-
- self.task = tsk;
- self.next = xchg(&core_state->dumper.next, &self);
- /*
- * Implies mb(), the result of xchg() must be visible
- * to core_state->dumper.
- */
- if (atomic_dec_and_test(&core_state->nr_threads))
- complete(&core_state->startup);
-
- for (;;) {
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
- if (!self.task) /* see coredump_finish() */
- break;
- schedule();
- }
- __set_task_state(tsk, TASK_RUNNING);
- down_read(&mm->mmap_sem);
- }
atomic_inc(&mm->mm_count);
BUG_ON(mm != tsk->active_mm);
/* more a memory barrier than a real lock */
@@ -701,6 +701,9 @@ static void exit_mm(struct task_struct *
task_unlock(tsk);
mm_update_next_owner(mm);
mmput(mm);
+
+ if (core_state)
+ exit_coredump(tsk, core_state);
}

static void
--- 26-rc2/fs/exec.c~7_CLEAR_MM_FIRST 2008-07-15 17:54:45.000000000 +0400
+++ 26-rc2/fs/exec.c 2008-07-15 20:24:50.000000000 +0400
@@ -1632,8 +1632,8 @@ static void coredump_finish(struct mm_st
next = curr->next;
task = curr->task;
/*
- * see exit_mm(), curr->task must not see
- * ->task == NULL before we read ->next.
+ * see exit_coredump(), curr->task must not
+ * see ->task == NULL before we read ->next.
*/
smp_mb();
curr->task = NULL;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/