[PATCH] mm,oom: Don't suppress dump_tasks() output from different OOM scopes.

From: Tetsuo Handa
Date: Sat Sep 14 2019 - 02:11:02 EST


Michal Hocko is complaining that "mm,oom: Defer dump_tasks() output."
needlessly suppresses concurrent OOM killer invocations from different
OOM scopes. This patch changes dump_tasks() not to suppress such output.

---
kernel/fork.c | 1 +
mm/oom_kill.c | 64 ++++++++++++++++++++++++++++++++++++++---------------------
2 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index f601168..c86a126d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1854,6 +1854,7 @@ static __latent_entropy struct task_struct *copy_process(
p = dup_task_struct(current, node);
if (!p)
goto fork_out;
+ INIT_LIST_HEAD(&p->oom_task_info.list);

/*
* This _must_ happen before we call free_task(), i.e. before we jump
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index db62f50..7f57cea 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -379,6 +379,7 @@ static void select_bad_process(struct oom_control *oc)

static unsigned int oom_killer_seq; /* Serialized by oom_lock. */
static LIST_HEAD(oom_candidate_list); /* Serialized by oom_lock. */
+static LIST_HEAD(oom_tmp_candidate_list); /* Serialized by oom_lock. */

static int add_candidate_task(struct task_struct *p, void *arg)
{
@@ -395,8 +396,13 @@ static int add_candidate_task(struct task_struct *p, void *arg)
p = find_lock_task_mm(p);
if (!p)
return 0;
- get_task_struct(p);
oti = &p->oom_task_info;
+ /* p might be still waiting for dump_candidate_tasks(). */
+ if (!list_empty(&oti->list)) {
+ task_unlock(p);
+ return 1;
+ }
+ get_task_struct(p);
mm = p->mm;
oti->seq = oom_killer_seq;
memcpy(oti->comm, p->comm, sizeof(oti->comm));
@@ -409,14 +415,14 @@ static int add_candidate_task(struct task_struct *p, void *arg)
oti->swapents = get_mm_counter(mm, MM_SWAPENTS);
oti->score_adj = p->signal->oom_score_adj;
task_unlock(p);
- list_add_tail(&oti->list, &oom_candidate_list);
+ list_add_tail(&oti->list, &oom_tmp_candidate_list);
return 0;
}

static void dump_candidate_tasks(struct work_struct *work)
{
bool first = true;
- unsigned int seq;
+ unsigned int seq = 0;
struct oom_task_info *oti;

if (work) /* Serialize only if asynchronous. */
@@ -424,7 +430,10 @@ static void dump_candidate_tasks(struct work_struct *work)
while (!list_empty(&oom_candidate_list)) {
oti = list_first_entry(&oom_candidate_list,
struct oom_task_info, list);
- seq = oti->seq;
+ if (seq != oti->seq) {
+ seq = oti->seq;
+ first = true;
+ }
if (first) {
pr_info("OOM[%u]: Tasks state (memory values in pages):\n",
seq);
@@ -436,7 +445,7 @@ static void dump_candidate_tasks(struct work_struct *work)
seq, oti->pid, oti->uid, oti->tgid, oti->total_vm,
oti->mm_rss, oti->pgtables_bytes, oti->swapents,
oti->score_adj, oti->comm);
- list_del(&oti->list);
+ list_del_init(&oti->list);
if (work)
mutex_unlock(&oom_lock);
put_task_struct(container_of(oti, struct task_struct,
@@ -464,32 +473,41 @@ static void dump_candidate_tasks(struct work_struct *work)
static void dump_tasks(struct oom_control *oc)
{
struct task_struct *p;
+ int ret = 0;

/*
- * Suppress as long as there is any OOM victim candidate from past
- * rounds of OOM killer invocations. We could change this to suppress
- * only if there is an OOM victim candidate in the same OOM domain if
- * we want to see OOM victim candidates from different OOM domains.
- * But since dump_header() is already ratelimited, I don't know whether
- * it makes difference to suppress OOM victim candidates from different
- * OOM domains...
+ * Suppress if OOM victim candidates in the same OOM scope from past
+ * OOM killer invocations are still waiting for dump_candidate_tasks(),
+ * for it is possible that the OOM reaper or exit_mmap() sets
+ * MMF_OOM_SKIP before dump_candidate_tasks() completes. Otherwise,
+ * call dump_candidate_tasks() after SIGKILL is sent to OOM victims and
+ * the OOM reaper started reclaiming.
*/
- if (!list_empty(&oom_candidate_list))
- return;
if (is_memcg_oom(oc))
- mem_cgroup_scan_tasks(oc->memcg, add_candidate_task, oc);
+ ret = mem_cgroup_scan_tasks(oc->memcg, add_candidate_task, oc);
else {
rcu_read_lock();
- for_each_process(p)
- add_candidate_task(p, oc);
+ for_each_process(p) {
+ ret = add_candidate_task(p, oc);
+ if (ret)
+ break;
+ }
rcu_read_unlock();
}
- /*
- * Report OOM victim candidates after SIGKILL is sent to OOM victims
- * and the OOM reaper started reclaiming.
- */
- if (!list_empty(&oom_candidate_list))
- queue_work(system_long_wq, &oom_dump_candidates_work);
+ if (ret) {
+ while (!list_empty(&oom_tmp_candidate_list)) {
+ struct oom_task_info *oti =
+ list_first_entry(&oom_tmp_candidate_list,
+ struct oom_task_info, list);
+
+ list_del_init(&oti->list);
+ put_task_struct(container_of(oti, struct task_struct,
+ oom_task_info));
+ }
+ return;
+ }
+ list_splice_tail_init(&oom_tmp_candidate_list, &oom_candidate_list);
+ queue_work(system_unbound_wq, &oom_dump_candidates_work);
}

static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim)
--
1.8.3.1