[PATCH] psi: Don't account force reclaim as memory pressure

From: Xunlei Pang
Date: Sat Jun 15 2019 - 08:12:29 EST


There're several cases like resize and force_empty that don't
need to account to psi, otherwise is misleading.

We also have a module reclaiming dying memcgs at background to
avoid too many dead memcgs which can cause lots of trouble, then
it makes the psi inaccuracy even worse without this patch.

Signed-off-by: Xunlei Pang <xlpang@xxxxxxxxxxxxxxxxx>
---
include/linux/swap.h | 3 ++-
mm/memcontrol.c | 13 +++++++------
mm/vmscan.c | 9 ++++++---
3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4bfb5c4ac108..74b5443877d4 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -354,7 +354,8 @@ extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
- bool may_swap);
+ bool may_swap,
+ bool force_reclaim);
extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
pg_data_t *pgdat,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f1dfa651f55d..f4ec57876ada 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2237,7 +2237,8 @@ static void reclaim_high(struct mem_cgroup *memcg,
if (page_counter_read(&memcg->memory) <= memcg->high)
continue;
memcg_memory_event(memcg, MEMCG_HIGH);
- try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
+ try_to_free_mem_cgroup_pages(memcg, nr_pages,
+ gfp_mask, true, false);
} while ((memcg = parent_mem_cgroup(memcg)));
}

@@ -2330,7 +2331,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
memcg_memory_event(mem_over_limit, MEMCG_MAX);

nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
- gfp_mask, may_swap);
+ gfp_mask, may_swap, false);

if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
goto retry;
@@ -2860,7 +2861,7 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
}

if (!try_to_free_mem_cgroup_pages(memcg, 1,
- GFP_KERNEL, !memsw)) {
+ GFP_KERNEL, !memsw, true)) {
ret = -EBUSY;
break;
}
@@ -2993,7 +2994,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
return -EINTR;

progress = try_to_free_mem_cgroup_pages(memcg, 1,
- GFP_KERNEL, true);
+ GFP_KERNEL, true, true);
if (!progress) {
nr_retries--;
/* maybe some writeback is necessary */
@@ -5549,7 +5550,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
nr_pages = page_counter_read(&memcg->memory);
if (nr_pages > high)
try_to_free_mem_cgroup_pages(memcg, nr_pages - high,
- GFP_KERNEL, true);
+ GFP_KERNEL, true, true);

memcg_wb_domain_size_changed(memcg);
return nbytes;
@@ -5596,7 +5597,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,

if (nr_reclaims) {
if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max,
- GFP_KERNEL, true))
+ GFP_KERNEL, true, true))
nr_reclaims--;
continue;
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7acd0afdfc2a..3831848fca5a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3212,7 +3212,8 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
- bool may_swap)
+ bool may_swap,
+ bool force_reclaim)
{
struct zonelist *zonelist;
unsigned long nr_reclaimed;
@@ -3243,13 +3244,15 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,

trace_mm_vmscan_memcg_reclaim_begin(0, sc.gfp_mask);

- psi_memstall_enter(&pflags);
+ if (!force_reclaim)
+ psi_memstall_enter(&pflags);
noreclaim_flag = memalloc_noreclaim_save();

nr_reclaimed = do_try_to_free_pages(zonelist, &sc);

memalloc_noreclaim_restore(noreclaim_flag);
- psi_memstall_leave(&pflags);
+ if (!force_reclaim)
+ psi_memstall_leave(&pflags);

trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);

--
2.14.4.44.g2045bb6