[PATCH] vmscan: fix slab vs page cache reclaim balance

From: Vladimir Davydov
Date: Fri Nov 28 2014 - 09:58:43 EST


Though page cache reclaim is done per-zone, the finest granularity for
slab cache reclaim is per node. To achieve proportional pressure being
put on them, we therefore shrink slab caches only when scanning the
class zone, which is the highest zone suitable for allocations. However,
the class zone may be empty, e.g. ZONE_NORMAL/ZONE_HIGH, which are class
zones for most allocations, are empty on x86_64 with < 4G of RAM. This
will result in slab cache being scanned only by kswapd, which in turn
may lead to a premature OOM kill.

This patch attempts to fix this by calling shrink_node_slabs per each
zone eligible while distributing the pressure between zones
proportionally to the number of present pages.

Signed-off-by: Vladimir Davydov <vdavydov@xxxxxxxxxxxxx>

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9130cf67bac1..dd80625a1be5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2298,8 +2298,7 @@ static inline bool should_continue_reclaim(struct zone *zone,
}
}

-static bool shrink_zone(struct zone *zone, struct scan_control *sc,
- bool is_classzone)
+static bool shrink_zone(struct zone *zone, struct scan_control *sc)
{
unsigned long nr_reclaimed, nr_scanned;
bool reclaimable = false;
@@ -2310,7 +2309,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
.zone = zone,
.priority = sc->priority,
};
- unsigned long zone_lru_pages = 0;
+ unsigned long nr_eligible = 0;
struct mem_cgroup *memcg;

nr_reclaimed = sc->nr_reclaimed;
@@ -2319,6 +2318,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
memcg = mem_cgroup_iter(root, NULL, &reclaim);
do {
unsigned long lru_pages;
+ unsigned long long tmp;
struct lruvec *lruvec;
int swappiness;

@@ -2326,7 +2326,17 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
swappiness = mem_cgroup_swappiness(memcg);

shrink_lruvec(lruvec, swappiness, sc, &lru_pages);
- zone_lru_pages += lru_pages;
+
+ /*
+ * Scale lru_pages inversely proportionally to the zone
+ * size in order to not over-reclaim slab caches, which
+ * are zone unaware.
+ */
+ tmp = lru_pages;
+ tmp *= zone->zone_pgdat->node_present_pages;
+ do_div(tmp, zone->present_pages);
+
+ nr_eligible += tmp;

/*
* Direct reclaim and kswapd have to scan all memory
@@ -2350,12 +2360,12 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
* Shrink the slab caches in the same proportion that
* the eligible LRU pages were scanned.
*/
- if (global_reclaim(sc) && is_classzone) {
+ if (global_reclaim(sc)) {
struct reclaim_state *reclaim_state;

shrink_node_slabs(sc->gfp_mask, zone_to_nid(zone),
sc->nr_scanned - nr_scanned,
- zone_lru_pages);
+ nr_eligible);

reclaim_state = current->reclaim_state;
if (reclaim_state) {
@@ -2503,7 +2513,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
/* need some check for avoid more shrink_zone() */
}

- if (shrink_zone(zone, sc, zone_idx(zone) == requested_highidx))
+ if (shrink_zone(zone, sc))
reclaimable = true;

if (global_reclaim(sc) &&
@@ -3010,7 +3020,7 @@ static bool kswapd_shrink_zone(struct zone *zone,
balance_gap, classzone_idx))
return true;

- shrink_zone(zone, sc, zone_idx(zone) == classzone_idx);
+ shrink_zone(zone, sc);

/* Account for the number of pages attempted to reclaim */
*nr_attempted += sc->nr_to_reclaim;
@@ -3656,7 +3666,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
* priorities until we have enough memory freed.
*/
do {
- shrink_zone(zone, &sc, true);
+ shrink_zone(zone, &sc);
} while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0);
}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/