[PATCH] vmscan: count reclaimed slab pages properly

From: KOSAKI Motohiro
Date: Tue Jul 13 2010 - 01:43:21 EST


Andrew Morton pointed out __zone_reclaim() shouldn't compare old and new
zone_page_state(NR_SLAB_RECLAIMABLE) result. Instead, it have to account
number of free slab pages by to enhance reclaim_state.

This patch does it.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@xxxxxxxxxxxxxx>
---
include/linux/swap.h | 3 ++-
mm/slab.c | 4 +++-
mm/slob.c | 4 +++-
mm/slub.c | 7 +++++--
mm/vmscan.c | 44 ++++++++++++++++----------------------------
5 files changed, 29 insertions(+), 33 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index ff4acea..b8d3f33 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -107,7 +107,8 @@ typedef struct {
* memory reclaim
*/
struct reclaim_state {
- unsigned long reclaimed_slab;
+ unsigned long reclaimed_slab;
+ struct zone *zone;
};

#ifdef __KERNEL__
diff --git a/mm/slab.c b/mm/slab.c
index 4e9c46f..aac9306 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1741,7 +1741,9 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
page++;
}
if (current->reclaim_state)
- current->reclaim_state->reclaimed_slab += nr_freed;
+ if (!current->reclaim_state->zone ||
+ current->reclaim_state->zone == page_zone(page))
+ current->reclaim_state->reclaimed_slab += nr_freed;
free_pages((unsigned long)addr, cachep->gfporder);
}

diff --git a/mm/slob.c b/mm/slob.c
index 3f19a34..192d05c 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -260,7 +260,9 @@ static void *slob_new_pages(gfp_t gfp, int order, int node)
static void slob_free_pages(void *b, int order)
{
if (current->reclaim_state)
- current->reclaim_state->reclaimed_slab += 1 << order;
+ if (!current->reclaim_state->zone ||
+ current->reclaim_state->zone == page_zone(page))
+ current->reclaim_state->reclaimed_slab += 1 << order;
free_pages((unsigned long)b, order);
}

diff --git a/mm/slub.c b/mm/slub.c
index 7bb7940..f510b14 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1204,8 +1204,11 @@ static void __free_slab(struct kmem_cache *s, struct page *page)

__ClearPageSlab(page);
reset_page_mapcount(page);
- if (current->reclaim_state)
- current->reclaim_state->reclaimed_slab += pages;
+ if (current->reclaim_state) {
+ if (!current->reclaim_state->zone ||
+ current->reclaim_state->zone == page_zone(page))
+ current->reclaim_state->reclaimed_slab += pages;
+ }
__free_pages(page, order);
}

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1bf9f72..8faef0c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2571,7 +2571,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
/* Minimum pages needed in order to stay on node */
const unsigned long nr_pages = 1 << order;
struct task_struct *p = current;
- struct reclaim_state reclaim_state;
int priority;
struct scan_control sc = {
.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
@@ -2583,8 +2582,10 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
.swappiness = vm_swappiness,
.order = order,
};
- unsigned long nr_slab_pages0, nr_slab_pages1;
-
+ struct reclaim_state reclaim_state = {
+ .reclaimed_slab = 0,
+ .zone = zone,
+ };

cond_resched();
/*
@@ -2594,7 +2595,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
*/
p->flags |= PF_MEMALLOC | PF_SWAPWRITE;
lockdep_set_current_reclaim_state(gfp_mask);
- reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;

if (zone_pagecache_reclaimable(zone) > zone->min_unmapped_pages) {
@@ -2610,34 +2610,22 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
} while (priority >= 0 && sc.nr_reclaimed < nr_pages);
}

- nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
- if (nr_slab_pages0 > zone->min_slab_pages) {
+ if (zone_page_state(zone, NR_SLAB_RECLAIMABLE) > zone->min_slab_pages) {
unsigned long lru_pages = zone_reclaimable_pages(zone);

- /*
- * shrink_slab() does not currently allow us to determine how
- * many pages were freed in this zone. So we take the current
- * number of slab pages and shake the slab until it is reduced
- * by the same nr_pages that we used for reclaiming unmapped
- * pages.
- *
- * Note that shrink_slab will free memory on all zones and may
- * take a long time.
- */
- while (shrink_slab(sc.nr_scanned, gfp_mask, lru_pages) &&
- (zone_page_state(zone, NR_SLAB_RECLAIMABLE) + nr_pages >
- nr_slab_pages0))
- ;
-
- /*
- * Update nr_reclaimed by the number of slab pages we
- * reclaimed from this zone.
- */
- nr_slab_pages1 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
- if (nr_slab_pages1 < nr_slab_pages0)
- sc.nr_reclaimed += nr_slab_pages0 - nr_slab_pages1;
+ for(;;) {
+ /*
+ * Note that shrink_slab will free memory on all zones
+ * and may take a long time.
+ */
+ if (!shrink_slab(sc.nr_scanned, gfp_mask, lru_pages))
+ break;
+ if (reclaim_state.reclaimed_slab >= nr_pages)
+ break;
+ }
}

+ sc.nr_reclaimed += reclaim_state.reclaimed_slab;
p->reclaim_state = NULL;
current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE);
lockdep_clear_current_reclaim_state();
--
1.6.5.2






--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/