Re: Large slab cache in 2.6.1

From: Nick Piggin
Date: Sun Feb 22 2004 - 01:37:30 EST




Andrew Morton wrote:

Nick Piggin <piggin@xxxxxxxxxxxxxxx> wrote:


Fair enough. Maybe if we can get enough testing, some of the mm
changes can get into 2.6.4? I'm sure Linus is turning pale, maybe
we'd better wait until 2.6.10 ;)



I need to alight from my lazy tail and test them a bit^Wlot first. More
like 2.6.5.




Can you maybe use this patch then, please?

Thanks

linux-2.6-npiggin/include/linux/mm.h | 2 -
linux-2.6-npiggin/mm/page_alloc.c | 11 ------
linux-2.6-npiggin/mm/vmscan.c | 64 ++++++++++++++++++++++++++++-------
3 files changed, 52 insertions(+), 25 deletions(-)

diff -puN mm/vmscan.c~vm-shrink-slab-lowmem mm/vmscan.c
--- linux-2.6/mm/vmscan.c~vm-shrink-slab-lowmem 2004-02-22 16:35:06.000000000 +1100
+++ linux-2.6-npiggin/mm/vmscan.c 2004-02-22 17:30:53.000000000 +1100
@@ -122,7 +122,25 @@ void remove_shrinker(struct shrinker *sh
}

EXPORT_SYMBOL(remove_shrinker);
-
+
+/*
+ * Returns the number of lowmem pages which are on the lru lists
+ */
+static unsigned int nr_lowmem_lru_pages(void)
+{
+ unsigned int pages = 0;
+ struct zone *zone;
+
+ for_each_zone(zone) {
+ if (unlikely(is_highmem(zone)))
+ continue;
+ pages += zone->nr_active + zone->nr_inactive;
+ }
+
+ return pages;
+}
+
+
#define SHRINK_BATCH 128
/*
* Call the shrink functions to age shrinkable caches
@@ -136,6 +154,24 @@ EXPORT_SYMBOL(remove_shrinker);
* slab to avoid swapping.
*
* We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits.
+ *
+ * The formula to work out how much to scan each slab is as follows:
+ * Let S be the number of lowmem LRU pages were scanned (scanned)
+ * Let M be the total number of lowmem LRU pages (pages)
+ * T be the total number of all slab items.
+ * For each slab:
+ * I be number of slab items ((*shrinker->shrinker)(0, gfp_mask))
+ *
+ * "S * M / T" then gives the total number of slab items to scan, N
+ * Then for each slab, "N * T / I" is the number of items to scan for this slab.
+ *
+ * This simplifies to "S * M / I", or
+ * lowmem lru scanned * items in this slab / total lowmem lru pages
+ *
+ * TODO:
+ * The value of M should be calculated *before* LRU scanning.
+ * Total number of items in each slab should be used, not just freeable ones.
+ * Unfreeable slab items should not count toward the scanning total.
*/
static int shrink_slab(unsigned long scanned, unsigned int gfp_mask)
{
@@ -145,14 +181,16 @@ static int shrink_slab(unsigned long sca
if (down_trylock(&shrinker_sem))
return 0;

- pages = nr_used_zone_pages();
+ pages = nr_lowmem_lru_pages();
list_for_each_entry(shrinker, &shrinker_list, list) {
unsigned long long delta;

delta = 4 * scanned / shrinker->seeks;
delta *= (*shrinker->shrinker)(0, gfp_mask);
do_div(delta, pages + 1);
- shrinker->nr += delta;
+
+ /* +1 to ensure some scanning gets done */
+ shrinker->nr += delta + 1;
if (shrinker->nr > SHRINK_BATCH) {
long nr_to_scan = shrinker->nr;

@@ -857,7 +895,8 @@ shrink_zone(struct zone *zone, unsigned
*/
static int
shrink_caches(struct zone **zones, int priority, int *total_scanned,
- int gfp_mask, int nr_pages, struct page_state *ps)
+ int *lowmem_scanned, int gfp_mask, int nr_pages,
+ struct page_state *ps)
{
int ret = 0;
int i;
@@ -875,7 +914,10 @@ shrink_caches(struct zone **zones, int p

ret += shrink_zone(zone, gfp_mask,
to_reclaim, &nr_scanned, ps, priority);
+
*total_scanned += nr_scanned;
+ if (i < ZONE_HIGHMEM)
+ *lowmem_scanned += nr_scanned;
if (ret >= nr_pages)
break;
}
@@ -915,19 +957,17 @@ int try_to_free_pages(struct zone **zone
zones[i]->temp_priority = DEF_PRIORITY;

for (priority = DEF_PRIORITY; priority >= 0; priority--) {
- int total_scanned = 0;
+ int total_scanned = 0, lowmem_scanned = 0;
struct page_state ps;

get_page_state(&ps);
nr_reclaimed += shrink_caches(zones, priority, &total_scanned,
- gfp_mask, nr_pages, &ps);
+ &lowmem_scanned, gfp_mask, nr_pages, &ps);

- if (zones[0] - zones[0]->zone_pgdat->node_zones < ZONE_HIGHMEM) {
- shrink_slab(total_scanned, gfp_mask);
- if (reclaim_state) {
- nr_reclaimed += reclaim_state->reclaimed_slab;
- reclaim_state->reclaimed_slab = 0;
- }
+ shrink_slab(lowmem_scanned, gfp_mask);
+ if (reclaim_state) {
+ nr_reclaimed += reclaim_state->reclaimed_slab;
+ reclaim_state->reclaimed_slab = 0;
}

if (nr_reclaimed >= nr_pages) {
diff -puN mm/page_alloc.c~vm-shrink-slab-lowmem mm/page_alloc.c
--- linux-2.6/mm/page_alloc.c~vm-shrink-slab-lowmem 2004-02-22 16:35:06.000000000 +1100
+++ linux-2.6-npiggin/mm/page_alloc.c 2004-02-22 17:04:43.000000000 +1100
@@ -772,17 +772,6 @@ unsigned int nr_free_pages(void)

EXPORT_SYMBOL(nr_free_pages);

-unsigned int nr_used_zone_pages(void)
-{
- unsigned int pages = 0;
- struct zone *zone;
-
- for_each_zone(zone)
- pages += zone->nr_active + zone->nr_inactive;
-
- return pages;
-}
-
#ifdef CONFIG_NUMA
unsigned int nr_free_pages_pgdat(pg_data_t *pgdat)
{
diff -puN include/linux/mm.h~vm-shrink-slab-lowmem include/linux/mm.h
--- linux-2.6/include/linux/mm.h~vm-shrink-slab-lowmem 2004-02-22 16:35:06.000000000 +1100
+++ linux-2.6-npiggin/include/linux/mm.h 2004-02-22 17:04:26.000000000 +1100
@@ -625,8 +625,6 @@ static inline struct vm_area_struct * fi

extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr);

-extern unsigned int nr_used_zone_pages(void);
-
extern struct page * vmalloc_to_page(void *addr);
extern struct page * follow_page(struct mm_struct *mm, unsigned long address,
int write);

_