--- .orig/include/linux/swap.h 2004-07-12 01:05:20.941322402 -0400 +++ 2.6.7-bk20-vm/include/linux/swap.h 2004-07-11 12:03:41.000000000 -0400 @@ -175,6 +175,8 @@ extern int try_to_free_pages(struct zone **, unsigned int, unsigned int); extern int shrink_all_memory(int); extern int vm_swappiness; +extern int vm_slow_scan; +extern int vm_limit_reclaim; #ifdef CONFIG_MMU /* linux/mm/shmem.c */ --- .orig/include/linux/sysctl.h 2004-07-12 01:05:21.099307424 -0400 +++ 2.6.7-bk20-vm/include/linux/sysctl.h 2004-07-11 12:02:56.000000000 -0400 @@ -165,6 +165,8 @@ VM_BLOCK_DUMP=24, /* block dump mode */ VM_HUGETLB_GROUP=25, /* permitted hugetlb group */ VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */ + VM_SLOW_SCAN=27, /* scan active list very slowly */ + VM_LIMIT_RECLAIM, /* limit reclaim to SWAP_CLUSTER_MAX */ }; --- .orig/kernel/sysctl.c 2004-07-12 01:05:21.193298512 -0400 +++ 2.6.7-bk20-vm/kernel/sysctl.c 2004-07-11 12:03:27.000000000 -0400 @@ -789,6 +789,26 @@ .strategy = &sysctl_intvec, .extra1 = &zero, }, + { + .ctl_name = VM_SLOW_SCAN, + .procname = "slow_scan", + .data = &vm_slow_scan, + .maxlen = sizeof(vm_slow_scan), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + .extra1 = &zero, + }, + { + .ctl_name = VM_LIMIT_RECLAIM, + .procname = "limit_reclaim", + .data = &vm_limit_reclaim, + .maxlen = sizeof(vm_limit_reclaim), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + .extra1 = &zero, + }, { .ctl_name = 0 } }; --- .orig/mm/vmscan.c 2004-07-12 01:05:21.359282775 -0400 +++ 2.6.7-bk20-vm/mm/vmscan.c 2004-07-11 14:33:16.586046183 -0400 @@ -119,6 +119,8 @@ * From 0 .. 100. Higher means more swappy. */ int vm_swappiness = 60; +int vm_slow_scan = 0; +int vm_limit_reclaim = 0; static long total_memory; static LIST_HEAD(shrinker_list); @@ -801,25 +803,33 @@ unsigned long nr_active; unsigned long nr_inactive; - /* - * Add one to `nr_to_scan' just to make sure that the kernel will - * slowly sift through the active list. - */ - zone->nr_scan_active += (zone->nr_active >> sc->priority) + 1; - nr_active = zone->nr_scan_active; - if (nr_active >= SWAP_CLUSTER_MAX) + if (vm_slow_scan) { + unsigned long nr = (zone->nr_active + zone->nr_inactive) >> sc->priority; + nr_inactive = min(nr, zone->nr_inactive); + nr_active = nr - nr_inactive; zone->nr_scan_active = 0; - else - nr_active = 0; - - zone->nr_scan_inactive += (zone->nr_inactive >> sc->priority) + 1; - nr_inactive = zone->nr_scan_inactive; - if (nr_inactive >= SWAP_CLUSTER_MAX) zone->nr_scan_inactive = 0; - else - nr_inactive = 0; + } else { + /* + * Add one to `nr_to_scan' just to make sure that the kernel will + * slowly sift through the active list. + */ + zone->nr_scan_active += (zone->nr_active >> sc->priority) + 1; + nr_active = zone->nr_scan_active; + if (nr_active >= SWAP_CLUSTER_MAX) + zone->nr_scan_active = 0; + else + nr_active = 0; - sc->nr_to_reclaim = SWAP_CLUSTER_MAX; + zone->nr_scan_inactive += (zone->nr_inactive >> sc->priority) + 1; + nr_inactive = zone->nr_scan_inactive; + if (nr_inactive >= SWAP_CLUSTER_MAX) + zone->nr_scan_inactive = 0; + else + nr_inactive = 0; + } + if (vm_limit_reclaim) + sc->nr_to_reclaim = SWAP_CLUSTER_MAX; while (nr_active || nr_inactive) { if (nr_active) { @@ -900,6 +910,7 @@ sc.gfp_mask = gfp_mask; sc.may_writepage = 0; + sc.nr_to_reclaim = SWAP_CLUSTER_MAX; inc_page_state(allocstall); @@ -917,12 +928,12 @@ sc.nr_reclaimed += reclaim_state->reclaimed_slab; reclaim_state->reclaimed_slab = 0; } - if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX) { + total_scanned += sc.nr_scanned; + total_reclaimed += sc.nr_reclaimed; + if (total_reclaimed >= SWAP_CLUSTER_MAX) { ret = 1; goto out; } - total_scanned += sc.nr_scanned; - total_reclaimed += sc.nr_reclaimed; /* * Try to write back as many pages as we just scanned. This @@ -1039,7 +1050,11 @@ if (nr_pages == 0) { /* Not software suspend */ if (zone->free_pages <= zone->pages_high) all_zones_ok = 0; - } + sc.nr_to_reclaim = zone->pages_high - zone->free_pages; + if (sc.nr_to_reclaim < SWAP_CLUSTER_MAX) + sc.nr_to_reclaim = SWAP_CLUSTER_MAX; + } else + sc.nr_to_reclaim = nr_pages; zone->temp_priority = priority; if (zone->prev_priority > priority) zone->prev_priority = priority;