--- include/linux/mmzone.h | 1 include/linux/nodemask.h | 2 kernel/sysctl.c | 78 ++++++++++++++++++++++++++++++++++++++ mm/vmscan.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 173 insertions(+), 3 deletions(-) Index: b/kernel/sysctl.c =================================================================== --- a/kernel/sysctl.c 2008-02-15 20:14:40.000000000 +0900 +++ b/kernel/sysctl.c 2008-02-16 16:45:58.000000000 +0900 @@ -187,6 +187,18 @@ int sysctl_legacy_va_layout; extern int prove_locking; extern int lock_stat; +extern int max_reclaimer; +extern unsigned long max_reclaim_time; +extern unsigned long max_reclaim_prepare_time; +extern int reclaim_limit; +extern unsigned long max_overkill_reclaim; + +extern unsigned long max_reclaim_time_aux; +extern unsigned long max_reclaim_prepare_time_aux; +extern unsigned long max_total_time; + + + /* The default sysctl tables: */ static struct ctl_table root_table[] = { @@ -1155,6 +1167,72 @@ static struct ctl_table vm_table[] = { .extra2 = &one, }, #endif + { + .ctl_name = CTL_UNNUMBERED, + .procname = "max_reclaimer", + .data = &max_reclaimer, + .maxlen = sizeof(max_reclaimer), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "max_reclaim_time", + .data = &max_reclaim_time, + .maxlen = sizeof(max_reclaim_time), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "max_reclaim_prepare_time", + .data = &max_reclaim_prepare_time, + .maxlen = sizeof(max_reclaim_prepare_time), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "reclaim_limit", + .data = &reclaim_limit, + .maxlen = sizeof(reclaim_limit), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "max_overkill_reclaim", + .data = &max_overkill_reclaim, + .maxlen = sizeof(max_overkill_reclaim), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "max_reclaim_time_aux", + .data = &max_reclaim_time_aux, + .maxlen = sizeof(max_reclaim_time_aux), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "max_reclaim_prepare_time_aux", + .data = &max_reclaim_prepare_time_aux, + .maxlen = sizeof(max_reclaim_prepare_time_aux), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "max_total_time", + .data = &max_total_time, + .maxlen = sizeof(max_total_time), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + }, + /* * NOTE: do not add new entries to this table unless you have read * Documentation/sysctl/ctl_unnumbered.txt Index: b/mm/vmscan.c =================================================================== --- a/mm/vmscan.c 2008-02-15 20:14:40.000000000 +0900 +++ b/mm/vmscan.c 2008-02-17 11:50:50.000000000 +0900 @@ -1421,6 +1421,23 @@ out: return ret; } +static DEFINE_SPINLOCK(research_reclaim_max_lock); +static atomic_t nr_reclaimers = ATOMIC_INIT(0); +static DECLARE_WAIT_QUEUE_HEAD(reclaim_throttle_waitq); + +// limit +int reclaim_limit = 2; +#define RECLAIM_LIMIT (reclaim_limit * num_highmem_nodes()) + +// record +int max_reclaimer = 0; +unsigned long max_reclaim_time = 0; +unsigned long max_reclaim_time_aux = 0; +unsigned long max_reclaim_prepare_time = 0; +unsigned long max_reclaim_prepare_time_aux = 0; +unsigned long max_overkill_reclaim = 0; +unsigned long max_total_time = 0; + unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask) { struct scan_control sc = { @@ -1433,8 +1450,82 @@ unsigned long try_to_free_pages(struct z .mem_cgroup = NULL, .isolate_pages = isolate_pages_global, }; - - return do_try_to_free_pages(zones, gfp_mask, &sc); + unsigned long nr_reclaimed; + u64 start_time; + u64 prepared_time; + u64 end_time; + u64 preparing_time; + u64 reclaiming_time; + unsigned long free_mem; + int record_max_prepare_time = 0; + unsigned long total_time; + + start_time = jiffies_64; + + if (unlikely(!atomic_add_unless(&nr_reclaimers, 1, RECLAIM_LIMIT))) + wait_event(reclaim_throttle_waitq, + atomic_add_unless(&nr_reclaimers, 1, RECLAIM_LIMIT)); + + spin_lock(&research_reclaim_max_lock); + if (atomic_read(&nr_reclaimers) > max_reclaimer) + max_reclaimer = atomic_read(&nr_reclaimers); + + prepared_time = jiffies_64; + preparing_time = prepared_time - start_time; + if (preparing_time > max_reclaim_time) { + record_max_prepare_time = 1; + max_reclaim_prepare_time = preparing_time; + } + spin_unlock(&research_reclaim_max_lock); + + /* more reclaim until needed? */ + if (preparing_time > HZ) { + int i; + + for (i = 0; zones[i] != NULL; i++) { + struct zone *zone = zones[i]; + int classzone_idx = zone_idx(zones[0]); + + if (!populated_zone(zone)) + continue; + + if (zone_watermark_ok(zone, order, 4*zone->pages_high, + classzone_idx, 0)) { + nr_reclaimed = 1; + goto out; + } + } + } + + nr_reclaimed = do_try_to_free_pages(zones, gfp_mask, &sc); + + spin_lock(&research_reclaim_max_lock); + end_time = jiffies_64; + reclaiming_time = end_time - prepared_time; + + if (record_max_prepare_time) + max_reclaim_prepare_time_aux = reclaiming_time; + + if (reclaiming_time > max_reclaim_time) { + max_reclaim_time_aux = preparing_time; + max_reclaim_time = reclaiming_time; + } + + total_time = preparing_time + reclaiming_time; + if( total_time > max_total_time ){ + max_total_time = total_time; + } + + free_mem = global_page_state(NR_FREE_PAGES); + if (free_mem > max_overkill_reclaim) + max_overkill_reclaim = free_mem; + spin_unlock(&research_reclaim_max_lock); + +out: + atomic_dec(&nr_reclaimers); + wake_up_all(&reclaim_throttle_waitq); + + return nr_reclaimed; } #ifdef CONFIG_CGROUP_MEM_CONT Index: b/include/linux/mmzone.h =================================================================== --- a/include/linux/mmzone.h 2008-02-15 20:14:40.000000000 +0900 +++ b/include/linux/mmzone.h 2008-02-15 20:14:49.000000000 +0900 @@ -334,7 +334,6 @@ struct zone { */ unsigned long spanned_pages; /* total size, including holes */ unsigned long present_pages; /* amount of memory (excluding holes) */ - /* * rarely used fields: */ Index: b/include/linux/nodemask.h =================================================================== --- a/include/linux/nodemask.h 2008-02-15 20:14:40.000000000 +0900 +++ b/include/linux/nodemask.h 2008-02-15 20:14:49.000000000 +0900 @@ -431,6 +431,8 @@ static inline int num_node_state(enum no #define num_online_nodes() num_node_state(N_ONLINE) #define num_possible_nodes() num_node_state(N_POSSIBLE) +#define num_highmem_nodes() num_node_state(N_HIGH_MEMORY) + #define node_online(node) node_state((node), N_ONLINE) #define node_possible(node) node_state((node), N_POSSIBLE)