Re: [PATCH]: Free pages from local pcp lists under tight memoryconditions

From: Andrew Morton
Date: Wed Nov 23 2005 - 00:36:57 EST


Rohit Seth <rohit.seth@xxxxxxxxx> wrote:
>
> Andrew, Linus,
>
> [PATCH]: This patch free pages (pcp->batch from each list at a time) from
> local pcp lists when a higher order allocation request is not able to
> get serviced from global free_list.
>
> This should help fix some of the earlier failures seen with order 1 allocations.
>
> I will send separate patches for:
>
> 1- Reducing the remote cpus pcp
> 2- Clean up page_alloc.c for CONFIG_HOTPLUG_CPU to use this code appropiately
>
> +static int
> +reduce_cpu_pcp(void )
> +{
> + struct zone *zone;
> + unsigned long flags;
> + unsigned int cpu = get_cpu();
> + int i, ret=0;
> +
> + local_irq_save(flags);
> + for_each_zone(zone) {
> + struct per_cpu_pageset *pset;
> +
> + pset = zone_pcp(zone, cpu);
> + for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
> + struct per_cpu_pages *pcp;
> +
> + pcp = &pset->pcp[i];
> + if (pcp->count == 0)
> + continue;
> + pcp->count -= free_pages_bulk(zone, pcp->batch,
> + &pcp->list, 0);
> + ret++;
> + }
> + }
> + local_irq_restore(flags);
> + put_cpu();
> + return ret;
> +}

This significantly duplicates the existing drain_local_pages().

>
> + if (order > 0)
> + while (reduce_cpu_pcp()) {
> + if (get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags))

This forgot to assign to local variable `page'! It'll return NULL and will
leak memory.

The `while' loop worries me for some reason, so I wimped out and just tried
the remote drain once.

> + goto got_pg;
> + }
> + /* FIXME: Add the support for reducing/draining the remote pcps.

This is easy enough to do.

I wanted to call the all-CPU drainer `drain_remote_pages' but that's
already taken by some rather poorly-named NUMA thing which also duplicates
most of __drain_pages().

This patch is against a random selection of the enormous number of mm/
patches in -mm. I haven't runtime-tested it yet.

We need to verify that this patch actually does something useful.



include/linux/gfp.h | 2 +
include/linux/suspend.h | 1
mm/page_alloc.c | 85 ++++++++++++++++++++++++++++++++++++------------
3 files changed, 66 insertions(+), 22 deletions(-)

diff -puN include/linux/gfp.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions include/linux/gfp.h
--- devel/include/linux/gfp.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions 2005-11-22 21:32:47.000000000 -0800
+++ devel-akpm/include/linux/gfp.h 2005-11-22 21:32:47.000000000 -0800
@@ -109,6 +109,8 @@ static inline struct page *alloc_pages_n
NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_mask));
}

+extern int drain_local_pages(void);
+
#ifdef CONFIG_NUMA
extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order);

diff -puN include/linux/suspend.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions include/linux/suspend.h
--- devel/include/linux/suspend.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions 2005-11-22 21:32:47.000000000 -0800
+++ devel-akpm/include/linux/suspend.h 2005-11-22 21:32:47.000000000 -0800
@@ -40,7 +40,6 @@ extern dev_t swsusp_resume_device;
extern int shrink_mem(void);

/* mm/page_alloc.c */
-extern void drain_local_pages(void);
extern void mark_free_pages(struct zone *zone);

#ifdef CONFIG_PM
diff -puN mm/page_alloc.c~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions mm/page_alloc.c
--- devel/mm/page_alloc.c~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions 2005-11-22 21:32:47.000000000 -0800
+++ devel-akpm/mm/page_alloc.c 2005-11-22 21:32:47.000000000 -0800
@@ -578,32 +578,71 @@ void drain_remote_pages(void)
}
#endif

-#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU)
-static void __drain_pages(unsigned int cpu)
+/*
+ * Drain any cpu-local pages into the buddy lists. Must be called under
+ * local_irq_disable().
+ */
+static int __drain_pages(unsigned int cpu)
{
- unsigned long flags;
struct zone *zone;
- int i;
+ int ret = 0;

for_each_zone(zone) {
struct per_cpu_pageset *pset;
+ int i;

pset = zone_pcp(zone, cpu);
for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
struct per_cpu_pages *pcp;

pcp = &pset->pcp[i];
- local_irq_save(flags);
+ if (!pcp->count)
+ continue;
pcp->count -= free_pages_bulk(zone, pcp->count,
&pcp->list, 0);
- local_irq_restore(flags);
+ ret++;
}
}
+ return ret;
}
-#endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU */

-#ifdef CONFIG_PM
+/*
+ * Spill all of this CPU's per-cpu pages back into the buddy allocator.
+ */
+int drain_local_pages(void)
+{
+ unsigned long flags;
+ int ret;
+
+ local_irq_save(flags);
+ ret = __drain_pages(smp_processor_id());
+ local_irq_restore(flags);
+ return ret;
+}
+
+static void drainer(void *p)
+{
+ atomic_add(drain_local_pages(), p);
+}
+
+/*
+ * Drain the per-cpu pages on all CPUs. If called from interrupt context we
+ * can only drain the local CPU's pages, since cross-CPU calls are deadlocky
+ * from interrupt context.
+ */
+static int drain_all_local_pages(void)
+{
+ if (in_interrupt()) {
+ return drain_local_pages();
+ } else {
+ atomic_t ret = ATOMIC_INIT(0);
+
+ on_each_cpu(drainer, &ret, 0, 1);
+ return atomic_read(&ret);
+ }
+}

+#ifdef CONFIG_PM
void mark_free_pages(struct zone *zone)
{
unsigned long zone_pfn, flags;
@@ -629,17 +668,6 @@ void mark_free_pages(struct zone *zone)
spin_unlock_irqrestore(&zone->lock, flags);
}

-/*
- * Spill all of this CPU's per-cpu pages back into the buddy allocator.
- */
-void drain_local_pages(void)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __drain_pages(smp_processor_id());
- local_irq_restore(flags);
-}
#endif /* CONFIG_PM */

static void zone_statistics(struct zonelist *zonelist, struct zone *z)
@@ -913,8 +941,16 @@ nofail_alloc:
}

/* Atomic allocations - we can't balance anything */
- if (!wait)
- goto nopage;
+ if (!wait) {
+ /*
+ * Check if there are pages available on pcp lists that can be
+ * moved to global page list to satisfy higher order allocations
+ */
+ if (order > 0 && drain_all_local_pages())
+ goto restart;
+ else
+ goto nopage;
+ }

rebalance:
cond_resched();
@@ -952,6 +988,13 @@ rebalance:
goto restart;
}

+ if (order > 0 && drain_all_local_pages()) {
+ page = get_page_from_freelist(gfp_mask, order, zonelist,
+ alloc_flags);
+ if (page)
+ goto got_pg;
+ }
+
/*
* Don't let big-order allocations loop unless the caller explicitly
* requests that. Wait for some write requests to complete then retry.
_

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/