Re: [PATCH 10/11] Direct compact when a high-order allocation fails

From: Minchan Kim
Date: Wed Mar 24 2010 - 08:06:56 EST


On Wed, Mar 24, 2010 at 8:59 PM, Minchan Kim <minchan.kim@xxxxxxxxx> wrote:
> On Wed, Mar 24, 2010 at 8:11 PM, Mel Gorman <mel@xxxxxxxxx> wrote:
>> On Wed, Mar 24, 2010 at 08:10:40AM +0900, Minchan Kim wrote:
>>> Hi, Mel.
>>>
>>> On Tue, Mar 23, 2010 at 9:25 PM, Mel Gorman <mel@xxxxxxxxx> wrote:
>>> > Ordinarily when a high-order allocation fails, direct reclaim is entered to
>>> > free pages to satisfy the allocation. ÂWith this patch, it is determined if
>>> > an allocation failed due to external fragmentation instead of low memory
>>> > and if so, the calling process will compact until a suitable page is
>>> > freed. Compaction by moving pages in memory is considerably cheaper than
>>> > paging out to disk and works where there are locked pages or no swap. If
>>> > compaction fails to free a page of a suitable size, then reclaim will
>>> > still occur.
>>> >
>>> > Direct compaction returns as soon as possible. As each block is compacted,
>>> > it is checked if a suitable page has been freed and if so, it returns.
>>> >
>>> > Signed-off-by: Mel Gorman <mel@xxxxxxxxx>
>>> > Acked-by: Rik van Riel <riel@xxxxxxxxxx>
>>> > ---
>>> > Âinclude/linux/compaction.h | Â 16 +++++-
>>> > Âinclude/linux/vmstat.h   |  Â1 +
>>> > Âmm/compaction.c      Â| Â118 ++++++++++++++++++++++++++++++++++++++++++++
>>> > Âmm/page_alloc.c      Â|  26 ++++++++++
>>> > Âmm/vmstat.c        Â|  15 +++++-
>>> > Â5 files changed, 172 insertions(+), 4 deletions(-)
>>> >
>>> > diff --git a/include/linux/compaction.h b/include/linux/compaction.h
>>> > index c94890b..b851428 100644
>>> > --- a/include/linux/compaction.h
>>> > +++ b/include/linux/compaction.h
>>> > @@ -1,14 +1,26 @@
>>> > Â#ifndef _LINUX_COMPACTION_H
>>> > Â#define _LINUX_COMPACTION_H
>>> >
>>> > -/* Return values for compact_zone() */
>>> > +/* Return values for compact_zone() and try_to_compact_pages() */
>>> > Â#define COMPACT_INCOMPLETE Â Â 0
>>> > -#define COMPACT_COMPLETE Â Â Â 1
>>> > +#define COMPACT_PARTIAL Â Â Â Â Â Â Â Â1
>>> > +#define COMPACT_COMPLETE Â Â Â 2
>>> >
>>> > Â#ifdef CONFIG_COMPACTION
>>> > Âextern int sysctl_compact_memory;
>>> > Âextern int sysctl_compaction_handler(struct ctl_table *table, int write,
>>> > Â Â Â Â Â Â Â Â Â Â Â Âvoid __user *buffer, size_t *length, loff_t *ppos);
>>> > +
>>> > +extern int fragmentation_index(struct zone *zone, unsigned int order);
>>> > +extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
>>> > + Â Â Â Â Â Â Â Â Â Â Â int order, gfp_t gfp_mask, nodemask_t *mask);
>>> > +#else
>>> > +static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
>>> > + Â Â Â Â Â Â Â Â Â Â Â int order, gfp_t gfp_mask, nodemask_t *nodemask)
>>> > +{
>>> > + Â Â Â return COMPACT_INCOMPLETE;
>>> > +}
>>> > +
>>> > Â#endif /* CONFIG_COMPACTION */
>>> >
>>> > Â#if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
>>> > diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
>>> > index 56e4b44..b4b4d34 100644
>>> > --- a/include/linux/vmstat.h
>>> > +++ b/include/linux/vmstat.h
>>> > @@ -44,6 +44,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
>>> > Â Â Â Â Â Â Â ÂKSWAPD_SKIP_CONGESTION_WAIT,
>>> > Â Â Â Â Â Â Â ÂPAGEOUTRUN, ALLOCSTALL, PGROTATED,
>>> > Â Â Â Â Â Â Â ÂCOMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED,
>>> > + Â Â Â Â Â Â Â COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
>>> > Â#ifdef CONFIG_HUGETLB_PAGE
>>> > Â Â Â Â Â Â Â ÂHTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
>>> > Â#endif
>>> > diff --git a/mm/compaction.c b/mm/compaction.c
>>> > index 8df6e3d..6688700 100644
>>> > --- a/mm/compaction.c
>>> > +++ b/mm/compaction.c
>>> > @@ -34,6 +34,8 @@ struct compact_control {
>>> > Â Â Â Âunsigned long nr_anon;
>>> > Â Â Â Âunsigned long nr_file;
>>> >
>>> > + Â Â Â unsigned int order; Â Â Â Â Â Â /* order a direct compactor needs */
>>> > + Â Â Â int migratetype; Â Â Â Â Â Â Â Â/* MOVABLE, RECLAIMABLE etc */
>>> > Â Â Â Âstruct zone *zone;
>>> > Â};
>>> >
>>> > @@ -301,10 +303,31 @@ static void update_nr_listpages(struct compact_control *cc)
>>> > Âstatic inline int compact_finished(struct zone *zone,
>>> > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âstruct compact_control *cc)
>>> > Â{
>>> > + Â Â Â unsigned int order;
>>> > + Â Â Â unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order);
>>> > +
>>> > Â Â Â Â/* Compaction run completes if the migrate and free scanner meet */
>>> > Â Â Â Âif (cc->free_pfn <= cc->migrate_pfn)
>>> > Â Â Â Â Â Â Â Âreturn COMPACT_COMPLETE;
>>> >
>>> > + Â Â Â /* Compaction run is not finished if the watermark is not met */
>>> > + Â Â Â if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
>>> > + Â Â Â Â Â Â Â return COMPACT_INCOMPLETE;
>>> > +
>>> > + Â Â Â if (cc->order == -1)
>>> > + Â Â Â Â Â Â Â return COMPACT_INCOMPLETE;
>>> > +
>>> > + Â Â Â /* Direct compactor: Is a suitable page free? */
>>> > + Â Â Â for (order = cc->order; order < MAX_ORDER; order++) {
>>> > + Â Â Â Â Â Â Â /* Job done if page is free of the right migratetype */
>>> > + Â Â Â Â Â Â Â if (!list_empty(&zone->free_area[order].free_list[cc->migratetype]))
>>> > + Â Â Â Â Â Â Â Â Â Â Â return COMPACT_PARTIAL;
>>> > +
>>> > + Â Â Â Â Â Â Â /* Job done if allocation would set block type */
>>> > + Â Â Â Â Â Â Â if (order >= pageblock_order && zone->free_area[order].nr_free)
>>> > + Â Â Â Â Â Â Â Â Â Â Â return COMPACT_PARTIAL;
>>> > + Â Â Â }
>>> > +
>>> > Â Â Â Âreturn COMPACT_INCOMPLETE;
>>> > Â}
>>> >
>>> > @@ -348,6 +371,101 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
>>> > Â Â Â Âreturn ret;
>>> > Â}
>>> >
>>> > +static inline unsigned long compact_zone_order(struct zone *zone,
>>> > + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â int order, gfp_t gfp_mask)
>>> > +{
>>> > + Â Â Â struct compact_control cc = {
>>> > + Â Â Â Â Â Â Â .nr_freepages = 0,
>>> > + Â Â Â Â Â Â Â .nr_migratepages = 0,
>>> > + Â Â Â Â Â Â Â .order = order,
>>> > + Â Â Â Â Â Â Â .migratetype = allocflags_to_migratetype(gfp_mask),
>>> > + Â Â Â Â Â Â Â .zone = zone,
>>> > + Â Â Â };
>>> > + Â Â Â INIT_LIST_HEAD(&cc.freepages);
>>> > + Â Â Â INIT_LIST_HEAD(&cc.migratepages);
>>> > +
>>> > + Â Â Â return compact_zone(zone, &cc);
>>> > +}
>>> > +
>>> > +/**
>>> > + * try_to_compact_pages - Direct compact to satisfy a high-order allocation
>>> > + * @zonelist: The zonelist used for the current allocation
>>> > + * @order: The order of the current allocation
>>> > + * @gfp_mask: The GFP mask of the current allocation
>>> > + * @nodemask: The allowed nodes to allocate from
>>> > + *
>>> > + * This is the main entry point for direct page compaction.
>>> > + */
>>> > +unsigned long try_to_compact_pages(struct zonelist *zonelist,
>>> > + Â Â Â Â Â Â Â Â Â Â Â int order, gfp_t gfp_mask, nodemask_t *nodemask)
>>> > +{
>>> > + Â Â Â enum zone_type high_zoneidx = gfp_zone(gfp_mask);
>>> > + Â Â Â int may_enter_fs = gfp_mask & __GFP_FS;
>>> > + Â Â Â int may_perform_io = gfp_mask & __GFP_IO;
>>> > + Â Â Â unsigned long watermark;
>>> > + Â Â Â struct zoneref *z;
>>> > + Â Â Â struct zone *zone;
>>> > + Â Â Â int rc = COMPACT_INCOMPLETE;
>>> > +
>>> > + Â Â Â /* Check whether it is worth even starting compaction */
>>> > + Â Â Â if (order == 0 || !may_enter_fs || !may_perform_io)
>>> > + Â Â Â Â Â Â Â return rc;
>>> > +
>>> > + Â Â Â /*
>>> > + Â Â Â Â* We will not stall if the necessary conditions are not met for
>>> > + Â Â Â Â* migration but direct reclaim seems to account stalls similarly
>>> > + Â Â Â Â*/

Then, Let's remove this comment.


--
Kind regards,
Minchan Kim
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/