[PATCH] 1

From: Minchan Kim
Date: Thu Jun 07 2012 - 00:12:14 EST


Signed-off-by: Minchan Kim <minchan@xxxxxxxxxx>
---
include/linux/compaction.h | 19 +++++
mm/compaction.c | 170 ++++++++++++++++++++++++++++++++++++++------
mm/internal.h | 9 ++-
mm/page_alloc.c | 8 +--
4 files changed, 178 insertions(+), 28 deletions(-)

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 51a90b7..e988037 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -1,6 +1,8 @@
#ifndef _LINUX_COMPACTION_H
#define _LINUX_COMPACTION_H

+#include <linux/node.h>
+
/* Return values for compact_zone() and try_to_compact_pages() */
/* compaction didn't start as it was not possible or direct reclaim was more suitable */
#define COMPACT_SKIPPED 0
@@ -11,6 +13,23 @@
/* The full zone was compacted */
#define COMPACT_COMPLETE 3

+/*
+ * compaction supports three modes
+ *
+ * COMPACT_ASYNC_MOVABLE uses asynchronous migration and only scans
+ * MIGRATE_MOVABLE pageblocks as migration sources and targets.
+ * COMPACT_ASYNC_UNMOVABLE uses asynchronous migration and only scans
+ * MIGRATE_MOVABLE pageblocks as migration sources.
+ * MIGRATE_UNMOVABLE pageblocks are scanned as potential migration
+ * targets and convers them to MIGRATE_MOVABLE if possible
+ * COMPACT_SYNC uses synchronous migration and scans all pageblocks
+ */
+enum compact_mode {
+ COMPACT_ASYNC_MOVABLE,
+ COMPACT_ASYNC_UNMOVABLE,
+ COMPACT_SYNC,
+};
+
#ifdef CONFIG_COMPACTION
extern int sysctl_compact_memory;
extern int sysctl_compaction_handler(struct ctl_table *table, int write,
diff --git a/mm/compaction.c b/mm/compaction.c
index 7ea259d..a5c9141 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -236,7 +236,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
*/
while (unlikely(too_many_isolated(zone))) {
/* async migration should just abort */
- if (!cc->sync)
+ if (cc->mode != COMPACT_SYNC)
return 0;

congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -304,7 +304,8 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
* satisfies the allocation
*/
pageblock_nr = low_pfn >> pageblock_order;
- if (!cc->sync && last_pageblock_nr != pageblock_nr &&
+ if (cc->mode != COMPACT_SYNC &&
+ last_pageblock_nr != pageblock_nr &&
!migrate_async_suitable(get_pageblock_migratetype(page))) {
low_pfn += pageblock_nr_pages;
low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
@@ -325,7 +326,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
continue;
}

- if (!cc->sync)
+ if (cc->mode != COMPACT_SYNC)
mode |= ISOLATE_ASYNC_MIGRATE;

lruvec = mem_cgroup_page_lruvec(page, zone);
@@ -360,27 +361,116 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,

#endif /* CONFIG_COMPACTION || CONFIG_CMA */
#ifdef CONFIG_COMPACTION
+/*
+ * Returns true if MIGRATE_UNMOVABLE pageblock can be successfully
+ * converted to MIGRATE_MOVABLE type, false otherwise.
+ */
+static bool can_rescue_unmovable_pageblock(struct page *page)
+{
+ struct zone *zone;
+ unsigned long pfn, start_pfn, end_pfn;
+ struct page *start_page, *end_page, *cursor_page;
+
+ zone = page_zone(page);
+ pfn = page_to_pfn(page);
+ start_pfn = pfn & ~(pageblock_nr_pages - 1);
+ end_pfn = start_pfn + pageblock_nr_pages - 1;
+
+ start_page = pfn_to_page(start_pfn);
+ end_page = pfn_to_page(end_pfn);
+
+ for (cursor_page = start_page, pfn = start_pfn; cursor_page <= end_page;
+ pfn++, cursor_page++) {
+
+ if (!pfn_valid_within(pfn))
+ continue;
+
+ /* Do not deal with pageblocks that overlap zones */
+ if (page_zone(cursor_page) != zone)
+ return false;
+
+ /*
+ * Race with page allocator/reclaimer can happen so that
+ * it can deceive unmovable block to migratable type
+ * on this pageblock. It could regress on anti-fragmentation
+ * but it's rare and not critical.
+ */
+ if (PageBuddy(cursor_page)) {
+ unsigned long order = page_order(cursor_page);
+
+ pfn += (1 << order) - 1;
+ cursor_page += (1 << order) - 1;
+ continue;
+ } else if (PageLRU(cursor_page)) {
+ continue;
+ /*
+ * We can't use page_count which does compound_head
+ * as we don't have a pin a page.
+ */
+ } else if (!atomic_read(&cursor_page->_count)) {
+ continue;
+ }
+
+ return false;
+ }
+
+ return true;
+}
+
+static void rescue_unmovable_pageblock(struct page *page)
+{
+ set_pageblock_migratetype(page, MIGRATE_MOVABLE);
+ move_freepages_block(page_zone(page), page, MIGRATE_MOVABLE);
+}
+
+/*
+ * MIGRATE_TARGET : good for migration target
+ * RESCUE_UNMOVABLE_TARTET : good only if we can rescue the unmovable pageblock.
+ * UNMOVABLE_TARGET : can't migrate because it's a page in unmovable pageblock.
+ * SKIP_TARGET : can't migrate by another reasons.
+ */
+enum smt_result {
+ MIGRATE_TARGET,
+ RESCUE_UNMOVABLE_TARGET,
+ UNMOVABLE_TARGET,
+ SKIP_TARGET,
+};

-/* Returns true if the page is within a block suitable for migration to */
-static bool suitable_migration_target(struct page *page)
+/*
+ * Returns MIGRATE_TARGET if the page is within a block
+ * suitable for migration to, UNMOVABLE_TARGET if the page
+ * is within a MIGRATE_UNMOVABLE block, SKIP_TARGET otherwise.
+ */
+static enum smt_result suitable_migration_target(struct page *page,
+ struct compact_control *cc)
{

int migratetype = get_pageblock_migratetype(page);

/* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE)
- return false;
+ return SKIP_TARGET;

/* If the page is a large free page, then allow migration */
if (PageBuddy(page) && page_order(page) >= pageblock_order)
- return true;
+ return MIGRATE_TARGET;

/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
- if (migrate_async_suitable(migratetype))
- return true;
+ if (cc->mode != COMPACT_ASYNC_UNMOVABLE &&
+ migrate_async_suitable(migratetype))
+ return MIGRATE_TARGET;
+
+ if (cc->mode == COMPACT_ASYNC_MOVABLE &&
+ migratetype == MIGRATE_UNMOVABLE)
+ return UNMOVABLE_TARGET;
+
+ if (cc->mode != COMPACT_ASYNC_MOVABLE &&
+ migratetype == MIGRATE_UNMOVABLE &&
+ can_rescue_unmovable_pageblock(page))
+ return RESCUE_UNMOVABLE_TARGET;

/* Otherwise skip the block */
- return false;
+ return SKIP_TARGET;
}

/*
@@ -414,6 +504,13 @@ static void isolate_freepages(struct zone *zone,
zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;

/*
+ * isolate_freepages() may be called more than once during
+ * compact_zone_order() run and we want only the most recent
+ * count.
+ */
+ cc->nr_unmovable_pageblock = 0;
+
+ /*
* Isolate free pages until enough are available to migrate the
* pages on cc->migratepages. We stop searching if the migrate
* and free page scanners meet or enough free pages are isolated.
@@ -421,6 +518,7 @@ static void isolate_freepages(struct zone *zone,
for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages;
pfn -= pageblock_nr_pages) {
unsigned long isolated;
+ enum smt_result ret;

if (!pfn_valid(pfn))
continue;
@@ -437,9 +535,12 @@ static void isolate_freepages(struct zone *zone,
continue;

/* Check the block is suitable for migration */
- if (!suitable_migration_target(page))
+ ret = suitable_migration_target(page, cc);
+ if (ret != MIGRATE_TARGET && ret != RESCUE_UNMOVABLE_TARGET) {
+ if (ret == UNMOVABLE_TARGET)
+ cc->nr_unmovable_pageblock++;
continue;
-
+ }
/*
* Found a block suitable for isolating free pages from. Now
* we disabled interrupts, double check things are ok and
@@ -448,12 +549,16 @@ static void isolate_freepages(struct zone *zone,
*/
isolated = 0;
spin_lock_irqsave(&zone->lock, flags);
- if (suitable_migration_target(page)) {
+ ret = suitable_migration_target(page, cc);
+ if (ret == MIGRATE_TARGET || ret == RESCUE_UNMOVABLE_TARGET) {
+ if (ret == RESCUE_UNMOVABLE_TARGET)
+ rescue_unmovable_pageblock(page);
end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);
isolated = isolate_freepages_block(pfn, end_pfn,
freelist, false);
nr_freepages += isolated;
- }
+ } else if (ret == UNMOVABLE_TARGET)
+ cc->nr_unmovable_pageblock++;
spin_unlock_irqrestore(&zone->lock, flags);

/*
@@ -685,8 +790,9 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)

nr_migrate = cc->nr_migratepages;
err = migrate_pages(&cc->migratepages, compaction_alloc,
- (unsigned long)cc, false,
- cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC);
+ (unsigned long)&cc->freepages, false,
+ (cc->mode == COMPACT_SYNC) ? MIGRATE_SYNC_LIGHT
+ : MIGRATE_ASYNC);
update_nr_listpages(cc);
nr_remaining = cc->nr_migratepages;

@@ -715,7 +821,8 @@ out:

static unsigned long compact_zone_order(struct zone *zone,
int order, gfp_t gfp_mask,
- bool sync)
+ enum compact_mode mode,
+ unsigned long *nr_pageblocks_skipped)
{
struct compact_control cc = {
.nr_freepages = 0,
@@ -723,12 +830,17 @@ static unsigned long compact_zone_order(struct zone *zone,
.order = order,
.migratetype = allocflags_to_migratetype(gfp_mask),
.zone = zone,
- .sync = sync,
+ .mode = mode,
};
+ unsigned long rc;
+
INIT_LIST_HEAD(&cc.freepages);
INIT_LIST_HEAD(&cc.migratepages);

- return compact_zone(zone, &cc);
+ rc = compact_zone(zone, &cc);
+ *nr_pageblocks_skipped = cc.nr_unmovable_pageblock;
+
+ return rc;
}

int sysctl_extfrag_threshold = 500;
@@ -753,6 +865,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
struct zoneref *z;
struct zone *zone;
int rc = COMPACT_SKIPPED;
+ unsigned long nr_pageblocks_skipped;
+ enum compact_mode mode;

/*
* Check whether it is worth even starting compaction. The order check is
@@ -769,12 +883,22 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
nodemask) {
int status;

- status = compact_zone_order(zone, order, gfp_mask, sync);
+ mode = sync ? COMPACT_SYNC : COMPACT_ASYNC_MOVABLE;
+retry:
+ status = compact_zone_order(zone, order, gfp_mask, mode,
+ &nr_pageblocks_skipped);
rc = max(status, rc);

/* If a normal allocation would succeed, stop compacting */
if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
break;
+
+ if (rc == COMPACT_COMPLETE && mode == COMPACT_ASYNC_MOVABLE) {
+ if (nr_pageblocks_skipped) {
+ mode = COMPACT_ASYNC_UNMOVABLE;
+ goto retry;
+ }
+ }
}

return rc;
@@ -808,7 +932,7 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
if (ok && cc->order > zone->compact_order_failed)
zone->compact_order_failed = cc->order + 1;
/* Currently async compaction is never deferred. */
- else if (!ok && cc->sync)
+ else if (!ok && cc->mode == COMPACT_SYNC)
defer_compaction(zone, cc->order);
}

@@ -823,7 +947,7 @@ int compact_pgdat(pg_data_t *pgdat, int order)
{
struct compact_control cc = {
.order = order,
- .sync = false,
+ .mode = COMPACT_ASYNC_MOVABLE,
};

return __compact_pgdat(pgdat, &cc);
@@ -833,7 +957,7 @@ static int compact_node(int nid)
{
struct compact_control cc = {
.order = -1,
- .sync = true,
+ .mode = COMPACT_SYNC,
};

return __compact_pgdat(NODE_DATA(nid), &cc);
diff --git a/mm/internal.h b/mm/internal.h
index 2ba87fb..061fde7 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -94,6 +94,9 @@ extern void putback_lru_page(struct page *page);
/*
* in mm/page_alloc.c
*/
+extern void set_pageblock_migratetype(struct page *page, int migratetype);
+extern int move_freepages_block(struct zone *zone, struct page *page,
+ int migratetype);
extern void __free_pages_bootmem(struct page *page, unsigned int order);
extern void prep_compound_page(struct page *page, unsigned long order);
#ifdef CONFIG_MEMORY_FAILURE
@@ -101,6 +104,7 @@ extern bool is_free_buddy_page(struct page *page);
#endif

#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+#include <linux/compaction.h>

/*
* in mm/compaction.c
@@ -119,11 +123,14 @@ struct compact_control {
unsigned long nr_migratepages; /* Number of pages to migrate */
unsigned long free_pfn; /* isolate_freepages search base */
unsigned long migrate_pfn; /* isolate_migratepages search base */
- bool sync; /* Synchronous migration */
+ enum compact_mode mode; /* Compaction mode */

int order; /* order a direct compactor needs */
int migratetype; /* MOVABLE, RECLAIMABLE etc */
struct zone *zone;
+
+ /* Number of UNMOVABLE destination pageblocks skipped during scan */
+ unsigned long nr_unmovable_pageblock;
};

unsigned long
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 476ae3e..d40e4c7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -219,7 +219,7 @@ EXPORT_SYMBOL(nr_online_nodes);

int page_group_by_mobility_disabled __read_mostly;

-static void set_pageblock_migratetype(struct page *page, int migratetype)
+void set_pageblock_migratetype(struct page *page, int migratetype)
{

if (unlikely(page_group_by_mobility_disabled))
@@ -954,8 +954,8 @@ static int move_freepages(struct zone *zone,
return pages_moved;
}

-static int move_freepages_block(struct zone *zone, struct page *page,
- int migratetype)
+int move_freepages_block(struct zone *zone, struct page *page,
+ int migratetype)
{
unsigned long start_pfn, end_pfn;
struct page *start_page, *end_page;
@@ -5651,7 +5651,7 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
.nr_migratepages = 0,
.order = -1,
.zone = page_zone(pfn_to_page(start)),
- .sync = true,
+ .mode = COMPACT_SYNC,
};
INIT_LIST_HEAD(&cc.migratepages);

--
1.7.9.5


--------------040400010104090007070307--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/