[PATCH 2/6] mm: add get_pageblock_migratetype_nolock() for cases where locking is undesirable

From: Vlastimil Babka
Date: Fri Feb 28 2014 - 09:16:07 EST


In order to prevent race with set_pageblock_migratetype, most of calls to
get_pageblock_migratetype have been moved under zone->lock. For the remaining
call sites, the extra locking is undesirable, notably in free_hot_cold_page().

This patch introduces a _nolock version to be used on these call sites, where
a wrong value does not affect correctness. The function makes sure that the
value does not exceed valid migratetype numbers. Such too-high values are
assumed to be a result of race and caller-supplied fallback value is returned
instead.

Signed-off-by: Vlastimil Babka <vbabka@xxxxxxx>
---
include/linux/mmzone.h | 24 ++++++++++++++++++++++++
mm/compaction.c | 14 +++++++++++---
mm/memory-failure.c | 3 ++-
mm/page_alloc.c | 22 +++++++++++++++++-----
mm/vmstat.c | 2 +-
5 files changed, 55 insertions(+), 10 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index fac5509..7c3f678 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -75,6 +75,30 @@ enum {

extern int page_group_by_mobility_disabled;

+/*
+ * When called without zone->lock held, a race with set_pageblock_migratetype
+ * may result in bogus values. Use this variant only when this does not affect
+ * correctness, and taking zone->lock would be costly. Values >= MIGRATE_TYPES
+ * are considered to be a result of this race and the value of race_fallback
+ * argument is returned instead.
+ */
+static inline int get_pageblock_migratetype_nolock(struct page *page,
+ int race_fallback)
+{
+ int ret = get_pageblock_flags_group(page, PB_migrate, PB_migrate_end);
+
+ if (unlikely(ret >= MIGRATE_TYPES))
+ ret = race_fallback;
+
+ return ret;
+}
+
+/*
+ * Should be called only with zone->lock held. In cases where locking overhead
+ * is undesirable, consider the _nolock version.
+ * Note that VM_BUG_ON(locked) here would require e.g. moving the function to a
+ * .c file to be able to include page_zone() definition.
+ */
static inline int get_pageblock_migratetype(struct page *page)
{
return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end);
diff --git a/mm/compaction.c b/mm/compaction.c
index 5142920..f0db73b 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -217,12 +217,17 @@ static inline bool compact_trylock_irqsave(spinlock_t *lock,
/* Returns true if the page is within a block suitable for migration to */
static bool suitable_migration_target(struct page *page)
{
+ int migratetype;
+
/* If the page is a large free page, then disallow migration */
if (PageBuddy(page) && page_order(page) >= pageblock_order)
return false;

+ /* If someone races on the pageblock, just assume it's not suitable */
+ migratetype = get_pageblock_migratetype_nolock(page, MIGRATE_RESERVE);
+
/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
- if (migrate_async_suitable(get_pageblock_migratetype(page)))
+ if (migrate_async_suitable(migratetype))
return true;

/* Otherwise skip the block */
@@ -530,9 +535,12 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
/*
* For async migration, also only scan in MOVABLE
* blocks. Async migration is optimistic to see if
- * the minimum amount of work satisfies the allocation
+ * the minimum amount of work satisfies the allocation.
+ * If we race on the migratetype, just assume it's an
+ * unsuitable one.
*/
- mt = get_pageblock_migratetype(page);
+ mt = get_pageblock_migratetype_nolock(page,
+ MIGRATE_RESERVE);
if (!cc->sync && !migrate_async_suitable(mt)) {
cc->finished_update_migrate = true;
skipped_async_unsuitable = true;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 35ef28a..d0625f6 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1672,7 +1672,8 @@ int soft_offline_page(struct page *page, int flags)
* was free. This flag should be kept set until the source page
* is freed and PG_hwpoison on it is set.
*/
- if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
+ if (get_pageblock_migratetype_nolock(page, MIGRATE_RESERVE)
+ != MIGRATE_ISOLATE)
set_migratetype_isolate(page, true);

ret = get_any_page(page, pfn, flags);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0cb41ec..de5b419 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1374,7 +1374,16 @@ void free_hot_cold_page(struct page *page, int cold)
if (!free_pages_prepare(page, 0))
return;

- migratetype = get_pageblock_migratetype(page);
+ /*
+ * We don't want to take zone->lock here just to determine pageblock
+ * migratetype safely. So we allow a race, which will be detected if
+ * the migratetype appears to be >= MIGRATE_TYPES.
+ * In case of a detected race, defer to free_one_page() below, which
+ * will re-read the pageblock migratetype under zone->lock and re-set
+ * freepage migratetype accordingly.
+ * We use MIGRATE_TYPES as MIGRATE_ISOLATE may not be enabled.
+ */
+ migratetype = get_pageblock_migratetype_nolock(page, MIGRATE_TYPES);
set_freepage_migratetype(page, migratetype);
local_irq_save(flags);
__count_vm_event(PGFREE);
@@ -1387,7 +1396,8 @@ void free_hot_cold_page(struct page *page, int cold)
* excessively into the page allocator
*/
if (migratetype >= MIGRATE_PCPTYPES) {
- if (unlikely(is_migrate_isolate(migratetype))) {
+ if (unlikely(is_migrate_isolate(migratetype)
+ || migratetype == MIGRATE_TYPES)) {
free_one_page(zone, page, 0);
goto out;
}
@@ -6080,8 +6090,9 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
* If @count is not zero, it is okay to include less @count unmovable pages
*
* PageLRU check without isolation or lru_lock could race so that
- * MIGRATE_MOVABLE block might include unmovable pages. It means you can't
- * expect this function should be exact.
+ * MIGRATE_MOVABLE block might include unmovable pages. The detection of
+ * pageblock migratetype can race as well. It means you can't expect this
+ * function to be exact.
*/
bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
bool skip_hwpoisoned_pages)
@@ -6095,7 +6106,8 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
*/
if (zone_idx(zone) == ZONE_MOVABLE)
return false;
- mt = get_pageblock_migratetype(page);
+ /* In case of a detected race, try to reduce false positives */
+ mt = get_pageblock_migratetype_nolock(page, MIGRATE_UNMOVABLE);
if (mt == MIGRATE_MOVABLE || is_migrate_cma(mt))
return false;

diff --git a/mm/vmstat.c b/mm/vmstat.c
index 2592010..1f08bf6 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -956,7 +956,7 @@ static void pagetypeinfo_showblockcount_print(struct seq_file *m,
if (!memmap_valid_within(pfn, page, zone))
continue;

- mtype = get_pageblock_migratetype(page);
+ mtype = get_pageblock_migratetype_nolock(page, MIGRATE_TYPES);

if (mtype < MIGRATE_TYPES)
count[mtype]++;
--
1.8.4.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/