[RESEND PATCH 2/3] mm: Add page colored allocation path

From: Åukasz Daniluk
Date: Wed Aug 23 2017 - 06:02:33 EST


Create alternative path when requesting free_list from buddy system
that takes recent allocations or physical page offset into account.

This solution carries a cost of additional, early memory allocations
done by kernel that is dependent on cache size and minimum order to be
colored.

Signed-off-by: Åukasz Daniluk <lukasz.daniluk@xxxxxxxxx>
---
Documentation/admin-guide/kernel-parameters.txt | 8 +
include/linux/mmzone.h | 5 +-
mm/page_alloc.c | 253 +++++++++++++++++++++---
3 files changed, 243 insertions(+), 23 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 372cc66bba23..4648f1cc6665 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -455,6 +455,14 @@
possible to determine what the correct size should be.
This option provides an override for these situations.

+ cache_color_size=
+ [KNL] Set cache size for purposes of cache coloring
+ mechanism in buddy allocator.
+
+ cache_color_min_order=
+ [KNL] Set minimal order for which page coloring
+ mechanism will be enabled in buddy allocator.
+
ca_keys= [KEYS] This parameter identifies a specific key(s) on
the system trusted keyring to be used for certificate
trust validation.
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 04128890a684..d10a5421b18b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -25,7 +25,8 @@
#else
#define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER
#endif
-#define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))
+#define ORDER_NR_PAGES(order) (1 << (order))
+#define MAX_ORDER_NR_PAGES ORDER_NR_PAGES(MAX_ORDER - 1)

/*
* PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed
@@ -94,6 +95,8 @@ extern int page_group_by_mobility_disabled;

struct free_area {
struct list_head free_list[MIGRATE_TYPES];
+ struct list_head *colored_free_list[MIGRATE_TYPES];
+ unsigned long next_color[MIGRATE_TYPES];
unsigned long nr_free;
};

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3f7b074fbfdb..3718b49032c2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -288,14 +288,129 @@ EXPORT_SYMBOL(nr_online_nodes);

int page_group_by_mobility_disabled __read_mostly;

-struct list_head *area_get_free_list(struct free_area *area, int order,
- int migratetype)
+int cache_color_min_order __read_mostly = MAX_ORDER - 1;
+
+
+/*
+ * cache_size - size of cache (in bytes)
+ */
+unsigned long long cache_size __read_mostly;
+
+/*
+ * Returns true if cache color allocations are enabled
+ */
+static inline bool cache_color_used(int order)
+{
+ return cache_size && order >= cache_color_min_order;
+}
+
+/*
+ * Returns number of cache colors in specified order
+ */
+static inline unsigned long cache_colors(int order)
+{
+ unsigned long colors = cache_size / (PAGE_SIZE * ORDER_NR_PAGES(order));
+
+ return colors ? colors : 1;
+}
+
+/*
+ * Returns cache color for page of specified order
+ */
+static inline unsigned long page_cache_color(struct page *page, int order)
+{
+ return (page_to_phys(page) % cache_size)
+ / (PAGE_SIZE * ORDER_NR_PAGES(order));
+}
+
+/*
+ * Returns color of first non-empty free_list for purpose of cache color
+ * allocations in specified range (start until end-1).
+ */
+static inline unsigned long cache_color_find_nonempty(struct free_area *area,
+ int migratetype, unsigned long start, unsigned long end)
{
+ unsigned long color;
+
+ for (color = start; color < end; ++color)
+ if (!list_empty(&area->colored_free_list[migratetype][color]))
+ break;
+
+ return color;
+}
+
+/*
+ * Returns free_list for cache color allocation purposes.
+ * In case page is passed, it is assumed that it will be added to free_list, so
+ * return value is based on color of the page.
+ * Otherwise it searches for next color with free pages available.
+ */
+static inline struct list_head *cache_color_area_get_free_list(
+ struct free_area *area, struct page *page, int order,
+ int migratetype)
+{
+ unsigned long color;
+ unsigned long current_color = area->next_color[migratetype];
+
+ if (page) {
+ color = page_cache_color(page, order);
+ return &area->colored_free_list[migratetype][color];
+ }
+
+ color = cache_color_find_nonempty(area, migratetype, current_color,
+ cache_colors(order));
+
+
+ if (color == cache_colors(order))
+ color = cache_color_find_nonempty(area, migratetype, 0,
+ current_color);
+
+ current_color = color + 1;
+ if (current_color >= cache_colors(order))
+ current_color = 0;
+
+ area->next_color[migratetype] = current_color;
+
+ return &area->colored_free_list[migratetype][color];
+}
+
+static inline bool cache_color_area_empty(struct free_area *area, int order,
+ int migratetype)
+{
+ return cache_color_find_nonempty(area, migratetype, 0,
+ cache_colors(order)) == cache_colors(order);
+}
+
+
+static inline unsigned long cache_color_area_free_count(struct free_area *area,
+ int order, int migratetype)
+{
+ unsigned long count = 0;
+ unsigned long color;
+ struct list_head *lh;
+
+ for (color = 0; color < cache_colors(order); ++color)
+ list_for_each(lh, &area->colored_free_list[migratetype][color])
+ ++count;
+
+ return count;
+}
+
+struct list_head *area_get_free_list(struct free_area *area, struct page *page,
+ int order, int migratetype)
+{
+ if (cache_color_used(order))
+ return cache_color_area_get_free_list(area, page, order,
+ migratetype);
+
return &area->free_list[migratetype];
}

bool area_empty(struct free_area *area, int order, int migratetype)
{
+ if (cache_color_used(order))
+ return cache_color_area_empty(area, order, migratetype);
+
return list_empty(&area->free_list[migratetype]);
}

@@ -305,12 +420,67 @@ unsigned long area_free_count(struct free_area *area, int order,
unsigned long count = 0;
struct list_head *lh;

+ if (cache_color_used(order))
+ return cache_color_area_free_count(area, order, migratetype);
+
list_for_each(lh, &area->free_list[migratetype])
++count;

return count;
}

+/*
+ * Returns pointer to allocated space that will host the array of free_lists
+ * used for cache color allocations.
+ */
+static __ref void *cache_color_alloc_lists(struct zone *zone, int order)
+{
+ const size_t size = sizeof(struct list_head) * cache_colors(order);
+
+ return alloc_bootmem_pages_node(zone->zone_pgdat, size);
+}
+
+static __init int setup_cache_color_size(char *_cache_size)
+{
+ char *retptr;
+
+ cache_size = memparse(_cache_size, &retptr);
+
+ if (retptr == _cache_size) {
+ pr_warn("Invalid cache size requested");
+ cache_size = 0;
+ }
+
+ if (cache_size == 0)
+ pr_info("Cache color pages functionality disabled");
+ else
+ pr_info("Cache size set to %llu bytes", cache_size);
+
+ return 0;
+}
+early_param("cache_color_size", setup_cache_color_size);
+
+static __init int setup_cache_color_min_order(char *_cache_color_min_order)
+{
+ int order;
+
+ if (kstrtoint(_cache_color_min_order, 10, &order) < 0) {
+ pr_warn("Invalid cache color min order requested");
+ order = -1;
+ }
+
+ if (order < 0 || order >= MAX_ORDER) {
+ pr_info("Cache color pages functionality disabled");
+ cache_color_min_order = MAX_ORDER;
+ } else {
+ pr_info("Cache min order set to %d", order);
+ cache_color_min_order = order;
+ }
+
+ return 0;
+}
+early_param("cache_color_min_order", setup_cache_color_min_order);
+
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
static inline void reset_deferred_meminit(pg_data_t *pgdat)
{
@@ -911,13 +1081,13 @@ static inline void __free_one_page(struct page *page,
page_is_buddy(higher_page, higher_buddy, order + 1)) {
list_add_tail(&page->lru,
area_get_free_list(&zone->free_area[order],
- order, migratetype));
+ page, order, migratetype));
goto out;
}
}

- list_add(&page->lru, area_get_free_list(&zone->free_area[order], order,
- migratetype));
+ list_add(&page->lru, area_get_free_list(&zone->free_area[order], page,
+ order, migratetype));
out:
zone->free_area[order].nr_free++;
}
@@ -1685,8 +1855,8 @@ static inline void expand(struct zone *zone, struct page *page,
if (set_page_guard(zone, &page[size], high, migratetype))
continue;

- list_add(&page[size].lru, area_get_free_list(area, high,
- migratetype));
+ list_add(&page[size].lru, area_get_free_list(area, &page[size],
+ high, migratetype));
area->nr_free++;
set_page_order(&page[size], high);
}
@@ -1829,7 +1999,8 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
for (current_order = order; current_order < MAX_ORDER; ++current_order) {
area = &(zone->free_area[current_order]);
page = list_first_entry_or_null(
- area_get_free_list(area, current_order, migratetype),
+ area_get_free_list(area, NULL, current_order,
+ migratetype),
struct page, lru);
if (!page)
continue;
@@ -1924,8 +2095,8 @@ static int move_freepages(struct zone *zone,

order = page_order(page);
list_move(&page->lru,
- area_get_free_list(&zone->free_area[order], order,
- migratetype));
+ area_get_free_list(&zone->free_area[order], page,
+ order, migratetype));
page += 1 << order;
pages_moved += 1 << order;
}
@@ -2074,7 +2245,7 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,

single_page:
area = &zone->free_area[current_order];
- list_move(&page->lru, area_get_free_list(area, current_order,
+ list_move(&page->lru, area_get_free_list(area, page, current_order,
start_type));
}

@@ -2187,7 +2358,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
struct free_area *area = &(zone->free_area[order]);

page = list_first_entry_or_null(
- area_get_free_list(area, order,
+ area_get_free_list(area, NULL, order,
MIGRATE_HIGHATOMIC),
struct page, lru);
if (!page)
@@ -2302,7 +2473,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
VM_BUG_ON(current_order == MAX_ORDER);

do_steal:
- page = list_first_entry(area_get_free_list(area, current_order,
+ page = list_first_entry(area_get_free_list(area, NULL, current_order,
fallback_mt), struct page, lru);

steal_suitable_fallback(zone, page, start_migratetype, can_steal);
@@ -2566,6 +2737,16 @@ void drain_all_pages(struct zone *zone)

#ifdef CONFIG_HIBERNATION

+static inline void mark_free_page(struct page *page, int order)
+{
+ unsigned long i, pfn;
+
+ pfn = page_to_pfn(page);
+
+ for (i = 0; i < (1UL << order); ++i)
+ swsusp_set_page_free(pfn_to_page(pfn + i));
+}
+
void mark_free_pages(struct zone *zone)
{
unsigned long pfn, max_zone_pfn;
@@ -2591,14 +2772,20 @@ void mark_free_pages(struct zone *zone)
}

for_each_migratetype_order(order, t) {
- list_for_each_entry(page,
- area_get_free_list(&zone->free_area[order],
- order, t), lru) {
- unsigned long i;
+ struct free_area *area = &zone->free_area[order];
+
+ if (cache_color_used(order)) {
+ unsigned long color;
+ struct list_head *colored_lists =
+ area->colored_free_list[t];

- pfn = page_to_pfn(page);
- for (i = 0; i < (1UL << order); i++)
- swsusp_set_page_free(pfn_to_page(pfn + i));
+ for (color = 0; color < cache_colors(order); ++color)
+ list_for_each_entry(page,
+ &colored_lists[color], lru)
+ mark_free_page(page, order);
+ } else {
+ list_for_each_entry(page, &area->free_list[t], lru)
+ mark_free_page(page, order);
}
}
spin_unlock_irqrestore(&zone->lock, flags);
@@ -5490,12 +5677,34 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
}
}

+static void __meminit zone_init_cache_color(struct zone *zone, int order,
+ int migratetype)
+{
+ unsigned long c;
+ struct free_area *area = &zone->free_area[order];
+
+ if (!cache_color_used(order))
+ return;
+
+ area->next_color[migratetype] = 0;
+ area->colored_free_list[migratetype] =
+ cache_color_alloc_lists(zone, order);
+
+ for (c = 0; c < cache_colors(order); ++c)
+ INIT_LIST_HEAD(&area->colored_free_list[migratetype][c]);
+}
+
static void __meminit zone_init_free_lists(struct zone *zone)
{
unsigned int order, t;
for_each_migratetype_order(order, t) {
- INIT_LIST_HEAD(&zone->free_area[order].free_list[t]);
- zone->free_area[order].nr_free = 0;
+ struct free_area *area = &zone->free_area[order];
+
+ INIT_LIST_HEAD(&area->free_list[t]);
+ area->nr_free = 0;
+
+ if (cache_color_used(order))
+ zone_init_cache_color(zone, order, t);
}
}

--
2.13.3