[PATCH v14 4/5] mm: support reporting free page blocks

From: Wei Wang
Date: Wed Aug 16 2017 - 23:39:02 EST


This patch adds support to walk through the free page blocks in the
system and report them via a callback function. Some page blocks may
leave the free list after zone->lock is released, so it is the caller's
responsibility to either detect or prevent the use of such pages.

Signed-off-by: Wei Wang <wei.w.wang@xxxxxxxxx>
Signed-off-by: Liang Li <liang.z.li@xxxxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxxxx>
Cc: Michael S. Tsirkin <mst@xxxxxxxxxx>
---
include/linux/mm.h | 6 ++++++
mm/page_alloc.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 50 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 46b9ac5..cd29b9f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1835,6 +1835,12 @@ extern void free_area_init_node(int nid, unsigned long * zones_size,
unsigned long zone_start_pfn, unsigned long *zholes_size);
extern void free_initmem(void);

+extern void walk_free_mem_block(void *opaque1,
+ unsigned int min_order,
+ void (*visit)(void *opaque2,
+ unsigned long pfn,
+ unsigned long nr_pages));
+
/*
* Free reserved pages within range [PAGE_ALIGN(start), end & PAGE_MASK)
* into the buddy system. The freed pages will be poisoned with pattern
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6d00f74..a721a35 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4762,6 +4762,50 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
show_swap_cache_info();
}

+/**
+ * walk_free_mem_block - Walk through the free page blocks in the system
+ * @opaque1: the context passed from the caller
+ * @min_order: the minimum order of free lists to check
+ * @visit: the callback function given by the caller
+ *
+ * The function is used to walk through the free page blocks in the system,
+ * and each free page block is reported to the caller via the @visit callback.
+ * Please note:
+ * 1) The function is used to report hints of free pages, so the caller should
+ * not use those reported pages after the callback returns.
+ * 2) The callback is invoked with the zone->lock being held, so it should not
+ * block and should finish as soon as possible.
+ */
+void walk_free_mem_block(void *opaque1,
+ unsigned int min_order,
+ void (*visit)(void *opaque2,
+ unsigned long pfn,
+ unsigned long nr_pages))
+{
+ struct zone *zone;
+ struct page *page;
+ struct list_head *list;
+ unsigned int order;
+ enum migratetype mt;
+ unsigned long pfn, flags;
+
+ for_each_populated_zone(zone) {
+ for (order = MAX_ORDER - 1;
+ order < MAX_ORDER && order >= min_order; order--) {
+ for (mt = 0; mt < MIGRATE_TYPES; mt++) {
+ spin_lock_irqsave(&zone->lock, flags);
+ list = &zone->free_area[order].free_list[mt];
+ list_for_each_entry(page, list, lru) {
+ pfn = page_to_pfn(page);
+ visit(opaque1, pfn, 1 << order);
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+ }
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(walk_free_mem_block);
+
static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
{
zoneref->zone = zone;
--
2.7.4