[PATCH v3 2/2] mm: wrap __find_buddy_pfn() with a necessary buddy page validation.

From: Zi Yan
Date: Fri Apr 01 2022 - 19:08:16 EST


From: Zi Yan <ziy@xxxxxxxxxx>

Whenever the buddy of a page is found from __find_buddy_pfn(),
page_is_buddy() should be used to check its validity. Add a helper
function find_buddy_page_pfn() to find the buddy page and do the check
together.

Suggested-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Link: https://lore.kernel.org/linux-mm/CAHk-=wji_AmYygZMTsPMdJ7XksMt7kOur8oDfDdniBRMjm4VkQ@xxxxxxxxxxxxxx/
Signed-off-by: Zi Yan <ziy@xxxxxxxxxx>
---
mm/internal.h | 117 ++++++++++++++++++++++++++++++++++----------
mm/page_alloc.c | 52 +++-----------------
mm/page_isolation.c | 9 ++--
3 files changed, 101 insertions(+), 77 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index 876e66237c89..56bbae014e0e 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -211,6 +211,67 @@ struct alloc_context {
bool spread_dirty_pages;
};

+/*
+ * This function returns the order of a free page in the buddy system. In
+ * general, page_zone(page)->lock must be held by the caller to prevent the
+ * page from being allocated in parallel and returning garbage as the order.
+ * If a caller does not hold page_zone(page)->lock, it must guarantee that the
+ * page cannot be allocated or merged in parallel. Alternatively, it must
+ * handle invalid values gracefully, and use buddy_order_unsafe() below.
+ */
+static inline unsigned int buddy_order(struct page *page)
+{
+ /* PageBuddy() must be checked by the caller */
+ return page_private(page);
+}
+
+/*
+ * Like buddy_order(), but for callers who cannot afford to hold the zone lock.
+ * PageBuddy() should be checked first by the caller to minimize race window,
+ * and invalid values must be handled gracefully.
+ *
+ * READ_ONCE is used so that if the caller assigns the result into a local
+ * variable and e.g. tests it for valid range before using, the compiler cannot
+ * decide to remove the variable and inline the page_private(page) multiple
+ * times, potentially observing different values in the tests and the actual
+ * use of the result.
+ */
+#define buddy_order_unsafe(page) READ_ONCE(page_private(page))
+
+/*
+ * This function checks whether a page is free && is the buddy
+ * we can coalesce a page and its buddy if
+ * (a) the buddy is not in a hole (check before calling!) &&
+ * (b) the buddy is in the buddy system &&
+ * (c) a page and its buddy have the same order &&
+ * (d) a page and its buddy are in the same zone.
+ *
+ * For recording whether a page is in the buddy system, we set PageBuddy.
+ * Setting, clearing, and testing PageBuddy is serialized by zone->lock.
+ *
+ * For recording page's order, we use page_private(page).
+ */
+static inline bool page_is_buddy(struct page *page, struct page *buddy,
+ unsigned int order)
+{
+ if (!page_is_guard(buddy) && !PageBuddy(buddy))
+ return false;
+
+ if (buddy_order(buddy) != order)
+ return false;
+
+ /*
+ * zone check is done late to avoid uselessly calculating
+ * zone/node ids for pages that could never merge.
+ */
+ if (page_zone_id(page) != page_zone_id(buddy))
+ return false;
+
+ VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
+
+ return true;
+}
+
/*
* Locate the struct page for both the matching buddy in our
* pair (buddy1) and the combined O(n+1) page they form (page).
@@ -234,6 +295,35 @@ __find_buddy_pfn(unsigned long page_pfn, unsigned int order)
return page_pfn ^ (1 << order);
}

+/*
+ * Find the buddy of @page and validate it.
+ * @page: The input page
+ * @pfn: The pfn of the page, it saves a call to page_to_pfn() when the
+ * function is used in the performance-critical __free_one_page().
+ * @order: The order of the page
+ * @buddy_pfn: The output pointer to the buddy pfn, it also saves a call to
+ * page_to_pfn().
+ *
+ * The found buddy can be a non PageBuddy, out of @page's zone, or its order is
+ * not the same as @page. The validation is necessary before use it.
+ *
+ * Return: the found buddy page or NULL if not found.
+ */
+static inline struct page *find_buddy_page_pfn(struct page *page,
+ unsigned long pfn, unsigned int order, unsigned long *buddy_pfn)
+{
+ struct page *buddy;
+ unsigned long __buddy_pfn = __find_buddy_pfn(pfn, order);
+
+ buddy = page + (__buddy_pfn - pfn);
+ if (buddy_pfn)
+ *buddy_pfn = __buddy_pfn;
+
+ if (page_is_buddy(page, buddy, order))
+ return buddy;
+ return NULL;
+}
+
extern struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
unsigned long end_pfn, struct zone *zone);

@@ -336,33 +426,6 @@ isolate_migratepages_range(struct compact_control *cc,
int find_suitable_fallback(struct free_area *area, unsigned int order,
int migratetype, bool only_stealable, bool *can_steal);

-/*
- * This function returns the order of a free page in the buddy system. In
- * general, page_zone(page)->lock must be held by the caller to prevent the
- * page from being allocated in parallel and returning garbage as the order.
- * If a caller does not hold page_zone(page)->lock, it must guarantee that the
- * page cannot be allocated or merged in parallel. Alternatively, it must
- * handle invalid values gracefully, and use buddy_order_unsafe() below.
- */
-static inline unsigned int buddy_order(struct page *page)
-{
- /* PageBuddy() must be checked by the caller */
- return page_private(page);
-}
-
-/*
- * Like buddy_order(), but for callers who cannot afford to hold the zone lock.
- * PageBuddy() should be checked first by the caller to minimize race window,
- * and invalid values must be handled gracefully.
- *
- * READ_ONCE is used so that if the caller assigns the result into a local
- * variable and e.g. tests it for valid range before using, the compiler cannot
- * decide to remove the variable and inline the page_private(page) multiple
- * times, potentially observing different values in the tests and the actual
- * use of the result.
- */
-#define buddy_order_unsafe(page) READ_ONCE(page_private(page))
-
/*
* These three helpers classifies VMAs for virtual memory accounting.
*/
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2ea106146686..b08eecd67aba 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -868,40 +868,6 @@ static inline void set_buddy_order(struct page *page, unsigned int order)
__SetPageBuddy(page);
}

-/*
- * This function checks whether a page is free && is the buddy
- * we can coalesce a page and its buddy if
- * (a) the buddy is not in a hole (check before calling!) &&
- * (b) the buddy is in the buddy system &&
- * (c) a page and its buddy have the same order &&
- * (d) a page and its buddy are in the same zone.
- *
- * For recording whether a page is in the buddy system, we set PageBuddy.
- * Setting, clearing, and testing PageBuddy is serialized by zone->lock.
- *
- * For recording page's order, we use page_private(page).
- */
-static inline bool page_is_buddy(struct page *page, struct page *buddy,
- unsigned int order)
-{
- if (!page_is_guard(buddy) && !PageBuddy(buddy))
- return false;
-
- if (buddy_order(buddy) != order)
- return false;
-
- /*
- * zone check is done late to avoid uselessly calculating
- * zone/node ids for pages that could never merge.
- */
- if (page_zone_id(page) != page_zone_id(buddy))
- return false;
-
- VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
-
- return true;
-}
-
#ifdef CONFIG_COMPACTION
static inline struct capture_control *task_capc(struct zone *zone)
{
@@ -1010,18 +976,17 @@ static inline bool
buddy_merge_likely(unsigned long pfn, unsigned long buddy_pfn,
struct page *page, unsigned int order)
{
- struct page *higher_page, *higher_buddy;
- unsigned long combined_pfn;
+ struct page *higher_page;
+ unsigned long higher_page_pfn;

if (order >= MAX_ORDER - 2)
return false;

- combined_pfn = buddy_pfn & pfn;
- higher_page = page + (combined_pfn - pfn);
- buddy_pfn = __find_buddy_pfn(combined_pfn, order + 1);
- higher_buddy = higher_page + (buddy_pfn - combined_pfn);
+ higher_page_pfn = buddy_pfn & pfn;
+ higher_page = page + (higher_page_pfn - pfn);

- return page_is_buddy(higher_page, higher_buddy, order + 1);
+ return find_buddy_page_pfn(higher_page, higher_page_pfn, order + 1,
+ NULL) != NULL;
}

/*
@@ -1075,10 +1040,9 @@ static inline void __free_one_page(struct page *page,
migratetype);
return;
}
- buddy_pfn = __find_buddy_pfn(pfn, order);
- buddy = page + (buddy_pfn - pfn);

- if (!page_is_buddy(page, buddy, order))
+ buddy = find_buddy_page_pfn(page, pfn, order, &buddy_pfn);
+ if (!buddy)
goto done_merging;

if (unlikely(order >= pageblock_order)) {
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index f67c4c70f17f..ff0ea6308299 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -70,7 +70,6 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
unsigned long flags, nr_pages;
bool isolated_page = false;
unsigned int order;
- unsigned long pfn, buddy_pfn;
struct page *buddy;

zone = page_zone(page);
@@ -89,11 +88,9 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
if (PageBuddy(page)) {
order = buddy_order(page);
if (order >= pageblock_order && order < MAX_ORDER - 1) {
- pfn = page_to_pfn(page);
- buddy_pfn = __find_buddy_pfn(pfn, order);
- buddy = page + (buddy_pfn - pfn);
-
- if (!is_migrate_isolate_page(buddy)) {
+ buddy = find_buddy_page_pfn(page, page_to_pfn(page),
+ order, NULL);
+ if (buddy && !is_migrate_isolate_page(buddy)) {
isolated_page = !!__isolate_free_page(page, order);
/*
* Isolating a free page in an isolated pageblock
--
2.35.1