[PATCH] mm: poison struct page for ptlock

From: Hugh Dickins
Date: Thu Nov 03 2005 - 14:27:46 EST


The split ptlock patch enlarged the default SMP PREEMPT struct page from
32 to 36 bytes on most 32-bit platforms, from 32 to 44 bytes on PA-RISC
7xxx (without PREEMPT). That was not my intention, and I don't believe
that split ptlock deserves any such slice of the user's memory.

While leaving most of the page_private() mods in place for the moment,
could we please try this patch, or something like it? Again to overlay
the spinlock_t from &page->private onwards, with corrected BUILD_BUG_ON
that we don't go beyond ->lru; with poisoning of the fields overlaid,
and unsplit config verifying that the split config is safe to use them.

Signed-off-by: Hugh Dickins <hugh@xxxxxxxxxxx>
---

include/linux/mm.h | 69 +++++++++++++++++++++++++++++++++++++++++++----------
1 files changed, 56 insertions(+), 13 deletions(-)

--- 2.6.14-git6/include/linux/mm.h 2005-11-03 18:38:01.000000000 +0000
+++ linux/include/linux/mm.h 2005-11-03 18:46:06.000000000 +0000
@@ -226,18 +226,19 @@ struct page {
* to show when page is mapped
* & limit reverse map searches.
*/
- union {
- unsigned long private; /* Mapping-private opaque data:
+ unsigned long private; /* Mapping-private opaque data:
* usually used for buffer_heads
* if PagePrivate set; used for
* swp_entry_t if PageSwapCache
* When page is free, this indicates
* order in the buddy system.
*/
-#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
- spinlock_t ptl;
-#endif
- } u;
+ /*
+ * Along with private, the mapping, index and lru fields of a
+ * page table page's struct page may be overlaid by a spinlock
+ * for pte locking: see comment on "split ptlock" below. Please
+ * do not rearrange these fields without considering that usage.
+ */
struct address_space *mapping; /* If low bit clear, points to
* inode address_space, or NULL.
* If page mapped as anonymous
@@ -265,8 +266,8 @@ struct page {
#endif /* WANT_PAGE_VIRTUAL */
};

-#define page_private(page) ((page)->u.private)
-#define set_page_private(page, v) ((page)->u.private = (v))
+#define page_private(page) ((page)->private)
+#define set_page_private(page, v) ((page)->private = (v))

/*
* FIXME: take this include out, include page-flags.h in
@@ -787,25 +788,67 @@ static inline pmd_t *pmd_alloc(struct mm
}
#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */

+/*
+ * In the split ptlock case, we shall be overlaying the struct page
+ * of a page table page with a spinlock starting at &page->private,
+ * ending dependent on architecture and config, but never beyond lru.
+ *
+ * So poison the struct page in all cases (in part to assert our
+ * territory: that pte locking owns these fields of a page table
+ * struct page), and verify it when freeing in the unsplit ptlock
+ * case, when none of these fields should have been touched.
+ * Poison lru back-to-front, to make sure list_del was not used.
+ *
+ * The time may come when important configs requiring split ptlock
+ * have a spinlock_t which cannot fit here: then kmalloc a spinlock_t
+ * (perhaps in its own cacheline) and keep the pointer in struct page.
+ */
+static inline void poison_struct_page(struct page *page)
+{
+ page->private = (unsigned long) page;
+ page->mapping = (struct address_space *) page;
+ page->index = (pgoff_t) page;
+ page->lru.next = LIST_POISON2;
+ page->lru.prev = LIST_POISON1;
+}
+
+static inline void verify_struct_page(struct page *page)
+{
+ BUG_ON(page->private != (unsigned long) page);
+ BUG_ON(page->mapping != (struct address_space *) page);
+ BUG_ON(page->index != (pgoff_t) page);
+ BUG_ON(page->lru.next != LIST_POISON2);
+ BUG_ON(page->lru.prev != LIST_POISON1);
+ /*
+ * Reset page->mapping so free_pages_check won't complain.
+ */
+ page->mapping = NULL;
+}
+
#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
/*
* We tuck a spinlock to guard each pagetable page into its struct page,
* at page->private, with BUILD_BUG_ON to make sure that this will not
- * overflow into the next struct page (as it might with DEBUG_SPINLOCK).
- * When freeing, reset page->mapping so free_pages_check won't complain.
+ * overflow beyond page->lru (as it might with PA-RISC DEBUG_SPINLOCK).
*/
-#define __pte_lockptr(page) &((page)->u.ptl)
+#define __pte_lockptr(page) ((spinlock_t *)&((page)->private))
#define pte_lock_init(_page) do { \
+ BUILD_BUG_ON(__pte_lockptr((struct page *)0) + 1 > \
+ (spinlock_t *)(&((struct page *)0)->lru + 1)); \
+ poison_struct_page(_page); \
spin_lock_init(__pte_lockptr(_page)); \
} while (0)
+/*
+ * When freeing, reset page->mapping so free_pages_check won't complain.
+ */
#define pte_lock_deinit(page) ((page)->mapping = NULL)
#define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
#else
/*
* We use mm->page_table_lock to guard all pagetable pages of the mm.
*/
-#define pte_lock_init(page) do {} while (0)
-#define pte_lock_deinit(page) do {} while (0)
+#define pte_lock_init(page) poison_struct_page(page)
+#define pte_lock_deinit(page) verify_struct_page(page)
#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;})
#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/