[PATCH] nvdimm: Support sizeof(struct page) > MAX_STRUCT_PAGE_SIZE

From: Dan Williams
Date: Thu Jan 05 2023 - 16:27:34 EST


Commit 6e9f05dc66f9 ("libnvdimm/pfn_dev: increase MAX_STRUCT_PAGE_SIZE")

...updated MAX_STRUCT_PAGE_SIZE to account for sizeof(struct page)
potentially doubling in the case of CONFIG_KMSAN=y. Unfortunately this
doubles the amount of capacity stolen from user addressable capacity for
everyone, regardless of whether they are using the debug option. Revert
that change, mandate that MAX_STRUCT_PAGE_SIZE never exceed 64, but
allow for debug scenarios to proceed with creating debug sized page maps
with a new 'libnvdimm.page_struct_override' module parameter.

Note that this only applies to cases where the page map is permanent,
i.e. stored in a reservation of the pmem itself ("--map=dev" in "ndctl
create-namespace" terms). For the "--map=mem" case, since the allocation
is ephemeral for the lifespan of the namespace, there are no explicit
restriction. However, the implicit restriction, of having enough
available "System RAM" to store the page map for the typically large
pmem, still applies.

Fixes: 6e9f05dc66f9 ("libnvdimm/pfn_dev: increase MAX_STRUCT_PAGE_SIZE")
Cc: <stable@xxxxxxxxxxxxxxx>
Cc: Alexander Potapenko <glider@xxxxxxxxxx>
Cc: Marco Elver <elver@xxxxxxxxxx>
Reported-by: Jeff Moyer <jmoyer@xxxxxxxxxx>
---
drivers/nvdimm/nd.h | 2 +-
drivers/nvdimm/pfn_devs.c | 45 ++++++++++++++++++++++++++-------------
2 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 85ca5b4da3cf..ec5219680092 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -652,7 +652,7 @@ void devm_namespace_disable(struct device *dev,
struct nd_namespace_common *ndns);
#if IS_ENABLED(CONFIG_ND_CLAIM)
/* max struct page size independent of kernel config */
-#define MAX_STRUCT_PAGE_SIZE 128
+#define MAX_STRUCT_PAGE_SIZE 64
int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap);
#else
static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 61af072ac98f..978d63559c0e 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -13,6 +13,11 @@
#include "pfn.h"
#include "nd.h"

+static bool page_struct_override;
+module_param(page_struct_override, bool, 0644);
+MODULE_PARM_DESC(page_struct_override,
+ "Force namespace creation in the presence of mm-debug.");
+
static void nd_pfn_release(struct device *dev)
{
struct nd_region *nd_region = to_nd_region(dev->parent);
@@ -758,12 +763,6 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
return -ENXIO;
}

- /*
- * Note, we use 64 here for the standard size of struct page,
- * debugging options may cause it to be larger in which case the
- * implementation will limit the pfns advertised through
- * ->direct_access() to those that are included in the memmap.
- */
start = nsio->res.start;
size = resource_size(&nsio->res);
npfns = PHYS_PFN(size - SZ_8K);
@@ -782,20 +781,33 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
}
end_trunc = start + size - ALIGN_DOWN(start + size, align);
if (nd_pfn->mode == PFN_MODE_PMEM) {
+ unsigned long page_map_size = MAX_STRUCT_PAGE_SIZE * npfns;
+
/*
* The altmap should be padded out to the block size used
* when populating the vmemmap. This *should* be equal to
* PMD_SIZE for most architectures.
*
- * Also make sure size of struct page is less than 128. We
- * want to make sure we use large enough size here so that
- * we don't have a dynamic reserve space depending on
- * struct page size. But we also want to make sure we notice
- * when we end up adding new elements to struct page.
+ * Also make sure size of struct page is less than
+ * MAX_STRUCT_PAGE_SIZE. The goal here is compatibility in the
+ * face of production kernel configurations that reduce the
+ * 'struct page' size below MAX_STRUCT_PAGE_SIZE. For debug
+ * kernel configurations that increase the 'struct page' size
+ * above MAX_STRUCT_PAGE_SIZE, the page_struct_override allows
+ * for continuing with the capacity that will be wasted when
+ * reverting to a production kernel configuration. Otherwise,
+ * those configurations are blocked by default.
*/
- BUILD_BUG_ON(sizeof(struct page) > MAX_STRUCT_PAGE_SIZE);
- offset = ALIGN(start + SZ_8K + MAX_STRUCT_PAGE_SIZE * npfns, align)
- - start;
+ if (sizeof(struct page) > MAX_STRUCT_PAGE_SIZE) {
+ if (page_struct_override)
+ page_map_size = sizeof(struct page) * npfns;
+ else {
+ dev_err(&nd_pfn->dev,
+ "Memory debug options prevent using pmem for the page map\n");
+ return -EINVAL;
+ }
+ }
+ offset = ALIGN(start + SZ_8K + page_map_size, align) - start;
} else if (nd_pfn->mode == PFN_MODE_RAM)
offset = ALIGN(start + SZ_8K, align) - start;
else
@@ -818,7 +830,10 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
pfn_sb->version_minor = cpu_to_le16(4);
pfn_sb->end_trunc = cpu_to_le32(end_trunc);
pfn_sb->align = cpu_to_le32(nd_pfn->align);
- pfn_sb->page_struct_size = cpu_to_le16(MAX_STRUCT_PAGE_SIZE);
+ if (sizeof(struct page) > MAX_STRUCT_PAGE_SIZE && page_struct_override)
+ pfn_sb->page_struct_size = cpu_to_le16(sizeof(struct page));
+ else
+ pfn_sb->page_struct_size = cpu_to_le16(MAX_STRUCT_PAGE_SIZE);
pfn_sb->page_size = cpu_to_le32(PAGE_SIZE);
checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
pfn_sb->checksum = cpu_to_le64(checksum);
--
2.38.1