[PATCH v1 1/5] kernel/resource: make release_mem_region_adjustable() never fail

From: David Hildenbrand
Date: Fri Aug 21 2020 - 06:35:00 EST


Let's make sure splitting a resource on memory hotunplug will never fail.
This will become more relevant once we merge selected system ram
resources - then, we'll trigger that case more frequently on memory
hotunplug.

In general, this function is already unlikely to fail. When we remove
memory, we free up quite a lot of metadata (memmap, page tables, memory
block device, etc.). The only way it could really fail currently is when
injecting allocation errors.

All other error cases inside release_mem_region_adjustable() seem to be
sanity checks if the function would be abused in different context -
let's add WARN_ON_ONCE().

Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxx>
Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
Cc: Jason Gunthorpe <jgg@xxxxxxxx>
Cc: Kees Cook <keescook@xxxxxxxxxxxx>
Cc: Ard Biesheuvel <ardb@xxxxxxxxxx>
Cc: Pankaj Gupta <pankaj.gupta.linux@xxxxxxxxx>
Cc: Baoquan He <bhe@xxxxxxxxxx>
Cc: Wei Yang <richardw.yang@xxxxxxxxxxxxxxx>
Signed-off-by: David Hildenbrand <david@xxxxxxxxxx>
---
include/linux/ioport.h | 4 ++--
kernel/resource.c | 49 ++++++++++++++++++++++++------------------
mm/memory_hotplug.c | 22 +------------------
3 files changed, 31 insertions(+), 44 deletions(-)

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 6c2b06fe8beb7..52a91f5fa1a36 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -248,8 +248,8 @@ extern struct resource * __request_region(struct resource *,
extern void __release_region(struct resource *, resource_size_t,
resource_size_t);
#ifdef CONFIG_MEMORY_HOTREMOVE
-extern int release_mem_region_adjustable(struct resource *, resource_size_t,
- resource_size_t);
+extern void release_mem_region_adjustable(struct resource *, resource_size_t,
+ resource_size_t);
#endif

/* Wrappers for managed devices */
diff --git a/kernel/resource.c b/kernel/resource.c
index 841737bbda9e5..1dcef5d53d76e 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1255,21 +1255,28 @@ EXPORT_SYMBOL(__release_region);
* assumes that all children remain in the lower address entry for
* simplicity. Enhance this logic when necessary.
*/
-int release_mem_region_adjustable(struct resource *parent,
- resource_size_t start, resource_size_t size)
+void release_mem_region_adjustable(struct resource *parent,
+ resource_size_t start, resource_size_t size)
{
+ struct resource *new_res = NULL;
+ bool alloc_nofail = false;
struct resource **p;
struct resource *res;
- struct resource *new_res;
resource_size_t end;
- int ret = -EINVAL;

end = start + size - 1;
- if ((start < parent->start) || (end > parent->end))
- return ret;
+ if (WARN_ON_ONCE((start < parent->start) || (end > parent->end)))
+ return;

- /* The alloc_resource() result gets checked later */
- new_res = alloc_resource(GFP_KERNEL);
+ /*
+ * We free up quite a lot of memory on memory hotunplug (esp., memap),
+ * just before releasing the region. This is highly unlikely to
+ * fail - let's play save and make it never fail as the caller cannot
+ * perform any error handling (e.g., trying to re-add memory will fail
+ * similarly).
+ */
+retry:
+ new_res = alloc_resource(GFP_KERNEL | alloc_nofail ? __GFP_NOFAIL : 0);

p = &parent->child;
write_lock(&resource_lock);
@@ -1295,7 +1302,6 @@ int release_mem_region_adjustable(struct resource *parent,
* so if we are dealing with them, let us just back off here.
*/
if (!(res->flags & IORESOURCE_SYSRAM)) {
- ret = 0;
break;
}

@@ -1312,20 +1318,23 @@ int release_mem_region_adjustable(struct resource *parent,
/* free the whole entry */
*p = res->sibling;
free_resource(res);
- ret = 0;
} else if (res->start == start && res->end != end) {
/* adjust the start */
- ret = __adjust_resource(res, end + 1,
- res->end - end);
+ WARN_ON_ONCE(__adjust_resource(res, end + 1,
+ res->end - end));
} else if (res->start != start && res->end == end) {
/* adjust the end */
- ret = __adjust_resource(res, res->start,
- start - res->start);
+ WARN_ON_ONCE(__adjust_resource(res, res->start,
+ start - res->start));
} else {
- /* split into two entries */
+ /* split into two entries - we need a new resource */
if (!new_res) {
- ret = -ENOMEM;
- break;
+ new_res = alloc_resource(GFP_ATOMIC);
+ if (!new_res) {
+ alloc_nofail = true;
+ write_unlock(&resource_lock);
+ goto retry;
+ }
}
new_res->name = res->name;
new_res->start = end + 1;
@@ -1336,9 +1345,8 @@ int release_mem_region_adjustable(struct resource *parent,
new_res->sibling = res->sibling;
new_res->child = NULL;

- ret = __adjust_resource(res, res->start,
- start - res->start);
- if (ret)
+ if (WARN_ON_ONCE(__adjust_resource(res, res->start,
+ start - res->start)))
break;
res->sibling = new_res;
new_res = NULL;
@@ -1349,7 +1357,6 @@ int release_mem_region_adjustable(struct resource *parent,

write_unlock(&resource_lock);
free_resource(new_res);
- return ret;
}
#endif /* CONFIG_MEMORY_HOTREMOVE */

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 35d56cbd3e45b..f77fd1810e9f5 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1700,26 +1700,6 @@ void try_offline_node(int nid)
}
EXPORT_SYMBOL(try_offline_node);

-static void __release_memory_resource(resource_size_t start,
- resource_size_t size)
-{
- int ret;
-
- /*
- * When removing memory in the same granularity as it was added,
- * this function never fails. It might only fail if resources
- * have to be adjusted or split. We'll ignore the error, as
- * removing of memory cannot fail.
- */
- ret = release_mem_region_adjustable(&iomem_resource, start, size);
- if (ret) {
- resource_size_t endres = start + size - 1;
-
- pr_warn("Unable to release resource <%pa-%pa> (%d)\n",
- &start, &endres, ret);
- }
-}
-
static int __ref try_remove_memory(int nid, u64 start, u64 size)
{
int rc = 0;
@@ -1753,7 +1733,7 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size)
memblock_remove(start, size);
}

- __release_memory_resource(start, size);
+ release_mem_region_adjustable(&iomem_resource, start, size);

try_offline_node(nid);

--
2.26.2