[2.6.20.16 review 08/28] x86_64: allocate sparsemem memmap above 4G

From: Willy Tarreau
Date: Sat Aug 11 2007 - 15:01:38 EST


On systems with huge amount of physical memory, VFS cache and memory memmap
may eat all available system memory under 4G, then the system may fail to
allocate swiotlb bounce buffer.

There was a fix for this issue in arch/x86_64/mm/numa.c, but that fix dose
not cover sparsemem model.

This patch add fix to sparsemem model by first try to allocate memmap above
4G.

Signed-off-by: Zou Nan hai <nanhai.zou@xxxxxxxxx>
Acked-by: Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
Cc: Andi Kleen <ak@xxxxxxx>
Cc: <stable@xxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
[chrisw: trivial backport]
Signed-off-by: Chris Wright <chrisw@xxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxx>
---
arch/x86_64/mm/init.c | 6 ++++++
include/linux/bootmem.h | 1 +
mm/sparse.c | 11 +++++++++++
3 files changed, 18 insertions(+), 0 deletions(-)

diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 2968b90..2489aa7 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -766,3 +766,9 @@ int in_gate_area_no_task(unsigned long addr)
{
return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
}
+
+void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
+{
+ return __alloc_bootmem_core(pgdat->bdata, size,
+ SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0);
+}
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 2275f27..8f820e4 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -59,6 +59,7 @@ extern void *__alloc_bootmem_core(struct bootmem_data *bdata,
unsigned long align,
unsigned long goal,
unsigned long limit);
+extern void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size);

#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
extern void reserve_bootmem(unsigned long addr, unsigned long size);
diff --git a/mm/sparse.c b/mm/sparse.c
index ac26eb0..faa08e2 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -209,6 +209,12 @@ static int sparse_init_one_section(struct mem_section *ms,
return 1;
}

+__attribute__((weak))
+void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
+{
+ return NULL;
+}
+
static struct page *sparse_early_mem_map_alloc(unsigned long pnum)
{
struct page *map;
@@ -219,6 +225,11 @@ static struct page *sparse_early_mem_map_alloc(unsigned long pnum)
if (map)
return map;

+ map = alloc_bootmem_high_node(NODE_DATA(nid),
+ sizeof(struct page) * PAGES_PER_SECTION);
+ if (map)
+ return map;
+
map = alloc_bootmem_node(NODE_DATA(nid),
sizeof(struct page) * PAGES_PER_SECTION);
if (map)
--
1.5.2.4

--
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/