Re: [PATCH 1/2] x86 boot: add E820_RESVD_KERN

From: Yinghai Lu
Date: Mon Jun 30 2008 - 05:32:04 EST


On Mon, Jun 30, 2008 at 2:15 AM, Yinghai Lu <yhlu.kernel@xxxxxxxxx> wrote:
> On Mon, Jun 30, 2008 at 12:51 AM, Huang, Ying <ying.huang@xxxxxxxxx> wrote:
>> On Mon, 2008-06-30 at 00:34 -0700, Yinghai Lu wrote:
>>> On Mon, Jun 30, 2008 at 12:03 AM, Huang, Ying <ying.huang@xxxxxxxxx> wrote:
>>> > On Fri, 2008-06-27 at 15:05 -0700, Yinghai Lu wrote:
>>> >> On Thu, Jun 26, 2008 at 7:48 PM, Huang, Ying <ying.huang@xxxxxxxxx> wrote:
>>> >> > On Thu, 2008-06-26 at 19:22 -0700, Yinghai Lu wrote:
>>> >> >> On Thu, Jun 26, 2008 at 2:47 AM, Yinghai Lu <yhlu.kernel@xxxxxxxxx> wrote:
>>> >> >> > On Thu, Jun 26, 2008 at 12:48 AM, Huang, Ying <ying.huang@xxxxxxxxx> wrote:
>>> >> >> >> On Thu, 2008-06-26 at 00:25 -0700, Yinghai Lu wrote:
>>> >> >> >> [...]
>>> >> >> >>> > if (pfn >= limit_pfn)
>>> >> >> >>> > @@ -977,7 +978,7 @@ u64 __init early_reserve_e820(u64 startt
>>> >> >> >>> > return 0;
>>> >> >> >>> >
>>> >> >> >>> > addr = round_down(start + size - sizet, align);
>>> >> >> >>> > - e820_update_range(addr, sizet, E820_RAM, E820_RESERVED);
>>> >> >> >>> > + e820_update_range(addr, sizet, E820_RAM, E820_RESVD_KERN);
>>> >> >> >>>
>>> >> >> >>> this line is not needed.
>>> >> >> >>
>>> >> >> >> Why? Memory reserved by early_rserved_e820 should not be saved during
>>> >> >> >> hibernation? shoudl not be saved by kdump?
>>> >> >> >>
>>> >> >
>>> >> > Can you tell me why this line is not needed?
>>> >> >
>>> >> > [...]
>>> >> >> some like the attach patch...
>>> >> >>
>>> >> >> you still can merge parse_setup_data parse_e820_ext
>>> >> >> also entries in parse_e820_ext is not initialized..., __copy_e820_map
>>> >> >> will do nothing.
>>> >> >
>>> >> > ïOK. Because some E820 entries are available after parse_setup_data(),
>>> >> > it is better to call reserve_setup_data() after calling
>>> >> > parse_setup_data() if update_e820_range() is used instead of
>>> >> > reserve_early().
>>> >>
>>> >> please modify it and test on your platforms then submit to Ingo..
>>> >
>>> > It seems that there is an issue:
>>> >
>>> > - If parse_setup_data() is called before reserve_setup_data(), and there
>>> > is a conflict between memory area used by setup_data and other memory
>>> > area, it is possible that the contents of setup_data is changed. So that
>>> > system may panic before reporting memory area conflict. And it seems
>>> > that memory area conflict is not checked by e820_update_range().
>>>
>>> what is "other memory area"? returned from find_e820_area? no one use that yet.
>>
>> I mean memory area reserved with reserved_early() or e820_update_range()
>> before reserve_setup_data() is called.
>
> before parse_setup_data, reserve_early is called for
> 1. kernel text/data/bss + initial pgt
> 2. ramdisk
> 3. ebda
> e820_update_range is not called.
> at this time early_res have RAM reserved by kernel.
>
> then setup_memory_map is called, so e820 have some ranges...directly
> from e820 table
>
> next need to call parse_setup_data
> it will add some entries in e820
>
> then reserve_setup_data is called, it will use e820_update_range to
> reserve setup_data itself directly in e820
>
> ...
>
>>
>> And because there is no conflict check in e820_update_range(), what to
>> deal with potential conflict between setup_data and other memory area
>> regardless which one is reserved earlier?
>
> find_e820_area will make sure it only find ram from e820 and it is not
> conflict with early_res
>
>
>>
>>
>> Another issue related:
>>
>> Because some memmap entries are available via extended E820 memmap
>> (SETUP_E820_EXT), it is not strictly safe to use e820_update_range()
>> between setup_memory_map() and parse_setup_data(). It may be better to
>> parse extended E820 memmap right after setup_memory_map().
>
> will look at the code again..

Ying,

please check attach the v2 patch...
remove one left over reserve_setup_data for 32 bit

YH
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index ba5ac88..e03b89a 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -120,6 +120,7 @@ void __init e820_print_map(char *who)
(e820.map[i].addr + e820.map[i].size));
switch (e820.map[i].type) {
case E820_RAM:
+ case E820_RESERVED_KERN:
printk(KERN_CONT "(usable)\n");
break;
case E820_RESERVED:
@@ -611,7 +612,7 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn)
register_nosave_region(pfn, PFN_UP(ei->addr));

pfn = PFN_DOWN(ei->addr + ei->size);
- if (ei->type != E820_RAM)
+ if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
register_nosave_region(PFN_UP(ei->addr), pfn);

if (pfn >= limit_pfn)
@@ -1207,6 +1208,7 @@ void __init e820_reserve_resources(void)
res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
for (i = 0; i < e820.nr_map; i++) {
switch (e820.map[i].type) {
+ case E820_RESERVED_KERN:
case E820_RAM: res->name = "System RAM"; break;
case E820_ACPI: res->name = "ACPI Tables"; break;
case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index a6816be..3e66bd3 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -53,21 +53,3 @@ void __init reserve_ebda_region(void)
/* reserve all memory between lowmem and the 1MB mark */
reserve_early_overlap_ok(lowmem, 0x100000, "BIOS reserved");
}
-
-void __init reserve_setup_data(void)
-{
- struct setup_data *data;
- u64 pa_data;
- char buf[32];
-
- if (boot_params.hdr.version < 0x0209)
- return;
- pa_data = boot_params.hdr.setup_data;
- while (pa_data) {
- data = early_ioremap(pa_data, sizeof(*data));
- sprintf(buf, "setup data %x", data->type);
- reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
- pa_data = data->next;
- early_iounmap(data, sizeof(*data));
- }
-}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index c970929..2ff9bc6 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -123,7 +123,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
#endif

reserve_ebda_region();
- reserve_setup_data();

/*
* At this point everything still needed from the boot loader
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index fb318ed..cf03401 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -389,14 +389,33 @@ static void __init parse_setup_data(void)
default:
break;
}
-#ifndef CONFIG_DEBUG_BOOT_PARAMS
- free_early(pa_data, pa_data+sizeof(*data)+data->len);
-#endif
pa_data = data->next;
early_iounmap(data, PAGE_SIZE);
}
}

+static void __init reserve_setup_data(void)
+{
+ struct setup_data *data;
+ u64 pa_data;
+ char buf[32];
+
+ if (boot_params.hdr.version < 0x0209)
+ return;
+ pa_data = boot_params.hdr.setup_data;
+ while (pa_data) {
+ data = early_ioremap(pa_data, sizeof(*data));
+ sprintf(buf, "setup data %x", data->type);
+ e820_update_range(pa_data, sizeof(*data)+data->len,
+ E820_RAM, E820_RESERVED_KERN);
+ pa_data = data->next;
+ early_iounmap(data, sizeof(*data));
+ }
+ sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+ printk(KERN_INFO "extended physical RAM map:\n");
+ e820_print_map("extended");
+}
+
/*
* --------- Crashkernel reservation ------------------------------
*/
@@ -524,7 +543,6 @@ void __init setup_arch(char **cmdline_p)
pre_setup_arch_hook();
early_cpu_init();
early_ioremap_init();
- reserve_setup_data();
#else
printk(KERN_INFO "Command line: %s\n", boot_command_line);
#endif
@@ -566,6 +584,9 @@ void __init setup_arch(char **cmdline_p)
ARCH_SETUP

setup_memory_map();
+ parse_setup_data();
+ reserve_setup_data();
+
copy_edd();

if (!boot_params.hdr.root_flags)
@@ -592,8 +613,6 @@ void __init setup_arch(char **cmdline_p)
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;

- parse_setup_data();
-
parse_early_param();

if (acpi_mps_check()) {
diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h
index 6eeba3b..ae22bdf 100644
--- a/include/asm-x86/bootparam.h
+++ b/include/asm-x86/bootparam.h
@@ -108,6 +108,4 @@ struct boot_params {
__u8 _pad9[276]; /* 0xeec */
} __attribute__((packed));

-void reserve_setup_data(void);
-
#endif /* _ASM_BOOTPARAM_H */
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index f622685..45e904f 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -44,6 +44,9 @@
#define E820_ACPI 3
#define E820_NVS 4

+/* reserved RAM used by kernel itself */
+#define E820_RESERVED_KERN 128
+
#ifndef __ASSEMBLY__
struct e820entry {
__u64 addr; /* start of memory segment */