Re: [PATCH - resend] Memory-Hotplug: Fix the bug on interface /dev/mem for 64-bit kernel(v1)
From: Yinghai Lu
Date:  Tue Jan 12 2010 - 18:01:55 EST
On Tue, Jan 12, 2010 at 5:35 AM, Wu Fengguang <fengguang.wu@xxxxxxxxx> wrote:
> On Tue, Jan 12, 2010 at 10:39:03AM +0800, KAMEZAWA Hiroyuki wrote:
>> On Tue, 12 Jan 2010 10:33:08 +0800
>> Wu Fengguang <fengguang.wu@xxxxxxxxx> wrote:
>>
>> > Sure, here it is :)
>> > ---
>> > x86: use the generic page_is_ram()
>> >
>> > The generic resource based page_is_ram() works better with memory
>> > hotplug/hotremove. So switch the x86 e820map based code to it.
>> >
>> > CC: Andi Kleen <andi@xxxxxxxxxxxxxx>
>> > CC: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>
>> > Signed-off-by: Wu Fengguang <fengguang.wu@xxxxxxxxx>
>>
>> Ack.
>
> Thank you.
>
>>
>> > +#ifdef CONFIG_X86
>> > +   /*
>> > +    * A special case is the first 4Kb of memory;
>> > +    * This is a BIOS owned area, not kernel ram, but generally
>> > +    * not listed as such in the E820 table.
>> > +    */
>> > +   if (pfn == 0)
>> > +           return 0;
>> > +
>> > +   /*
>> > +    * Second special case: Some BIOSen report the PC BIOS
>> > +    * area (640->1Mb) as ram even though it is not.
>> > +    */
>> > +   if (pfn >= (BIOS_BEGIN >> PAGE_SHIFT) &&
>> > +       pfn <  (BIOS_END   >> PAGE_SHIFT))
>> > +           return 0;
>> > +#endif
>>
>> I'm glad if this part is sorted out in clean way ;)
>
> Two possible solutions are:
>
> - to exclude the above two ranges directly in e820 map;
> - to not add the above two ranges into iomem_resource.
>
> Yinghai, do you have any suggestions?
> We want to get rid of the two explicit tests from page_is_ram().
please check attached patch.
YH
[PATCH] x86: remove bios data range from e820
to prepare move page_is_ram as generic one
Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxxx
---
 arch/x86/kernel/e820.c   |    8 ++++++++
 arch/x86/kernel/head32.c |    2 --
 arch/x86/kernel/head64.c |    2 --
 arch/x86/kernel/setup.c  |   19 ++++++++++++++++++-
 arch/x86/mm/ioremap.c    |   16 ----------------
 5 files changed, 26 insertions(+), 21 deletions(-)
Index: linux-2.6/arch/x86/kernel/setup.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup.c
+++ linux-2.6/arch/x86/kernel/setup.c
@@ -657,6 +657,23 @@ static struct dmi_system_id __initdata b
 	{}
 };
 
+static void __init trim_bios_range(void)
+{
+	/*
+	 * A special case is the first 4Kb of memory;
+	 * This is a BIOS owned area, not kernel ram, but generally
+	 * not listed as such in the E820 table.
+	 */
+	e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED);
+	/*
+	 * special case: Some BIOSen report the PC BIOS
+	 * area (640->1Mb) as ram even though it is not.
+	 * take them out.
+	 */
+	e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
+	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+}
+
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
@@ -820,7 +837,7 @@ void __init setup_arch(char **cmdline_p)
 	insert_resource(&iomem_resource, &data_resource);
 	insert_resource(&iomem_resource, &bss_resource);
 
-
+	trim_bios_range();
 #ifdef CONFIG_X86_32
 	if (ppro_with_ram_bug()) {
 		e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
Index: linux-2.6/arch/x86/kernel/e820.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820.c
+++ linux-2.6/arch/x86/kernel/e820.c
@@ -509,11 +509,19 @@ u64 __init e820_remove_range(u64 start,
 			     int checktype)
 {
 	int i;
+	u64 end;
 	u64 real_removed_size = 0;
 
 	if (size > (ULLONG_MAX - start))
 		size = ULLONG_MAX - start;
 
+	end = start + size;
+	printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
+		       (unsigned long long) start,
+		       (unsigned long long) end);
+	e820_print_type(old_type);
+	printk(KERN_CONT "\n");
+
 	for (i = 0; i < e820.nr_map; i++) {
 		struct e820entry *ei = &e820.map[i];
 		u64 final_start, final_end;
Index: linux-2.6/arch/x86/mm/ioremap.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/ioremap.c
+++ linux-2.6/arch/x86/mm/ioremap.c
@@ -29,22 +29,6 @@ int page_is_ram(unsigned long pagenr)
 	resource_size_t addr, end;
 	int i;
 
-	/*
-	 * A special case is the first 4Kb of memory;
-	 * This is a BIOS owned area, not kernel ram, but generally
-	 * not listed as such in the E820 table.
-	 */
-	if (pagenr == 0)
-		return 0;
-
-	/*
-	 * Second special case: Some BIOSen report the PC BIOS
-	 * area (640->1Mb) as ram even though it is not.
-	 */
-	if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
-		    pagenr < (BIOS_END >> PAGE_SHIFT))
-		return 0;
-
 	for (i = 0; i < e820.nr_map; i++) {
 		/*
 		 * Not usable memory:
Index: linux-2.6/arch/x86/kernel/head32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head32.c
+++ linux-2.6/arch/x86/kernel/head32.c
@@ -29,8 +29,6 @@ static void __init i386_default_early_se
 
 void __init i386_start_kernel(void)
 {
-	reserve_early_overlap_ok(0, PAGE_SIZE, "BIOS data page");
-
 #ifdef CONFIG_X86_TRAMPOLINE
 	/*
 	 * But first pinch a few for the stack/trampoline stuff
Index: linux-2.6/arch/x86/kernel/head64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head64.c
+++ linux-2.6/arch/x86/kernel/head64.c
@@ -98,8 +98,6 @@ void __init x86_64_start_reservations(ch
 {
 	copy_bootdata(__va(real_mode_data));
 
-	reserve_early_overlap_ok(0, PAGE_SIZE, "BIOS data page");
-
 	reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
 
 #ifdef CONFIG_BLK_DEV_INITRD