Re: [PATCH] x86/mm/init: respect memblock reserved regions when destroyingmappings

From: Yinghai Lu
Date: Sun Feb 06 2011 - 14:25:25 EST


On 02/05/2011 11:30 PM, H. Peter Anvin wrote:
> On 02/05/2011 11:02 PM, Yinghai Lu wrote:
>> why not just move calling cleanup_highmap down?
>>
>> something like attached patch.
>
> This patch looks very clean and looks on the surface of it like it is
> removing some ugly ad hoc code, but (as always) it needs a description
> about the problem it solves and why it is correct.

Sure.


Jeremy and xen guys, can you please check if it works well with xen ?

Thanks


[PATCH] x86: Cleanup highmap after brk is concluded

now cleanup_high actually two step. one is early in head64.c.
and it only clear above _end. later will try to clean from _brk_end to _end.
it will need to double check if those boundary are PMD_SIZE aligned...

Also init_memory_mapping() is called several times for numa or memory hotplug.
and it deals real memory mapping instead of initial kernel mapping.
We really should not handle initial kernel mapping there.

We can move cleanup_highmap() calling down after _brk_end is settled and pass
_brk_end with it.

Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>

---
arch/x86/include/asm/pgtable_64.h | 2 +-
arch/x86/kernel/head64.c | 3 ---
arch/x86/kernel/setup.c | 6 ++++++
arch/x86/mm/init.c | 19 -------------------
arch/x86/mm/init_64.c | 5 +++--
5 files changed, 10 insertions(+), 25 deletions(-)

Index: linux-2.6/arch/x86/include/asm/pgtable_64.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/pgtable_64.h
+++ linux-2.6/arch/x86/include/asm/pgtable_64.h
@@ -165,7 +165,7 @@ static inline int pgd_large(pgd_t pgd) {
#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })

extern int kern_addr_valid(unsigned long addr);
-extern void cleanup_highmap(void);
+extern void cleanup_highmap(unsigned long end);

#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
Index: linux-2.6/arch/x86/kernel/head64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head64.c
+++ linux-2.6/arch/x86/kernel/head64.c
@@ -77,9 +77,6 @@ void __init x86_64_start_kernel(char * r
/* Make NULL pointers segfault */
zap_identity_mappings();

- /* Cleanup the over mapped high alias */
- cleanup_highmap();
-
max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;

for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) {
Index: linux-2.6/arch/x86/kernel/setup.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup.c
+++ linux-2.6/arch/x86/kernel/setup.c
@@ -297,6 +297,9 @@ static void __init init_gbpages(void)
static inline void init_gbpages(void)
{
}
+static void __init cleanup_highmap(unsigned long end)
+{
+}
#endif

static void __init reserve_brk(void)
@@ -922,6 +925,9 @@ void __init setup_arch(char **cmdline_p)
*/
reserve_brk();

+ /* Cleanup the over mapped high alias after _brk_end*/
+ cleanup_highmap(_brk_end);
+
memblock.current_limit = get_max_mapped();
memblock_x86_fill();

Index: linux-2.6/arch/x86/mm/init.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init.c
+++ linux-2.6/arch/x86/mm/init.c
@@ -279,25 +279,6 @@ unsigned long __init_refok init_memory_m
load_cr3(swapper_pg_dir);
#endif

-#ifdef CONFIG_X86_64
- if (!after_bootmem && !start) {
- pud_t *pud;
- pmd_t *pmd;
-
- mmu_cr4_features = read_cr4();
-
- /*
- * _brk_end cannot change anymore, but it and _end may be
- * located on different 2M pages. cleanup_highmap(), however,
- * can only consider _end when it runs, so destroy any
- * mappings beyond _brk_end here.
- */
- pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
- pmd = pmd_offset(pud, _brk_end - 1);
- while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
- pmd_clear(pmd);
- }
-#endif
__flush_tlb_all();

if (!after_bootmem && e820_table_end > e820_table_start)
Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -297,13 +297,14 @@ void __init init_extra_mapping_uc(unsign
* rounded up to the 2MB boundary. This catches the invalid pmds as
* well, as they are located before _text:
*/
-void __init cleanup_highmap(void)
+void __init cleanup_highmap(unsigned long end)
{
unsigned long vaddr = __START_KERNEL_map;
- unsigned long end = roundup((unsigned long)_end, PMD_SIZE) - 1;
pmd_t *pmd = level2_kernel_pgt;
pmd_t *last_pmd = pmd + PTRS_PER_PMD;

+ end = roundup(end, PMD_SIZE) - 1;
+
for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) {
if (pmd_none(*pmd))
continue;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/