Subject: [PATCH] x86, 64bit: use #PE handler to setup page table for data We need to access data area that is not mapped in arch/x86/kernel/head_64.S two case: a. load microcode from microcode b. when zero_page and command_line ls loaded high above 1G. with this one, will don't not need to ioremap_init ahead... the pgt buffer is from BRK, and we have enough space there. Also later init_mem_mapping will resuse those pgt. This patch is most from HPA. others from Yinghai: 1. use it with BRK 2. only map 2M one time, becase use zero_page, and command line is very small also microcode should be small too 128k ? and should not hit possible hole that should be mapped. 3. make it work with kexec when phys_base is not zero. Signed-off-by: Yinghai Lu --- arch/x86/kernel/head64.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/head_64.S | 13 ++++++++--- 2 files changed, 64 insertions(+), 3 deletions(-) Index: linux-2.6/arch/x86/kernel/head64.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/head64.c +++ linux-2.6/arch/x86/kernel/head64.c @@ -26,6 +26,60 @@ #include #include +/* Create a new PMD entry */ +int __init early_make_pgtable(unsigned long address) +{ + unsigned long physaddr = address - __PAGE_OFFSET; + unsigned long i; + pgdval_t pgd, *pgd_p; + pudval_t pud, *pud_p; + pmdval_t pmd, *pmd_p; + + if (address < __PAGE_OFFSET || physaddr >= MAXMEM) + return -1; /* Invalid address - puke */ + + pgd_p = &init_level4_pgt[pgd_index(address)].pgd; + pgd = *pgd_p; + + /* + * The use of __START_KERNEL_map rather than __PAGE_OFFSET here is + * critical -- __PAGE_OFFSET would point us back into the dynamic + * range and we might end up looping forever... + */ + if (pgd) + pud_p = (pudval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); + else { + if ((char *)(_brk_end + PAGE_SIZE) > __brk_limit) + return -1; + pud_p = (pudval_t *)_brk_end; + _brk_end += PAGE_SIZE; + + for (i = 0; i < PTRS_PER_PUD; i++) + pud_p[i] = 0; + *pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; + } + pud_p += pud_index(address); + pud = *pud_p; + + if (pud) + pmd_p = (pmdval_t *)((pud & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); + else { + if ((char *)(_brk_end + PAGE_SIZE) > __brk_limit) + return -1; + pmd_p = (pmdval_t *)_brk_end; + _brk_end += PAGE_SIZE; + + for (i = 0; i < PTRS_PER_PMD; i++) + pmd_p[i] = 0; + *pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; + } + pmd = (physaddr & PMD_MASK) + __PAGE_KERNEL_LARGE; + pmd_p[pmd_index(address)] = pmd; + + return 0; +} + + static void __init zap_identity_mappings(void) { pgd_t *pgd = pgd_offset_k(0UL); Index: linux-2.6/arch/x86/kernel/head_64.S =================================================================== --- linux-2.6.orig/arch/x86/kernel/head_64.S +++ linux-2.6/arch/x86/kernel/head_64.S @@ -494,14 +494,21 @@ ENTRY(early_idt_handler) pushq %r11 # 0(%rsp) cmpl $__KERNEL_CS,96(%rsp) - jne 10f + jne 11f + cmpl $14,72(%rsp) # Page fault? + jnz 10f + GET_CR2_INTO(%rdi) # can clobber any volatile register if pv + call early_make_pgtable + andl %eax,%eax + jz 20f # All good +10: leaq 88(%rsp),%rdi # Pointer to %rip call early_fixup_exception andl %eax,%eax jnz 20f # Found an exception entry -10: +11: #ifdef CONFIG_EARLY_PRINTK GET_CR2_INTO(%r9) # can clobber any volatile register if pv movl 80(%rsp),%r8d # error code @@ -523,7 +530,7 @@ ENTRY(early_idt_handler) 1: hlt jmp 1b -20: # Exception table entry found +20: # Exception table entry found or page table generaged. popq %r11 popq %r10 popq %r9