Re: [PATCH 1/2] x86/efi: Correct ident mapping of efi old_map when kalsr enabled

From: Baoquan He
Date: Thu Apr 27 2017 - 06:32:06 EST


Hi Thomas,

Thanks for reviewing!

On 04/26/17 at 07:49am, Thomas Garnier wrote:
> On Wed, Apr 26, 2017 at 3:43 AM, Baoquan He <bhe@xxxxxxxxxx> wrote:
> > > arch/x86/platform/efi/efi_64.c | 35 +++++++++++++++++++++++++++--------
> > > 1 file changed, 27 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
> > > index 2ee7694..2e7baff 100644
> > > --- a/arch/x86/platform/efi/efi_64.c
> > > +++ b/arch/x86/platform/efi/efi_64.c
> > > @@ -71,11 +71,12 @@ static void __init early_code_mapping_set_exec(int executable)
> > >
> > > pgd_t * __init efi_call_phys_prolog(void)
> > > {
> > > - unsigned long vaddress;
> > > + unsigned long vaddr, left_vaddr;
> > > + unsigned int num_entries;
> > > pgd_t *save_pgd;
> > > -
> > > - int pgd;
> > > + pud_t *pud, *pud_k;
> > > int n_pgds;
> > > + int i;
> > >
> > > if (!efi_enabled(EFI_OLD_MEMMAP)) {
> > > save_pgd = (pgd_t *)read_cr3();
> > > @@ -88,10 +89,22 @@ pgd_t * __init efi_call_phys_prolog(void)
> > > n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
> > > save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL);
> > >
> > > - for (pgd = 0; pgd < n_pgds; pgd++) {
> > > - save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE);
> > > - vaddress = (unsigned long)__va(pgd * PGDIR_SIZE);
> > > - set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
> > > + for (i = 0; i < n_pgds; i++) {
> > > + save_pgd[i] = *pgd_offset_k(i * PGDIR_SIZE);
> > > +
> > > + vaddr = (unsigned long)__va(i * PGDIR_SIZE);
> > > + pud = pud_alloc_one(NULL, 0);
>
> Please check if pud is NULL.

I considered it a while. I didn't check because I thought it's still in
kernel init stage, and at most 128 page frames are cost for 64TB,
namely 512KB. If kernel can't give 512KB at this time, it will die soon.
I would like to hear what people are suggesting. Since you have pointed
it out, I will add checking here.

However I think we can keep those allocated page and try our best to
build as much ident mapping as possible. E.g if we have 10TB memory, but
failed to allocate page for 11th pud table, we can break the for loop,
leave those built ident mapping there since efi region could be located
inside those 0~5TB region.

Then inefi_call_phys_epilog() only free these allocated pud tables in
efi_call_phys_prolog, check and avoid freeing those pud tables from
direct mapping which still existed because of allocation failure in
efi_call_phys_prolog.

diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 2e7baff..67920d4 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -94,6 +94,11 @@ pgd_t * __init efi_call_phys_prolog(void)

vaddr = (unsigned long)__va(i * PGDIR_SIZE);
pud = pud_alloc_one(NULL, 0);
+ if (!pud) {
+ pr_err("Failed to allocate page for %d-th pud table "
+ "to build 1:1 mapping!\n", i);
+ break;
+ }

num_entries = PTRS_PER_PUD - pud_index(vaddr);
pud_k = pud_offset(pgd_offset_k(vaddr), vaddr);
@@ -132,8 +137,10 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)

for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) {
pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
- pud = (pud_t *)pgd_page_vaddr(*pgd);
- pud_free(NULL, pud);
+ if (*pgd != save_pgd[pgd_idx]) {
+ pud = (pud_t *)pgd_page_vaddr(*pgd);
+ pud_free(NULL, pud);
+ }
set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
}


>
> > > +
> > > + num_entries = PTRS_PER_PUD - pud_index(vaddr);
> > > + pud_k = pud_offset(pgd_offset_k(vaddr), vaddr);
> > > + memcpy(pud, pud_k, num_entries);
> > > + if (pud_index(vaddr) > 0) {
>
> You are using pud_index(vaddr) 3 times, might be worth using a local variable.

Sure, will do, thanks.

>
> > > + left_vaddr = vaddr + (num_entries * PUD_SIZE);
> > > + pud_k = pud_offset(pgd_offset_k(left_vaddr),
> > > + left_vaddr);
> > > + memcpy(pud + num_entries, pud_k, pud_index(vaddr));
>
> I think this section (or the overall for loop) would benefit with a
> comment explaining explaining why you are shifting the new PUD like
> this.

Will write a paragraph.
>
> > > + }
> > > + pgd_populate(NULL, pgd_offset_k(i * PGDIR_SIZE), pud);
> > > }
> > > out:
> > > __flush_tlb_all();
> > > @@ -106,6 +119,8 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
> > > */
> > > int pgd_idx;
> > > int nr_pgds;
> > > + pud_t *pud;
> > > + pgd_t *pgd;
> > >
> > > if (!efi_enabled(EFI_OLD_MEMMAP)) {
> > > write_cr3((unsigned long)save_pgd);
> > > @@ -115,8 +130,12 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
> > >
> > > nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
> > >
> > > - for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++)
> > > + for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) {
> > > + pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
> > > + pud = (pud_t *)pgd_page_vaddr(*pgd);
> > > + pud_free(NULL, pud);
> > > set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
> > > + }
> > >
> > > kfree(save_pgd);
> > >
> > > --
> > > 2.5.5
> > >
>
>
>
>
> --
> Thomas