[RFC][PATCH 8/8] mm: Optimize pte_map_lock()

From: Peter Zijlstra
Date: Mon Jan 04 2010 - 15:50:00 EST


If we ensure the pagetable invariance by also guarding against unmap,
we can skip part of the pagetable walk by validating the vma early.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
---
mm/memory.c | 58 ++++++++++++++++++++++++++++++++++++----------------------
1 file changed, 36 insertions(+), 22 deletions(-)

Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c
+++ linux-2.6/mm/memory.c
@@ -956,6 +956,7 @@ static unsigned long unmap_page_range(st
details = NULL;

BUG_ON(addr >= end);
+ write_seqcount_begin(&vma->vm_sequence);
mem_cgroup_uncharge_start();
tlb_start_vma(tlb, vma);
pgd = pgd_offset(vma->vm_mm, addr);
@@ -970,6 +971,7 @@ static unsigned long unmap_page_range(st
} while (pgd++, addr = next, (addr != end && *zap_work > 0));
tlb_end_vma(tlb, vma);
mem_cgroup_uncharge_end();
+ write_seqcount_end(&vma->vm_sequence);

return addr;
}
@@ -1961,9 +1963,6 @@ static int pte_map_lock(struct mm_struct
unsigned long address, pmd_t *pmd, unsigned int flags,
unsigned int seq, pte_t **ptep, spinlock_t **ptlp)
{
- pgd_t *pgd;
- pud_t *pud;
-
if (!(flags & FAULT_FLAG_SPECULATIVE)) {
*ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
return 1;
@@ -1972,19 +1971,7 @@ static int pte_map_lock(struct mm_struct
again:
pin_page_tables();

- pgd = pgd_offset(mm, address);
- if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
- goto out;
-
- pud = pud_offset(pgd, address);
- if (pud_none(*pud) || unlikely(pud_bad(*pud)))
- goto out;
-
- pmd = pmd_offset(pud, address);
- if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
- goto out;
-
- if (pmd_huge(*pmd))
+ if (vma_is_dead(vma, seq))
goto out;

*ptlp = pte_lockptr(mm, pmd);
@@ -1998,7 +1985,7 @@ again:
if (!*ptep)
goto out;

- if (vma && vma_is_dead(vma, seq))
+ if (vma_is_dead(vma, seq))
goto unlock;

unpin_page_tables();
@@ -3115,13 +3102,14 @@ int handle_mm_fault(struct mm_struct *mm
int handle_speculative_fault(struct mm_struct *mm, unsigned long address,
unsigned int flags)
{
- pmd_t *pmd = NULL;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
pte_t *pte, entry;
spinlock_t *ptl;
struct vm_area_struct *vma;
unsigned int seq;
- int ret = VM_FAULT_RETRY;
- int dead;
+ int dead, ret = VM_FAULT_RETRY;

__set_current_state(TASK_RUNNING);
flags |= FAULT_FLAG_SPECULATIVE;
@@ -3129,8 +3117,31 @@ int handle_speculative_fault(struct mm_s
count_vm_event(PGFAULT);

rcu_read_lock();
- if (!pte_map_lock(mm, NULL, address, pmd, flags, 0, &pte, &ptl))
- goto out_unlock;
+again:
+ pin_page_tables();
+
+ pgd = pgd_offset(mm, address);
+ if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+ goto out;
+
+ pud = pud_offset(pgd, address);
+ if (pud_none(*pud) || unlikely(pud_bad(*pud)))
+ goto out;
+
+ pmd = pmd_offset(pud, address);
+ if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+ goto out;
+
+ if (pmd_huge(*pmd))
+ goto out;
+
+ ptl = pte_lockptr(mm, pmd);
+ pte = pte_offset_map(pmd, address);
+ if (!spin_trylock(ptl)) {
+ pte_unmap(pte);
+ unpin_page_tables();
+ goto again;
+ }

vma = find_vma(mm, address);

@@ -3156,6 +3167,7 @@ int handle_speculative_fault(struct mm_s
entry = *pte;

pte_unmap_unlock(pte, ptl);
+ unpin_page_tables();

ret = handle_pte_fault(mm, vma, address, entry, pmd, flags, seq);

@@ -3165,6 +3177,8 @@ out_unlock:

out_unmap:
pte_unmap_unlock(pte, ptl);
+out:
+ unpin_page_tables();
goto out_unlock;
}


--

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/