Re: [2/3] mm: fix up some user-visible effects of the stack guard page

From: Linus Torvalds
Date: Fri Aug 20 2010 - 17:25:29 EST


On Fri, Aug 20, 2010 at 1:42 PM, Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote:
>
> You mean something like the below?

Yeah. I looked at exactly something like that. With the same name for
vma_next() - except I just passed down 'mm' to it too. Your patch
takes it from vma->mm and then you have a few places do the whole
__vma_next() by hand.

> Should I look at refreshing that thing (yes, that's a 2007 patch)?

Interesting. I generated about half the patch, and then decided that
it's not worth it. But the fact that apparently you did the same thing
in 2007 means that maybe there really _is_ a pent-up demand for doing
this dang thing.

I do note that I also did a patch that just added "vm_prev", and it's
a lot smaller. It doesn't allow for the cleanups of using the generic
list iterators, though. And it has the usual problem with the
straightforward doubly-linked lists: you end up with those ugly
conditional assignments like

if (prev)
prev->vm_next = vma;
if (next)
next->vm_prev = vma;

which the list_head approach avoids.

Appended is that much smaller patch. It has an ugly and partial
"validate_vma()" hack there in find_vma() to catch the worst bugs, but
it's really not tested. I did try booting it, and it's not spewing out
errors, but who the heck knows. It doesn't look too horrid, but...

What do you think?

NOTE! I've done _zero_ cleanups. And one of the thing I noticed is
that "find_vma_prev()" is sometimes used to find a "prev" even when
there is no "vma" (ie growing a grow-up stack at the end of the VM, or
adding a new mapping at the end), so my hope that we could get rid of
it entirely was naïve. But there should be other things we should be
able to simplify when we can get at the prev pointer (all the
mprotect/mlock splitting code should work fine without having that
'prev' thing passed around etc).

Linus
include/linux/mm_types.h | 2 +-
kernel/fork.c | 7 +++++--
mm/mmap.c | 37 +++++++++++++++++++++++++++++++++----
mm/nommu.c | 7 +++++--
4 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index b8bb9a6..ee7e258 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -134,7 +134,7 @@ struct vm_area_struct {
within vm_mm. */

/* linked list of VM areas per task, sorted by address */
- struct vm_area_struct *vm_next;
+ struct vm_area_struct *vm_next, *vm_prev;

pgprot_t vm_page_prot; /* Access permissions of this VMA. */
unsigned long vm_flags; /* Flags, see mm.h. */
diff --git a/kernel/fork.c b/kernel/fork.c
index 856eac3..b7e9d60 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -300,7 +300,7 @@ out:
#ifdef CONFIG_MMU
static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
{
- struct vm_area_struct *mpnt, *tmp, **pprev;
+ struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
struct rb_node **rb_link, *rb_parent;
int retval;
unsigned long charge;
@@ -328,6 +328,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
if (retval)
goto out;

+ prev = NULL;
for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
struct file *file;

@@ -359,7 +360,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
goto fail_nomem_anon_vma_fork;
tmp->vm_flags &= ~VM_LOCKED;
tmp->vm_mm = mm;
- tmp->vm_next = NULL;
+ tmp->vm_next = tmp->vm_prev = NULL;
file = tmp->vm_file;
if (file) {
struct inode *inode = file->f_path.dentry->d_inode;
@@ -392,6 +393,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
*/
*pprev = tmp;
pprev = &tmp->vm_next;
+ tmp->vm_prev = prev;
+ prev = tmp;

__vma_link_rb(mm, tmp, rb_link, rb_parent);
rb_link = &tmp->vm_rb.rb_right;
diff --git a/mm/mmap.c b/mm/mmap.c
index 3100333..b6212c2 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -388,17 +388,23 @@ static inline void
__vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
struct vm_area_struct *prev, struct rb_node *rb_parent)
{
+ struct vm_area_struct *next;
+
+ vma->vm_prev = prev;
if (prev) {
- vma->vm_next = prev->vm_next;
+ next = prev->vm_next;
prev->vm_next = vma;
} else {
mm->mmap = vma;
if (rb_parent)
- vma->vm_next = rb_entry(rb_parent,
+ next = rb_entry(rb_parent,
struct vm_area_struct, vm_rb);
else
- vma->vm_next = NULL;
+ next = NULL;
}
+ vma->vm_next = next;
+ if (next)
+ next->vm_prev = vma;
}

void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -483,7 +489,11 @@ static inline void
__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
struct vm_area_struct *prev)
{
- prev->vm_next = vma->vm_next;
+ struct vm_area_struct *next = vma->vm_next;
+
+ prev->vm_next = next;
+ if (next)
+ next->vm_prev = prev;
rb_erase(&vma->vm_rb, &mm->mm_rb);
if (mm->mmap_cache == vma)
mm->mmap_cache = prev;
@@ -1577,6 +1587,21 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,

EXPORT_SYMBOL(get_unmapped_area);

+static void validate_vma_list(struct vm_area_struct *vma)
+{
+ static int count;
+
+ if (((vma->vm_next && vma->vm_next->vm_prev != vma) ||
+ (vma->vm_prev && vma->vm_prev->vm_next != vma)) &&
+ count < 100) {
+ printk("Badness %p[%p] -> %p -> [%p]%p\n",
+ vma->vm_prev, vma->vm_prev ? vma->vm_prev->vm_next : NULL,
+ vma,
+ vma->vm_next ? vma->vm_next->vm_prev : NULL, vma->vm_next);
+ count++;
+ }
+}
+
/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
{
@@ -1610,6 +1635,7 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
mm->mmap_cache = vma;
}
}
+if (vma) validate_vma_list(vma);
return vma;
}

@@ -1915,6 +1941,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr;

insertion_point = (prev ? &prev->vm_next : &mm->mmap);
+ vma->vm_prev = NULL;
do {
rb_erase(&vma->vm_rb, &mm->mm_rb);
mm->map_count--;
@@ -1922,6 +1949,8 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
vma = vma->vm_next;
} while (vma && vma->vm_start < end);
*insertion_point = vma;
+ if (vma)
+ vma->vm_prev = prev;
tail_vma->vm_next = NULL;
if (mm->unmap_area == arch_unmap_area)
addr = prev ? prev->vm_end : mm->mmap_base;
diff --git a/mm/nommu.c b/mm/nommu.c
index efa9a38..88ff091 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -604,7 +604,7 @@ static void protect_vma(struct vm_area_struct *vma, unsigned long flags)
*/
static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
{
- struct vm_area_struct *pvma, **pp;
+ struct vm_area_struct *pvma, **pp, *next;
struct address_space *mapping;
struct rb_node **p, *parent;

@@ -664,8 +664,11 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
break;
}

- vma->vm_next = *pp;
+ next = *pp;
*pp = vma;
+ vma->vm_next = next;
+ if (next)
+ next->vm_prev = vma;
}

/*