--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -96,7 +96,11 @@ extern unsigned int kobjsize(const void *objp);
#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
+#ifdef CONFIG_MMU
+#define VM_LOCK_RMAP 0x01000000 /* Do not follow this rmap (mmu mmap) */
+#else
#define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */
+#endif
What's the locking for vma_area_struct.vm_flags? It's somewhat
unobvious what that is, and whether the code here observes it.
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -167,7 +167,7 @@ struct vm_area_struct {
* can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
* or brk vma (with NULL file) can only be in an anon_vma list.
*/
- struct list_head anon_vma_node; /* Serialized by anon_vma->lock */
+ struct list_head anon_vma_chain; /* Serialized by mmap_sem& friends */
Can we be a bit more explicit than "and friends"?
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -328,15 +328,17 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
if (!tmp)
goto fail_nomem;
*tmp = *mpnt;
+ INIT_LIST_HEAD(&tmp->anon_vma_chain);
pol = mpol_dup(vma_policy(mpnt));
retval = PTR_ERR(pol);
if (IS_ERR(pol))
goto fail_nomem_policy;
vma_set_policy(tmp, pol);
+ if (anon_vma_fork(tmp, mpnt))
+ goto fail_nomem_anon_vma_fork;
Didn't we just leak `pol'?
@@ -542,6 +541,29 @@ again: remove_next = 1 + (end> next->vm_end);
}
}
+ /*
+ * When changing only vma->vm_end, we don't really need
+ * anon_vma lock.
+ */
+ if (vma->anon_vma&& (insert || importer || start != vma->vm_start))
+ anon_vma = vma->anon_vma;
+ if (anon_vma) {
+ /*
+ * Easily overlooked: when mprotect shifts the boundary,
+ * make sure the expanding vma has anon_vma set if the
+ * shrinking vma had, to cover any anon pages imported.
+ */
+ if (importer&& !importer->anon_vma) {
+ /* Block reverse map lookups until things are set up. */
Here, it'd be nice to explain the _reason_ for doing this.
@@ -1868,11 +1893,14 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
pol = mpol_dup(vma_policy(vma));
if (IS_ERR(pol)) {
- kmem_cache_free(vm_area_cachep, new);
- return PTR_ERR(pol);
+ err = PTR_ERR(pol);
+ goto out_free_vma;
}
vma_set_policy(new, pol);
+ if (anon_vma_clone(new, vma))
+ goto out_free_mpol;
The handling of `err' in this function is a bit tricksy. It's correct,
but not obviously so and we might break it in the future. One way to
address that would be to sprinkle `err = -ENOMEM' everywhere and
wouldn't be very nice. Ho hum.
if (new->vm_file) {
get_file(new->vm_file);
if (vma->vm_flags& VM_EXECUTABLE)
@@ -1883,12 +1911,28 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
new->vm_ops->open(new);
if (new_below)
- vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
+ err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
((addr - new->vm_start)>> PAGE_SHIFT), new);
else
- vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
+ err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
- return 0;
+ /* Success. */
+ if (!err)
+ return 0;
+
+ /* Clean everything up if vma_adjust failed. */
+ new->vm_ops->close(new);
+ if (new->vm_file) {
+ if (vma->vm_flags& VM_EXECUTABLE)
+ removed_exe_file_vma(mm);
+ fput(new->vm_file);
+ }
Did the above path get tested?
+ /*
+ * First, attach the new VMA to the parent VMA's anon_vmas,
+ * so rmap can find non-COWed pages in child processes.
+ */
+ if (anon_vma_clone(vma, pvma))
+ return -ENOMEM;
+
+ /* Then add our own anon_vma. */
+ anon_vma = anon_vma_alloc();
+ if (!anon_vma)
+ goto out_error;
+ avc = anon_vma_chain_alloc();
+ if (!avc)
+ goto out_error_free_anon_vma;
The error paths here don't undo the results of anon_vma_clone(). I
guess all those anon_vma_chains get unlinked and freed later on?
free/exit()?
@@ -192,6 +280,9 @@ void __init anon_vma_init(void)
{
anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
+ anon_vma_chain_cachep = kmem_cache_create("anon_vma_chain",
+ sizeof(struct anon_vma_chain), 0,
+ SLAB_PANIC, NULL);
Could use KMEM_CACHE() here.
Trivial touchups: