[PATCH 2/3] revoke: fix shared mapping revoke

From: Pekka J Enberg
Date: Fri Mar 16 2007 - 06:07:34 EST


From: Pekka Enberg <penberg@xxxxxxxxxxxxxx>

As pointed out by Nick Piggin, modifying vma->vm_flags without
->mmap_sem can corrupt the flags. This changes revoke_mapping() to use
down_write_trylock() to acquire ->mmap_sem and then rescan vmas of the
parent mm.

Also make sure we restart scanning in revoke_mm() if zap_page_range()
did not finish within our process timeslice.

Cc: Nick Piggin <nickpiggin@xxxxxxxxxxxx>
Signed-off-by: Pekka Enberg <penberg@xxxxxxxxxxxxxx>
---
fs/revoke.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 105 insertions(+), 23 deletions(-)

Index: uml-2.6/fs/revoke.c
===================================================================
--- uml-2.6.orig/fs/revoke.c 2007-03-16 11:37:52.000000000 +0200
+++ uml-2.6/fs/revoke.c 2007-03-16 12:02:10.000000000 +0200
@@ -158,6 +158,22 @@ for (fd = 0; fd < fdt->max_fds; fd++) {
return err;
}

+static inline bool need_revoke(struct vm_area_struct *vma,
+ struct file *to_exclude)
+{
+ if (vma->vm_flags & VM_REVOKED)
+ return false;
+
+ if (!(vma->vm_flags & VM_SHARED))
+ return false;
+
+ return vma->vm_file != to_exclude;
+}
+
+/*
+ * LOCKING: down_write(&mm->mmap_sem)
+ * -> spin_lock(&mapping->i_mmap_lock)
+ */
static int revoke_vma(struct vm_area_struct *vma, struct zap_details *details)
{
unsigned long restart_addr, start_addr, end_addr;
@@ -166,11 +182,6 @@ static int revoke_vma(struct vm_area_str
start_addr = vma->vm_start;
end_addr = vma->vm_end;

- /*
- * Not holding ->mmap_sem here.
- */
- vma->vm_flags |= VM_REVOKED;
- smp_mb();
again:
restart_addr = zap_page_range(vma, start_addr, end_addr - start_addr,
details);
@@ -183,6 +194,7 @@ static int revoke_vma(struct vm_area_str
start_addr = restart_addr;
goto again;
}
+ vma->vm_flags |= VM_REVOKED;
return 0;

out_need_break:
@@ -192,34 +204,108 @@ return 0;
return -EINTR;
}

-static int revoke_mapping(struct address_space *mapping, struct file *to_exclude)
+/*
+ * LOCKING: spin_lock(&mapping->i_mmap_lock)
+ */
+static int revoke_mm(struct mm_struct *mm, struct address_space *mapping,
+ struct file *to_exclude)
{
struct vm_area_struct *vma;
- struct prio_tree_iter iter;
struct zap_details details;
int err = 0;

details.i_mmap_lock = &mapping->i_mmap_lock;

- spin_lock(&mapping->i_mmap_lock);
+ /*
+ * If ->mmap_sem is under contention, we continue scanning other
+ * mms and try again later.
+ */
+ if (!down_write_trylock(&mm->mmap_sem)) {
+ err = -EAGAIN;
+ goto out;
+ }
+ for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
+ if (!need_revoke(vma, to_exclude))
+ continue;
+
+ err = revoke_vma(vma, &details);
+ if (err)
+ break;
+ }
+ up_write(&mm->mmap_sem);
+ out:
+ return err;
+}
+
+/*
+ * LOCKING: spin_lock(&mapping->i_mmap_lock)
+ */
+static void revoke_mapping_tree(struct address_space *mapping,
+ struct file *to_exclude)
+{
+ struct vm_area_struct *vma;
+ struct prio_tree_iter iter;
+ int try_again = 0;
+
+ restart:
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX) {
- if ((vma->vm_flags & VM_SHARED) && vma->vm_file != to_exclude) {
- err = revoke_vma(vma, &details);
- if (err)
- goto out;
+ int err;
+
+ if (likely(!need_revoke(vma, to_exclude)))
+ continue;
+
+ err = revoke_mm(vma->vm_mm, mapping, to_exclude);
+ if (err == -EAGAIN) {
+ try_again = 1;
+ continue;
}
+ if (err == -EINTR)
+ goto restart;
+ }
+ if (try_again) {
+ cond_resched();
+ goto restart;
}
+}

+/*
+ * LOCKING: spin_lock(&mapping->i_mmap_lock)
+ */
+static void revoke_mapping_list(struct address_space *mapping,
+ struct file *to_exclude)
+{
+ struct vm_area_struct *vma;
+ int try_again = 0;
+
+ restart:
list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) {
- if ((vma->vm_flags & VM_SHARED) && vma->vm_file != to_exclude) {
- err = revoke_vma(vma, &details);
- if (err)
- goto out;
+ int err;
+
+ if (likely(!need_revoke(vma, to_exclude)))
+ continue;
+
+ err = revoke_mm(vma->vm_mm, mapping, to_exclude);
+ if (err == -EAGAIN) {
+ try_again = 1;
+ continue;
}
+ if (err == -EINTR)
+ goto restart;
}
- out:
+ if (try_again) {
+ cond_resched();
+ goto restart;
+ }
+}
+
+static void revoke_mapping(struct address_space *mapping, struct file *to_exclude)
+{
+ spin_lock(&mapping->i_mmap_lock);
+ if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
+ revoke_mapping_tree(mapping, to_exclude);
+ if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
+ revoke_mapping_list(mapping, to_exclude);
spin_unlock(&mapping->i_mmap_lock);
- return err;
}

static void restore_file(struct revokefs_inode_info *info)
@@ -439,11 +525,7 @@ int err = 0;
/*
* Take down shared memory mappings.
*/
- err = revoke_mapping(inode->i_mapping, to_exclude);
- if (err) {
- restore_files(table);
- goto out_free_table;
- }
+ revoke_mapping(inode->i_mapping, to_exclude);

/*
* Now, revoke the files for good.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/