[RFC] preliminary patch for mm/mlock.c

Bill Hawes (whawes@star.net)
Mon, 01 Jun 1998 11:10:29 -0400


This is a multi-part message in MIME format.
--------------9B7BDB14F9907D1383C11C8E
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

I've started work on adding semaphore protection to the mlock system
calls and have attached a preliminary patch for review and comment. The
approach is similar to the mmap and munmap case in that I allocate the
required vma structs in advance (at most two needed) and then proceed
with the list manipulations.

Preallocating has the advantage of significantly simplifying the various
mlock_fixup routines, as no failures are possible. I've also simplified
the loop exit conditions somewhat.

One minor complication is that the semaphore must be released before
faulting in the locked pages. For the mlockall case the memory areas
aren't necessarily contiguous, so I've handled this case by releasing
the semaphore after each area, touching the pages, and then restarting
the loop. The presence of the VM_LOCKED flag marks the progress made in
processing the areas, so everything works even if the lists change
during the call.

The one thing I don't like about the preallocation strategy is that it
introduces the possibility of failure even if the area to be locked
exactly covers one or more vmas. The problem here is that I need to get
the semaphore before checking for this case, and would greatly prefer
not to allocate memory while holding the semaphore.

It would be fairly easy to use an opportunistic approach of getting the
semaphore, checking whether memeory is needed, and then dropping the
semaphore to allocate if necessary. Any opinions on the pros and cons of
this?

Apart from these questions, I think the patch is reasonable and might
even work. (It's running on my system, though I'm not sure what software
if any uses mlock calls.)

Regards,
Bill
--------------9B7BDB14F9907D1383C11C8E
Content-Type: text/plain; charset=us-ascii; name="mlock_103-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="mlock_103-patch"

--- linux-2.1.103/mm/mlock.c.old Sun May 17 12:21:09 1998
+++ linux-2.1.103/mm/mlock.c Mon Jun 1 09:31:29 1998
@@ -19,20 +19,17 @@
#include <asm/system.h>
#include <asm/pgtable.h>

-static inline int mlock_fixup_all(struct vm_area_struct * vma, int newflags)
+static inline void mlock_fixup_all(struct vm_area_struct * vma, int newflags)
{
vma->vm_flags = newflags;
- return 0;
}

-static inline int mlock_fixup_start(struct vm_area_struct * vma,
- unsigned long end, int newflags)
+static inline void mlock_fixup_start(struct vm_area_struct * vma,
+ struct vm_area_struct ** extra, unsigned long end, int newflags)
{
- struct vm_area_struct * n;
+ struct vm_area_struct * n = *extra;

- n = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!n)
- return -EAGAIN;
+ *extra = n->vm_next;
*n = *vma;
vma->vm_start = end;
n->vm_end = end;
@@ -43,17 +40,14 @@
if (n->vm_ops && n->vm_ops->open)
n->vm_ops->open(n);
insert_vm_struct(current->mm, n);
- return 0;
}

-static inline int mlock_fixup_end(struct vm_area_struct * vma,
- unsigned long start, int newflags)
+static inline void mlock_fixup_end(struct vm_area_struct * vma,
+ struct vm_area_struct ** extra, unsigned long start, int newflags)
{
- struct vm_area_struct * n;
+ struct vm_area_struct * n = *extra;

- n = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!n)
- return -EAGAIN;
+ *extra = n->vm_next;
*n = *vma;
vma->vm_end = start;
n->vm_start = start;
@@ -64,22 +58,18 @@
if (n->vm_ops && n->vm_ops->open)
n->vm_ops->open(n);
insert_vm_struct(current->mm, n);
- return 0;
}

-static inline int mlock_fixup_middle(struct vm_area_struct * vma,
+static inline void mlock_fixup_middle(struct vm_area_struct * vma,
+ struct vm_area_struct ** extra,
unsigned long start, unsigned long end, int newflags)
{
struct vm_area_struct * left, * right;

- left = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!left)
- return -EAGAIN;
- right = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!right) {
- kmem_cache_free(vm_area_cachep, left);
- return -EAGAIN;
- }
+ left = *extra;
+ right = left->vm_next;
+ *extra = right->vm_next;
+
*left = *vma;
*right = *vma;
left->vm_end = start;
@@ -98,51 +88,51 @@
}
insert_vm_struct(current->mm, left);
insert_vm_struct(current->mm, right);
- return 0;
}

-static int mlock_fixup(struct vm_area_struct * vma,
- unsigned long start, unsigned long end, unsigned int newflags)
+static void touch_pages(unsigned long start, unsigned long end)
{
- int pages, retval;
+ while (start < end) {
+ int c;
+ if (get_user(c,(int *) start))
+ break;
+ __asm__ __volatile__("": :"r" (c));
+ start += PAGE_SIZE;
+ }
+}

- if (newflags == vma->vm_flags)
- return 0;
+static void mlock_fixup(struct vm_area_struct * vma,
+ struct vm_area_struct ** extra,
+ unsigned long start, unsigned long end, unsigned int newflags)
+{
+ int pages;

if (start == vma->vm_start) {
if (end == vma->vm_end)
- retval = mlock_fixup_all(vma, newflags);
+ mlock_fixup_all(vma, newflags);
else
- retval = mlock_fixup_start(vma, end, newflags);
+ mlock_fixup_start(vma, extra, end, newflags);
} else {
if (end == vma->vm_end)
- retval = mlock_fixup_end(vma, start, newflags);
+ mlock_fixup_end(vma, extra, start, newflags);
else
- retval = mlock_fixup_middle(vma, start, end, newflags);
+ mlock_fixup_middle(vma, extra, start, end, newflags);
}
- if (!retval) {
- /* keep track of amount of locked VM */
- pages = (end - start) >> PAGE_SHIFT;
- if (!(newflags & VM_LOCKED))
- pages = -pages;
- vma->vm_mm->locked_vm += pages;
-
- if (newflags & VM_LOCKED)
- while (start < end) {
- int c;
- get_user(c,(int *) start);
- __asm__ __volatile__("": :"r" (c));
- start += PAGE_SIZE;
- }
- }
- return retval;
+
+ /* keep track of amount of locked VM */
+ pages = (end - start) >> PAGE_SHIFT;
+ if (!(newflags & VM_LOCKED))
+ pages = -pages;
+ vma->vm_mm->locked_vm += pages;
}

static int do_mlock(unsigned long start, size_t len, int on)
{
+ struct mm_struct *mm = current->mm;
+ unsigned int mask = on ? ~0 : ~VM_LOCKED;
unsigned long nstart, end, tmp;
- struct vm_area_struct * vma, * next;
- int error;
+ struct vm_area_struct * vma, * next, * extra;
+ int error, changed = 0;

if (!capable(CAP_IPC_LOCK))
return -EPERM;
@@ -152,37 +142,67 @@
return -EINVAL;
if (end == start)
return 0;
- vma = find_vma(current->mm, start);
- if (!vma || vma->vm_start > start)
- return -ENOMEM;

- for (nstart = start ; ; ) {
- unsigned int newflags;
+printk("do_mlock: lock %08lx-%08lx, on=%d\n", start, end, on);

- /* Here we know that vma->vm_start <= nstart < vma->vm_end. */
+ /*
+ * No more than two vma structs will be required,
+ * so we allocate them before getting the semaphore.
+ */
+ error = -EAGAIN;
+ extra = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!extra)
+ goto out;
+ next = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ extra->vm_next = next;
+ if (!next)
+ goto out_free;
+ next->vm_next = NULL;
+
+ down(&mm->mmap_sem);
+ vma = find_vma(mm, start);

- newflags = vma->vm_flags | VM_LOCKED;
- if (!on)
- newflags &= ~VM_LOCKED;
+ error = -ENOMEM;
+ for (nstart = start; nstart < end; nstart = tmp, vma = next) {
+ unsigned int newflags;

- if (vma->vm_end >= end) {
- error = mlock_fixup(vma, nstart, end, newflags);
- break;
- }
+ if (!vma || vma->vm_start > nstart)
+ goto out_merge;
+ if (mm->map_count > MAX_MAP_COUNT)
+ goto out_merge;
+ next = vma->vm_next;

+ /* Here we know that vma->vm_start <= nstart < vma->vm_end. */
tmp = vma->vm_end;
- next = vma->vm_next;
- error = mlock_fixup(vma, nstart, tmp, newflags);
- if (error)
- break;
- nstart = tmp;
- vma = next;
- if (!vma || vma->vm_start != nstart) {
- error = -ENOMEM;
- break;
- }
+ if (tmp > end)
+ tmp = end;
+
+ newflags = (vma->vm_flags | VM_LOCKED) & mask;
+ if (vma->vm_flags == newflags)
+ continue;
+ mlock_fixup(vma, &extra, nstart, tmp, newflags);
+ changed = 1;
}
- merge_segments(current->mm, start, end);
+ error = 0;
+
+out_merge:
+ if (changed)
+ merge_segments_locked(mm, start, end);
+ up(&mm->mmap_sem);
+
+ /*
+ * Lock the pages after releasing the semaphore
+ */
+ if (on)
+ touch_pages(start, nstart);
+
+out_free:
+ /* Free any unused segments */
+ while ((vma = extra) != NULL) {
+ extra = vma->vm_next;
+ kmem_cache_free(vm_area_cachep, vma);
+ }
+out:
return error;
}

@@ -231,9 +251,10 @@

static int do_mlockall(int flags)
{
- int error;
- unsigned int def_flags;
+ struct mm_struct *mm = current->mm;
+ unsigned int def_flags, mask = (flags & MCL_CURRENT) ? ~0 : ~VM_LOCKED;
struct vm_area_struct * vma;
+ int changed = 0;

if (!capable(CAP_IPC_LOCK))
return -EPERM;
@@ -241,21 +262,39 @@
def_flags = 0;
if (flags & MCL_FUTURE)
def_flags = VM_LOCKED;
- current->mm->def_flags = def_flags;
-
- error = 0;
- for (vma = current->mm->mmap; vma ; vma = vma->vm_next) {
- unsigned int newflags;
+ mm->def_flags = def_flags;

- newflags = vma->vm_flags | VM_LOCKED;
- if (!(flags & MCL_CURRENT))
- newflags &= ~VM_LOCKED;
- error = mlock_fixup(vma, vma->vm_start, vma->vm_end, newflags);
- if (error)
- break;
+ /*
+ * Note: we grab the semaphore even though there are no blocking
+ * operations, as someone else may already be using the list.
+ */
+restart:
+ down(&mm->mmap_sem);
+ for (vma = mm->mmap; vma ; vma = vma->vm_next) {
+ unsigned int newflags = (vma->vm_flags | VM_LOCKED) & mask;
+
+ if (newflags == vma->vm_flags)
+ continue;
+ /* no extra memory is needed for this case */
+ mlock_fixup(vma, NULL, vma->vm_start, vma->vm_end, newflags);
+ changed = 1;
+
+ /*
+ * This part is a little ugly: we have to release the
+ * semaphore to fault in the pages, but then the vma
+ * list might change out from under us. So we restart
+ * after each vma until all have been changed.
+ */
+ if (newflags & VM_LOCKED) {
+ up(&mm->mmap_sem);
+ touch_pages(vma->vm_start, vma->vm_end);
+ goto restart;
+ }
}
- merge_segments(current->mm, 0, TASK_SIZE);
- return error;
+ if (changed)
+ merge_segments_locked(mm, 0, TASK_SIZE);
+ up(&mm->mmap_sem);
+ return 0;
}

asmlinkage int sys_mlockall(int flags)

--------------9B7BDB14F9907D1383C11C8E--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu