Updated patch for 2.1.103 mmap.c

Bill Hawes (whawes@star.net)
Mon, 01 Jun 1998 10:51:58 -0400


This is a multi-part message in MIME format.
--------------D104AF523853780712A12D95
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

I've attached an updated patch for 2.1.103 mmap.c to protect mmap and
munmap operations, adding a few more refinements and fixing a bug
spotted by Colin Plumb. These changes are important if we want to make
mmap operations completely thread-safe, as the present code has numerous
race conditions when threads share an mm struct.

The main point of the patch is to provide semaphore protection for the
mmap and munmap operations. It preallocates the required vma structures
(two for mmap, one for munmap) and then locks the mmap semaphore before
checking the vma lists and changing the structures. I've added two new
entry points, do_munmap_locked and merge_segments_locked, so that these
operations can be called when the semaphore is already held.

The patch also fixes a problem in the munmap code with the test for maps
> MAX_MAP_COUNT. The test was being made after the vma list had already been modified, so an error exit would have left the list in an inconsistent state. There was also a minor problem with locked_vm accounting -- pages were being added if VM_LOCKED byut not VM_IO was set, but elsewhere in the kernel only the VM_LOCKED flag is used to determine the locked_vm count.

The patch is running on my system with no ill effects and (hopefully)
should be ready for public consumption.

Regards,
Bill
--------------D104AF523853780712A12D95
Content-Type: text/plain; charset=us-ascii; name="mmap_103-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="mmap_103-patch"

--- linux-2.1.103/include/linux/mm.h.old Thu May 21 15:07:52 1998
+++ linux-2.1.103/include/linux/mm.h Sun May 31 17:14:52 1998
@@ -300,7 +300,9 @@
/* mmap.c */
extern void vma_init(void);
-extern unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags, unsigned long off);
+extern unsigned long do_mmap(struct file *, unsigned long, size_t,
+ unsigned long, unsigned long, unsigned long);
extern void merge_segments(struct mm_struct *, unsigned long, unsigned long);
+extern void merge_segments_locked(struct mm_struct *, unsigned long,
+ unsigned long);
extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
extern void exit_mmap(struct mm_struct *);
--- linux-2.1.103/mm/mmap.c.old Sat Mar 7 11:32:19 1998
+++ linux-2.1.103/mm/mmap.c Mon Jun 1 10:39:29 1998
@@ -48,6 +48,8 @@

int sysctl_overcommit_memory;

+static int do_munmap_locked(unsigned long, size_t, struct vm_area_struct **);
+
/* Check that a process has enough memory to allocate a
* new virtual mapping.
*/
@@ -86,6 +88,18 @@
}
}

+/*
+ * Unlink from the share list and free a vma struct.
+ */
+static inline void free_vma(struct mm_struct *mm, struct vm_area_struct *mpnt)
+{
+ mm->map_count--;
+ remove_shared_vm_struct(mpnt);
+ if (mpnt->vm_file)
+ fput(mpnt->vm_file);
+ kmem_cache_free(vm_area_cachep, mpnt);
+}
+
asmlinkage unsigned long sys_brk(unsigned long brk)
{
unsigned long rlim, retval;
@@ -157,11 +171,11 @@
#undef _trans
}

-unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len,
+unsigned long do_mmap(struct file * file, unsigned long addr, size_t len,
unsigned long prot, unsigned long flags, unsigned long off)
{
struct mm_struct * mm = current->mm;
- struct vm_area_struct * vma;
+ struct vm_area_struct * vma, * extra;
int correct_wcount = 0, error;

if ((len = PAGE_ALIGN(len)) == 0)
@@ -174,10 +188,6 @@
if (off + len < off)
return -EINVAL;

- /* Too many mappings? */
- if (mm->map_count > MAX_MAP_COUNT)
- return -ENOMEM;
-
/* mlock MCL_FUTURE? */
if (mm->def_flags & VM_LOCKED) {
unsigned long locked = mm->locked_vm << PAGE_SHIFT;
@@ -186,53 +196,70 @@
return -EAGAIN;
}

+ /*
+ * We need at least one vma, and may need an extra one if we
+ * unmap a hole. Get them now before acquiring the semaphore.
+ */
+ error = -ENOMEM;
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!vma)
+ goto out;
+ extra = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ /* (allocation will be checked later) */
+
+ down(&mm->mmap_sem);
+
+ /* Too many mappings? */
+ if (mm->map_count > MAX_MAP_COUNT)
+ goto out_free;
+
/* Do simple checking here so the lower-level routines won't have
- * to. we assume access permissions have been handled by the open
+ * to. We assume access permissions have been handled by the open
* of the memory object, so we don't do any here.
*/
+ error = -EINVAL;
if (file != NULL) {
switch (flags & MAP_TYPE) {
case MAP_SHARED:
+ error = -EACCES;
if ((prot & PROT_WRITE) && !(file->f_mode & 2))
- return -EACCES;
+ goto out_free;

- /* make sure there are no mandatory locks on the file. */
+ /* Verify there are no mandatory locks on the file. */
+ error = -EAGAIN;
if (locks_verify_locked(file->f_dentry->d_inode))
- return -EAGAIN;
+ goto out_free;
/* fall through */
case MAP_PRIVATE:
+ error = -EACCES;
if (!(file->f_mode & 1))
- return -EACCES;
+ goto out_free;
break;

default:
- return -EINVAL;
+ goto out_free;
}
} else if ((flags & MAP_TYPE) != MAP_PRIVATE)
- return -EINVAL;
+ goto out_free;

- /* Obtain the address to map to. we verify (or select) it and ensure
+ /* Obtain the address to map to. We verify (or select) it and ensure
* that it represents a valid section of the address space.
*/
if (flags & MAP_FIXED) {
+ error = -EINVAL;
if (addr & ~PAGE_MASK)
- return -EINVAL;
+ goto out_free;
} else {
+ error = -ENOMEM;
addr = get_unmapped_area(addr, len);
if (!addr)
- return -ENOMEM;
+ goto out_free;
}

/* Determine the object being mapped and call the appropriate
- * specific mapper. the address has already been validated, but
+ * specific mapper. The address has already been validated, but
* not unmapped, but the maps are removed from the list.
*/
- if (file && (!file->f_op || !file->f_op->mmap))
- return -ENODEV;
-
- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!vma)
- return -ENOMEM;

vma->vm_mm = mm;
vma->vm_start = addr;
@@ -240,6 +267,10 @@
vma->vm_flags = vm_flags(prot,flags) | mm->def_flags;

if (file) {
+ error = -ENODEV;
+ if (!file->f_op || !file->f_op->mmap)
+ goto out_free;
+
if (file->f_mode & 1)
vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
if (flags & MAP_SHARED) {
@@ -265,21 +296,22 @@
vma->vm_file = NULL;
vma->vm_pte = 0;

- /* Clear old maps */
- error = -ENOMEM;
- if (do_munmap(addr, len))
- goto free_vma;
+ /* Clear old maps. */
+ error = do_munmap_locked(addr, len, &extra);
+ if (error)
+ goto out_free;

+ error = -ENOMEM;
/* Check against address space limit. */
if ((mm->total_vm << PAGE_SHIFT) + len
> current->rlim[RLIMIT_AS].rlim_cur)
- goto free_vma;
+ goto out_free;

/* Private writable mapping? Check memory availability.. */
if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE &&
!(flags & MAP_NORESERVE) &&
!vm_enough_memory(len >> PAGE_SHIFT))
- goto free_vma;
+ goto out_free;

error = 0;
if (file) {
@@ -298,13 +330,12 @@
}
if (!error)
error = file->f_op->mmap(file, vma);
-
}
/* Fix up the count if necessary, then check for an error */
if (correct_wcount)
file->f_dentry->d_inode->i_writecount++;
if (error)
- goto free_vma;
+ goto out_free;

/*
* merge_segments may merge our vma, so we can't refer to it
@@ -313,12 +344,18 @@
flags = vma->vm_flags;
addr = vma->vm_start; /* can addr have changed?? */
insert_vm_struct(mm, vma);
- merge_segments(mm, vma->vm_start, vma->vm_end);
-
+ merge_segments_locked(mm, vma->vm_start, vma->vm_end);
+
mm->total_vm += len >> PAGE_SHIFT;
- if ((flags & VM_LOCKED) && !(flags & VM_IO)) {
- unsigned long start = addr;
+ if (flags & VM_LOCKED)
mm->locked_vm += len >> PAGE_SHIFT;
+ /*
+ * Safe to unlock now ... all done manipulating vma lists.
+ */
+ up(&mm->mmap_sem);
+
+ if ((flags & (VM_LOCKED | VM_IO)) == VM_LOCKED) {
+ unsigned long start = addr;
do {
char c;
get_user(c,(char *) start);
@@ -329,8 +366,13 @@
}
return addr;

-free_vma:
+out_free:
+ up(&mm->mmap_sem);
+
+ if (extra)
+ kmem_cache_free(vm_area_cachep, extra);
kmem_cache_free(vm_area_cachep, vma);
+out:
return error;
}

@@ -384,12 +426,13 @@
static int unmap_fixup(struct vm_area_struct *area, unsigned long addr,
size_t len, struct vm_area_struct **extra)
{
+ struct mm_struct * mm = current->mm;
struct vm_area_struct *mpnt;
unsigned long end = addr + len;

- area->vm_mm->total_vm -= len >> PAGE_SHIFT;
+ mm->total_vm -= len >> PAGE_SHIFT;
if (area->vm_flags & VM_LOCKED)
- area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
+ mm->locked_vm -= len >> PAGE_SHIFT;

/* Unmapping the whole area. */
if (addr == area->vm_start && end == area->vm_end) {
@@ -425,7 +468,7 @@
if (mpnt->vm_ops && mpnt->vm_ops->open)
mpnt->vm_ops->open(mpnt);
area->vm_end = addr; /* Truncate area */
- insert_vm_struct(current->mm, mpnt);
+ insert_vm_struct(mm, mpnt);
}

/* Close the current area ... */
@@ -438,98 +481,82 @@
/* ... then reopen and reinsert. */
if (area->vm_ops && area->vm_ops->open)
area->vm_ops->open(area);
- insert_vm_struct(current->mm, area);
+ insert_vm_struct(mm, area);
return 1;
}

-asmlinkage int sys_munmap(unsigned long addr, size_t len)
-{
- int ret;
-
- lock_kernel();
- ret = do_munmap(addr, len);
- unlock_kernel();
- return ret;
-}
-
-/* Munmap is split into 2 main parts -- this part which finds
- * what needs doing, and the areas themselves, which do the
- * work. This now handles partial unmappings.
- * Jeremy Fitzhardine <jeremy@sw.oz.au>
+/*
+ * Find the areas that need to be unmapped. The extra vma
+ * has been allocated (but not checked), and the pointer
+ * will be cleared if the memory is used.
+ *
+ * Note: called with the mmap semaphore held.
*/
-int do_munmap(unsigned long addr, size_t len)
+static int do_munmap_locked(unsigned long addr, size_t len,
+ struct vm_area_struct **extra)
{
- struct mm_struct * mm;
- struct vm_area_struct *mpnt, *next, *free, *extra;
- int freed;
-
- if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
- return -EINVAL;
-
- if ((len = PAGE_ALIGN(len)) == 0)
- return 0;
+ struct mm_struct * mm = current->mm;
+ unsigned long extent = addr + len;
+ struct vm_area_struct *mpnt, *free;
+ int error;

- /* Check if this memory area is ok - put it on the temporary
- * list if so.. The checks here are pretty simple --
- * every area affected in some way (by any overlap) is put
- * on the list. If nothing is put on, nothing is affected.
+ /*
+ * Find the first area potentially affected by the unmapping.
*/
- mm = current->mm;
mpnt = mm->mmap;
while(mpnt && mpnt->vm_end <= addr)
mpnt = mpnt->vm_next;
if (!mpnt)
- return 0;
+ goto out_done;

/*
- * We may need one additional vma to fix up the mappings ...
- * and this is the last chance for an easy error exit.
+ * Check whether the unmapping will create a hole,
+ * and if so make sure we can handle it.
*/
- extra = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!extra)
- return -ENOMEM;
-
- next = mpnt->vm_next;
+ if (mpnt->vm_start < addr && extent < mpnt->vm_end) {
+ error = -ENOMEM;
+ if (!*extra)
+ goto out;
+ if (mm->map_count > MAX_MAP_COUNT)
+ goto out;
+ }

- /* we have mpnt->vm_next = next and addr < mpnt->vm_end */
+ /*
+ * Check if this memory area is ok - put it on the temporary
+ * list if so.. The checks here are pretty simple --
+ * every area affected in some way (by any overlap) is put
+ * on the list. If nothing is put on, nothing is affected.
+ *
+ * Note: invariants mpnt->vm_next = next and addr < mpnt->vm_end
+ */
free = NULL;
- for ( ; mpnt && mpnt->vm_start < addr+len; ) {
+ while (mpnt && mpnt->vm_start < extent) {
struct vm_area_struct *next = mpnt->vm_next;

- if(mpnt->vm_next)
- mpnt->vm_next->vm_pprev = mpnt->vm_pprev;
- *mpnt->vm_pprev = mpnt->vm_next;
+ if (next)
+ next->vm_pprev = mpnt->vm_pprev;
+ *mpnt->vm_pprev = next;

mpnt->vm_next = free;
free = mpnt;
mpnt = next;
}

- if (free && (free->vm_start < addr) && (free->vm_end > addr+len)) {
- if (mm->map_count > MAX_MAP_COUNT) {
- kmem_cache_free(vm_area_cachep, extra);
- return -ENOMEM;
- }
- }
-
/* Ok - we have the memory areas we should free on the 'free' list,
* so release them, and unmap the page range..
* If the one of the segments is only being partially unmapped,
* it will put new vm_area_struct(s) into the address space.
*/
- freed = 0;
while ((mpnt = free) != NULL) {
unsigned long st, end, size;

free = free->vm_next;
- freed = 1;

mm->map_count--;
remove_shared_vm_struct(mpnt);

st = addr < mpnt->vm_start ? mpnt->vm_start : addr;
- end = addr+len;
- end = end > mpnt->vm_end ? mpnt->vm_end : end;
+ end = extent > mpnt->vm_end ? mpnt->vm_end : extent;
size = end - st;

if (mpnt->vm_ops && mpnt->vm_ops->unmap)
@@ -542,17 +569,58 @@
/*
* Fix the mapping, and free the old area if it wasn't reused.
*/
- if (!unmap_fixup(mpnt, st, size, &extra))
+ if (!unmap_fixup(mpnt, st, size, extra))
kmem_cache_free(vm_area_cachep, mpnt);
+ mm->mmap_cache = NULL; /* Kill the cache. */
}
+out_done:
+ error = 0;
+out:
+ return error;
+}
+
+/*
+ * Munmap is split into two main parts: do_munmap_locked, which
+ * finds what needs doing, and unmap_fixup, which does the work.
+ * This now handles partial unmappings.
+ * Jeremy Fitzhardine <jeremy@sw.oz.au>
+ */
+int do_munmap(unsigned long addr, size_t len)
+{
+ struct mm_struct * mm = current->mm;
+ struct vm_area_struct *extra;
+ int error;
+
+ if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
+ return -EINVAL;
+
+ if ((len = PAGE_ALIGN(len)) == 0)
+ return 0;
+
+ /*
+ * We may need one additional vma to fix up the mappings ...
+ * get it now before acquiring the semaphore.
+ */
+ extra = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+
+ down(&mm->mmap_sem);
+ error = do_munmap_locked(addr, len, &extra);
+ up(&mm->mmap_sem);

/* Release the extra vma struct if it wasn't used */
if (extra)
kmem_cache_free(vm_area_cachep, extra);
+ return error;
+}

- if (freed)
- mm->mmap_cache = NULL; /* Kill the cache. */
- return 0;
+asmlinkage int sys_munmap(unsigned long addr, size_t len)
+{
+ int ret;
+
+ lock_kernel();
+ ret = do_munmap(addr, len);
+ unlock_kernel();
+ return ret;
}

/* Release all mmaps. */
@@ -625,17 +693,29 @@
}
}

-/* Merge the list of memory segments if possible.
- * Redundant vm_area_structs are freed.
- * This assumes that the list is ordered by address.
- * We don't need to traverse the entire list, only those segments
- * which intersect or are adjacent to a given interval.
+/*
+ * Externally visible entry point when semaphore not held.
*/
-void merge_segments (struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)
+void merge_segments (struct mm_struct * mm, unsigned long start_addr,
+ unsigned long end_addr)
{
- struct vm_area_struct *prev, *mpnt, *next;
-
down(&mm->mmap_sem);
+ merge_segments_locked(mm, start_addr, end_addr);
+ up(&mm->mmap_sem);
+}
+
+/*
+ * Merge the list of memory segments if possible, freeing any redundant
+ * vm_area_structs. (This assumes that the list is ordered by address.)
+ * We don't need to traverse the entire list, only those segments that
+ * intersect or are adjacent to a given interval.
+ *
+ * Note: the caller must hold the mmap semaphore.
+ */
+void merge_segments_locked(struct mm_struct * mm, unsigned long start_addr,
+ unsigned long end_addr)
+{
+ struct vm_area_struct *prev, *mpnt, *next;

prev = NULL;
mpnt = mm->mmap;
@@ -644,7 +724,7 @@
mpnt = mpnt->vm_next;
}
if (!mpnt)
- goto no_vma;
+ return;

next = mpnt->vm_next;

@@ -682,9 +762,9 @@
* big segment can possibly merge with the next one.
* The old unused mpnt is freed.
*/
- if(mpnt->vm_next)
- mpnt->vm_next->vm_pprev = mpnt->vm_pprev;
- *mpnt->vm_pprev = mpnt->vm_next;
+ if(next)
+ next->vm_pprev = mpnt->vm_pprev;
+ *mpnt->vm_pprev = next;

prev->vm_end = mpnt->vm_end;
if (mpnt->vm_ops && mpnt->vm_ops->close) {
@@ -692,16 +772,10 @@
mpnt->vm_start = mpnt->vm_end;
mpnt->vm_ops->close(mpnt);
}
- mm->map_count--;
- remove_shared_vm_struct(mpnt);
- if (mpnt->vm_file)
- fput(mpnt->vm_file);
- kmem_cache_free(vm_area_cachep, mpnt);
+ free_vma(mm, mpnt);
mpnt = prev;
}
mm->mmap_cache = NULL; /* Kill the cache. */
-no_vma:
- up(&mm->mmap_sem);
}

__initfunc(void vma_init(void))

--------------D104AF523853780712A12D95--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu