[PATCH] mincore only, for i386

From: Chuck Lever (cel@monkey.org)
Date: Tue Jan 18 2000 - 17:42:56 EST


i'm breaking the madvise/mincore work into smaller chunks.

here's mincore for i386. this is different than previous implementations
-- it adds a new vm_operation (incore) which is a predicate that returns a
value indicating whether a page in a vm area is in memory.

a separate predicate is implemented for filemap's and shm's. other
filesystem- and device-specific implementations can be provided as
necessary, but the filemap_incore logic is applicable for most block file
systems. a generic sys_mincore invokes whichever is appropriate.

comments appreciated!

note: i've tested the filemap stuff, but i don't currently have a shm
application that i can use to test the shm stuff -- it "looks" right, but
still needs to be tested.

diff -ruN Linux-2.3.40-3/arch/i386/kernel/entry.S linux/arch/i386/kernel/entry.S
--- Linux-2.3.40-3/arch/i386/kernel/entry.S Tue Jan 11 12:40:43 2000
+++ linux/arch/i386/kernel/entry.S Tue Jan 18 15:24:36 2000
@@ -617,7 +617,7 @@
         .long SYMBOL_NAME(sys_setgid)
         .long SYMBOL_NAME(sys_setfsuid) /* 215 */
         .long SYMBOL_NAME(sys_setfsgid)
-
+ .long SYMBOL_NAME(sys_mincore)
 
         /*
          * NOTE!! This doesn't have to be exact - we just have
@@ -625,6 +625,6 @@
          * entries. Don't panic if you notice that this hasn't
          * been shrunk every time we add a new system call.
          */
- .rept NR_syscalls-216
+ .rept NR_syscalls-217
                 .long SYMBOL_NAME(sys_ni_syscall)
         .endr
diff -ruN Linux-2.3.40-3/drivers/sgi/char/graphics.c linux/drivers/sgi/char/graphics.c
--- Linux-2.3.40-3/drivers/sgi/char/graphics.c Tue Jan 11 12:33:01 2000
+++ linux/drivers/sgi/char/graphics.c Tue Jan 18 15:57:58 2000
@@ -260,6 +260,7 @@
         NULL, /* no special mmap-protect */
         NULL, /* no special mmap-sync */
         NULL, /* no special mmap-advise */
+ NULL, /* no special mmap-incore */
         sgi_graphics_nopage, /* our magic no-page fault handler */
         NULL, /* no special mmap-wppage */
         NULL /* no special mmap-swapout */
diff -ruN Linux-2.3.40-3/drivers/sgi/char/shmiq.c linux/drivers/sgi/char/shmiq.c
--- Linux-2.3.40-3/drivers/sgi/char/shmiq.c Tue Jan 11 12:33:01 2000
+++ linux/drivers/sgi/char/shmiq.c Tue Jan 18 15:58:13 2000
@@ -302,6 +302,7 @@
         NULL, /* no special mmap-protect */
         NULL, /* no special mmap-sync */
         NULL, /* no special mmap-advise */
+ NULL, /* no special mmap-incore */
         shmiq_nopage, /* our magic no-page fault handler */
         NULL, /* no special mmap-wppage */
         NULL /* no special mmap-swapout */
diff -ruN Linux-2.3.40-3/fs/ncpfs/mmap.c linux/fs/ncpfs/mmap.c
--- Linux-2.3.40-3/fs/ncpfs/mmap.c Wed Nov 24 16:47:44 1999
+++ linux/fs/ncpfs/mmap.c Tue Jan 18 15:57:12 2000
@@ -100,6 +100,7 @@
         NULL, /* protect */
         NULL, /* sync */
         NULL, /* advise */
+ NULL, /* incore */
         ncp_file_mmap_nopage, /* nopage */
         NULL, /* wppage */
         NULL /* swapout */
diff -ruN Linux-2.3.40-3/include/asm-i386/unistd.h linux/include/asm-i386/unistd.h
--- Linux-2.3.40-3/include/asm-i386/unistd.h Tue Jan 11 12:40:56 2000
+++ linux/include/asm-i386/unistd.h Tue Jan 18 15:25:43 2000
@@ -221,6 +221,7 @@
 #define __NR_setgid32 214
 #define __NR_setfsuid32 215
 #define __NR_setfsgid32 216
+#define __NR_mincore 217
 
 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
 
diff -ruN Linux-2.3.40-3/include/linux/mm.h linux/include/linux/mm.h
--- Linux-2.3.40-3/include/linux/mm.h Fri Jan 14 15:43:09 2000
+++ linux/include/linux/mm.h Tue Jan 18 16:29:20 2000
@@ -106,6 +106,7 @@
         void (*protect)(struct vm_area_struct *area, unsigned long, size_t, unsigned int newprot);
         int (*sync)(struct vm_area_struct *area, unsigned long, size_t, unsigned int flags);
         void (*advise)(struct vm_area_struct *area, unsigned long, size_t, unsigned int advise);
+ char (*incore)(struct vm_area_struct *area, unsigned long);
         struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int write_access);
         struct page * (*wppage)(struct vm_area_struct * area, unsigned long address, struct page * page);
         int (*swapout)(struct page *, struct file *);
@@ -446,6 +447,8 @@
                         size_t size, unsigned int flags);
 extern struct page *filemap_nopage(struct vm_area_struct * area,
                                     unsigned long address, int no_share);
+extern char filemap_incore(struct vm_area_struct * vma,
+ unsigned long pgoff);
 
 /*
  * GFP bitmasks..
diff -ruN Linux-2.3.40-3/ipc/shm.c linux/ipc/shm.c
--- Linux-2.3.40-3/ipc/shm.c Fri Jan 14 15:43:09 2000
+++ linux/ipc/shm.c Tue Jan 18 16:24:07 2000
@@ -64,6 +64,7 @@
 static void killseg (int shmid);
 static void shm_open (struct vm_area_struct *shmd);
 static void shm_close (struct vm_area_struct *shmd);
+static char shm_incore (struct vm_area_struct *shmd, unsigned long pgoff);
 static struct page * shm_nopage(struct vm_area_struct *, unsigned long, int);
 static int shm_swapout(struct page *, struct file *);
 #ifdef CONFIG_PROC_FS
@@ -590,6 +591,7 @@
         NULL, /* protect */
         NULL, /* sync */
         NULL, /* advise */
+ shm_incore, /* incore */
         shm_nopage, /* nopage */
         NULL, /* wppage */
         shm_swapout /* swapout */
@@ -829,6 +831,29 @@
 static int shm_swapout(struct page * page, struct file *file)
 {
         return 0;
+}
+
+/*
+ * is page present?
+ */
+static char shm_incore(struct vm_area_struct * shmd, unsigned long pgoff)
+{
+ char result = 0;
+ pte_t pte;
+ struct shmid_kernel * shp = (struct shmid_kernel *) shmd->vm_private_data;
+
+ down(&shp->sem);
+ if(shp != shm_lock(shp->id))
+ BUG();
+
+ pte = SHM_ENTRY(shp, pgoff);
+ if (pte_present(pte))
+ result = 1;
+
+ shm_unlock(shp->id);
+ up(&shp->sem);
+
+ return result;
 }
 
 /*
diff -ruN Linux-2.3.40-3/mm/filemap.c linux/mm/filemap.c
--- Linux-2.3.40-3/mm/filemap.c Fri Jan 14 15:43:09 2000
+++ linux/mm/filemap.c Tue Jan 18 16:35:16 2000
@@ -700,6 +700,37 @@
         return page;
 }
 
+/*
+ * This predicate returns 1 if the page is "in core," otherwise 0.
+ *
+ * Later we can get more picky about what "in core" means precisely,
+ * but for now, it simply checks to see if the page is in the page
+ * cache, and is up to date; i.e. that no page-in operation would be
+ * required at this time if an application were to map and access
+ * this page.
+ */
+char filemap_incore(struct vm_area_struct * vma, unsigned long pgoff)
+{
+ char result = 0;
+ struct address_space * as;
+ struct page * page, ** hash;
+
+ /* Punt: anonymous pages are always present */
+ if (!vma->vm_file)
+ return 1;
+
+ as = &vma->vm_file->f_dentry->d_inode->i_data;
+ hash = page_hash(as, pgoff);
+
+ spin_lock(&pagecache_lock);
+ page = __find_page_nolock(as, pgoff, *hash);
+ if ((page) && (Page_Uptodate(page)))
+ result = 1;
+ spin_unlock(&pagecache_lock);
+
+ return result;
+}
+
 #if 0
 #define PROFILE_READAHEAD
 #define DEBUG_READAHEAD
@@ -1633,6 +1664,7 @@
         NULL, /* no special protect */
         filemap_sync, /* sync */
         NULL, /* advise */
+ filemap_incore, /* incore */
         filemap_nopage, /* nopage */
         NULL, /* wppage */
         filemap_swapout /* swapout */
@@ -1651,6 +1683,7 @@
         NULL, /* protect */
         NULL, /* sync */
         NULL, /* advise */
+ filemap_incore, /* incore */
         filemap_nopage, /* nopage */
         NULL, /* wppage */
         NULL /* swapout */
diff -ruN Linux-2.3.40-3/mm/mmap.c linux/mm/mmap.c
--- Linux-2.3.40-3/mm/mmap.c Tue Dec 21 13:59:00 1999
+++ linux/mm/mmap.c Tue Jan 18 16:36:10 2000
@@ -12,6 +12,7 @@
 #include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/file.h>
+#include <linux/vmalloc.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
@@ -717,6 +718,123 @@
         ret = do_munmap(addr, len);
         up(&current->mm->mmap_sem);
         return ret;
+}
+
+static long mincore_area(struct vm_area_struct * vma,
+ unsigned long start, unsigned long end, char * vec)
+{
+ int error, size, i = 0;
+ char * tmp;
+
+ start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+ if (end > vma->vm_end)
+ end = vma->vm_end;
+ end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+
+ /* # of bytes in "vec" = # of pages */
+ size = end - start;
+ tmp = (char *) vmalloc(size);
+ if (!tmp)
+ return -ENOMEM;
+
+ while (start < end) {
+ if (vma->vm_ops && vma->vm_ops->incore)
+ tmp[i++] = vma->vm_ops->incore(vma, start++);
+ else
+ tmp[i++] = 1;
+ }
+
+ error = copy_to_user(vec, tmp, size) ? -EFAULT : 0;
+ vfree(tmp);
+ return error;
+}
+
+/*
+ * The mincore(2) system call.
+ *
+ * mincore() returns the memory residency status of the pages in the
+ * current process's address space specified by [addr, addr + len).
+ * The status is returned in a vector of bytes. The least significant
+ * bit of each byte is 1 if the referenced page is in memory, otherwise
+ * it is zero.
+ *
+ * Because the status of a page can change after mincore() checks it
+ * but before it returns to the application, the returned vector may
+ * contain stale information. Only locked pages are guaranteed to
+ * remain in memory.
+ *
+ * return values:
+ * zero - success
+ * -EFAULT - vec points to an illegal address
+ * -EINVAL - addr is not a multiple of PAGE_CACHE_SIZE,
+ * or len has a nonpositive value
+ * -ENOMEM - Addresses in the range [addr, addr + len] are
+ * invalid for the address space of this process, or
+ * specify one or more pages which are not currently
+ * mapped
+ */
+asmlinkage long sys_mincore(unsigned long start, size_t len, char *vec)
+{
+ int index = 0;
+ unsigned long end;
+ struct vm_area_struct * vma;
+ int unmapped_error = 0;
+ int error = -EINVAL;
+
+ down(&current->mm->mmap_sem);
+
+ if (start & ~PAGE_MASK)
+ goto out;
+ len = (len + ~PAGE_MASK) & PAGE_MASK;
+ end = start + len;
+ if (end < start)
+ goto out;
+
+ error = 0;
+ if (end == start)
+ goto out;
+
+ /*
+ * If the interval [start,end) covers some unmapped address
+ * ranges, just ignore them, but return -ENOMEM at the end.
+ */
+ vma = find_vma(current->mm, start);
+ for (;;) {
+ /* Still start < end. */
+ error = -ENOMEM;
+ if (!vma)
+ goto out;
+
+ /* Here start < vma->vm_end. */
+ if (start < vma->vm_start) {
+ unmapped_error = -ENOMEM;
+ start = vma->vm_start;
+ }
+
+ /* Here vma->vm_start <= start < vma->vm_end. */
+ if (end <= vma->vm_end) {
+ if (start < end) {
+ error = mincore_area(vma, start, end,
+ &vec[index]);
+ if (error)
+ goto out;
+ }
+ error = unmapped_error;
+ goto out;
+ }
+
+ /* Here vma->vm_start <= start < vma->vm_end < end. */
+ error = mincore_area(vma, start, vma->vm_end, &vec[index]);
+ if (error)
+ goto out;
+ index += (vma->vm_end - start) >> PAGE_CACHE_SHIFT;
+ start = vma->vm_end;
+ vma = vma->vm_next;
+ }
+
+out:
+ up(&current->mm->mmap_sem);
+ return error;
 }
 
 /*
diff -ruN Linux-2.3.40-3/net/packet/af_packet.c linux/net/packet/af_packet.c
--- Linux-2.3.40-3/net/packet/af_packet.c Fri Dec 31 15:04:42 1999
+++ linux/net/packet/af_packet.c Tue Jan 18 15:57:24 2000
@@ -1543,6 +1543,7 @@
         NULL, /* no special protect */
         NULL, /* sync */
         NULL, /* advise */
+ NULL, /* incore */
         NULL, /* nopage */
         NULL, /* wppage */
         NULL /* swapout */

        - Chuck Lever

--
corporate:	<chuckl@netscape.com>
personal:	<chucklever@netscape.net> or <cel@monkey.org>

The Linux Scalability project: http://www.citi.umich.edu/projects/linux-scalability/

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sun Jan 23 2000 - 21:00:19 EST