it still needs some help in the non-i386 architecture specific department.
Binary files linux-2.3.15-ref/ID and linux/ID differ
diff -ruN linux-2.3.15-ref/arch/alpha/kernel/osf_sys.c linux/arch/alpha/kernel/osf_sys.c
--- linux-2.3.15-ref/arch/alpha/kernel/osf_sys.c Tue Aug 24 14:40:29 1999
+++ linux/arch/alpha/kernel/osf_sys.c Wed Aug 25 16:38:44 1999
@@ -205,6 +205,8 @@
}
+#if 0
+/* Linux has an madvise implementation now. */
/*
* Heh. As documented by DEC..
*/
@@ -212,6 +214,7 @@
{
return 0;
}
+#endif
/*
* No need to acquire the kernel lock, we're local..
diff -ruN linux-2.3.15-ref/arch/i386/kernel/entry.S linux/arch/i386/kernel/entry.S
--- linux-2.3.15-ref/arch/i386/kernel/entry.S Tue Aug 24 14:40:29 1999
+++ linux/arch/i386/kernel/entry.S Wed Aug 25 16:35:31 1999
@@ -560,6 +560,7 @@
.long SYMBOL_NAME(sys_ni_syscall) /* streams1 */
.long SYMBOL_NAME(sys_ni_syscall) /* streams2 */
.long SYMBOL_NAME(sys_vfork) /* 190 */
+ .long SYMBOL_NAME(sys_madvise)
/*
* NOTE!! This doesn't have to be exact - we just have
@@ -567,6 +568,6 @@
* entries. Don't panic if you notice that this hasn't
* been shrunk every time we add a new system call.
*/
- .rept NR_syscalls-190
+ .rept NR_syscalls-189
.long SYMBOL_NAME(sys_ni_syscall)
.endr
diff -ruN linux-2.3.15-ref/arch/m68k/kernel/entry.S linux/arch/m68k/kernel/entry.S
--- linux-2.3.15-ref/arch/m68k/kernel/entry.S Tue Aug 24 14:41:01 1999
+++ linux/arch/m68k/kernel/entry.S Wed Aug 25 16:35:50 1999
@@ -600,6 +600,7 @@
.long SYMBOL_NAME(sys_ni_syscall) /* streams1 */
.long SYMBOL_NAME(sys_ni_syscall) /* streams2 */
.long SYMBOL_NAME(sys_vfork) /* 190 */
+ .long SYMBOL_NAME(sys_madvise)
.rept NR_syscalls-(.-SYMBOL_NAME(sys_call_table))/4
.long SYMBOL_NAME(sys_ni_syscall)
diff -ruN linux-2.3.15-ref/arch/mips/kernel/syscalls.h linux/arch/mips/kernel/syscalls.h
--- linux-2.3.15-ref/arch/mips/kernel/syscalls.h Tue Aug 24 14:39:26 1999
+++ linux/arch/mips/kernel/syscalls.h Wed Aug 25 16:37:06 1999
@@ -225,3 +225,4 @@
SYS(sys_sendfile, 3)
SYS(sys_ni_syscall, 0)
SYS(sys_ni_syscall, 0)
+SYS(sys_madvise, 3) /* 4210 */
diff -ruN linux-2.3.15-ref/arch/mips/kernel/sysirix.c linux/arch/mips/kernel/sysirix.c
--- linux-2.3.15-ref/arch/mips/kernel/sysirix.c Tue Aug 24 14:39:26 1999
+++ linux/arch/mips/kernel/sysirix.c Wed Aug 25 16:40:30 1999
@@ -1136,6 +1136,10 @@
return retval;
}
+/*
+ * XXX Linux has a native madvise() system call now.
+ * This may need to do something Irix-specific.
+ */
asmlinkage int irix_madvise(unsigned long addr, int len, int behavior)
{
lock_kernel();
diff -ruN linux-2.3.15-ref/arch/ppc/kernel/misc.S linux/arch/ppc/kernel/misc.S
--- linux-2.3.15-ref/arch/ppc/kernel/misc.S Tue Aug 24 14:39:28 1999
+++ linux/arch/ppc/kernel/misc.S Wed Aug 25 16:36:05 1999
@@ -894,4 +894,5 @@
.long sys_ni_syscall /* streams1 */
.long sys_ni_syscall /* streams2 */
.long sys_vfork
+ .long sys_madvise
.space (NR_syscalls-183)*4
diff -ruN linux-2.3.15-ref/include/asm-alpha/mman.h linux/include/asm-alpha/mman.h
--- linux-2.3.15-ref/include/asm-alpha/mman.h Sun Jan 25 19:31:47 1998
+++ linux/include/asm-alpha/mman.h Tue Aug 24 17:02:21 1999
@@ -31,6 +31,13 @@
#define MCL_CURRENT 8192 /* lock all currently mapped pages */
#define MCL_FUTURE 16384 /* lock all additions to address space */
+/* used by madvise() */
+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
/* compatibility flags */
#define MAP_ANON MAP_ANONYMOUS
#define MAP_FILE 0
diff -ruN linux-2.3.15-ref/include/asm-arm/mman.h linux/include/asm-arm/mman.h
--- linux-2.3.15-ref/include/asm-arm/mman.h Tue Jan 20 19:39:42 1998
+++ linux/include/asm-arm/mman.h Tue Aug 24 17:03:12 1999
@@ -22,6 +22,12 @@
#define MS_INVALIDATE 2 /* invalidate the caches */
#define MS_SYNC 4 /* synchronous memory sync */
+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */
diff -ruN linux-2.3.15-ref/include/asm-i386/mman.h linux/include/asm-i386/mman.h
--- linux-2.3.15-ref/include/asm-i386/mman.h Mon Oct 7 01:55:48 1996
+++ linux/include/asm-i386/mman.h Tue Aug 24 17:03:48 1999
@@ -22,6 +22,12 @@
#define MS_INVALIDATE 2 /* invalidate the caches */
#define MS_SYNC 4 /* synchronous memory sync */
+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */
diff -ruN linux-2.3.15-ref/include/asm-m68k/mman.h linux/include/asm-m68k/mman.h
--- linux-2.3.15-ref/include/asm-m68k/mman.h Fri Nov 22 08:56:36 1996
+++ linux/include/asm-m68k/mman.h Tue Aug 24 17:04:22 1999
@@ -22,6 +22,12 @@
#define MS_INVALIDATE 2 /* invalidate the caches */
#define MS_SYNC 4 /* synchronous memory sync */
+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */
diff -ruN linux-2.3.15-ref/include/asm-mips/mman.h linux/include/asm-mips/mman.h
--- linux-2.3.15-ref/include/asm-mips/mman.h Thu Jun 26 15:33:40 1997
+++ linux/include/asm-mips/mman.h Tue Aug 24 17:05:07 1999
@@ -51,6 +51,15 @@
#define MS_INVALIDATE 2 /* invalidate mappings & caches */
/*
+ * Flags for madvise
+ */
+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
+/*
* Flags for mlockall
*/
#define MCL_CURRENT 1 /* lock all current mappings */
diff -ruN linux-2.3.15-ref/include/asm-ppc/mman.h linux/include/asm-ppc/mman.h
--- linux-2.3.15-ref/include/asm-ppc/mman.h Wed Dec 18 03:54:09 1996
+++ linux/include/asm-ppc/mman.h Tue Aug 24 17:05:44 1999
@@ -22,6 +22,12 @@
#define MS_INVALIDATE 2 /* invalidate the caches */
#define MS_SYNC 4 /* synchronous memory sync */
+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
#define MCL_CURRENT 0x2000 /* lock all currently mapped pages */
#define MCL_FUTURE 0x4000 /* lock all additions to address space */
diff -ruN linux-2.3.15-ref/include/asm-sparc/mman.h linux/include/asm-sparc/mman.h
--- linux-2.3.15-ref/include/asm-sparc/mman.h Sat Nov 9 03:29:41 1996
+++ linux/include/asm-sparc/mman.h Tue Aug 24 17:06:17 1999
@@ -28,6 +28,12 @@
#define MS_INVALIDATE 2 /* invalidate the caches */
#define MS_SYNC 4 /* synchronous memory sync */
+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
#define MCL_CURRENT 0x2000 /* lock all currently mapped pages */
#define MCL_FUTURE 0x4000 /* lock all additions to address space */
diff -ruN linux-2.3.15-ref/include/asm-sparc64/mman.h linux/include/asm-sparc64/mman.h
--- linux-2.3.15-ref/include/asm-sparc64/mman.h Fri Dec 13 04:37:47 1996
+++ linux/include/asm-sparc64/mman.h Tue Aug 24 17:06:48 1999
@@ -28,6 +28,12 @@
#define MS_INVALIDATE 2 /* invalidate the caches */
#define MS_SYNC 4 /* synchronous memory sync */
+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
#define MCL_CURRENT 0x2000 /* lock all currently mapped pages */
#define MCL_FUTURE 0x4000 /* lock all additions to address space */
diff -ruN linux-2.3.15-ref/include/linux/mm.h linux/include/linux/mm.h
--- linux-2.3.15-ref/include/linux/mm.h Tue Aug 24 14:51:52 1999
+++ linux/include/linux/mm.h Wed Aug 25 17:30:37 1999
@@ -57,6 +57,7 @@
unsigned long vm_offset;
struct file * vm_file;
unsigned long vm_pte; /* shared mem */
+ unsigned vm_rd_behavior;
};
/*
diff -ruN linux-2.3.15-ref/mm/filemap.c linux/mm/filemap.c
--- linux-2.3.15-ref/mm/filemap.c Tue Aug 24 14:43:19 1999
+++ linux/mm/filemap.c Wed Aug 25 17:30:16 1999
@@ -33,6 +33,8 @@
*
* finished 'unifying' the page and buffer cache and SMP-threaded the
* page-cache, 21.05.1999, Ingo Molnar <mingo@redhat.com>
+ *
+ * madvise(2), 24.08.1999, Chuck Lever <cel@monkey.org>
*/
atomic_t page_cache_size = ATOMIC_INIT(0);
@@ -1367,7 +1369,8 @@
* Otherwise, we're off the end of a privately mapped file,
* so we need to map a zero page.
*/
- if (offset < inode->i_size)
+ if ((offset < inode->i_size) &&
+ (area->vm_rd_behavior != MADV_RANDOM))
read_cluster_nonblocking(file, offset);
else
page_cache_read(file, offset);
@@ -1881,6 +1884,175 @@
err = written ? written : status;
out:
return err;
+}
+
+/*
+ * The madvise(2) system call.
+ *
+ * The application can use madvise() to advise the kernel how it should
+ * handle paging I/O in this VM area. The idea is to help the kernel
+ * use appropriate read-ahead techniques. The information provided is
+ * advisory only, and can be safely disregarded by the kernel.
+ *
+ * behavior values:
+ * MADV_NORMAL - the default behavior is to read clusters. This
+ * results in some read-ahead and read-behind.
+ * MADV_RANDOM - the system should read the minimum amount of data
+ * on any access, since it is unlikely that the application
+ * will need more than what it asks for.
+ * MADV_SEQUENTIAL - pages in the given range will probably be
+ * accessed once, so they can be aggressively read ahead,
+ * and can be freed soon after they are accessed.
+ * MADV_WILLNEED - the application is notifying the system to read
+ * some pages ahead.
+ * MADV_DONTNEED - the application is finished with the given range,
+ * so the kernel can free resources associated with it.
+ *
+ * return values:
+ * zero = success
+ * -1 = some error occurred, errno value set (see below).
+ *
+ * errno values:
+ * EINVAL - start + len < 0, or start is not page-aligned, or
+ * behavior is not a valid value.
+ * ENOMEM - addresses in the specified range are not mapped, or are
+ * outside the AS of the process.
+ * EIO - an I/O error occurred while paging in data.
+ */
+
+/*
+ * This reads in data the application thinks it will need eventually.
+ * Today it is synchronous, but some day this should initiate asynch-
+ * ronous read-ahead and return immediately.
+ */
+static inline int madvise_willneed(struct vm_area_struct * vma,
+ unsigned long start, unsigned long end)
+{
+ int write = (vma->vm_flags & VM_WRITE) != 0;
+ while (start < end) {
+ if (handle_mm_fault(current, vma, start, write) < 0) {
+ return -EIO;
+ }
+ start += PAGE_CACHE_SIZE;
+ }
+ return 0;
+}
+
+/*
+ * Application no longer needs these pages. If there is dirty data,
+ * it's OK to just throw it away. The app will be more careful about
+ * data it wants to keep. Be sure to free swap resources too.
+ *
+ * This is a simple-minded "lazy" implementation that just signals
+ * shrink_mmap to do the work later, if the system needs these pages
+ * back.
+ */
+static inline int madvise_dontneed(struct vm_area_struct * vma,
+ unsigned long start, unsigned long end)
+{
+ struct page * page;
+
+ while (start < end) {
+ page = page_cache_entry(start);
+ clear_bit(PG_referenced, &page->flags);
+ start += PAGE_CACHE_SIZE;
+ }
+ return 0;
+}
+
+static int madvise_area(struct vm_area_struct * vma,
+ unsigned long start, unsigned long end, int behavior)
+{
+ int error = 0;
+
+ switch (behavior) {
+ case MADV_NORMAL:
+ case MADV_SEQUENTIAL:
+ case MADV_RANDOM:
+ vma->vm_rd_behavior = behavior;
+ break;
+
+ case MADV_WILLNEED:
+ error = madvise_willneed(vma, start, end);
+ break;
+
+ case MADV_DONTNEED:
+ error = madvise_dontneed(vma, start, end);
+ break;
+
+ default:
+ error = -EINVAL;
+ break;
+ }
+
+ return error;
+}
+
+/*
+ * The system call API is given a range of addresses in the
+ * current process's address space. This function does sanity
+ * checking, then breaks the address range up into a set of
+ * vm areas for processing.
+ */
+asmlinkage int sys_madvise(unsigned long start, size_t len, int behavior)
+{
+ unsigned long end;
+ struct vm_area_struct * vma;
+ int unmapped_error = 0;
+ int error = -EINVAL;
+
+ down(¤t->mm->mmap_sem);
+
+ if (start & ~PAGE_MASK)
+ goto out;
+ len = (len + ~PAGE_MASK) & PAGE_MASK;
+ end = start + len;
+ if (end < start)
+ goto out;
+
+ error = 0;
+ if (end == start)
+ goto out;
+
+ /*
+ * If the interval [start,end) covers some unmapped address ranges,
+ * just ignore them, but return -ENOMEM at the end.
+ */
+ vma = find_vma(current->mm, start);
+ for (;;) {
+ /* Still start < end. */
+ error = -ENOMEM;
+ if (!vma)
+ goto out;
+
+ /* Here start < vma->vm_end. */
+ if (start < vma->vm_start) {
+ unmapped_error = -ENOMEM;
+ start = vma->vm_start;
+ }
+
+ /* Here vma->vm_start <= start < vma->vm_end. */
+ if (end <= vma->vm_end) {
+ if (start < end) {
+ error = madvise_area(vma, start, end, behavior);
+ if (error)
+ goto out;
+ }
+ error = unmapped_error;
+ goto out;
+ }
+
+ /* Here vma->vm_start <= start < vma->vm_end < end. */
+ error = madvise_area(vma, start, vma->vm_end, behavior);
+ if (error)
+ goto out;
+ start = vma->vm_end;
+ vma = vma->vm_next;
+ }
+
+out:
+ up(¤t->mm->mmap_sem);
+ return error;
}
/*
- Chuck Lever
-- corporate: <chuckl@netscape.com> personal: <chucklever@netscape.net> or <cel@monkey.org>The Linux Scalability project: http://www.citi.umich.edu/projects/linux-scalability/
- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu Please read the FAQ at http://www.tux.org/lkml/