Updated misc mem patch for 2.1.113

Bill Hawes (whawes@transmeta.com)
Sat, 01 Aug 1998 15:03:21 -0700


This is a multi-part message in MIME format.
--------------AF239B47B5F809A2C10DEED2
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

The attached updated misc memory trimming patch has a revised approach
to pgt trimming that I hope will satisy objections made to the previous
patch.

The new method for cache trimming avoids the locking issues by letting
each CPU do the work for its own caches. The kswapd CPU sets a flag
asking the other CPUs to trim their pgt caches, and then trims its own
cache synchronously. The trimming is done by __check_pgt_cache(), which
now takes a limit argument; the limit is set to either the sysctl value
or 0 if trimming has been requested. When the CPU idle tasks have
drained their pgt cache to 0, the trim flag is reset so that the cache
reverts to its normal limits.

I think this is a reasonable compromise to allow kswapd to reclaim the
cache memory without introducing any new locking requirements.

For trimming filesystem memory I've added a trim_fs_memory() function to
fs/filesystems.c to serve as the interface to the vm system. This avoids
the need to have fs-specific conditional flags in the mm files, and will
be make it easy to add additional memory reclamation calls. At present
only the NFS dircache memory is reclaimable, but I suspect that some of
the other filesystems may have freeable memory as well.

The patch makes a fair amount of additional memory available to kswapd
-- 16 pages from NFS plus 25 pages per CPU -- so it should prove to be a
useful addition to the vm system.

Regards,
Bill

--------------AF239B47B5F809A2C10DEED2
Content-Type: text/plain; charset=us-ascii; name="mm_miscmem113-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="mm_miscmem113-patch"

--- linux-2.1.113/mm/vmscan.c.old Mon Jul 20 10:37:39 1998
+++ linux-2.1.113/mm/vmscan.c Sat Aug 1 09:36:00 1998
@@ -439,6 +439,22 @@
return 0;
}

+extern int trim_pgt_cache(void);
+extern int trim_fs_memory(void);
+/*
+ * Try to free memory from various sources.
+ */
+static int shrink_misc_mem(int pri, int gfp_mask)
+{
+ if ((gfp_mask & __GFP_IO) && shm_swap(pri, gfp_mask))
+ return 1;
+ if (trim_pgt_cache())
+ return 1;
+ if (trim_fs_memory())
+ return 1;
+ return 0;
+}
+
/*
* We are much more aggressive about trying to swap out than we used
* to be. This works out OK, because we now do proper aging on page
@@ -469,7 +485,7 @@
return 1;
state = 1;
case 1:
- if ((gfp_mask & __GFP_IO) && shm_swap(i, gfp_mask))
+ if (shrink_misc_mem(i, gfp_mask))
return 1;
state = 2;
case 2:
--- linux-2.1.113/mm/memory.c.old Sat Aug 1 08:53:17 1998
+++ linux-2.1.113/mm/memory.c Sat Aug 1 09:57:47 1998
@@ -56,6 +56,13 @@
unsigned long num_physpages = 0;
void * high_memory = NULL;

+/* Low and high watermarks for page table cache.
+ The system should try to have pgt_water[0] <= cache elements <= pgt_water[1]
+ */
+int pgt_cache_water[2] = { 25, 50 };
+
+static void __check_pgt_cache(int);
+
/*
* We special-case the C-O-W ZERO_PAGE, because it's such
* a common occurrence (no need to read the page to know
@@ -136,7 +143,7 @@
free_one_pgd(page_dir + i);

/* keep the page table cache within bounds */
- check_pgt_cache();
+ __check_pgt_cache(pgt_cache_water[1]);
return;

out_bad:
@@ -165,7 +172,7 @@
pgd_free(page_dir);

/* keep the page table cache within bounds */
- check_pgt_cache();
+ __check_pgt_cache(pgt_cache_water[1]);
out:
return;

@@ -948,14 +955,20 @@
}
}

-/* Low and high watermarks for page table cache.
- The system should try to have pgt_water[0] <= cache elements <= pgt_water[1]
+
+/*
+ * Set when the vm system wants to trim
+ * extra pages from the cache lists.
*/
-int pgt_cache_water[2] = { 25, 50 };
+int pgt_trim_req[NR_CPUS] = {0, };

-void check_pgt_cache(void)
+/*
+ * Note: the pxx_quicklists are per CPU, so we don't need
+ * the kernel lock for this operation.
+ */
+static void __check_pgt_cache(int limit)
{
- if (pgtable_cache_size > pgt_cache_water[1]) {
+ if(pgtable_cache_size > limit) {
do {
if (pgd_quicklist)
free_pgd_slow(get_pgd_fast());
@@ -965,4 +978,47 @@
free_pte_slow(get_pte_fast());
} while (pgtable_cache_size > pgt_cache_water[0]);
}
+}
+
+
+/*
+ * Called from the CPU idle tasks to regulate the page table cache.
+ */
+void check_pgt_cache(void)
+{
+ int limit = pgt_cache_water[1];
+
+ /* Set the limit to 0 if extra trimming was requested */
+ if (pgt_trim_req[smp_processor_id()])
+ limit = 0;
+ __check_pgt_cache(limit);
+
+ /*
+ * Cache empty? Reset the trim request flag ...
+ */
+ if (!pgtable_cache_size)
+ pgt_trim_req[smp_processor_id()] = 0;
+}
+
+
+/*
+ * Called by the vm system to reduce pgt cache memory.
+ */
+int trim_pgt_cache(void)
+{
+ int cpu, result;
+
+ /*
+ * Request that all CPUs trim their caches.
+ */
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ pgt_trim_req[cpu] = 1;
+
+ result = 0;
+ if (pgtable_cache_size)
+ result = 1;
+
+ /* now check our own cache */
+ check_pgt_cache();
+ return result;
}
--- linux-2.1.113/fs/filesystems.c.old Sat Aug 1 08:53:17 1998
+++ linux-2.1.113/fs/filesystems.c Sat Aug 1 09:36:00 1998
@@ -179,6 +179,24 @@
return err;
}

+#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE)
+int (*nfs_trim_dircache_func)(void) = NULL;
+#endif
+
+/*
+ * General hook to allow the vm system to reclaim memory
+ * from the filesystems. Add calls here for any fs that
+ * allocates memory unreclaimable by other means.
+ */
+int trim_fs_memory(void)
+{
+#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE)
+ if (nfs_trim_dircache_func && nfs_trim_dircache_func())
+ return 1;
+#endif
+ return 0;
+}
+
#ifndef CONFIG_NFSD
#ifdef CONFIG_NFSD_MODULE
int (*do_nfsservctl)(int, void *, void *) = NULL;
--- linux-2.1.113/kernel/ksyms.c.old Fri Jul 24 11:19:10 1998
+++ linux-2.1.113/kernel/ksyms.c Sat Aug 1 09:36:00 1998
@@ -72,6 +72,9 @@
#if !defined(CONFIG_NFSD) && defined(CONFIG_NFSD_MODULE)
extern int (*do_nfsservctl)(int, void *, void *);
#endif
+#if !defined(CONFIG_NFS_FS) && defined(CONFIG_NFS_FS_MODULE)
+extern int (*nfs_trim_dircache_func)(void);
+#endif

extern void *sys_call_table;

@@ -194,6 +197,10 @@

#if !defined(CONFIG_NFSD) && defined(CONFIG_NFSD_MODULE)
EXPORT_SYMBOL(do_nfsservctl);
+#endif
+
+#if !defined(CONFIG_NFS_FS) && defined(CONFIG_NFS_FS_MODULE)
+EXPORT_SYMBOL(nfs_trim_dircache_func);
#endif

/* device registration */
--- linux-2.1.113/fs/nfs/dir.c.old Wed Jun 24 14:30:10 1998
+++ linux-2.1.113/fs/nfs/dir.c Sat Aug 1 09:36:00 1998
@@ -352,7 +352,8 @@
if (sb && sb->s_dev != cache->dev)
continue;
if (cache->locked) {
- printk("NFS: cache locked at umount %s\n",
+ printk(KERN_ERR
+ "NFS: cache locked at umount %s\n",
(cache->entry ? "(lost a page!)" : ""));
continue;
}
@@ -362,6 +363,40 @@
cache->entry = NULL;
}
}
+}
+
+/*
+ * Trim a page from the dir cache. Eventually the dir cache
+ * should be implemented as inode (page) cache, but for now
+ * this allows the memory to be reclaimed when needed.
+ */
+int
+nfs_trim_dircache(void)
+{
+ struct nfs_dirent *cache = dircache, *oldest = NULL;
+ unsigned long age = ~0UL;
+ int i;
+
+ /*
+ * Find the oldest cache entry with a freeable page.
+ */
+ for (i = NFS_MAX_DIRCACHE; i--; cache++) {
+ if (cache->locked)
+ continue;
+ if (!cache->entry)
+ continue;
+ if (cache->age <= age) {
+ oldest = cache;
+ age = cache->age;
+ }
+ }
+ if (oldest) {
+ oldest->valid = 0;
+ free_page((unsigned long) oldest->entry);
+ oldest->entry = NULL;
+ return 1;
+ }
+ return 0;
}

/*
--- linux-2.1.113/fs/nfs/inode.c.old Thu Jul 23 16:26:10 1998
+++ linux-2.1.113/fs/nfs/inode.c Sat Aug 1 09:36:00 1998
@@ -32,6 +32,9 @@
#include <asm/system.h>
#include <asm/uaccess.h>

+extern int nfs_trim_dircache(void);
+extern int (*nfs_trim_dircache_func)(void);
+
#define CONFIG_NFS_SNAPSHOT 1
#define NFSDBG_FACILITY NFSDBG_VFS
#define NFS_PARANOIA 1
@@ -135,6 +138,9 @@
if (!(server->flags & NFS_MOUNT_NONLM))
lockd_down(); /* release rpc.lockd */
rpciod_down(); /* release rpciod */
+
+ /* clear the "trim dircache" pointer */
+ nfs_trim_dircache_func = NULL;
/*
* Invalidate the dircache for this superblock.
*/
@@ -298,6 +304,9 @@
goto out_no_root;
sb->s_root->d_op = &nfs_dentry_operations;
sb->s_root->d_fsdata = root_fh;
+
+ /* install the "trim dircache" pointer */
+ nfs_trim_dircache_func = nfs_trim_dircache;

/* We're airborne */
unlock_super(sb);

--------------AF239B47B5F809A2C10DEED2--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.altern.org/andrebalsa/doc/lkml-faq.html