updated patch for 2.1.117 dynamic fd arrays

Bill Hawes (whawes@transmeta.com)
Sat, 22 Aug 1998 10:21:14 -0700


This is a multi-part message in MIME format.
--------------EEF1B4E3E23F63713E2B7BEC
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

I've updated the dynamic fd array patch to clear only the non-copied
part of the fd array, in keeping with the recent kernel changes to
optimize the memset(). Other operations of the patch are unchanged.

For those that aren't familiar with the dynamic fd patch, its principal
advantages are:
(1) substantially reduced kernel memory consumption, typically 30-40
pages less,

(2) faster operation for forking-intensive applications, as typically
only 32 fd slots need to be copied or cleared instead of 1024,

(3) provides an adaptable framework to support increasing NR_OPEN
beyond 1024.

Regards,
Bill

--------------EEF1B4E3E23F63713E2B7BEC
Content-Type: text/plain; charset=us-ascii; name="fork_files117-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="fork_files117-patch"

--- linux-2.1.117/include/linux/sched.h.old Wed Aug 19 21:22:43 1998
+++ linux-2.1.117/include/linux/sched.h Thu Aug 20 08:31:39 1998
@@ -122,7 +122,11 @@

asmlinkage void schedule(void);

-
+/*
+ * The default fd array needs to be at least BITS_PER_LONG,
+ * as this is the granularity returned by copy_fdset().
+ */
+#define NR_OPEN_DEFAULT BITS_PER_LONG
/*
* Open file table structure
*/
@@ -132,6 +136,7 @@
struct file ** fd; /* current fd array */
fd_set close_on_exec;
fd_set open_fds;
+ struct file * fd_array[NR_OPEN_DEFAULT];
};

#define INIT_FILES { \
@@ -139,7 +144,8 @@
NR_OPEN, \
&init_fd_array[0], \
{ { 0, } }, \
- { { 0, } } \
+ { { 0, } }, \
+ { NULL, } \
}

struct fs_struct {
@@ -603,6 +609,13 @@
atomic_inc(&mm->count);
}
extern void mmput(struct mm_struct *);
+
+/*
+ * Routines for handling the fd arrays
+ */
+extern struct file ** alloc_fd_array(int);
+extern int expand_fd_array(struct files_struct *);
+extern void free_fd_array(struct file **, int);

extern int copy_thread(int, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
extern void flush_thread(void);
--- linux-2.1.117/kernel/fork.c.old Wed Aug 19 21:23:21 1998
+++ linux-2.1.117/kernel/fork.c Thu Aug 20 08:17:32 1998
@@ -377,11 +377,78 @@
return __copy_fdset(dst->fds_bits, src->fds_bits);
}

+/*
+ * Allocate an fd array, using get_free_page() if possible.
+ * Note: the array isn't cleared at allocation time.
+ */
+struct file ** alloc_fd_array(int num)
+{
+ struct file **new_fds;
+ int size = num * sizeof(struct file *);
+
+ if (size == PAGE_SIZE)
+ new_fds = (struct file **) __get_free_page(GFP_KERNEL);
+ else
+ new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
+ return new_fds;
+}
+
+/*
+ * Expand the fd array in the files_struct.
+ */
+int expand_fd_array(struct files_struct *files)
+{
+ struct file **new_fds, **old_fds;
+ int error, nfds;
+
+ error = -EMFILE;
+ if (files->max_fds >= NR_OPEN)
+ goto out;
+
+ /* Expand to the max in one step */
+ nfds = NR_OPEN;
+
+ error = -ENOMEM;
+ new_fds = alloc_fd_array(nfds);
+ if (!new_fds)
+ goto out;
+
+ /* Copy the existing array and install the new pointer */
+ if (nfds > files->max_fds) {
+ int i = files->max_fds;
+ int size = (nfds - i) * sizeof(struct file *);
+
+ old_fds = files->fd;
+ files->fd = new_fds;
+ files->max_fds = nfds;
+ while (i--)
+ *new_fds++ = *old_fds++;
+ /* clear the remainder of the array */
+ memset(new_fds, 0, size);
+ } else {
+ /* Somebody expanded the array while we slept ... */
+ free_fd_array(new_fds, nfds);
+ }
+ error = 0;
+out:
+ return error;
+}
+
+void free_fd_array(struct file **array, int num)
+{
+ int size = num * sizeof(struct file *);
+
+ if (size == PAGE_SIZE)
+ free_page((unsigned long) array);
+ else
+ kfree(array);
+}
+
static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
{
struct files_struct *oldf, *newf;
struct file **old_fds, **new_fds;
- int size, i, error = 0;
+ int nfds, size, i, error = 0;

/*
* A background process may not have any files ...
@@ -401,24 +468,35 @@
if (!newf)
goto out;

- /*
- * Allocate the fd array, using get_free_page() if possible.
- * Eventually we want to make the array size variable ...
- */
- size = NR_OPEN * sizeof(struct file *);
- if (size == PAGE_SIZE)
- new_fds = (struct file **) __get_free_page(GFP_KERNEL);
- else
- new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
- if (!new_fds)
- goto out_release;
-
atomic_set(&newf->count, 1);
- newf->max_fds = NR_OPEN;
- newf->fd = new_fds;
newf->close_on_exec = oldf->close_on_exec;
i = copy_fdset(&newf->open_fds, &oldf->open_fds);

+#if 1
+ /* Do a sanity check ... */
+ if (i > oldf->max_fds)
+ printk("copy_files: pid %d, open files %d exceeds max %d!\n",
+ current->pid, i, oldf->max_fds);
+#endif
+
+ /*
+ * Check whether we need to allocate a larger fd array.
+ * Note: we're not a clone task, so the open count won't
+ * change.
+ */
+ new_fds = &newf->fd_array[0];
+ nfds = NR_OPEN_DEFAULT;
+ if (i > nfds) {
+ nfds = NR_OPEN;
+ new_fds = alloc_fd_array(nfds);
+ if (!new_fds)
+ goto out_release;
+ }
+ newf->max_fds = nfds;
+ newf->fd = new_fds;
+
+ /* compute the remainder to be cleared */
+ size = (nfds - i) * sizeof(struct file *);
old_fds = oldf->fd;
for (; i != 0; i--) {
struct file *f = *old_fds++;
@@ -428,7 +506,7 @@
new_fds++;
}
/* This is long word aligned thus could use a optimized version */
- memset(new_fds, 0, (char *)newf->fd + size - (char *)new_fds);
+ memset(new_fds, 0, size);

tsk->files = newf;
error = 0;
--- linux-2.1.117/kernel/exit.c.old Wed Aug 19 21:22:43 1998
+++ linux-2.1.117/kernel/exit.c Wed Aug 19 21:36:31 1998
@@ -199,12 +199,10 @@
if (atomic_dec_and_test(&files->count)) {
close_files(files);
/*
- * Free the fd array as appropriate ...
+ * Free the fd array if we expanded it.
*/
- if (NR_OPEN * sizeof(struct file *) == PAGE_SIZE)
- free_page((unsigned long) files->fd);
- else
- kfree(files->fd);
+ if (files->fd != &files->fd_array[0])
+ free_fd_array(files->fd, files->max_fds);
kmem_cache_free(files_cachep, files);
}
}
--- linux-2.1.117/fs/open.c.old Tue Jul 28 16:13:17 1998
+++ linux-2.1.117/fs/open.c Wed Aug 19 21:36:31 1998
@@ -690,6 +690,7 @@
struct files_struct * files = current->files;
int fd, error;

+repeat:
error = -EMFILE;
fd = find_first_zero_bit(&files->open_fds, NR_OPEN);
/*
@@ -698,8 +699,15 @@
*/
if (fd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
goto out;
-
- /* Check here for fd > files->max_fds to do dynamic expansion */
+ /*
+ * Check whether we need to expand the fd array.
+ */
+ if (fd >= files->max_fds) {
+ error = expand_fd_array(files);
+ if (!error)
+ goto repeat;
+ goto out;
+ }

FD_SET(fd, &files->open_fds);
FD_CLR(fd, &files->close_on_exec);
--- linux-2.1.117/fs/fcntl.c.old Wed Aug 19 21:23:20 1998
+++ linux-2.1.117/fs/fcntl.c Wed Aug 19 21:36:31 1998
@@ -20,14 +20,15 @@

extern int sock_fcntl (struct file *, unsigned int cmd, unsigned long arg);

-static inline int dupfd(unsigned int fd, unsigned int arg)
+static inline int dupfd(unsigned int fd, unsigned int in_arg)
{
struct files_struct * files = current->files;
struct file * file;
+ unsigned int arg;
int error;

error = -EINVAL;
- if (arg >= NR_OPEN)
+ if (in_arg >= NR_OPEN)
goto out;

error = -EBADF;
@@ -35,10 +36,21 @@
if (!file)
goto out;

+repeat:
error = -EMFILE;
- arg = find_next_zero_bit(&files->open_fds, NR_OPEN, arg);
+ arg = find_next_zero_bit(&files->open_fds, NR_OPEN, in_arg);
if (arg >= current->rlim[RLIMIT_NOFILE].rlim_cur)
goto out_putf;
+ /*
+ * Check whether we need to expand the fd array.
+ */
+ if (arg >= files->max_fds) {
+ error = expand_fd_array(files);
+ if (!error)
+ goto repeat;
+ goto out_putf;
+ }
+
FD_SET(arg, &files->open_fds);
FD_CLR(arg, &files->close_on_exec);
fd_install(arg, file);
@@ -58,12 +70,12 @@
lock_kernel();
if (!fcheck(oldfd))
goto out;
+ if (newfd >= NR_OPEN)
+ goto out; /* following POSIX.1 6.2.1 */
+
err = newfd;
if (newfd == oldfd)
goto out;
- err = -EBADF;
- if (newfd >= NR_OPEN)
- goto out; /* following POSIX.1 6.2.1 */

sys_close(newfd);
err = dupfd(oldfd, newfd);
@@ -119,6 +131,7 @@
filp = fget(fd);
if (!filp)
goto out;
+
err = 0;
switch (cmd) {
case F_DUPFD:
@@ -159,7 +172,6 @@
err = filp->f_owner.pid;
break;
case F_SETOWN:
- err = 0;
filp->f_owner.pid = arg;
filp->f_owner.uid = current->uid;
filp->f_owner.euid = current->euid;
@@ -179,10 +191,9 @@
break;
default:
/* sockets need a few special fcntls. */
+ err = -EINVAL;
if (S_ISSOCK (filp->f_dentry->d_inode->i_mode))
err = sock_fcntl (filp, cmd, arg);
- else
- err = -EINVAL;
break;
}
fput(filp);
--- linux-2.1.117/fs/proc/array.c.old Wed Aug 19 21:23:20 1998
+++ linux-2.1.117/fs/proc/array.c Wed Aug 19 21:36:31 1998
@@ -680,11 +680,14 @@
"Pid:\t%d\n"
"PPid:\t%d\n"
"Uid:\t%d\t%d\t%d\t%d\n"
- "Gid:\t%d\t%d\t%d\t%d\n",
+ "Gid:\t%d\t%d\t%d\t%d\n"
+ "FDSize:\t%d\n",
get_task_state(p),
- p->pid, p->p_pptr->pid,
+ p->pid,
+ p->p_pptr->pid,
p->uid, p->euid, p->suid, p->fsuid,
- p->gid, p->egid, p->sgid, p->fsgid);
+ p->gid, p->egid, p->sgid, p->fsgid,
+ p->files ? p->files->max_fds : 0);
return buffer;
}

--------------EEF1B4E3E23F63713E2B7BEC--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.altern.org/andrebalsa/doc/lkml-faq.html