updated patch for 2.1.108 expandable fd arrays

Bill Hawes (whawes@star.net)
Wed, 08 Jul 1998 15:54:07 -0400


This is a multi-part message in MIME format.
--------------BED4F00A86B192425AE187B0
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

I've made a minor change to the expandable fd array patch to fix a
problem on 64-bit architectures. The default fd array needs to be large
enough to hold BITS_PER_LONG entries, as the granularity of the
copy_fdset routine is one longword. This was producing spurious error
messages and resulting in the fd array being expanded when it really
didn't need to be.

Operations on 32-bit architectures are unaffected by the change.

Thanks to everyone for their continued testing of the patch. Apart from
this problem, the patch appears to be working well both in terms of
memory savings and increased performance for forking-intensive
operations.

Regards,
Bill
--------------BED4F00A86B192425AE187B0
Content-Type: text/plain; charset=us-ascii; name="fork_files108-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="fork_files108-patch"

--- linux-2.1.108/include/linux/sched.h.old Fri Jul 3 10:42:44 1998
+++ linux-2.1.108/include/linux/sched.h Wed Jul 8 11:50:22 1998
@@ -122,7 +122,11 @@

asmlinkage void schedule(void);

-
+/*
+ * The default fd array needs to be at least BITS_PER_LONG,
+ * as this is the granularity returned by copy_fdset().
+ */
+#define NR_OPEN_DEFAULT BITS_PER_LONG
/*
* Open file table structure
*/
@@ -132,6 +136,7 @@
struct file ** fd; /* current fd array */
fd_set close_on_exec;
fd_set open_fds;
+ struct file * fd_array[NR_OPEN_DEFAULT];
};

#define INIT_FILES { \
@@ -139,7 +144,8 @@
NR_OPEN, \
&init_fd_array[0], \
{ { 0, } }, \
- { { 0, } } \
+ { { 0, } }, \
+ { NULL, } \
}

struct fs_struct {
@@ -604,6 +610,13 @@
mm->count++;
}
extern void mmput(struct mm_struct *);
+
+/*
+ * Routines for handling the fd arrays
+ */
+extern struct file ** alloc_fd_array(int);
+extern int expand_fd_array(struct files_struct *);
+extern void free_fd_array(struct file **, int);

extern int copy_thread(int, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
extern void flush_thread(void);
--- linux-2.1.108/kernel/fork.c.old Fri Jul 3 10:32:34 1998
+++ linux-2.1.108/kernel/fork.c Fri Jul 3 11:49:01 1998
@@ -376,11 +380,74 @@
return __copy_fdset(dst->fds_bits, src->fds_bits);
}

+/*
+ * Allocate an fd array, using get_free_page() if possible.
+ */
+struct file ** alloc_fd_array(int num)
+{
+ struct file **new_fds;
+ int size = num * sizeof(struct file *);
+
+ if (size == PAGE_SIZE)
+ new_fds = (struct file **) __get_free_page(GFP_KERNEL);
+ else
+ new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
+ if (new_fds)
+ memset((void *) new_fds, 0, size);
+ return new_fds;
+}
+
+/*
+ * Expand the fd array in the files_struct.
+ */
+int expand_fd_array(struct files_struct *files)
+{
+ struct file **new_fds;
+ int error, nfds;
+
+ error = -EMFILE;
+ if (files->max_fds >= NR_OPEN)
+ goto out;
+
+ /* Expand to the max in one step */
+ nfds = NR_OPEN;
+
+ error = -ENOMEM;
+ new_fds = alloc_fd_array(nfds);
+ if (!new_fds)
+ goto out;
+
+ /* Copy the existing array and install the new pointer */
+ if (nfds > files->max_fds) {
+ int i;
+ for (i = files->max_fds; i--; )
+ new_fds[i] = files->fd[i];
+ files->fd = new_fds;
+ files->max_fds = nfds;
+ } else {
+ /* Somebody expanded the array while we slept ... */
+ free_fd_array(new_fds, nfds);
+ }
+ error = 0;
+out:
+ return error;
+}
+
+void free_fd_array(struct file **array, int num)
+{
+ int size = num * sizeof(struct file *);
+
+ if (size == PAGE_SIZE)
+ free_page((unsigned long) array);
+ else
+ kfree(array);
+}
+
static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
{
struct files_struct *oldf, *newf;
struct file **old_fds, **new_fds;
- int size, i, error = 0;
+ int nfds, i, error = 0;

/*
* A background process may not have any files ...
@@ -400,24 +467,31 @@
if (!newf)
goto out;

- /*
- * Allocate the fd array, using get_free_page() if possible.
- * Eventually we want to make the array size variable ...
- */
- size = NR_OPEN * sizeof(struct file *);
- if (size == PAGE_SIZE)
- new_fds = (struct file **) __get_free_page(GFP_KERNEL);
- else
- new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
- if (!new_fds)
- goto out_release;
- memset((void *) new_fds, 0, size);
-
newf->count = 1;
- newf->max_fds = NR_OPEN;
- newf->fd = new_fds;
newf->close_on_exec = oldf->close_on_exec;
i = copy_fdset(&newf->open_fds, &oldf->open_fds);
+#if 1
+ /* Do a sanity check ... */
+ if (i > oldf->max_fds)
+ printk("copy_files: pid %d, open files %d exceeds max %d!\n",
+ current->pid, i, oldf->max_fds);
+#endif
+
+ /*
+ * Check whether we need to allocate a larger fd array.
+ * Note: we're not a clone task, so the open count won't
+ * change.
+ */
+ new_fds = &newf->fd_array[0];
+ nfds = NR_OPEN_DEFAULT;
+ if (i > nfds) {
+ nfds = NR_OPEN;
+ new_fds = alloc_fd_array(nfds);
+ if (!new_fds)
+ goto out_release;
+ }
+ newf->max_fds = nfds;
+ newf->fd = new_fds;

old_fds = oldf->fd;
for (; i != 0; i--) {
--- linux-2.1.108/kernel/exit.c.old Sun May 17 12:21:08 1998
+++ linux-2.1.108/kernel/exit.c Fri Jul 3 11:49:01 1998
@@ -191,12 +191,10 @@
if (!--files->count) {
close_files(files);
/*
- * Free the fd array as appropriate ...
+ * Free the fd array if we expanded it.
*/
- if (NR_OPEN * sizeof(struct file *) == PAGE_SIZE)
- free_page((unsigned long) files->fd);
- else
- kfree(files->fd);
+ if (files->fd != &files->fd_array[0])
+ free_fd_array(files->fd, files->max_fds);
kmem_cache_free(files_cachep, files);
}
}
--- linux-2.1.108/fs/open.c.old Sun May 17 12:21:49 1998
+++ linux-2.1.108/fs/open.c Fri Jul 3 11:49:01 1998
@@ -689,6 +689,7 @@
struct files_struct * files = current->files;
int fd, error;

+repeat:
error = -EMFILE;
fd = find_first_zero_bit(&files->open_fds, NR_OPEN);
/*
@@ -697,8 +698,15 @@
*/
if (fd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
goto out;
-
- /* Check here for fd > files->max_fds to do dynamic expansion */
+ /*
+ * Check whether we need to expand the fd array.
+ */
+ if (fd >= files->max_fds) {
+ error = expand_fd_array(files);
+ if (!error)
+ goto repeat;
+ goto out;
+ }

FD_SET(fd, &files->open_fds);
FD_CLR(fd, &files->close_on_exec);
--- linux-2.1.108/fs/fcntl.c.old Fri Jul 3 10:33:11 1998
+++ linux-2.1.108/fs/fcntl.c Fri Jul 3 11:49:01 1998
@@ -20,14 +20,15 @@

extern int sock_fcntl (struct file *, unsigned int cmd, unsigned long arg);

-static inline int dupfd(unsigned int fd, unsigned int arg)
+static inline int dupfd(unsigned int fd, unsigned int in_arg)
{
struct files_struct * files = current->files;
struct file * file;
+ unsigned int arg;
int error;

error = -EINVAL;
- if (arg >= NR_OPEN)
+ if (in_arg >= NR_OPEN)
goto out;

error = -EBADF;
@@ -35,10 +36,21 @@
if (!file)
goto out;

+repeat:
error = -EMFILE;
- arg = find_next_zero_bit(&files->open_fds, NR_OPEN, arg);
+ arg = find_next_zero_bit(&files->open_fds, NR_OPEN, in_arg);
if (arg >= current->rlim[RLIMIT_NOFILE].rlim_cur)
goto out_putf;
+ /*
+ * Check whether we need to expand the fd array.
+ */
+ if (arg >= files->max_fds) {
+ error = expand_fd_array(files);
+ if (!error)
+ goto repeat;
+ goto out_putf;
+ }
+
FD_SET(arg, &files->open_fds);
FD_CLR(arg, &files->close_on_exec);
fd_install(arg, file);
@@ -58,12 +70,12 @@
lock_kernel();
if (!fcheck(oldfd))
goto out;
+ if (newfd >= NR_OPEN)
+ goto out; /* following POSIX.1 6.2.1 */
+
err = newfd;
if (newfd == oldfd)
goto out;
- err = -EBADF;
- if (newfd >= NR_OPEN)
- goto out; /* following POSIX.1 6.2.1 */

sys_close(newfd);
err = dupfd(oldfd, newfd);
@@ -119,6 +131,7 @@
filp = fget(fd);
if (!filp)
goto out;
+
err = 0;
switch (cmd) {
case F_DUPFD:
@@ -159,7 +172,6 @@
err = filp->f_owner.pid;
break;
case F_SETOWN:
- err = 0;
filp->f_owner.pid = arg;
filp->f_owner.uid = current->uid;
filp->f_owner.euid = current->euid;
@@ -168,10 +180,9 @@
break;
default:
/* sockets need a few special fcntls. */
+ err = -EINVAL;
if (S_ISSOCK (filp->f_dentry->d_inode->i_mode))
err = sock_fcntl (filp, cmd, arg);
- else
- err = -EINVAL;
break;
}
fput(filp);
--- linux-2.1.108/fs/proc/array.c.old Fri Jul 3 10:33:11 1998
+++ linux-2.1.108/fs/proc/array.c Fri Jul 3 11:49:01 1998
@@ -676,11 +676,14 @@
"Pid:\t%d\n"
"PPid:\t%d\n"
"Uid:\t%d\t%d\t%d\t%d\n"
- "Gid:\t%d\t%d\t%d\t%d\n",
+ "Gid:\t%d\t%d\t%d\t%d\n"
+ "FDSize:\t%d\n",
get_task_state(p),
- p->pid, p->p_pptr->pid,
+ p->pid,
+ p->p_pptr->pid,
p->uid, p->euid, p->suid, p->fsuid,
- p->gid, p->egid, p->sgid, p->fsgid);
+ p->gid, p->egid, p->sgid, p->fsgid,
+ p->files ? p->files->max_fds : 0);
return buffer;
}

@@ -947,7 +950,8 @@
if (end > PGDIR_SIZE)
end = PGDIR_SIZE;
do {
- statm_pte_range(pmd, address, end - address, pages, shared, dirty, total);
+ statm_pte_range(pmd, address, end - address,
+ pages, shared, dirty, total);
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
} while (address < end);
@@ -957,7 +961,8 @@
int * pages, int * shared, int * dirty, int * total)
{
while (address < end) {
- statm_pmd_range(pgd, address, end - address, pages, shared, dirty, total);
+ statm_pmd_range(pgd, address, end - address,
+ pages, shared, dirty, total);
address = (address + PGDIR_SIZE) & PGDIR_MASK;
pgd++;
}
@@ -980,7 +985,8 @@
pgd_t *pgd = pgd_offset(tsk->mm, vma->vm_start);
int pages = 0, shared = 0, dirty = 0, total = 0;

- statm_pgd_range(pgd, vma->vm_start, vma->vm_end, &pages, &shared, &dirty, &total);
+ statm_pgd_range(pgd, vma->vm_start, vma->vm_end,
+ &pages, &shared, &dirty, &total);
resident += pages;
share += shared;
dt += dirty;

--------------BED4F00A86B192425AE187B0--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu