[PATCH] races in sys_acct(), ugly interaction with remount().

Alexander Viro (viro@math.psu.edu)
Tue, 10 Nov 1998 19:12:00 -0500 (EST)


Linus, could you apply the following patch? It _really_ compiles
and seems to fix several nasty bugs in kernel/acct.c.
a) umount() failed to turn accounting off it was suspended.
b) remount failed to turn accounting off.
c) the whole sys_acct() was racey.
Patch fixes first two problems and seems to be OK wrt the third one.
Cheers,
Al
Patch (against 2.1.127) follows:

--- fs/super.c.old Tue Nov 10 01:36:50 1998
+++ fs/super.c Tue Nov 10 01:37:57 1998
@@ -646,6 +646,12 @@
dentry->d_covers = covered;
}

+static void cleanup_acct(kdev_t dev) {
+#ifdef CONFIG_BSD_PROCESS_ACCT
+ (void) acct_auto_close(dev);
+#endif
+}
+
static int do_umount(kdev_t dev, int unmount_root, int flags)
{
struct super_block * sb;
@@ -663,10 +669,7 @@
* are no quotas running any more. Just turn them on again.
*/
DQUOT_OFF(dev);
-
-#ifdef CONFIG_BSD_PROCESS_ACCT
- (void) acct_auto_close(dev);
-#endif
+ cleanup_acct(dev);

/*
* If we may have to abort operations to get out of this
@@ -971,6 +974,8 @@
*/
shrink_dcache_sb(sb);
fsync_dev(sb->s_dev);
+ if (flags & MS_RDONLY)
+ cleanup_acct(sb->s_dev);
retval = do_remount_sb(sb, flags, data);
}
dput(dentry);
--- kernel/acct.c.old Tue Nov 10 00:33:51 1998
+++ kernel/acct.c Tue Nov 10 17:38:05 1998
@@ -23,6 +23,21 @@
*
* Now we silently close acct_file on attempt to reopen. Cleaned sys_acct().
* XTerms and EMACS are manifestations of pure evil. 21/10/98, AV.
+ *
+ * Fixed a nasty interaction with with sys_umount(). If the accointing
+ * was suspeneded we failed to stop it on umount(). Messy.
+ * Another one: remount to readonly didn't stop accounting.
+ * Question: what should we do if we have CAP_SYS_ADMIN but not
+ * CAP_SYS_PACCT? Current code does the following: umount returns -EBUSY
+ * unless we are messing with the root. In that case we are getting a
+ * real mess with do_remount_sb(). 9/11/98, AV.
+ *
+ * Fixed a bunch of races (and pair of leaks). Probably not the best way,
+ * but this one obviously doesn't introduce deadlocks. Later. BTW, found
+ * one race (and leak) in BSD implementation.
+ * OK, that's better. ANOTHER race and leak in BSD variant. There always
+ * is one more bug... TODO: move filp_open into open.c and make
+ * parameters sysctl-controllable. 10/11/98, AV.
*/

#include <linux/config.h>
@@ -60,12 +75,13 @@
/*
* External references and all of the globals.
*/

static volatile int acct_active = 0;
static volatile int acct_needcheck = 0;
static struct file *acct_file = NULL;
static struct timer_list acct_timer = { NULL, NULL, 0, 0, acct_timeout };
+static int do_acct_process(long, struct file *);

/*
* Called whenever the timer says to check the free space.
@@ -78,39 +94,114 @@
/*
* Check the amount of free space and suspend/resume accordingly.
*/
-static void check_free_space(void)
+static int check_free_space(struct file *file)
{
mm_segment_t fs;
struct statfs sbuf;
struct super_block *sb;
+ int res = acct_active;
+ int act;

- if (!acct_file || !acct_needcheck)
- return;
+ if (!file || !acct_needcheck)
+ return res;

- sb = acct_file->f_dentry->d_inode->i_sb;
+ sb = file->f_dentry->d_inode->i_sb;
if (!sb->s_op || !sb->s_op->statfs)
- return;
+ return res;

fs = get_fs();
set_fs(KERNEL_DS);
+ /* May block */
sb->s_op->statfs(sb, &sbuf, sizeof(struct statfs));
set_fs(fs);

+ if (sbuf.f_bavail <= SUSPEND * sbuf.f_blocks / 100)
+ act = -1;
+ else if (sbuf.f_bavail >= RESUME * sbuf.f_blocks / 100)
+ act = 1;
+ else
+ act = 0;
+
+ /*
+ * If some joker switched acct_file under us we'ld better be
+ * silent and _not_ touch anything.
+ */
+ if (file != acct_file)
+ return act ? (act>0) : res;
+
if (acct_active) {
- if (sbuf.f_bavail <= SUSPEND * sbuf.f_blocks / 100) {
+ if (act < 0) {
acct_active = 0;
printk(KERN_INFO "Process accounting paused\r\n");
}
} else {
- if (sbuf.f_bavail >= RESUME * sbuf.f_blocks / 100) {
+ if (act > 0) {
acct_active = 1;
printk(KERN_INFO "Process accounting resumed\r\n");
}
}
+
del_timer(&acct_timer);
acct_needcheck = 0;
acct_timer.expires = jiffies + ACCT_TIMEOUT;
add_timer(&acct_timer);
+ return acct_active;
+}
+
+/*
+ * It's a copy of fs/open.c::do_open(), except that it returns resulting
+ * struct file instead of doing fd_install(). It would better go into
+ * open.c and become public - this functionality is duplicated in other
+ * places.
+ */
+static struct file *filp_open(const char *name,int flags,int mode)
+{
+ struct file *file;
+ struct dentry *dentry;
+ struct inode *inode;
+ int error;
+
+ error = -ENFILE;
+ if (!(file = get_empty_filp()))
+ goto out_err;
+ file->f_flags = flags;
+ file->f_mode = (flags + 1) & O_ACCMODE;
+ if (file->f_mode)
+ flags++;
+ if (flags & O_TRUNC)
+ flags |= 2;
+ dentry = open_namei(name, flags, mode);
+ error = PTR_ERR(dentry);
+ if (IS_ERR(dentry))
+ goto out_f;
+ inode = dentry->d_inode;
+ if (file->f_mode & FMODE_WRITE) {
+ error = get_write_access(inode);
+ if (error)
+ goto out_d;
+ }
+
+ file->f_dentry = dentry;
+ file->f_pos = 0;
+ file->f_reada = 0;
+ file->f_op = NULL;
+ if (inode->i_op)
+ file->f_op = inode->i_op->default_file_ops;
+ if (file->f_op && file->f_op->open) {
+ error = file->f_op->open(inode,file);
+ if (error)
+ goto out_w;
+ }
+ file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
+ return file;
+out_w:
+ put_write_access(inode);
+out_d:
+ dput(dentry);
+out_f:
+ put_filp(file);
+out_err:
+ return ERR_PTR(error);
}

/*
@@ -121,9 +212,7 @@
*/
asmlinkage int sys_acct(const char *name)
{
- struct dentry *dentry;
- struct inode *inode;
- struct file_operations *ops;
+ struct file *file = NULL, *old_acct = NULL;
char *tmp;
int error = -EPERM;

@@ -131,82 +220,57 @@
if (!capable(CAP_SYS_PACCT))
goto out;

+ if (name) {
+ tmp = getname(name);
+ error = PTR_ERR(tmp);
+ if (IS_ERR(tmp))
+ goto out;
+ /* Difference from BSD - they don't do O_APPEND */
+ file = filp_open(tmp, O_WRONLY|O_APPEND, 0);
+ putname(tmp);
+ if (IS_ERR(file)) {
+ error = PTR_ERR(file);
+ goto out;
+ }
+ error = -EACCES;
+ if (!S_ISREG(file->f_dentry->d_inode->i_mode))
+ goto out_err;
+
+ error = -EIO;
+ if (!file->f_op->write)
+ goto out_err;
+ }
+
+ error = 0;
if (acct_file) {
- /* fput() may block, so just in case... */
- struct file *tmp = acct_file;
- if (acct_active)
- acct_process(0);
+ old_acct = acct_file;
del_timer(&acct_timer);
acct_active = 0;
acct_needcheck = 0;
acct_file = NULL;
- fput(tmp);
}
- error = 0;
- if (!name) /* We are done */
- goto out;
-
- tmp = getname(name);
- error = PTR_ERR(tmp);
- if (IS_ERR(tmp))
- goto out;
-
- dentry = open_namei(tmp, O_RDWR, 0600);
- putname(tmp);
-
- error = PTR_ERR(dentry);
- if (IS_ERR(dentry))
- goto out;
-
- inode = dentry->d_inode;
-
- error = -EACCES;
- if (!S_ISREG(inode->i_mode))
- goto out_d;
-
- error = -EIO;
- if (!inode->i_op || !(ops = inode->i_op->default_file_ops) ||
- !ops->write)
- goto out_d;
-
- error = -EUSERS;
- if (!(acct_file = get_empty_filp()))
- goto out_d;
-
- acct_file->f_mode = (O_WRONLY + 1) & O_ACCMODE;
- acct_file->f_flags = O_WRONLY;
- acct_file->f_dentry = dentry;
- acct_file->f_pos = inode->i_size;
- acct_file->f_reada = 0;
- acct_file->f_op = ops;
- error = get_write_access(inode);
- if (error)
- goto out_f;
- if (ops->open)
- error = ops->open(inode, acct_file);
- if (error) {
- put_write_access(inode);
- goto out_f;
+ if (name) {
+ acct_file = file;
+ acct_needcheck = 0;
+ acct_active = 1;
+ acct_timer.expires = jiffies + ACCT_TIMEOUT;
+ add_timer(&acct_timer);
+ }
+ if (old_acct) {
+ do_acct_process(0,old_acct);
+ fput(old_acct);
}
- acct_needcheck = 0;
- acct_active = 1;
- acct_timer.expires = jiffies + ACCT_TIMEOUT;
- add_timer(&acct_timer);
- goto out;
-out_f:
- /* decrementing f_count is _not_ enough */
- put_filp(acct_file);
- acct_file = NULL;
-out_d:
- dput(dentry);
out:
unlock_kernel();
return error;
+out_err:
+ fput(file);
+ goto out;
}

void acct_auto_close(kdev_t dev)
{
- if (acct_active && acct_file && acct_file->f_dentry->d_inode->i_dev == dev)
+ if (acct_file && acct_file->f_dentry->d_inode->i_dev == dev)
sys_acct((char *)NULL);
}

@@ -260,7 +324,10 @@
#define KSTK_EIP(stack) (((unsigned long *)(stack))[1019])
#define KSTK_ESP(stack) (((unsigned long *)(stack))[1022])

-int acct_process(long exitcode)
+/*
+ * do_acct_process does all actual work.
+ */
+static int do_acct_process(long exitcode, struct file *file)
{
struct acct ac;
mm_segment_t fs;
@@ -268,14 +335,15 @@

/*
* First check to see if there is enough free_space to continue
- * the process accounting system. Check_free_space toggles the
- * acct_active flag so we need to check that after check_free_space.
+ * the process accounting system.
*/
- check_free_space();
-
- if (!acct_active)
+ if (!file)
return 0;
-
+ file->f_count++;
+ if (!check_free_space(file)) {
+ fput(file);
+ return 0;
+ }

/*
* Fill the accounting struct with the needed info as recorded
@@ -327,10 +395,19 @@
*/
fs = get_fs();
set_fs(KERNEL_DS);
- acct_file->f_op->write(acct_file, (char *)&ac,
- sizeof(struct acct), &acct_file->f_pos);
+ file->f_op->write(file, (char *)&ac,
+ sizeof(struct acct), &file->f_pos);
set_fs(fs);
+ fput(file);
return 0;
+}
+
+/*
+ * acct_process - now just a wrapper around do_acct_process
+ */
+int acct_process(long exitcode)
+{
+ return do_acct_process(exitcode, acct_file);
}

#else

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/