Re: [patch] [possible race in ext2] Re: how to write get_block?

tytso@mit.edu
Thu, 14 Oct 1999 12:56:55 -0400


From: "Stephen C. Tweedie" <sct@redhat.com>
Date: Mon, 11 Oct 1999 17:34:36 +0100 (BST)

The _fast_ quick fix is to maintain a per-inode list of dirty buffers
and to invalidate that list when we do a delete. This works for
directories if we only support truncate back to zero --- it obviously
gets things wrong if we allow partial truncates of directories (but why
would anyone want to allow that?!)

This would have minimal performance implication and would also allow
fast fsync() of indirect block metadata for regular files.

I've actually had patches to do fast fsync for some time, but I
thought you said you had some changes in queue which would make the
need for this obsolete, so didn't bother to submit this, since it is a
bit of a hack. Here it is though, for folks to comment upon. Code to
invalidate the list of dirty buffers is missing from this code, but it
wouldn't be hard to add.

- Ted

Patch generated: on Wed Aug 18 15:01:49 EDT 1999 by tytso@rsts-11.mit.edu
against Linux version 2.2.10

===================================================================
RCS file: fs/ext2/RCS/fsync.c,v
retrieving revision 1.1
diff -u -r1.1 fs/ext2/fsync.c
--- fs/ext2/fsync.c 1999/07/21 10:45:22 1.1
+++ fs/ext2/fsync.c 1999/07/21 10:45:26
@@ -250,36 +250,83 @@
return err;
}

-/*
- * File may be NULL when we are called. Perhaps we shouldn't
- * even pass file to fsync ?
- */
-
-int ext2_sync_file(struct file * file, struct dentry *dentry)
+static int sync_inode(struct inode *inode)
{
- int wait, err = 0;
- struct inode *inode = dentry->d_inode;
-
+ int i, wait, err = 0;
+ __u32 *blklist;
+
if (S_ISLNK(inode->i_mode) && !(inode->i_blocks))
- /*
- * Don't sync fast links!
- */
- goto skip;
+ return 0;

- for (wait=0; wait<=1; wait++)
- {
- err |= sync_direct (inode, wait);
- err |= sync_indirect (inode,
+ if (!S_ISDIR(inode->i_mode) && inode->u.ext2_i.i_ffsync_flag) {
+ blklist = inode->u.ext2_i.i_ffsync_blklist;
+ for (wait = 0; wait <=1; wait++) {
+ for (i=0; i < inode->u.ext2_i.i_ffsync_ptr; i++) {
+#if 0 /* Debugging */
+ if (!wait)
+ printk("Fasy sync: %d\n", blklist[i]);
+#endif
+ err |= sync_block(inode, &blklist[i], wait);
+ }
+ }
+ } else {
+ for (wait=0; wait<=1; wait++) {
+ err |= sync_direct (inode, wait);
+ err |= sync_indirect (inode,
inode->u.ext2_i.i_data+EXT2_IND_BLOCK,
- wait);
- err |= sync_dindirect (inode,
+ wait);
+ err |= sync_dindirect (inode,
inode->u.ext2_i.i_data+EXT2_DIND_BLOCK,
- wait);
- err |= sync_tindirect (inode,
+ wait);
+ err |= sync_tindirect (inode,
inode->u.ext2_i.i_data+EXT2_TIND_BLOCK,
- wait);
+ wait);
+ }
}
-skip:
+ inode->u.ext2_i.i_ffsync_flag = 1;
+ inode->u.ext2_i.i_ffsync_ptr = 0;
err |= ext2_sync_inode (inode);
+ return err;
+}
+
+/*
+ * File may be NULL when we are called by msync on a vma. In the
+ * future, the VFS layer should be changed to not pass the struct file
+ * parameter to the fsync function, since it's not used by any of the
+ * implementations (and the dentry parameter is all that we need).
+ */
+int ext2_sync_file(struct file * file, struct dentry *dentry)
+{
+ int err = 0;
+
+ err = sync_inode(dentry->d_inode);
+ if (dentry->d_parent && dentry->d_parent->d_inode)
+ err |= sync_inode(dentry->d_parent->d_inode);
+
return err ? -EIO : 0;
+}
+
+/*
+ * This function adds a list of blocks to be written out by fsync. If
+ * it exceeds NUM_EXT2_FFSYNC_BLKS, then we turn off the fast fsync flag.
+ */
+void ext2_ffsync_add_blk(struct inode *inode, __u32 blk)
+{
+ int i;
+ __u32 *blklist;
+
+ if (inode->u.ext2_i.i_ffsync_flag == 0)
+ return;
+#if 0 /* Debugging */
+ printk("Add fast sync: %d\n", blk);
+#endif
+ blklist = inode->u.ext2_i.i_ffsync_blklist;
+ for (i=0; i < inode->u.ext2_i.i_ffsync_ptr; i++)
+ if (blklist[i] == blk)
+ return;
+ if (inode->u.ext2_i.i_ffsync_ptr > NUM_EXT2_FFSYNC_BLKS) {
+ inode->u.ext2_i.i_ffsync_flag = 0;
+ return;
+ }
+ blklist[inode->u.ext2_i.i_ffsync_ptr++] = blk;
}
===================================================================
RCS file: fs/ext2/RCS/super.c,v
retrieving revision 1.1
diff -u -r1.1 fs/ext2/super.c
--- fs/ext2/super.c 1999/07/21 10:45:22 1.1
+++ fs/ext2/super.c 1999/07/21 10:45:26
@@ -733,7 +733,11 @@
}

static struct file_system_type ext2_fs_type = {
+#ifdef EXT2_DEV_FS
+ "ext2dev",
+#else
"ext2",
+#endif
FS_REQUIRES_DEV /* | FS_IBASKET */, /* ibaskets have unresolved bugs */
ext2_read_super,
NULL
===================================================================
RCS file: fs/ext2/RCS/file.c,v
retrieving revision 1.1
diff -u -r1.1 fs/ext2/file.c
--- fs/ext2/file.c 1999/07/21 10:45:22 1.1
+++ fs/ext2/file.c 1999/07/21 10:51:28
@@ -295,6 +295,7 @@
break;
}
mark_buffer_dirty(bh, 0);
+ ext2_ffsync_add_blk(inode, bh->b_blocknr);
update_vm_cache(inode, pos, bh->b_data + offset, c);
pos += c;
written += c;
===================================================================
RCS file: fs/ext2/RCS/inode.c,v
retrieving revision 1.1
diff -u -r1.1 fs/ext2/inode.c
--- fs/ext2/inode.c 1999/07/21 10:45:22 1.1
+++ fs/ext2/inode.c 1999/07/21 10:45:26
@@ -125,6 +125,7 @@
mark_buffer_uptodate(bh, 1);
mark_buffer_dirty(bh, 1);
brelse (bh);
+ ext2_ffsync_add_blk(inode, bh->b_blocknr);
} else {
ext2_discard_prealloc (inode);
ext2_debug ("preallocation miss (%lu/%lu).\n",
@@ -351,6 +352,7 @@
}
*p = le32_to_cpu(tmp);
mark_buffer_dirty(bh, 1);
+ ext2_ffsync_add_blk(inode, bh->b_blocknr);
if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) {
ll_rw_block (WRITE, 1, &bh);
wait_on_buffer (bh);
===================================================================
RCS file: include/linux/RCS/ext2_fs_i.h,v
retrieving revision 1.1
diff -u -r1.1 include/linux/ext2_fs_i.h
--- include/linux/ext2_fs_i.h 1999/07/21 10:45:22 1.1
+++ include/linux/ext2_fs_i.h 1999/07/21 10:45:26
@@ -16,6 +16,8 @@
#ifndef _LINUX_EXT2_FS_I
#define _LINUX_EXT2_FS_I

+#define NUM_EXT2_FFSYNC_BLKS 4
+
/*
* second extended file system inode data in memory
*/
@@ -37,6 +39,14 @@
__u32 i_prealloc_count;
__u32 i_high_size;
int i_new_inode:1; /* Is a freshly allocated inode */
+ int i_ffsync_flag:1; /* Fast sync flag */
+ unsigned long i_bflags; /* The B-tree flags */
+ __u32 i_ffsync_blklist[NUM_EXT2_FFSYNC_BLKS];
+ __u16 i_ffsync_ptr;
+ struct wait_queue * i_bwait;
};
+
+/* Ext2 B-tree flags */
+#define EXT2_BTF_LOCKED 1

#endif /* _LINUX_EXT2_FS_I */
===================================================================
RCS file: include/linux/RCS/ext2_fs.h,v
retrieving revision 1.1
diff -u -r1.1 include/linux/ext2_fs.h
--- include/linux/ext2_fs.h 1999/07/21 10:45:23 1.1
+++ include/linux/ext2_fs.h 1999/07/21 10:45:26
@@ -545,6 +545,7 @@

/* fsync.c */
extern int ext2_sync_file (struct file *, struct dentry *);
+extern void ext2_ffsync_add_blk(struct inode *inode, __u32 blk);

/* ialloc.c */
extern struct inode * ext2_new_inode (const struct inode *, int, int *);
===================================================================
RCS file: include/linux/RCS/fs.h,v
retrieving revision 1.1
diff -u -r1.1 include/linux/fs.h
--- include/linux/fs.h 1999/07/21 10:45:23 1.1
+++ include/linux/fs.h 1999/07/21 10:55:36
@@ -216,6 +216,7 @@
struct wait_queue * b_wait;
struct buffer_head ** b_pprev; /* doubly linked list of hash-queue */
struct buffer_head * b_prev_free; /* doubly linked list of buffers */
+ struct list_head b_idirty; /* linked list per-inode dirty buffers */
struct buffer_head * b_reqnext; /* request queue */

/*
@@ -332,6 +333,7 @@
struct list_head i_hash;
struct list_head i_list;
struct list_head i_dentry;
+ struct list_head i_dirty_buf;

unsigned long i_ino;
unsigned int i_count;
===================================================================
RCS file: fs/RCS/buffer.c,v
retrieving revision 1.1
diff -u -r1.1 fs/buffer.c
--- fs/buffer.c 1999/07/21 10:45:23 1.1
+++ fs/buffer.c 1999/07/21 10:45:26
@@ -416,6 +416,8 @@
clear_bit(BH_Uptodate, &bh->b_state);
clear_bit(BH_Dirty, &bh->b_state);
clear_bit(BH_Req, &bh->b_state);
+ memset(bh->b_data, 0, bh->b_size);
+ list_del(&bh->b_idirty);
}
}
}
@@ -484,6 +486,7 @@
nr_buffers_type[bh->b_list]--;
remove_from_hash_queue(bh);
remove_from_lru_list(bh);
+ list_del(&bh->b_idirty);
}

static inline void put_last_free(struct buffer_head * bh)
@@ -672,6 +675,7 @@
bh->b_blocknr = block;
bh->b_end_io = handler;
bh->b_dev_id = dev_id;
+ INIT_LIST_HEAD(&bh->b_idirty);
}

static void end_buffer_io_sync(struct buffer_head *bh, int uptodate)

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/