Slow fsync on large files (with a patch against 2.2.12)

Martin Schenk (schenkm@eunet.at)
Sun, 05 Sep 1999 00:12:39 +0200


This is a multi-part message in MIME format.
--------------C6E30A06B74B143DB15A5F0E
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

problem:

fsyncing a large file takes a lot of time, because ext2_sync_file
goes through the headers of all buffered blocks of the file in order
to determine whether the block has to be written to disk.
On a 100 Mb file (100000 blocks), this can lead to more than
200000 calls to sync_block (sync_block is called twice for each block) -
on my K6-200 this takes about 1/2 second of CPU time.
(So fsyncing a large file with few changes in LINUX is CPU and not I/O
bound - at least when most of the file is in the buffer cache)

further details:
ext2_sync_file finds all these blocks by going through a hierarchy of
indirect blocks, which store block numbers.

idea for the solution:
Moving the check for changes from the bottom of the "block hierarchy"
to the upper levels, so only a small part of the blocks has to be
checked.
Every time data is written to a file, the concerned block is searched
by going through the inode of the file and the "block hierarchy".
This makes it possible to set flags, so that on fsyncing a file, only a
part of the "hierarchy" has to be searched.

implementation:
I added a flag "sync_depend" to the "buffer_head" structure which
already contains the meta data of the buffered blocks.
I changed block_getblk in /usr/src/linux/fs/ext2/inode.c to set
sync_depend
on the searched block when it is called with the flag create!=0.
I changed the functions scanning the "block hierarchy" in
/usr/src/linux/fs/ext2/fsync.c to abort the scan on branches with
sync_depend==0,
and to reset sync_depend to 0 after syncing.

caveats:
My code assumes the following:
- the struct buffer_head is an acceptable place to put a flag like
sync_depend
- all writes to a block somehow end up going through block_getblk with
create==1
- I have not overlooked other things (as I know very little about ext2
this does
not seem likely)

results:
On the above mentioned problem, the application of my patch reduces the
number
of calls to sync_block to below 1000.

-- 
Martin Schenk <schenkm@eunet.at>
Richard-Wagnergasse 46, A-8010 Graz, Austria
--------------C6E30A06B74B143DB15A5F0E
Content-Type: text/plain; charset=us-ascii; name="syncdiff"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="syncdiff"

--- linuxelf-2.2/include/linux/fs.h Sat Sep 4 21:40:25 1999 +++ linux.orig/include/linux/fs.h Thu Aug 26 02:29:52 1999 @@ -226,14 +226,6 @@ */ void (*b_end_io)(struct buffer_head *bh, int uptodate); void *b_dev_id; - -#ifdef CONFIG_FASTSYNC /* faster fsync for large files on EXT2 */ - int sync_depend:1; /* (Double/Triple)Indirect-Buffer: - true -> points to a possibly changed buffer, - false ->no changes done, sync does not need to - check dependents - */ -#endif }; typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); --- linuxelf-2.2/fs/ext2/inode.c Sat Sep 4 22:07:14 1999 +++ linux.orig/fs/ext2/inode.c Mon Aug 9 21:04:57 1999 @@ -283,9 +283,6 @@ return NULL; } } -#ifdef CONFIG_FASTSYNC - if (create) bh->sync_depend=1; /* changes to dependents possible */ -#endif p = (u32 *) bh->b_data + nr; repeat: tmp = le32_to_cpu(*p); --- linuxelf-2.2/fs/ext2/fsync.c Sat Sep 4 22:48:59 1999 +++ linux.orig/fs/ext2/fsync.c Wed May 20 22:09:12 1998 @@ -155,11 +146,6 @@ rc = sync_iblock (inode, iblock, &ind_bh, wait); if (rc || !ind_bh) return rc; - -#ifdef CONFIG_FASTSYNC - if (!ind_bh->sync_depend) - { brelse(ind_bh); return 0; } /* no changes to dependents */ -#endif for (i = 0; i < addr_per_block; i++) { rc = sync_block_swab32 (inode, @@ -168,10 +154,6 @@ if (rc) err = rc; } - -#ifdef CONFIG_FASTSYNC - if (wait) ind_bh->sync_depend=0; -#endif brelse (ind_bh); return err; } @@ -186,11 +168,6 @@ rc = sync_iblock_swab32 (inode, iblock, &ind_bh, wait); if (rc || !ind_bh) return rc; - -#ifdef CONFIG_FASTSYNC - if (!ind_bh->sync_depend) - { brelse(ind_bh); return 0; } /* no changes to dependents */ -#endif for (i = 0; i < addr_per_block; i++) { rc = sync_block_swab32 (inode, @@ -199,10 +176,6 @@ if (rc) err = rc; } - -#ifdef CONFIG_FASTSYNC - if (wait) ind_bh->sync_depend=0; -#endif brelse (ind_bh); return err; } @@ -219,11 +192,6 @@ rc = sync_iblock (inode, diblock, &dind_bh, wait); if (rc || !dind_bh) return rc; - -#ifdef CONFIG_FASTSYNC - if (!dind_bh->sync_depend) - { brelse(dind_bh); return 0; } /* no changes to dependents */ -#endif for (i = 0; i < addr_per_block; i++) { rc = sync_indirect_swab32 (inode, @@ -232,11 +200,6 @@ if (rc) err = rc; } - -#ifdef CONFIG_FASTSYNC - if (wait) dind_bh->sync_depend=0; -#endif - brelse (dind_bh); return err; } @@ -251,11 +214,6 @@ rc = sync_iblock_swab32 (inode, diblock, &dind_bh, wait); if (rc || !dind_bh) return rc; - -#ifdef CONFIG_FASTSYNC - if (!dind_bh->sync_depend) - { brelse(dind_bh); return 0; } /* no changes to dependents */ -#endif for (i = 0; i < addr_per_block; i++) { rc = sync_indirect_swab32 (inode, @@ -264,10 +222,6 @@ if (rc) err = rc; } - -#ifdef CONFIG_FASTSYNC - if (wait) dind_bh->sync_depend=0; -#endif brelse (dind_bh); return err; } @@ -284,12 +238,6 @@ rc = sync_iblock (inode, tiblock, &tind_bh, wait); if (rc || !tind_bh) return rc; - -#ifdef CONFIG_FASTSYNC - if (!tind_bh->sync_depend) /* no changes to dependents */ - { brelse(tind_bh); return 0; - } -#endif for (i = 0; i < addr_per_block; i++) { rc = sync_dindirect_swab32 (inode, @@ -298,10 +246,6 @@ if (rc) err = rc; } - -#ifdef CONFIG_FASTSYNC - if (wait) tind_bh->sync_depend=0; -#endif brelse (tind_bh); return err; }

--------------C6E30A06B74B143DB15A5F0E--

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu Please read the FAQ at http://www.tux.org/lkml/