People cry for ext3, because they want faster fsck. Really, ext2 does
horribly when it comes to fsck: for me fsck took 6
minutes. Considering that reading whole partition from begining to the
end takes 10 minutes or so, that's REALLY BAD.
Also, ext2 does pretty bad when it comes to deleting large files:
creating 400MB file full of zeros with dd takes 2minutes, deleting it
takes 30 seconds on unmodified ext2.
This has one common problem under it: indirect blocks are spread all
over the media with big holes between them.
Ok, here's the patch which changes it: it tries to alocate indirect
blocks close to each other. This speeds up fsck 2 times and rm 3
times, but may slow down other operations. Still it may be good idea
when your critical server takes 4 hours to fsck.
Please, take a look at this patch. I'm interested in two things:
*) can patch like this cause data loss?
*) is there any way to make this more efficient? It currently
allocates indirect blocks closer to each other, but I feel it could do
_much_ better.
Pavel
PS: Of course, patch will not work right away. You'd have to start
using it just after mke2fs so new layout takes place. So here's script
(not totaly safe, try to run it on really quiet system) to convert
most of your data. (Note: you may not convert /bin this way :-).
PPS: Development sponsored by SuSE, this patch will hopefully appear
somewhere under http://www.suse.cz/development/.
#!/bin/bash
#
# e2realloc, copyright 1999 Pavel Machek, distribute under GPL.
#
# This will screw up your filesystem if it is not quiet, beware.
#
find /home /mirror /opt /sbin /tmp /usr /var -mount -type f -links 1 | {
while true; do
read a || exit 0
echo -n Processing $a...
if [ -e "$a.mv" ]; then
echo Impossible: "$a.mv" exists
exit 1
fi
mv "$a" "$a.mv"
cp -a "$a.mv" "$a" || {
mv -f "$a.mv" "$a"
echo Failed to realloc $a.
exit 2
}
rm "$a.mv"
echo ok
done
}
--- clean//fs/ext2/inode.c Fri May 14 20:42:39 1999
+++ linux/fs/ext2/inode.c Thu Jun 10 00:24:38 1999
@@ -103,6 +103,21 @@
wait_on_super (inode->i_sb);
#ifdef EXT2_PREALLOCATE
+ if (inode->u.ext2_i.i_alloc_elsewhere) {
+ if (!inode->u.ext2_i.i_elsewhere_goal) {
+ goal = (inode->u.ext2_i.i_block_group *
+ EXT2_BLOCKS_PER_GROUP(inode->i_sb)) +
+ le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_first_data_block);
+ if (goal > 512)
+ goal -= 512;
+ } else goal = inode->u.ext2_i.i_elsewhere_goal;
+// printk( "Allocating goal = %d, ", goal );
+ result = ext2_new_block (inode, goal, 0, 0, err);
+ inode->u.ext2_i.i_elsewhere_goal = result;
+// printk( "got = %d\n", result );
+ return result;
+ }
+
if (inode->u.ext2_i.i_prealloc_count &&
(goal == inode->u.ext2_i.i_prealloc_block ||
goal + 1 == inode->u.ext2_i.i_prealloc_block))
@@ -255,8 +270,8 @@
}
ext2_debug ("goal = %d.\n", goal);
-
tmp = ext2_alloc_block (inode, goal, err);
+
if (!tmp)
return NULL;
result = getblk (inode->i_dev, tmp, inode->i_sb->s_blocksize);
@@ -266,8 +281,12 @@
goto repeat;
}
*p = tmp;
- inode->u.ext2_i.i_next_alloc_block = new_block;
- inode->u.ext2_i.i_next_alloc_goal = tmp;
+
+ if (!inode->u.ext2_i.i_alloc_elsewhere) {
+ inode->u.ext2_i.i_next_alloc_block = new_block;
+ inode->u.ext2_i.i_next_alloc_goal = tmp;
+ }
+
inode->i_ctime = CURRENT_TIME;
inode->i_blocks += blocks;
if (IS_SYNC(inode) || inode->u.ext2_i.i_osync)
@@ -358,8 +377,10 @@
inode->i_ctime = CURRENT_TIME;
inode->i_blocks += blocks;
mark_inode_dirty(inode);
- inode->u.ext2_i.i_next_alloc_block = new_block;
- inode->u.ext2_i.i_next_alloc_goal = tmp;
+ if (!inode->u.ext2_i.i_alloc_elsewhere) {
+ inode->u.ext2_i.i_next_alloc_block = new_block;
+ inode->u.ext2_i.i_next_alloc_goal = tmp;
+ }
brelse (bh);
return result;
}
@@ -402,9 +423,12 @@
b = block;
if (block < EXT2_NDIR_BLOCKS)
return inode_getblk (inode, block, create, b, err);
+ inode->u.ext2_i.i_alloc_elsewhere = 1;
+ inode->u.ext2_i.i_elsewhere_goal = 0;
block -= EXT2_NDIR_BLOCKS;
if (block < addr_per_block) {
bh = inode_getblk (inode, EXT2_IND_BLOCK, create, b, err);
+ inode->u.ext2_i.i_alloc_elsewhere = 0;
return block_getblk (inode, bh, block, create,
inode->i_sb->s_blocksize, b, err);
}
@@ -413,6 +437,7 @@
bh = inode_getblk (inode, EXT2_DIND_BLOCK, create, b, err);
bh = block_getblk (inode, bh, block >> addr_per_block_bits,
create, inode->i_sb->s_blocksize, b, err);
+ inode->u.ext2_i.i_alloc_elsewhere = 0;
return block_getblk (inode, bh, block & (addr_per_block - 1),
create, inode->i_sb->s_blocksize, b, err);
}
@@ -422,6 +447,7 @@
create, inode->i_sb->s_blocksize, b, err);
bh = block_getblk (inode, bh, (block >> addr_per_block_bits) & (addr_per_block - 1),
create, inode->i_sb->s_blocksize, b, err);
+ inode->u.ext2_i.i_alloc_elsewhere = 0;
return block_getblk (inode, bh, block & (addr_per_block - 1), create,
inode->i_sb->s_blocksize, b, err);
}
@@ -542,6 +568,7 @@
inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
inode->i_version = ++event;
inode->u.ext2_i.i_new_inode = 0;
+ inode->u.ext2_i.i_alloc_elsewhere = 0;
inode->u.ext2_i.i_flags = le32_to_cpu(raw_inode->i_flags);
inode->u.ext2_i.i_faddr = le32_to_cpu(raw_inode->i_faddr);
inode->u.ext2_i.i_frag_no = raw_inode->i_frag;
--- clean//include/linux/ext2_fs_i.h Fri May 14 20:42:46 1999
+++ linux/include/linux/ext2_fs_i.h Wed Jun 9 00:14:59 1999
@@ -37,6 +37,8 @@
__u32 i_prealloc_count;
__u32 i_high_size;
int i_new_inode:1; /* Is a freshly allocated inode */
+ int i_alloc_elsewhere:1;
+ __u32 i_elsewhere_goal;
};
#endif /* _LINUX_EXT2_FS_I */
-- I'm really pavel@ucw.cz. Look at http://195.113.31.123/~pavel. Pavel Hi! I'm a .signature virus! Copy me into your ~/.signature, please!- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu Please read the FAQ at http://www.tux.org/lkml/