[PATCH] concurrent inode allocation for ext2 against 2.5.64

From: Alex Tomas (bzzz@tmi.comex.ru)
Date: Sat Mar 15 2003 - 16:01:38 EST


hi!

here is the patch for ext2 concurrent inode allocation. should be applied
on top of previous concurrent-balloc patch. tested on dual p3 for several
hours of stress-test + fsck. hope someone test it on big iron ;)

diff -uNr linux/fs/ext2/ialloc.c edited/fs/ext2/ialloc.c
--- linux/fs/ext2/ialloc.c Sat Mar 15 23:34:17 2003
+++ edited/fs/ext2/ialloc.c Sat Mar 15 23:05:19 2003
@@ -63,6 +63,52 @@
         return bh;
 }
 
+void ext2_reserve_inode (struct super_block * sb, int group, int dir)
+{
+ struct ext2_group_desc * desc;
+ struct buffer_head *bh;
+
+ desc = ext2_get_group_desc(sb, group, &bh);
+ if (!desc) {
+ ext2_error(sb, "ext2_reserve_inode",
+ "can't get descriptor for group %d", group);
+ return;
+ }
+
+ spin_lock(&EXT2_SB(sb)->s_bgi[group].ialloc_lock);
+ desc->bg_free_inodes_count =
+ cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) - 1);
+ if (dir)
+ desc->bg_used_dirs_count =
+ cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) + 1);
+ spin_unlock(&EXT2_SB(sb)->s_bgi[group].ialloc_lock);
+
+ mark_buffer_dirty(bh);
+}
+
+void ext2_release_inode (struct super_block * sb, int group, int dir)
+{
+ struct ext2_group_desc * desc;
+ struct buffer_head *bh;
+
+ desc = ext2_get_group_desc(sb, group, &bh);
+ if (!desc) {
+ ext2_error(sb, "ext2_release_inode",
+ "can't get descriptor for group %d", group);
+ return;
+ }
+
+ spin_lock(&EXT2_SB(sb)->s_bgi[group].ialloc_lock);
+ desc->bg_free_inodes_count =
+ cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1);
+ if (dir)
+ desc->bg_used_dirs_count =
+ cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1);
+ spin_unlock(&EXT2_SB(sb)->s_bgi[group].ialloc_lock);
+
+ mark_buffer_dirty(bh);
+}
+
 /*
  * NOTE! When we get the inode, we're the only people
  * that have access to it, and as such there are no
@@ -85,10 +131,8 @@
         int is_directory;
         unsigned long ino;
         struct buffer_head *bitmap_bh = NULL;
- struct buffer_head *bh2;
         unsigned long block_group;
         unsigned long bit;
- struct ext2_group_desc * desc;
         struct ext2_super_block * es;
 
         ino = inode->i_ino;
@@ -105,7 +149,6 @@
                 DQUOT_DROP(inode);
         }
 
- lock_super (sb);
         es = EXT2_SB(sb)->s_es;
         is_directory = S_ISDIR(inode->i_mode);
 
@@ -126,32 +169,17 @@
                 goto error_return;
 
         /* Ok, now we can actually update the inode bitmaps.. */
- if (!ext2_clear_bit(bit, bitmap_bh->b_data))
+ if (!ext2_clear_bit_atomic(&EXT2_SB(sb)->s_bgi[block_group].ialloc_lock,
+ bit, (void *) bitmap_bh->b_data))
                 ext2_error (sb, "ext2_free_inode",
                               "bit already cleared for inode %lu", ino);
- else {
- desc = ext2_get_group_desc (sb, block_group, &bh2);
- if (desc) {
- desc->bg_free_inodes_count =
- cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1);
- if (is_directory) {
- desc->bg_used_dirs_count =
- cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1);
- EXT2_SB(sb)->s_dir_count--;
- }
- }
- mark_buffer_dirty(bh2);
- es->s_free_inodes_count =
- cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) + 1);
- mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
- }
+ else
+ ext2_release_inode(sb, block_group, is_directory);
         mark_buffer_dirty(bitmap_bh);
         if (sb->s_flags & MS_SYNCHRONOUS)
                 sync_dirty_buffer(bitmap_bh);
- sb->s_dirt = 1;
 error_return:
         brelse(bitmap_bh);
- unlock_super (sb);
 }
 
 /*
@@ -211,9 +239,8 @@
  */
 static int find_group_dir(struct super_block *sb, struct inode *parent)
 {
- struct ext2_super_block * es = EXT2_SB(sb)->s_es;
         int ngroups = EXT2_SB(sb)->s_groups_count;
- int avefreei = le32_to_cpu(es->s_free_inodes_count) / ngroups;
+ int avefreei = ext2_count_free_inodes(sb) / ngroups;
         struct ext2_group_desc *desc, *best_desc = NULL;
         struct buffer_head *bh, *best_bh = NULL;
         int group, best_group = -1;
@@ -234,11 +261,9 @@
         }
         if (!best_desc)
                 return -1;
- best_desc->bg_free_inodes_count =
- cpu_to_le16(le16_to_cpu(best_desc->bg_free_inodes_count) - 1);
- best_desc->bg_used_dirs_count =
- cpu_to_le16(le16_to_cpu(best_desc->bg_used_dirs_count) + 1);
- mark_buffer_dirty(best_bh);
+
+ ext2_reserve_inode(sb, best_group, 1);
+
         return best_group;
 }
 
@@ -277,11 +302,12 @@
         struct ext2_super_block *es = sbi->s_es;
         int ngroups = sbi->s_groups_count;
         int inodes_per_group = EXT2_INODES_PER_GROUP(sb);
- int avefreei = le32_to_cpu(es->s_free_inodes_count) / ngroups;
+ int freei = ext2_count_free_inodes(sb);
+ int avefreei = freei / ngroups;
         int free_blocks = ext2_count_free_blocks(sb);
         int avefreeb = free_blocks / ngroups;
         int blocks_per_dir;
- int ndirs = sbi->s_dir_count;
+ int ndirs = ext2_count_dirs(sb);
         int max_debt, max_dirs, min_blocks, min_inodes;
         int group = -1, i;
         struct ext2_group_desc *desc;
@@ -364,12 +390,8 @@
         return -1;
 
 found:
- desc->bg_free_inodes_count =
- cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) - 1);
- desc->bg_used_dirs_count =
- cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) + 1);
- sbi->s_dir_count++;
- mark_buffer_dirty(bh);
+ ext2_reserve_inode(sb, group, 1);
+
         return group;
 }
 
@@ -431,9 +453,8 @@
         return -1;
 
 found:
- desc->bg_free_inodes_count =
- cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) - 1);
- mark_buffer_dirty(bh);
+ ext2_reserve_inode(sb, group, 0);
+
         return group;
 }
 
@@ -456,7 +477,6 @@
                 return ERR_PTR(-ENOMEM);
 
         ei = EXT2_I(inode);
- lock_super (sb);
         es = EXT2_SB(sb)->s_es;
 repeat:
         if (S_ISDIR(mode)) {
@@ -480,7 +500,12 @@
                                       EXT2_INODES_PER_GROUP(sb));
         if (i >= EXT2_INODES_PER_GROUP(sb))
                 goto bad_count;
- ext2_set_bit(i, bitmap_bh->b_data);
+ if (ext2_set_bit_atomic(&EXT2_SB(sb)->s_bgi[group].ialloc_lock,
+ i, (void *) bitmap_bh->b_data)) {
+ brelse(bitmap_bh);
+ ext2_release_inode(sb, group, S_ISDIR(mode));
+ goto repeat;
+ }
 
        mark_buffer_dirty(bitmap_bh);
         if (sb->s_flags & MS_SYNCHRONOUS)
@@ -497,9 +524,7 @@
                 goto fail2;
         }
 
- es->s_free_inodes_count =
- cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1);
-
+ spin_lock(&EXT2_SB(sb)->s_bgi[group].ialloc_lock);
         if (S_ISDIR(mode)) {
                 if (EXT2_SB(sb)->s_bgi[group].debts < 255)
                         EXT2_SB(sb)->s_bgi[group].debts++;
@@ -507,9 +532,8 @@
                 if (EXT2_SB(sb)->s_bgi[group].debts)
                         EXT2_SB(sb)->s_bgi[group].debts--;
         }
-
- mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
- sb->s_dirt = 1;
+ spin_unlock(&EXT2_SB(sb)->s_bgi[group].ialloc_lock);
+
         inode->i_uid = current->fsuid;
         if (test_opt (sb, GRPID))
                 inode->i_gid = dir->i_gid;
@@ -552,7 +576,6 @@
         inode->i_generation = EXT2_SB(sb)->s_next_generation++;
         insert_inode_hash(inode);
 
- unlock_super(sb);
         if(DQUOT_ALLOC_INODE(inode)) {
                 DQUOT_DROP(inode);
                 goto fail3;
@@ -574,15 +597,8 @@
         return ERR_PTR(err);
 
 fail2:
- desc = ext2_get_group_desc (sb, group, &bh2);
- desc->bg_free_inodes_count =
- cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1);
- if (S_ISDIR(mode))
- desc->bg_used_dirs_count =
- cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1);
- mark_buffer_dirty(bh2);
+ ext2_release_inode(sb, group, S_ISDIR(mode));
 fail:
- unlock_super(sb);
         make_bad_inode(inode);
         iput(inode);
         return ERR_PTR(err);
@@ -605,16 +621,19 @@
 
 unsigned long ext2_count_free_inodes (struct super_block * sb)
 {
+ struct ext2_group_desc *desc;
+ unsigned long desc_count = 0;
+ int i;
+
 #ifdef EXT2FS_DEBUG
         struct ext2_super_block * es;
- unsigned long desc_count = 0, bitmap_count = 0;
+ unsigned long bitmap_count = 0;
         struct buffer_head *bitmap_bh = NULL;
         int i;
 
         lock_super (sb);
         es = EXT2_SB(sb)->s_es;
         for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
- struct ext2_group_desc *desc;
                 unsigned x;
 
                 desc = ext2_get_group_desc (sb, i, NULL);
@@ -637,7 +656,13 @@
         unlock_super(sb);
         return desc_count;
 #else
- return le32_to_cpu(EXT2_SB(sb)->s_es->s_free_inodes_count);
+ for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
+ desc = ext2_get_group_desc (sb, i, NULL);
+ if (!desc)
+ continue;
+ desc_count += le16_to_cpu(desc->bg_free_inodes_count);
+ }
+ return desc_count;
 #endif
 }
 
diff -uNr linux/fs/ext2/super.c edited/fs/ext2/super.c
--- linux/fs/ext2/super.c Sat Mar 15 23:34:17 2003
+++ edited/fs/ext2/super.c Sat Mar 15 22:15:51 2003
@@ -510,6 +510,7 @@
         
         /* restore free blocks counter in SB -bzzz */
         es->s_free_blocks_count = total_free = ext2_count_free_blocks(sb);
+ es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
 
         /* distribute reserved blocks over groups -bzzz */
         for(i = sbi->s_groups_count-1; reserved && total_free && i >= 0; i--) {
@@ -802,6 +803,7 @@
                 sbi->s_bgi[i].debts = 0;
                 sbi->s_bgi[i].reserved = 0;
                 spin_lock_init(&sbi->s_bgi[i].balloc_lock);
+ spin_lock_init(&sbi->s_bgi[i].ialloc_lock);
         }
         for (i = 0; i < db_count; i++) {
                 block = descriptor_loc(sb, logic_sb_block, i);
@@ -869,6 +871,7 @@
 static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es)
 {
         es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
+ es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
         es->s_wtime = cpu_to_le32(get_seconds());
         mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
         sync_dirty_buffer(EXT2_SB(sb)->s_sbh);
@@ -898,6 +901,7 @@
                         es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) &
                                                   ~EXT2_VALID_FS);
                         es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
+ es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
                         es->s_mtime = cpu_to_le32(get_seconds());
                         ext2_sync_super(sb, es);
                 } else
diff -uNr linux/include/linux/ext2_fs_sb.h edited/include/linux/ext2_fs_sb.h
--- linux/include/linux/ext2_fs_sb.h Sat Mar 15 23:34:18 2003
+++ edited/include/linux/ext2_fs_sb.h Sat Mar 15 21:38:35 2003
@@ -19,6 +19,7 @@
 struct ext2_bg_info {
         u8 debts;
         spinlock_t balloc_lock;
+ spinlock_t ialloc_lock;
         unsigned int reserved;
 } ____cacheline_aligned_in_smp;

 

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sat Mar 15 2003 - 22:00:43 EST