[patch] sched, ext3: fix scheduling latencies in ext3

From: Ingo Molnar
Date: Tue Sep 14 2004 - 05:10:23 EST



the attached patch fixes long scheduling latencies in the ext3 code, and
it also cleans up the existing lock-break functionality to use the new
primitives.

This patch has been in the -VP patchset for quite some time.

Ingo

the attached patch fixes long scheduling latencies in the ext3 code, and
it also cleans up the existing lock-break functionality to use the new
primitives.

This patch has been in the -VP patchset for quite some time.

Signed-off-by: Ingo Molnar <mingo@xxxxxxx>

--- linux/fs/jbd/checkpoint.c.orig
+++ linux/fs/jbd/checkpoint.c
@@ -333,6 +333,10 @@ int log_do_checkpoint(journal_t *journal
break;
}
retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count);
+ if (cond_resched_lock(&journal->j_list_lock)) {
+ retry = 1;
+ break;
+ }
} while (jh != last_jh && !retry);

if (batch_count)
@@ -487,6 +491,14 @@ int __journal_clean_checkpoint_list(jour
/* Use trylock because of the ranknig */
if (jbd_trylock_bh_state(jh2bh(jh)))
ret += __try_to_free_cp_buf(jh);
+ /*
+ * This function only frees up some memory
+ * if possible so we dont have an obligation
+ * to finish processing. Bail out if preemption
+ * requested:
+ */
+ if (need_resched())
+ goto out;
} while (jh != last_jh);
}
} while (transaction != last_transaction);
--- linux/fs/jbd/commit.c.orig
+++ linux/fs/jbd/commit.c
@@ -262,7 +262,7 @@ write_out_data:
__journal_file_buffer(jh, commit_transaction,
BJ_Locked);
jbd_unlock_bh_state(bh);
- if (need_resched()) {
+ if (lock_need_resched(&journal->j_list_lock)) {
spin_unlock(&journal->j_list_lock);
goto write_out_data;
}
@@ -288,7 +288,7 @@ write_out_data:
jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh);
put_bh(bh);
- if (need_resched()) {
+ if (lock_need_resched(&journal->j_list_lock)) {
spin_unlock(&journal->j_list_lock);
goto write_out_data;
}
@@ -333,11 +333,7 @@ write_out_data:
jbd_unlock_bh_state(bh);
}
put_bh(bh);
- if (need_resched()) {
- spin_unlock(&journal->j_list_lock);
- cond_resched();
- spin_lock(&journal->j_list_lock);
- }
+ cond_resched_lock(&journal->j_list_lock);
}
spin_unlock(&journal->j_list_lock);

@@ -545,6 +541,8 @@ wait_for_iobuf:
wait_on_buffer(bh);
goto wait_for_iobuf;
}
+ if (cond_resched())
+ goto wait_for_iobuf;

if (unlikely(!buffer_uptodate(bh)))
err = -EIO;
@@ -599,6 +597,8 @@ wait_for_iobuf:
wait_on_buffer(bh);
goto wait_for_ctlbuf;
}
+ if (cond_resched())
+ goto wait_for_ctlbuf;

if (unlikely(!buffer_uptodate(bh)))
err = -EIO;
@@ -719,6 +719,7 @@ skip_commit: /* The journal should be un
J_ASSERT(commit_transaction->t_shadow_list == NULL);
J_ASSERT(commit_transaction->t_log_list == NULL);

+restart_loop:
while (commit_transaction->t_forget) {
transaction_t *cp_transaction;
struct buffer_head *bh;
@@ -792,6 +793,8 @@ skip_commit: /* The journal should be un
release_buffer_page(bh);
}
spin_unlock(&journal->j_list_lock);
+ if (cond_resched())
+ goto restart_loop;
}

/* Done with this transaction! */
--- linux/fs/jbd/recovery.c.orig
+++ linux/fs/jbd/recovery.c
@@ -354,6 +354,8 @@ static int do_one_pass(journal_t *journa
struct buffer_head * obh;
struct buffer_head * nbh;

+ cond_resched(); /* We're under lock_kernel() */
+
/* If we already know where to stop the log traversal,
* check right now that we haven't gone past the end of
* the log. */
--- linux/fs/ext3/ialloc.c.orig
+++ linux/fs/ext3/ialloc.c
@@ -733,6 +733,7 @@ unsigned long ext3_count_free_inodes (st
if (!gdp)
continue;
desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
+ cond_resched();
}
return desc_count;
#endif
--- linux/fs/ext3/namei.c.orig
+++ linux/fs/ext3/namei.c
@@ -674,6 +674,7 @@ static int dx_make_map (struct ext3_dir_
map_tail->hash = h.hash;
map_tail->offs = (u32) ((char *) de - base);
count++;
+ cond_resched();
}
/* XXX: do we need to check rec_len == 0 case? -Chris */
de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
--- linux/fs/ext3/super.c.orig
+++ linux/fs/ext3/super.c
@@ -1231,6 +1231,8 @@ static int ext3_fill_super (struct super
sbi->s_resuid = EXT3_DEF_RESUID;
sbi->s_resgid = EXT3_DEF_RESGID;

+ unlock_kernel();
+
blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
if (!blocksize) {
printk(KERN_ERR "EXT3-fs: unable to set blocksize\n");
@@ -1576,6 +1578,7 @@ static int ext3_fill_super (struct super
percpu_counter_mod(&sbi->s_dirs_counter,
ext3_count_dirs(sb));

+ lock_kernel();
return 0;

failed_mount3:
@@ -1597,6 +1600,7 @@ failed_mount:
out_fail:
sb->s_fs_info = NULL;
kfree(sbi);
+ lock_kernel();
return -EINVAL;
}

@@ -2113,9 +2117,11 @@ int ext3_statfs (struct super_block * sb
* block group descriptors. If the sparse superblocks
* feature is turned on, then not all groups have this.
*/
- for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
overhead += ext3_bg_has_super(sb, i) +
ext3_bg_num_gdb(sb, i);
+ cond_resched();
+ }

/*
* Every block group has an inode bitmap, a block
--- linux/fs/ext3/balloc.c.orig
+++ linux/fs/ext3/balloc.c
@@ -207,6 +207,11 @@ do_more:
}
jbd_lock_bh_state(bitmap_bh);
#endif
+ if (need_resched()) {
+ jbd_unlock_bh_state(bitmap_bh);
+ cond_resched();
+ jbd_lock_bh_state(bitmap_bh);
+ }
/* @@@ This prevents newly-allocated data from being
* freed and then reallocated within the same
* transaction.