[PATCH 2/2] ext4: Send barriers on fsync only when needed

From: Jan Kara
Date: Wed Jul 21 2010 - 13:01:51 EST


It isn't necessary to send a barrier to disk for fsync of file 'f'
when we already sent one after all the data of 'f' have been written.
Implement logic to detect this condition and avoid sending barrier
in this case.

We use counters of submitted and completed IO barriers for a block device.
When a page is written to the block device, we store current number of
barriers submitted in the inode. When we handle fsync, we check whether
the number of completed barriers is at least that large.

Signed-off-by: Jan Kara <jack@xxxxxxx>
---
fs/ext4/ext4.h | 4 +++-
fs/ext4/fsync.c | 19 +++++++++++++++++--
fs/ext4/inode.c | 4 ++++
3 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 19a4de5..cc67e72 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -832,10 +832,12 @@ struct ext4_inode_info {

/*
* Transactions that contain inode's metadata needed to complete
- * fsync and fdatasync, respectively.
+ * fsync and fdatasync, respectively and barrier id when we last
+ * wrote data to this file.
*/
tid_t i_sync_tid;
tid_t i_datasync_tid;
+ unsigned i_data_bid;
};

/*
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 592adf2..d8a6995 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -57,6 +57,21 @@ static void ext4_sync_parent(struct inode *inode)
}
}

+static int ext4_need_issue_data_flush(struct inode *inode)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+ int comp_bid, inode_bid = ei->i_data_bid;
+
+ if (!(journal->j_flags & JBD2_BARRIER))
+ return 0;
+ comp_bid = atomic_read(&inode->i_sb->s_bdev->bd_barriers_completed);
+ /* inode_bid < completed_bid safe against wrapping */
+ if (inode_bid - comp_bid < 0)
+ return 0;
+ return 1;
+}
+
/*
* akpm: A new design for ext4_sync_file().
*
@@ -126,11 +141,11 @@ int ext4_sync_file(struct file *file, int datasync)
*/
if (ext4_should_writeback_data(inode) &&
(journal->j_fs_dev != journal->j_dev) &&
- (journal->j_flags & JBD2_BARRIER))
+ ext4_need_issue_data_flush(inode))
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
NULL, BLKDEV_IFL_WAIT);
ret = jbd2_log_wait_commit(journal, commit_tid);
- } else if (journal->j_flags & JBD2_BARRIER)
+ } else if (ext4_need_issue_data_flush(inode))
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
BLKDEV_IFL_WAIT);
return ret;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 42272d6..8d57aae 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2758,6 +2758,10 @@ static int ext4_writepage(struct page *page,
} else
ret = block_write_full_page(page, noalloc_get_block_write,
wbc);
+ /* Make sure we read current value of bd_barriers_sent */
+ smp_rmb();
+ EXT4_I(inode)->i_data_bid =
+ atomic_read(&inode->i_sb->s_bdev->bd_barriers_sent);

return ret;
}
--
1.6.4.2


--3MwIy2ne0vdjdPXF--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/