Re: [PATCH v2] ocfs2: avoid potential ABBA deadlock by reordering tl_inode lock
From: Joseph Qi
Date: Mon Jul 07 2025 - 01:58:58 EST
On 2025/7/7 11:29, Ivan Pravdin wrote:
> In ocfs2_move_extent(), tl_inode is currently locked after the global
> bitmap inode. However, in ocfs2_flush_truncate_log(), the lock order
> is reversed: tl_inode is locked first, followed by the global bitmap
> inode.
>
> This creates a classic ABBA deadlock scenario if two threads attempt
> these operations concurrently and acquire the locks in different orders.
>
> To prevent this, move the tl_inode locking earlier in
> ocfs2_move_extent(), so that it always precedes the global bitmap
> inode lock.
>
> No functional changes beyond lock ordering.
>
> Reported-by: syzbot+6bf948e47f9bac7aacfa@xxxxxxxxxxxxxxxxxxxxxxxxx
> Closes: https://lore.kernel.org/all/67d5645c.050a0220.1dc86f.0004.GAE@xxxxxxxxxx/
> Signed-off-by: Ivan Pravdin <ipravdin.official@xxxxxxxxx>
> ---
> v1 -> v2: Fixed unlocking order in ocfs2_move_extent.
>
> fs/ocfs2/move_extents.c | 13 +++++++------
> 1 file changed, 7 insertions(+), 6 deletions(-)
>
> diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
> index 369c7d27befd..aaf8eb2693a4 100644
> --- a/fs/ocfs2/move_extents.c
> +++ b/fs/ocfs2/move_extents.c
> @@ -617,6 +617,8 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
> */
> credits += OCFS2_INODE_UPDATE_CREDITS + 1;
>
> + inode_lock(tl_inode);
> +
> /*
> * ocfs2_move_extent() didn't reserve any clusters in lock_allocators()
> * logic, while we still need to lock the global_bitmap.
> @@ -637,13 +639,11 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
> goto out_unlock_gb_mutex;
> }
>
You've missed unlock tl_inode when ocfs2_get_system_file_inode() fails.
> - inode_lock(tl_inode);
> -
> handle = ocfs2_start_trans(osb, credits);
> if (IS_ERR(handle)) {
> ret = PTR_ERR(handle);
> mlog_errno(ret);
> - goto out_unlock_tl_inode;
> + goto out_unlock_gb_inode;
> }
>
> new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos);
> @@ -704,12 +704,13 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
> ocfs2_commit_trans(osb, handle);
> brelse(gd_bh);
>
> -out_unlock_tl_inode:
> - inode_unlock(tl_inode);
> -
> +out_unlock_gb_inode:
> ocfs2_inode_unlock(gb_inode, 1);
> +
> out_unlock_gb_mutex:
inode lock has changed to rw_semaphore, so 'mutex' is no longer proper.
> inode_unlock(gb_inode);
> +
> + inode_unlock(tl_inode);
> brelse(gb_bh);
> iput(gb_inode);
>
How about the following alternative:
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 369c7d27befd..d56c337204f6 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -611,6 +611,8 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
goto out;
}
+ inode_lock(tl_inode);
+
/*
* need to count 2 extra credits for global_bitmap inode and
* group descriptor.
@@ -626,7 +628,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
if (!gb_inode) {
mlog(ML_ERROR, "unable to get global_bitmap inode\n");
ret = -EIO;
- goto out;
+ goto out_unlock_tl_inode;
}
inode_lock(gb_inode);
@@ -634,16 +636,14 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1);
if (ret) {
mlog_errno(ret);
- goto out_unlock_gb_mutex;
+ goto out_unlock_gb_inode;
}
- inode_lock(tl_inode);
-
handle = ocfs2_start_trans(osb, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret);
- goto out_unlock_tl_inode;
+ goto out_unlock;
}
new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos);
@@ -703,16 +703,14 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
out_commit:
ocfs2_commit_trans(osb, handle);
brelse(gd_bh);
-
-out_unlock_tl_inode:
- inode_unlock(tl_inode);
-
+out_unlock:
ocfs2_inode_unlock(gb_inode, 1);
-out_unlock_gb_mutex:
+out_unlock_gb_inode:
inode_unlock(gb_inode);
brelse(gb_bh);
iput(gb_inode);
-
+out_unlock_tl_inode:
+ inode_unlock(tl_inode);
out:
if (context->meta_ac) {
ocfs2_free_alloc_context(context->meta_ac);