Re: [PATCH v4] f2fs: fix to do sanity check on total_data_blocks

From: Jaegeuk Kim
Date: Fri May 06 2022 - 19:22:13 EST


I added a macro to clean up. Could you please check this out?

https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git/commit/?h=dev&id=6b8beca0edd32075a769bfe4178ca00c0dcd22a9

On 05/06, Chao Yu wrote:
> As Yanming reported in bugzilla:
>
> https://bugzilla.kernel.org/show_bug.cgi?id=215916
>
> The kernel message is shown below:
>
> kernel BUG at fs/f2fs/segment.c:2560!
> Call Trace:
> allocate_segment_by_default+0x228/0x440
> f2fs_allocate_data_block+0x13d1/0x31f0
> do_write_page+0x18d/0x710
> f2fs_outplace_write_data+0x151/0x250
> f2fs_do_write_data_page+0xef9/0x1980
> move_data_page+0x6af/0xbc0
> do_garbage_collect+0x312f/0x46f0
> f2fs_gc+0x6b0/0x3bc0
> f2fs_balance_fs+0x921/0x2260
> f2fs_write_single_data_page+0x16be/0x2370
> f2fs_write_cache_pages+0x428/0xd00
> f2fs_write_data_pages+0x96e/0xd50
> do_writepages+0x168/0x550
> __writeback_single_inode+0x9f/0x870
> writeback_sb_inodes+0x47d/0xb20
> __writeback_inodes_wb+0xb2/0x200
> wb_writeback+0x4bd/0x660
> wb_workfn+0x5f3/0xab0
> process_one_work+0x79f/0x13e0
> worker_thread+0x89/0xf60
> kthread+0x26a/0x300
> ret_from_fork+0x22/0x30
> RIP: 0010:new_curseg+0xe8d/0x15f0
>
> The root cause is: ckpt.valid_block_count is inconsistent with SIT table,
> stat info indicates filesystem has free blocks, but SIT table indicates
> filesystem has no free segment.
>
> So that during garbage colloection, it triggers panic when LFS allocator
> fails to find free segment.
>
> This patch tries to fix this issue by checking consistency in between
> ckpt.valid_block_count and block accounted from SIT.
>
> Cc: stable@xxxxxxxxxxxxxxx
> Reported-by: Ming Yan <yanming@xxxxxxxxxx>
> Signed-off-by: Chao Yu <chao.yu@xxxxxxxx>
> ---
> v4:
> - fix to set data/node type correctly.
> fs/f2fs/segment.c | 37 ++++++++++++++++++++++++++-----------
> 1 file changed, 26 insertions(+), 11 deletions(-)
>
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 3a3e2cec2ac4..4735d477059d 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -4461,7 +4461,8 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
> unsigned int i, start, end;
> unsigned int readed, start_blk = 0;
> int err = 0;
> - block_t total_node_blocks = 0;
> + block_t sit_valid_blocks[2] = {0, 0};
> + int type;
>
> do {
> readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS,
> @@ -4486,8 +4487,9 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
> if (err)
> return err;
> seg_info_from_raw_sit(se, &sit);
> - if (IS_NODESEG(se->type))
> - total_node_blocks += se->valid_blocks;
> +
> + type = IS_NODESEG(se->type) ? NODE : DATA;
> + sit_valid_blocks[type] += se->valid_blocks;
>
> if (f2fs_block_unit_discard(sbi)) {
> /* build discard map only one time */
> @@ -4527,15 +4529,17 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
> sit = sit_in_journal(journal, i);
>
> old_valid_blocks = se->valid_blocks;
> - if (IS_NODESEG(se->type))
> - total_node_blocks -= old_valid_blocks;
> +
> + type = IS_NODESEG(se->type) ? NODE : DATA;
> + sit_valid_blocks[type] -= old_valid_blocks;
>
> err = check_block_count(sbi, start, &sit);
> if (err)
> break;
> seg_info_from_raw_sit(se, &sit);
> - if (IS_NODESEG(se->type))
> - total_node_blocks += se->valid_blocks;
> +
> + type = IS_NODESEG(se->type) ? NODE : DATA;
> + sit_valid_blocks[type] += se->valid_blocks;
>
> if (f2fs_block_unit_discard(sbi)) {
> if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
> @@ -4557,13 +4561,24 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
> }
> up_read(&curseg->journal_rwsem);
>
> - if (!err && total_node_blocks != valid_node_count(sbi)) {
> + if (err)
> + return err;
> +
> + if (sit_valid_blocks[NODE] != valid_node_count(sbi)) {
> f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
> - total_node_blocks, valid_node_count(sbi));
> - err = -EFSCORRUPTED;
> + sit_valid_blocks[NODE], valid_node_count(sbi));
> + return -EFSCORRUPTED;
> }
>
> - return err;
> + if (sit_valid_blocks[DATA] + sit_valid_blocks[NODE] >
> + valid_user_blocks(sbi)) {
> + f2fs_err(sbi, "SIT is corrupted data# %u %u vs %u",
> + sit_valid_blocks[DATA], sit_valid_blocks[NODE],
> + valid_user_blocks(sbi));
> + return -EFSCORRUPTED;
> + }
> +
> + return 0;
> }
>
> static void init_free_segmap(struct f2fs_sb_info *sbi)
> --
> 2.25.1