Re: [f2fs-dev] [PATCH v4] f2fs: Fix indefinite loop in f2fs_gc()

From: Jaegeuk Kim
Date: Fri Aug 09 2019 - 11:36:57 EST


On 08/07, Chao Yu wrote:
> On 2019-8-7 21:40, Sahitya Tummala wrote:
> > Policy - Foreground GC, LFS and greedy GC mode.
> >
> > Under this policy, f2fs_gc() loops forever to GC as it doesn't have
> > enough free segements to proceed and thus it keeps calling gc_more
> > for the same victim segment. This can happen if the selected victim
> > segment could not be GC'd due to failed blkaddr validity check i.e.
> > is_alive() returns false for the blocks set in current validity map.
> >
> > Fix this by keeping track of such invalid segments and skip those
> > segments for selection in get_victim_by_default() to avoid endless
> > GC loop under such error scenarios. Currently, add this logic under
> > CONFIG_F2FS_CHECK_FS to be able to root cause the issue in debug
> > version.
> >
> > Signed-off-by: Sahitya Tummala <stummala@xxxxxxxxxxxxxx>
> > ---
> > v4: Cover all logic with CONFIG_F2FS_CHECK_FS
> >
> > fs/f2fs/gc.c | 31 +++++++++++++++++++++++++++++--
> > fs/f2fs/segment.c | 14 +++++++++++++-
> > fs/f2fs/segment.h | 3 +++
> > 3 files changed, 45 insertions(+), 3 deletions(-)
> >
> > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > index 8974672..cbcacbd 100644
> > --- a/fs/f2fs/gc.c
> > +++ b/fs/f2fs/gc.c
> > @@ -382,6 +382,16 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
> > nsearched++;
> > }
> >
> > +#ifdef CONFIG_F2FS_CHECK_FS
> > + /*
> > + * skip selecting the invalid segno (that is failed due to block
> > + * validity check failure during GC) to avoid endless GC loop in
> > + * such cases.
> > + */
> > + if (test_bit(segno, sm->invalid_segmap))
> > + goto next;
> > +#endif
> > +
> > secno = GET_SEC_FROM_SEG(sbi, segno);
> >
> > if (sec_usage_check(sbi, secno))
> > @@ -602,8 +612,15 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
> > {
> > struct page *node_page;
> > nid_t nid;
> > - unsigned int ofs_in_node;
> > + unsigned int ofs_in_node, segno;
> > block_t source_blkaddr;
> > + unsigned long offset;
> > +#ifdef CONFIG_F2FS_CHECK_FS
> > + struct sit_info *sit_i = SIT_I(sbi);
> > +#endif
> > +
> > + segno = GET_SEGNO(sbi, blkaddr);
> > + offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
> >
> > nid = le32_to_cpu(sum->nid);
> > ofs_in_node = le16_to_cpu(sum->ofs_in_node);
> > @@ -627,8 +644,18 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
> > source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node);
> > f2fs_put_page(node_page, 1);
> >
> > - if (source_blkaddr != blkaddr)
> > + if (source_blkaddr != blkaddr) {
> > +#ifdef CONFIG_F2FS_CHECK_FS
>
> unsigned int segno = GET_SEGNO(sbi, blkaddr);
> unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
>
> Should be local, otherwise it looks good to me, I think Jaegeuk can help to fix
> this while merging.

Fixed a bit, and merged.
Thanks~

>
> Reviewed-by: Chao Yu <yuchao0@xxxxxxxxxx>
>
> Thanks,
>
> > + if (unlikely(check_valid_map(sbi, segno, offset))) {
> > + if (!test_and_set_bit(segno, sit_i->invalid_segmap)) {
> > + f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u\n",
> > + blkaddr, source_blkaddr, segno);
> > + f2fs_bug_on(sbi, 1);
> > + }
> > + }
> > +#endif
> > return false;
> > + }
> > return true;
> > }
> >
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index a661ac3..ee795b1 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -806,6 +806,9 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
> > enum dirty_type dirty_type)
> > {
> > struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
> > +#ifdef CONFIG_F2FS_CHECK_FS
> > + struct sit_info *sit_i = SIT_I(sbi);
> > +#endif
> >
> > if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
> > dirty_i->nr_dirty[dirty_type]--;
> > @@ -817,9 +820,13 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
> > if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
> > dirty_i->nr_dirty[t]--;
> >
> > - if (get_valid_blocks(sbi, segno, true) == 0)
> > + if (get_valid_blocks(sbi, segno, true) == 0) {
> > clear_bit(GET_SEC_FROM_SEG(sbi, segno),
> > dirty_i->victim_secmap);
> > +#ifdef CONFIG_F2FS_CHECK_FS
> > + clear_bit(segno, sit_i->invalid_segmap);
> > +#endif
> > + }
> > }
> > }
> >
> > @@ -4015,6 +4022,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
> > sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
> > if (!sit_i->sit_bitmap_mir)
> > return -ENOMEM;
> > +
> > + sit_i->invalid_segmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
> > + if (!sit_i->invalid_segmap)
> > + return -ENOMEM;
> > #endif
> >
> > /* init SIT information */
> > @@ -4517,6 +4528,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
> > kvfree(sit_i->sit_bitmap);
> > #ifdef CONFIG_F2FS_CHECK_FS
> > kvfree(sit_i->sit_bitmap_mir);
> > + kvfree(sit_i->invalid_segmap);
> > #endif
> > kvfree(sit_i);
> > }
> > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> > index b746028..9370d53 100644
> > --- a/fs/f2fs/segment.h
> > +++ b/fs/f2fs/segment.h
> > @@ -229,6 +229,9 @@ struct sit_info {
> > char *sit_bitmap; /* SIT bitmap pointer */
> > #ifdef CONFIG_F2FS_CHECK_FS
> > char *sit_bitmap_mir; /* SIT bitmap mirror */
> > +
> > + /* bitmap of segments to be ignored by GC in case of errors */
> > + unsigned long *invalid_segmap;
> > #endif
> > unsigned int bitmap_size; /* SIT bitmap size */
> >
> >