Re: [f2fs-dev] [RFC v3] f2fs: extent cache: support unaligned extent

From: Jaegeuk Kim
Date: Wed Aug 04 2021 - 19:45:07 EST


Chao,

How about this?
https://github.com/jaegeuk/f2fs/commit/d6bbe121bc24dfabfedc07ba7cb6e921fb70ece0

I'm digging one bug in __insert_extent_tree w/ the patch tho.

On 08/04, Jaegeuk Kim wrote:
> On 08/04, Chao Yu wrote:
> > Compressed inode may suffer read performance issue due to it can not
> > use extent cache, so I propose to add this unaligned extent support
> > to improve it.
> >
> > Currently, it only works in readonly format f2fs image.
> >
> > Unaligned extent: in one compressed cluster, physical block number
> > will be less than logical block number, so we add an extra physical
> > block length in extent info in order to indicate such extent status.
> >
> > The idea is if one whole cluster blocks are contiguous physically,
> > once its mapping info was readed at first time, we will cache an
> > unaligned (or aligned) extent info entry in extent cache, it expects
> > that the mapping info will be hitted when rereading cluster.
> >
> > Merge policy:
> > - Aligned extents can be merged.
> > - Aligned extent and unaligned extent can not be merged.
> >
> > Signed-off-by: Chao Yu <chao@xxxxxxxxxx>
> > ---
> > v3:
> > - avoid CONFIG_F2FS_FS_COMPRESSION as much as possible
> > - clean up codes
> > fs/f2fs/compress.c | 24 ++++++++++++
> > fs/f2fs/data.c | 28 +++++++++++---
> > fs/f2fs/extent_cache.c | 88 +++++++++++++++++++++++++++++++++++++-----
> > fs/f2fs/f2fs.h | 42 +++++++++++++++++---
> > fs/f2fs/node.c | 18 +++++++++
> > 5 files changed, 179 insertions(+), 21 deletions(-)
> >
> > diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
> > index 4aa166d3d9bf..296ff37d4b08 100644
> > --- a/fs/f2fs/compress.c
> > +++ b/fs/f2fs/compress.c
> > @@ -1719,6 +1719,30 @@ void f2fs_put_page_dic(struct page *page)
> > f2fs_put_dic(dic);
> > }
> >
> > +/*
> > + * check whether cluster blocks are contiguous, and add extent cache entry
> > + * only if cluster blocks are logically and physically contiguous.
> > + */
> > +int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn)
> > +{
> > + bool compressed = f2fs_data_blkaddr(dn) == COMPRESS_ADDR;
> > + int i = compressed ? 1 : 0;
> > + block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page,
> > + dn->ofs_in_node + i);
> > +
> > + for (i += 1; i < F2FS_I(dn->inode)->i_cluster_size; i++) {
> > + block_t blkaddr = data_blkaddr(dn->inode, dn->node_page,
> > + dn->ofs_in_node + i);
> > +
> > + if (!__is_valid_data_blkaddr(blkaddr))
> > + break;
> > + if (first_blkaddr + i - 1 != blkaddr)
> > + return 0;
> > + }
> > +
> > + return compressed ? i - 1 : i;
> > +}
> > +
> > const struct address_space_operations f2fs_compress_aops = {
> > .releasepage = f2fs_release_page,
> > .invalidatepage = f2fs_invalidate_page,
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index f7b96625e616..8cc964c54d51 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -2186,6 +2186,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> > sector_t last_block_in_file;
> > const unsigned blocksize = blks_to_bytes(inode, 1);
> > struct decompress_io_ctx *dic = NULL;
> > + struct extent_info_unaligned eiu;
> > + bool from_dnode = true;
> > int i;
> > int ret = 0;
> >
> > @@ -2216,6 +2218,12 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> > if (f2fs_cluster_is_empty(cc))
> > goto out;
> >
> > + if (f2fs_lookup_extent_cache_unaligned(inode, start_idx, &eiu))
> > + from_dnode = false;
> > +
> > + if (from_dnode)
> > + goto skip_reading_dnode;
> > +
> > set_new_dnode(&dn, inode, NULL, NULL, 0);
> > ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
> > if (ret)
> > @@ -2223,11 +2231,13 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> >
> > f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR);
> >
> > +skip_reading_dnode:
> > for (i = 1; i < cc->cluster_size; i++) {
> > block_t blkaddr;
> >
> > - blkaddr = data_blkaddr(dn.inode, dn.node_page,
> > - dn.ofs_in_node + i);
> > + blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
> > + dn.ofs_in_node + i) :
> > + eiu.ei.blk + i;
> >
> > if (!__is_valid_data_blkaddr(blkaddr))
> > break;
> > @@ -2237,6 +2247,9 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> > goto out_put_dnode;
> > }
> > cc->nr_cpages++;
> > +
> > + if (!from_dnode && i >= eiu.plen)
> > + break;
> > }
> >
> > /* nothing to decompress */
> > @@ -2256,8 +2269,9 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> > block_t blkaddr;
> > struct bio_post_read_ctx *ctx;
> >
> > - blkaddr = data_blkaddr(dn.inode, dn.node_page,
> > - dn.ofs_in_node + i + 1);
> > + blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
> > + dn.ofs_in_node + i + 1) :
> > + eiu.ei.blk + i + 1;
> >
> > f2fs_wait_on_block_writeback(inode, blkaddr);
> >
> > @@ -2302,13 +2316,15 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> > *last_block_in_bio = blkaddr;
> > }
> >
> > - f2fs_put_dnode(&dn);
> > + if (from_dnode)
> > + f2fs_put_dnode(&dn);
> >
> > *bio_ret = bio;
> > return 0;
> >
> > out_put_dnode:
> > - f2fs_put_dnode(&dn);
> > + if (from_dnode)
> > + f2fs_put_dnode(&dn);
> > out:
> > for (i = 0; i < cc->cluster_size; i++) {
> > if (cc->rpages[i]) {
> > diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
> > index 3ebf976a682d..0ea37e64031f 100644
> > --- a/fs/f2fs/extent_cache.c
> > +++ b/fs/f2fs/extent_cache.c
> > @@ -235,7 +235,7 @@ static struct kmem_cache *extent_node_slab;
> > static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
> > struct extent_tree *et, struct extent_info *ei,
> > struct rb_node *parent, struct rb_node **p,
> > - bool leftmost)
> > + bool leftmost, bool unaligned)
> > {
> > struct extent_node *en;
> >
> > @@ -247,6 +247,9 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
> > INIT_LIST_HEAD(&en->list);
> > en->et = et;
> >
> > + if (unaligned)
> > + en->plen = ((struct extent_info_unaligned *)ei)->plen;
> > +
> > rb_link_node(&en->rb_node, parent, p);
> > rb_insert_color_cached(&en->rb_node, &et->root, leftmost);
> > atomic_inc(&et->node_cnt);
> > @@ -320,7 +323,7 @@ static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi,
> > struct rb_node **p = &et->root.rb_root.rb_node;
> > struct extent_node *en;
> >
> > - en = __attach_extent_node(sbi, et, ei, NULL, p, true);
> > + en = __attach_extent_node(sbi, et, ei, NULL, p, true, false);
> > if (!en)
> > return NULL;
> >
> > @@ -439,6 +442,17 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
> > stat_inc_rbtree_node_hit(sbi);
> >
> > *ei = en->ei;
> > +
> > +#ifdef CONFIG_F2FS_FS_COMPRESSION
> > + if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
> > + f2fs_sb_has_readonly(sbi)) {
> > + struct extent_info_unaligned *eiu =
> > + (struct extent_info_unaligned *)ei;
> > +
> > + eiu->plen = en->plen;
> > + }
> > +#endif
> > +
> > spin_lock(&sbi->extent_lock);
> > if (!list_empty(&en->list)) {
> > list_move_tail(&en->list, &sbi->extent_list);
> > @@ -457,17 +471,18 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
> > static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
> > struct extent_tree *et, struct extent_info *ei,
> > struct extent_node *prev_ex,
> > - struct extent_node *next_ex)
> > + struct extent_node *next_ex,
> > + bool unaligned)
> > {
> > struct extent_node *en = NULL;
> >
> > - if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) {
> > + if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, unaligned)) {
> > prev_ex->ei.len += ei->len;
> > ei = &prev_ex->ei;
> > en = prev_ex;
> > }
> >
> > - if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
> > + if (next_ex && __is_front_mergeable(ei, &next_ex->ei, unaligned)) {
> > next_ex->ei.fofs = ei->fofs;
> > next_ex->ei.blk = ei->blk;
> > next_ex->ei.len += ei->len;
> > @@ -495,7 +510,7 @@ static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
> > struct extent_tree *et, struct extent_info *ei,
> > struct rb_node **insert_p,
> > struct rb_node *insert_parent,
> > - bool leftmost)
> > + bool leftmost, bool unaligned)
> > {
> > struct rb_node **p;
> > struct rb_node *parent = NULL;
> > @@ -512,7 +527,7 @@ static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
> > p = f2fs_lookup_rb_tree_for_insert(sbi, &et->root, &parent,
> > ei->fofs, &leftmost);
> > do_insert:
> > - en = __attach_extent_node(sbi, et, ei, parent, p, leftmost);
> > + en = __attach_extent_node(sbi, et, ei, parent, p, leftmost, unaligned);
> > if (!en)
> > return NULL;
> >
> > @@ -594,7 +609,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
> > end - dei.fofs + dei.blk,
> > org_end - end);
> > en1 = __insert_extent_tree(sbi, et, &ei,
> > - NULL, NULL, true);
> > + NULL, NULL, true, false);
> > next_en = en1;
> > } else {
> > en->ei.fofs = end;
> > @@ -633,9 +648,10 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
> > if (blkaddr) {
> >
> > set_extent_info(&ei, fofs, blkaddr, len);
> > - if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
> > + if (!__try_merge_extent_node(sbi, et, &ei,
> > + prev_en, next_en, false))
> > __insert_extent_tree(sbi, et, &ei,
> > - insert_p, insert_parent, leftmost);
> > + insert_p, insert_parent, leftmost, false);
> >
> > /* give up extent_cache, if split and small updates happen */
> > if (dei.len >= 1 &&
> > @@ -661,6 +677,47 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
> > f2fs_mark_inode_dirty_sync(inode, true);
> > }
> >
> > +#ifdef CONFIG_F2FS_FS_COMPRESSION
> > +void f2fs_update_extent_tree_range_unaligned(struct inode *inode,
> > + pgoff_t fofs, block_t blkaddr, unsigned int llen,
> > + unsigned int plen)
> > +{
> > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > + struct extent_tree *et = F2FS_I(inode)->extent_tree;
> > + struct extent_node *en = NULL;
> > + struct extent_node *prev_en = NULL, *next_en = NULL;
> > + struct extent_info_unaligned eiu;
> > + struct rb_node **insert_p = NULL, *insert_parent = NULL;
> > + bool leftmost = false;
> > +
> > + trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, llen);
> > +
> > + /* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */
> > + if (is_inode_flag_set(inode, FI_NO_EXTENT))
> > + return;
> > +
> > + write_lock(&et->lock);
> > +
> > + en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root,
> > + (struct rb_entry *)et->cached_en, fofs,
> > + (struct rb_entry **)&prev_en,
> > + (struct rb_entry **)&next_en,
> > + &insert_p, &insert_parent, false,
> > + &leftmost);
> > + f2fs_bug_on(sbi, en);
> > +
> > + set_extent_info(&eiu.ei, fofs, blkaddr, llen);
> > + eiu.plen = plen;
> > +
> > + if (!__try_merge_extent_node(sbi, et, (struct extent_info *)&eiu,
> > + prev_en, next_en, true))
> > + __insert_extent_tree(sbi, et, (struct extent_info *)&eiu,
> > + insert_p, insert_parent, leftmost, true);
> > +
> > + write_unlock(&et->lock);
> > +}
> > +#endif
> > +
> > unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
> > {
> > struct extent_tree *et, *next;
> > @@ -818,6 +875,17 @@ bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
> > return f2fs_lookup_extent_tree(inode, pgofs, ei);
> > }
> >
> > +#ifdef CONFIG_F2FS_FS_COMPRESSION
> > +bool f2fs_lookup_extent_cache_unaligned(struct inode *inode, pgoff_t pgofs,
> > + struct extent_info_unaligned *eiu)
> > +{
> > + if (!f2fs_may_extent_tree(inode))
> > + return false;
> > +
> > + return f2fs_lookup_extent_tree(inode, pgofs, (struct extent_info *)eiu);
> > +}
> > +#endif
> > +
> > void f2fs_update_extent_cache(struct dnode_of_data *dn)
> > {
> > pgoff_t fofs;
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 75f97c50302d..618397e6e6c2 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -582,11 +582,21 @@ struct extent_info {
> > u32 blk; /* start block address of the extent */
> > };
> >
> > +#ifdef CONFIG_F2FS_FS_COMPRESSION
> > +struct extent_info_unaligned {
> > + struct extent_info ei; /* extent info */
> > + unsigned int plen; /* physical extent length of compressed blocks */
> > +};
> > +#endif
> > +
> > struct extent_node {
> > struct rb_node rb_node; /* rb node located in rb-tree */
> > struct extent_info ei; /* extent info */
> > struct list_head list; /* node in global extent list of sbi */
> > struct extent_tree *et; /* extent tree pointer */
> > +#ifdef CONFIG_F2FS_FS_COMPRESSION
> > + unsigned int plen; /* physical extent length of compressed blocks */
> > +#endif
> > };
> >
> > struct extent_tree {
> > @@ -822,22 +832,32 @@ static inline bool __is_discard_front_mergeable(struct discard_info *cur,
> > }
> >
> > static inline bool __is_extent_mergeable(struct extent_info *back,
> > - struct extent_info *front)
> > + struct extent_info *front, bool unaligned)
> > {
> > + if (unaligned) {
> > + struct extent_info_unaligned *be =
> > + (struct extent_info_unaligned *)back;
>
> back is from extent_node->ei, so is it okay to case like this?
> I feel that we may need to replace this casting approach with parameters.
>
> > + struct extent_info_unaligned *fe =
> > + (struct extent_info_unaligned *)front;
> > +
> > + if (be->ei.len != be->plen || fe->ei.len != fe->plen)
> > + return false;
> > + }
> > +
> > return (back->fofs + back->len == front->fofs &&
> > back->blk + back->len == front->blk);
> > }
> >
> > static inline bool __is_back_mergeable(struct extent_info *cur,
> > - struct extent_info *back)
> > + struct extent_info *back, bool unaligned)
> > {
> > - return __is_extent_mergeable(back, cur);
> > + return __is_extent_mergeable(back, cur, unaligned);
> > }
> >
> > static inline bool __is_front_mergeable(struct extent_info *cur,
> > - struct extent_info *front)
> > + struct extent_info *front, bool unaligned)
> > {
> > - return __is_extent_mergeable(cur, front);
> > + return __is_extent_mergeable(cur, front, unaligned);
> > }
> >
> > extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync);
> > @@ -4001,6 +4021,10 @@ unsigned int f2fs_destroy_extent_node(struct inode *inode);
> > void f2fs_destroy_extent_tree(struct inode *inode);
> > bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
> > struct extent_info *ei);
> > +#ifdef CONFIG_F2FS_FS_COMPRESSION
> > +bool f2fs_lookup_extent_cache_unaligned(struct inode *inode, pgoff_t pgofs,
> > + struct extent_info_unaligned *eiu);
> > +#endif
> > void f2fs_update_extent_cache(struct dnode_of_data *dn);
> > void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
> > pgoff_t fofs, block_t blkaddr, unsigned int len);
> > @@ -4078,6 +4102,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> > struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc);
> > void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed);
> > void f2fs_put_page_dic(struct page *page);
> > +int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn);
> > int f2fs_init_compress_ctx(struct compress_ctx *cc);
> > void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse);
> > void f2fs_init_compress_info(struct f2fs_sb_info *sbi);
> > @@ -4106,6 +4131,9 @@ void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino);
> > sbi->compr_written_block += blocks; \
> > sbi->compr_saved_block += diff; \
> > } while (0)
> > +void f2fs_update_extent_tree_range_unaligned(struct inode *inode,
> > + pgoff_t fofs, block_t blkaddr, unsigned int llen,
> > + unsigned int plen);
> > #else
> > static inline bool f2fs_is_compressed_page(struct page *page) { return false; }
> > static inline bool f2fs_is_compress_backend_ready(struct inode *inode)
> > @@ -4132,6 +4160,7 @@ static inline void f2fs_put_page_dic(struct page *page)
> > {
> > WARN_ON_ONCE(1);
> > }
> > +static inline int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn) { return 0; }
> > static inline int f2fs_init_compress_inode(struct f2fs_sb_info *sbi) { return 0; }
> > static inline void f2fs_destroy_compress_inode(struct f2fs_sb_info *sbi) { }
> > static inline int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { return 0; }
> > @@ -4147,6 +4176,9 @@ static inline bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi,
> > static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi,
> > nid_t ino) { }
> > #define inc_compr_inode_stat(inode) do { } while (0)
> > +static inline void f2fs_update_extent_tree_range_unaligned(struct inode *inode,
> > + pgoff_t fofs, block_t blkaddr, unsigned int llen,
> > + unsigned int plen) { }
> > #endif
> >
> > static inline void set_compress_context(struct inode *inode)
> > diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> > index 5840b82ce311..baf09a2e6e1f 100644
> > --- a/fs/f2fs/node.c
> > +++ b/fs/f2fs/node.c
> > @@ -841,6 +841,24 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
> > dn->ofs_in_node = offset[level];
> > dn->node_page = npage[level];
> > dn->data_blkaddr = f2fs_data_blkaddr(dn);
> > +
> > + if (is_inode_flag_set(dn->inode, FI_COMPRESSED_FILE) &&
> > + f2fs_sb_has_readonly(sbi)) {
> > + int blknum = f2fs_cluster_blocks_are_contiguous(dn);
> > +
> > + if (blknum) {
> > + block_t blkaddr = f2fs_data_blkaddr(dn);
> > +
> > + if (blkaddr == COMPRESS_ADDR)
> > + blkaddr = data_blkaddr(dn->inode, dn->node_page,
> > + dn->ofs_in_node + 1);
> > +
> > + f2fs_update_extent_tree_range_unaligned(dn->inode,
> > + index, blkaddr,
> > + F2FS_I(dn->inode)->i_cluster_size,
> > + blknum);
> > + }
> > + }
> > return 0;
> >
> > release_pages:
> > --
> > 2.22.1
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@xxxxxxxxxxxxxxxxxxxxx
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel