Re: [PATCH v6] f2fs: add reserved nodes for privileged users
From: Chunhai Guo
Date: Fri Aug 08 2025 - 07:34:59 EST
在 8/8/2025 8:54 AM, Chao Yu 写道:
> Chunhai, can we test selinux case as well? It may need to revert selinux fix
> to find out the problem scenario first.
Yes. I tried reverting the selinux fix, but couldn't reproduce the problem.
Thanks,
>
> Thanks,
>
> On 2025/8/8 06:57, Jaegeuk Kim wrote:
>> By the way, can we also add some testcases in xfstests to check all this
>> works as intended?
>>
>> On 08/07, Chunhai Guo wrote:
>>> This patch allows privileged users to reserve nodes via the
>>> 'reserve_node' mount option, which is similar to the existing
>>> 'reserve_root' option.
>>>
>>> "-o reserve_node=<N>" means <N> nodes are reserved for privileged
>>> users only.
>>>
>>> Signed-off-by: Chunhai Guo <guochunhai@xxxxxxxx>
>>> ---
>>> v5->v6: Modified F2FS_SPEC_reserve_node from (1<<24) to (1<<25) following Zhiguo's suggestion in v5.
>>> v4->v5: Apply Chao's suggestion from v4.
>>> v3->v4: Rebase this patch on https://lore.kernel.org/linux-f2fs-devel/20250731060338.1136086-1-chao@xxxxxxxxxx
>>> v2->v3: Apply Chao's suggestion from v2.
>>> v1->v2: Add two missing handling parts.
>>> v1: https://lore.kernel.org/linux-f2fs-devel/20250729095238.607433-1-guochunhai@xxxxxxxx/
>>> ---
>>> Documentation/filesystems/f2fs.rst | 9 ++++---
>>> fs/f2fs/f2fs.h | 17 ++++++++----
>>> fs/f2fs/super.c | 43 +++++++++++++++++++++++++-----
>>> 3 files changed, 54 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
>>> index 5cad369ceb92..e06cbb823bb7 100644
>>> --- a/Documentation/filesystems/f2fs.rst
>>> +++ b/Documentation/filesystems/f2fs.rst
>>> @@ -173,9 +173,12 @@ data_flush Enable data flushing before checkpoint in order to
>>> persist data of regular and symlink.
>>> reserve_root=%d Support configuring reserved space which is used for
>>> allocation from a privileged user with specified uid or
>>> - gid, unit: 4KB, the default limit is 0.2% of user blocks.
>>> -resuid=%d The user ID which may use the reserved blocks.
>>> -resgid=%d The group ID which may use the reserved blocks.
>>> + gid, unit: 4KB, the default limit is 12.5% of user blocks.
>>> +reserve_node=%d Support configuring reserved nodes which are used for
>>> + allocation from a privileged user with specified uid or
>>> + gid, the default limit is 12.5% of all nodes.
>>> +resuid=%d The user ID which may use the reserved blocks and nodes.
>>> +resgid=%d The group ID which may use the reserved blocks and nodes.
>>> fault_injection=%d Enable fault injection in all supported types with
>>> specified injection rate.
>>> fault_type=%d Support configuring fault injection type, should be
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index f19472eb2789..047964d66736 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -131,6 +131,7 @@ extern const char *f2fs_fault_name[FAULT_MAX];
>>> * string rather than using the MS_LAZYTIME flag, so this must remain.
>>> */
>>> #define F2FS_MOUNT_LAZYTIME 0x40000000
>>> +#define F2FS_MOUNT_RESERVE_NODE 0x80000000
>>>
>>> #define F2FS_OPTION(sbi) ((sbi)->mount_opt)
>>> #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
>>> @@ -178,6 +179,7 @@ struct f2fs_rwsem {
>>> struct f2fs_mount_info {
>>> unsigned int opt;
>>> block_t root_reserved_blocks; /* root reserved blocks */
>>> + block_t root_reserved_nodes; /* root reserved nodes */
>>> kuid_t s_resuid; /* reserved blocks for uid */
>>> kgid_t s_resgid; /* reserved blocks for gid */
>>> int active_logs; /* # of active logs */
>>> @@ -2407,7 +2409,7 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs)
>>> return ofs == XATTR_NODE_OFFSET;
>>> }
>>>
>>> -static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi,
>>> +static inline bool __allow_reserved_root(struct f2fs_sb_info *sbi,
>>> struct inode *inode, bool cap)
>>> {
>>> if (!inode)
>>> @@ -2432,7 +2434,7 @@ static inline unsigned int get_available_block_count(struct f2fs_sb_info *sbi,
>>> avail_user_block_count = sbi->user_block_count -
>>> sbi->current_reserved_blocks;
>>>
>>> - if (test_opt(sbi, RESERVE_ROOT) && !__allow_reserved_blocks(sbi, inode, cap))
>>> + if (test_opt(sbi, RESERVE_ROOT) && !__allow_reserved_root(sbi, inode, cap))
>>> avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
>>>
>>> if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
>>> @@ -2790,7 +2792,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
>>> struct inode *inode, bool is_inode)
>>> {
>>> block_t valid_block_count;
>>> - unsigned int valid_node_count;
>>> + unsigned int valid_node_count, avail_user_node_count;
>>> unsigned int avail_user_block_count;
>>> int err;
>>>
>>> @@ -2812,15 +2814,20 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
>>> spin_lock(&sbi->stat_lock);
>>>
>>> valid_block_count = sbi->total_valid_block_count + 1;
>>> - avail_user_block_count = get_available_block_count(sbi, inode, false);
>>> + avail_user_block_count = get_available_block_count(sbi, inode,
>>> + test_opt(sbi, RESERVE_NODE));
>>>
>>> if (unlikely(valid_block_count > avail_user_block_count)) {
>>> spin_unlock(&sbi->stat_lock);
>>> goto enospc;
>>> }
>>>
>>> + avail_user_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
>>> + if (test_opt(sbi, RESERVE_NODE) &&
>>> + !__allow_reserved_root(sbi, inode, true))
>>> + avail_user_node_count -= F2FS_OPTION(sbi).root_reserved_nodes;
>>> valid_node_count = sbi->total_valid_node_count + 1;
>>> - if (unlikely(valid_node_count > sbi->total_node_count)) {
>>> + if (unlikely(valid_node_count > avail_user_node_count)) {
>>> spin_unlock(&sbi->stat_lock);
>>> goto enospc;
>>> }
>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>> index 3f8bc42e0968..f37004780ce0 100644
>>> --- a/fs/f2fs/super.c
>>> +++ b/fs/f2fs/super.c
>>> @@ -143,6 +143,7 @@ enum {
>>> Opt_extent_cache,
>>> Opt_data_flush,
>>> Opt_reserve_root,
>>> + Opt_reserve_node,
>>> Opt_resgid,
>>> Opt_resuid,
>>> Opt_mode,
>>> @@ -273,6 +274,7 @@ static const struct fs_parameter_spec f2fs_param_specs[] = {
>>> fsparam_flag_no("extent_cache", Opt_extent_cache),
>>> fsparam_flag("data_flush", Opt_data_flush),
>>> fsparam_u32("reserve_root", Opt_reserve_root),
>>> + fsparam_u32("reserve_node", Opt_reserve_node),
>>> fsparam_gid("resgid", Opt_resgid),
>>> fsparam_uid("resuid", Opt_resuid),
>>> fsparam_enum("mode", Opt_mode, f2fs_param_mode),
>>> @@ -346,6 +348,7 @@ static match_table_t f2fs_checkpoint_tokens = {
>>> #define F2FS_SPEC_memory_mode (1 << 22)
>>> #define F2FS_SPEC_errors (1 << 23)
>>> #define F2FS_SPEC_lookup_mode (1 << 24)
>>> +#define F2FS_SPEC_reserve_node (1 << 25)
>>>
>>> struct f2fs_fs_context {
>>> struct f2fs_mount_info info;
>>> @@ -447,22 +450,30 @@ static void f2fs_destroy_casefold_cache(void) { }
>>>
>>> static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
>>> {
>>> - block_t limit = min((sbi->user_block_count >> 3),
>>> + block_t block_limit = min((sbi->user_block_count >> 3),
>>> sbi->user_block_count - sbi->reserved_blocks);
>>> + block_t node_limit = sbi->total_node_count >> 3;
>>>
>>> /* limit is 12.5% */
>>> if (test_opt(sbi, RESERVE_ROOT) &&
>>> - F2FS_OPTION(sbi).root_reserved_blocks > limit) {
>>> - F2FS_OPTION(sbi).root_reserved_blocks = limit;
>>> + F2FS_OPTION(sbi).root_reserved_blocks > block_limit) {
>>> + F2FS_OPTION(sbi).root_reserved_blocks = block_limit;
>>> f2fs_info(sbi, "Reduce reserved blocks for root = %u",
>>> F2FS_OPTION(sbi).root_reserved_blocks);
>>> }
>>> - if (!test_opt(sbi, RESERVE_ROOT) &&
>>> + if (test_opt(sbi, RESERVE_NODE) &&
>>> + F2FS_OPTION(sbi).root_reserved_nodes > node_limit) {
>>> + F2FS_OPTION(sbi).root_reserved_nodes = node_limit;
>>> + f2fs_info(sbi, "Reduce reserved nodes for root = %u",
>>> + F2FS_OPTION(sbi).root_reserved_nodes);
>>> + }
>>> + if (!test_opt(sbi, RESERVE_ROOT) && !test_opt(sbi, RESERVE_NODE) &&
>>> (!uid_eq(F2FS_OPTION(sbi).s_resuid,
>>> make_kuid(&init_user_ns, F2FS_DEF_RESUID)) ||
>>> !gid_eq(F2FS_OPTION(sbi).s_resgid,
>>> make_kgid(&init_user_ns, F2FS_DEF_RESGID))))
>>> - f2fs_info(sbi, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root",
>>> + f2fs_info(sbi, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root"
>>> + " and reserve_node",
>>> from_kuid_munged(&init_user_ns,
>>> F2FS_OPTION(sbi).s_resuid),
>>> from_kgid_munged(&init_user_ns,
>>> @@ -851,6 +862,11 @@ static int f2fs_parse_param(struct fs_context *fc, struct fs_parameter *param)
>>> F2FS_CTX_INFO(ctx).root_reserved_blocks = result.uint_32;
>>> ctx->spec_mask |= F2FS_SPEC_reserve_root;
>>> break;
>>> + case Opt_reserve_node:
>>> + ctx_set_opt(ctx, F2FS_MOUNT_RESERVE_NODE);
>>> + F2FS_CTX_INFO(ctx).root_reserved_nodes = result.uint_32;
>>> + ctx->spec_mask |= F2FS_SPEC_reserve_node;
>>> + break;
>>> case Opt_resuid:
>>> F2FS_CTX_INFO(ctx).s_resuid = result.uid;
>>> ctx->spec_mask |= F2FS_SPEC_resuid;
>>> @@ -1438,6 +1454,14 @@ static int f2fs_check_opt_consistency(struct fs_context *fc,
>>> ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_ROOT);
>>> ctx->opt_mask &= ~F2FS_MOUNT_RESERVE_ROOT;
>>> }
>>> + if (test_opt(sbi, RESERVE_NODE) &&
>>> + (ctx->opt_mask & F2FS_MOUNT_RESERVE_NODE) &&
>>> + ctx_test_opt(ctx, F2FS_MOUNT_RESERVE_NODE)) {
>>> + f2fs_info(sbi, "Preserve previous reserve_node=%u",
>>> + F2FS_OPTION(sbi).root_reserved_nodes);
>>> + ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_NODE);
>>> + ctx->opt_mask &= ~F2FS_MOUNT_RESERVE_NODE;
>>> + }
>>>
>>> err = f2fs_check_test_dummy_encryption(fc, sb);
>>> if (err)
>>> @@ -1637,6 +1661,9 @@ static void f2fs_apply_options(struct fs_context *fc, struct super_block *sb)
>>> if (ctx->spec_mask & F2FS_SPEC_reserve_root)
>>> F2FS_OPTION(sbi).root_reserved_blocks =
>>> F2FS_CTX_INFO(ctx).root_reserved_blocks;
>>> + if (ctx->spec_mask & F2FS_SPEC_reserve_node)
>>> + F2FS_OPTION(sbi).root_reserved_nodes =
>>> + F2FS_CTX_INFO(ctx).root_reserved_nodes;
>>> if (ctx->spec_mask & F2FS_SPEC_resgid)
>>> F2FS_OPTION(sbi).s_resgid = F2FS_CTX_INFO(ctx).s_resgid;
>>> if (ctx->spec_mask & F2FS_SPEC_resuid)
>>> @@ -2359,9 +2386,11 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
>>> else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
>>> seq_puts(seq, "fragment:block");
>>> seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs);
>>> - if (test_opt(sbi, RESERVE_ROOT))
>>> - seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u",
>>> + if (test_opt(sbi, RESERVE_ROOT) || test_opt(sbi, RESERVE_NODE))
>>> + seq_printf(seq, ",reserve_root=%u,reserve_node=%u,resuid=%u,"
>>> + "resgid=%u",
>>> F2FS_OPTION(sbi).root_reserved_blocks,
>>> + F2FS_OPTION(sbi).root_reserved_nodes,
>>> from_kuid_munged(&init_user_ns,
>>> F2FS_OPTION(sbi).s_resuid),
>>> from_kgid_munged(&init_user_ns,
>>> --
>>> 2.34.1