[PATCH 2/2] f2fs: introduce F2FS_UNFAIR_RWSEM to support unfair rwsem

From: Jaegeuk Kim
Date: Thu Mar 03 2022 - 21:20:14 EST


Unfair rwsem should be used when blk-cg is on. Otherwise, there is regression.

FYI, we noticed a -26.7% regression of aim7.jobs-per-min due to commit:

commit: e4544b63a7ee49e7fbebf35ece0a6acd3b9617ae ("f2fs: move f2fs to use reader-unfair rwsems")
https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git master

in testcase: aim7
on test machine: 88 threads 2 sockets Intel(R) Xeon(R) Gold 6238M CPU @ 2.10GHz with 128G memory
with following parameters:

disk: 4BRD_12G
md: RAID0
fs: f2fs
test: sync_disk_rw
load: 100
cpufreq_governor: performance
ucode: 0x500320a

test-description: AIM7 is a traditional UNIX system level benchmark suite which is used to test and measure the performance of multiuser system.
test-url: https://sourceforge.net/projects/aimbench/files/aim-suite7/

Reported-by: kernel test robot <oliver.sang@xxxxxxxxx>
Signed-off-by: Jaegeuk Kim <jaegeuk@xxxxxxxxxx>
---
fs/f2fs/Kconfig | 7 +++++++
fs/f2fs/f2fs.h | 10 ++++++++++
2 files changed, 17 insertions(+)

diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index f46a7339d6cf..03ef087537c7 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -143,3 +143,10 @@ config F2FS_IOSTAT
Support getting IO statistics through sysfs and printing out periodic
IO statistics tracepoint events. You have to turn on "iostat_enable"
sysfs node to enable this feature.
+
+config F2FS_UNFAIR_RWSEM
+ bool "F2FS unfair rw_semaphore"
+ depends on F2FS_FS && BLK_CGROUP
+ help
+ Use unfair rw_semaphore, if system configured IO priority by block
+ cgroup.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index efc4f1fe2ffd..68d791ec8b27 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -132,7 +132,9 @@ typedef u32 nid_t;

struct f2fs_rwsem {
struct rw_semaphore internal_rwsem;
+#ifdef CONFIG_F2FS_UNFAIR_RWSEM
wait_queue_head_t read_waiters;
+#endif
};

struct f2fs_mount_info {
@@ -2131,7 +2133,9 @@ static inline void __init_f2fs_rwsem(struct f2fs_rwsem *sem,
const char *sem_name, struct lock_class_key *key)
{
__init_rwsem(&sem->internal_rwsem, sem_name, key);
+#ifdef CONFIG_F2FS_UNFAIR_RWSEM
init_waitqueue_head(&sem->read_waiters);
+#endif
}

static inline int f2fs_rwsem_is_locked(struct f2fs_rwsem *sem)
@@ -2146,7 +2150,11 @@ static inline int f2fs_rwsem_is_contended(struct f2fs_rwsem *sem)

static inline void f2fs_down_read(struct f2fs_rwsem *sem)
{
+#ifdef CONFIG_F2FS_UNFAIR_RWSEM
wait_event(sem->read_waiters, down_read_trylock(&sem->internal_rwsem));
+#else
+ down_read(&sem->internal_rwsem);
+#endif
}

static inline int f2fs_down_read_trylock(struct f2fs_rwsem *sem)
@@ -2181,7 +2189,9 @@ static inline int f2fs_down_write_trylock(struct f2fs_rwsem *sem)
static inline void f2fs_up_write(struct f2fs_rwsem *sem)
{
up_write(&sem->internal_rwsem);
+#ifdef CONFIG_F2FS_UNFAIR_RWSEM
wake_up_all(&sem->read_waiters);
+#endif
}

static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
--
2.35.1.616.g0bdcbb4464-goog