[PATCH 2/6] mm: Add become_kswapd and restore_kswapd

From: Matthew Wilcox (Oracle)
Date: Thu Jun 25 2020 - 07:32:19 EST


Since XFS needs to pretend to be kswapd in some of its worker threads,
create methods to save & restore kswapd state. Don't bother restoring
kswapd state in kswapd -- the only time we reach this code is when we're
exiting and the task_struct is about to be destroyed anyway.

Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx>
---
fs/xfs/libxfs/xfs_btree.c | 14 ++++++++------
include/linux/sched/mm.h | 26 ++++++++++++++++++++++++++
mm/vmscan.c | 16 +---------------
3 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 2d25bab68764..a04a44238aab 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -2813,8 +2813,9 @@ xfs_btree_split_worker(
{
struct xfs_btree_split_args *args = container_of(work,
struct xfs_btree_split_args, work);
+ bool is_kswapd = args->kswapd;
unsigned long pflags;
- unsigned long new_pflags = PF_MEMALLOC_NOFS;
+ int memalloc_nofs;

/*
* we are in a transaction context here, but may also be doing work
@@ -2822,16 +2823,17 @@ xfs_btree_split_worker(
* temporarily to ensure that we don't block waiting for memory reclaim
* in any way.
*/
- if (args->kswapd)
- new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
-
- current_set_flags_nested(&pflags, new_pflags);
+ if (is_kswapd)
+ pflags = become_kswapd();
+ memalloc_nofs = memalloc_nofs_save();

args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
args->key, args->curp, args->stat);
complete(args->done);

- current_restore_flags_nested(&pflags, new_pflags);
+ memalloc_nofs_restore(memalloc_nofs);
+ if (is_kswapd)
+ restore_kswapd(pflags);
}

/*
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 1a7e1ab1be85..b0089eadc367 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -308,6 +308,32 @@ static inline void memalloc_nocma_restore(unsigned int flags)
}
#endif

+/*
+ * Tell the memory management that we're a "memory allocator",
+ * and that if we need more memory we should get access to it
+ * regardless (see "__alloc_pages()"). "kswapd" should
+ * never get caught in the normal page freeing logic.
+ *
+ * (Kswapd normally doesn't need memory anyway, but sometimes
+ * you need a small amount of memory in order to be able to
+ * page out something else, and this flag essentially protects
+ * us from recursively trying to free more memory as we're
+ * trying to free the first piece of memory in the first place).
+ */
+#define KSWAPD_PF_FLAGS (PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD)
+
+static inline unsigned long become_kswapd(void)
+{
+ unsigned long flags = current->flags & KSWAPD_PF_FLAGS;
+ current->flags |= KSWAPD_PF_FLAGS;
+ return flags;
+}
+
+static inline void restore_kswapd(unsigned long flags)
+{
+ current->flags &= ~(flags ^ KSWAPD_PF_FLAGS);
+}
+
static inline void set_current_io_flusher(void)
{
current->flags |= PF_LOCAL_THROTTLE;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b6d84326bdf2..27ae76699899 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3870,19 +3870,7 @@ static int kswapd(void *p)
if (!cpumask_empty(cpumask))
set_cpus_allowed_ptr(tsk, cpumask);

- /*
- * Tell the memory management that we're a "memory allocator",
- * and that if we need more memory we should get access to it
- * regardless (see "__alloc_pages()"). "kswapd" should
- * never get caught in the normal page freeing logic.
- *
- * (Kswapd normally doesn't need memory anyway, but sometimes
- * you need a small amount of memory in order to be able to
- * page out something else, and this flag essentially protects
- * us from recursively trying to free more memory as we're
- * trying to free the first piece of memory in the first place).
- */
- tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
+ become_kswapd();
set_freezable();

WRITE_ONCE(pgdat->kswapd_order, 0);
@@ -3932,8 +3920,6 @@ static int kswapd(void *p)
goto kswapd_try_sleep;
}

- tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
-
return 0;
}

--
2.27.0