[PATCH] mm/damon/core: Avoid fake load reports due to uninterruptible sleeps

From: SeongJae Park
Date: Wed Nov 24 2021 - 09:52:29 EST


Because DAMON sleeps in uninterruptible mode, /proc/loadavg reports fake
load while DAMON is turned on, though it is doing nothing. This can
confuse users[1]. To avoid the case, this commit makes DAMON sleeps in
idle mode.

[1] https://lore.kernel.org/all/11868371.O9o76ZdvQC@xxxxxxxxxxxxxx/

Fixes: 2224d8485492 ("mm: introduce Data Access MONitor (DAMON)")
Reported-by: Oleksandr Natalenko <oleksandr@xxxxxxxxxxxxxx>
Signed-off-by: SeongJae Park <sj@xxxxxxxxxx>
Cc: <stable@xxxxxxxxxxxxxxx> # 5.15.x
---
I think this needs to be applied on v5.15.y, but this cannot cleanly
applied there as is. I will back-port this on v5.15.y and post later
once this is merged in the mainline.

mm/damon/core.c | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/mm/damon/core.c b/mm/damon/core.c
index daacd9536c7c..7813f47aadc9 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -12,6 +12,8 @@
#include <linux/kthread.h>
#include <linux/mm.h>
#include <linux/random.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
#include <linux/slab.h>
#include <linux/string.h>

@@ -976,12 +978,25 @@ static unsigned long damos_wmark_wait_us(struct damos *scheme)
return 0;
}

+/* sleep for @usecs in idle mode */
+static void __sched damon_usleep_idle(unsigned long usecs)
+{
+ ktime_t exp = ktime_add_us(ktime_get(), usecs);
+ u64 delta = usecs * NSEC_PER_USEC / 100; /* allow 1% error */
+
+ for (;;) {
+ __set_current_state(TASK_IDLE);
+ if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))
+ break;
+ }
+}
+
static void kdamond_usleep(unsigned long usecs)
{
if (usecs > 100 * 1000)
- schedule_timeout_interruptible(usecs_to_jiffies(usecs));
+ schedule_timeout_idle(usecs_to_jiffies(usecs));
else
- usleep_range(usecs, usecs + 1);
+ damon_usleep_idle(usecs);
}

/* Returns negative error code if it's not activated but should return */
@@ -1036,7 +1051,7 @@ static int kdamond_fn(void *data)
ctx->callback.after_sampling(ctx))
done = true;

- usleep_range(ctx->sample_interval, ctx->sample_interval + 1);
+ kdamond_usleep(ctx->sample_interval);

if (ctx->primitive.check_accesses)
max_nr_accesses = ctx->primitive.check_accesses(ctx);
--
2.17.1