[PATCH v2 1/2] timers: Implement usleep_idle_range()

From: SeongJae Park
Date: Thu Nov 25 2021 - 11:10:48 EST


Some kernel threads such as DAMON could need to repeatedly sleep in
micro seconds level. Because usleep_range() sleeps in uninterruptible
state, however, such threads would make /proc/loadavg reports fake load.

To help such cases, this commit implements a variant of usleep_range()
called usleep_idle_range(). It is same to usleep_range() but sets the
state of the current task as TASK_IDLE while sleeping.

Signed-off-by: SeongJae Park <sj@xxxxxxxxxx>
---
include/linux/delay.h | 14 +++++++++++++-
kernel/time/timer.c | 16 +++++++++-------
2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/include/linux/delay.h b/include/linux/delay.h
index 8eacf67eb212..039e7e0c7378 100644
--- a/include/linux/delay.h
+++ b/include/linux/delay.h
@@ -20,6 +20,7 @@
*/

#include <linux/math.h>
+#include <linux/sched.h>

extern unsigned long loops_per_jiffy;

@@ -58,7 +59,18 @@ void calibrate_delay(void);
void __attribute__((weak)) calibration_delay_done(void);
void msleep(unsigned int msecs);
unsigned long msleep_interruptible(unsigned int msecs);
-void usleep_range(unsigned long min, unsigned long max);
+void usleep_range_state(unsigned long min, unsigned long max,
+ unsigned int state);
+
+static inline void usleep_range(unsigned long min, unsigned long max)
+{
+ usleep_range_state(min, max, TASK_UNINTERRUPTIBLE);
+}
+
+static inline void usleep_idle_range(unsigned long min, unsigned long max)
+{
+ usleep_range_state(min, max, TASK_IDLE);
+}

static inline void ssleep(unsigned int seconds)
{
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index e3d2c23c413d..85f1021ad459 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -2054,26 +2054,28 @@ unsigned long msleep_interruptible(unsigned int msecs)
EXPORT_SYMBOL(msleep_interruptible);

/**
- * usleep_range - Sleep for an approximate time
- * @min: Minimum time in usecs to sleep
- * @max: Maximum time in usecs to sleep
+ * usleep_range_state - Sleep for an approximate time in a given state
+ * @min: Minimum time in usecs to sleep
+ * @max: Maximum time in usecs to sleep
+ * @state: State of the current task that will be while sleeping
*
* In non-atomic context where the exact wakeup time is flexible, use
- * usleep_range() instead of udelay(). The sleep improves responsiveness
+ * usleep_range_state() instead of udelay(). The sleep improves responsiveness
* by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces
* power usage by allowing hrtimers to take advantage of an already-
* scheduled interrupt instead of scheduling a new one just for this sleep.
*/
-void __sched usleep_range(unsigned long min, unsigned long max)
+void __sched usleep_range_state(unsigned long min, unsigned long max,
+ unsigned int state)
{
ktime_t exp = ktime_add_us(ktime_get(), min);
u64 delta = (u64)(max - min) * NSEC_PER_USEC;

for (;;) {
- __set_current_state(TASK_UNINTERRUPTIBLE);
+ __set_current_state(state);
/* Do not return before the requested sleep time has elapsed */
if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))
break;
}
}
-EXPORT_SYMBOL(usleep_range);
+EXPORT_SYMBOL(usleep_range_state);
--
2.17.1