[tip: sched/core] delayacct: Add sysctl to enable at runtime

From: tip-bot2 for Peter Zijlstra
Date: Wed May 12 2021 - 06:29:38 EST


The following commit has been merged into the sched/core branch of tip:

Commit-ID: 0cd7c741f01de13dc1eecf22557593b3514639bb
Gitweb: https://git.kernel.org/tip/0cd7c741f01de13dc1eecf22557593b3514639bb
Author: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
AuthorDate: Mon, 10 May 2021 14:01:00 +02:00
Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
CommitterDate: Wed, 12 May 2021 11:43:25 +02:00

delayacct: Add sysctl to enable at runtime

Just like sched_schedstats, allow runtime enabling (and disabling) of
delayacct. This is useful if one forgot to add the delayacct boot time
option.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Link: https://lkml.kernel.org/r/YJkhebGJAywaZowX@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
---
Documentation/accounting/delay-accounting.rst | 6 ++-
include/linux/delayacct.h | 4 ++-
kernel/delayacct.c | 36 +++++++++++++++++-
kernel/sysctl.c | 12 ++++++-
4 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst
index f20b282..1b8b46d 100644
--- a/Documentation/accounting/delay-accounting.rst
+++ b/Documentation/accounting/delay-accounting.rst
@@ -74,8 +74,10 @@ To enable, add::

delayacct

-to the kernel boot options. The rest of the instructions
-below assume this has been done.
+to the kernel boot options. The rest of the instructions below assume this has
+been done. Alternatively, use sysctl kernel.task_delayacct to switch the state
+at runtime. Note however that only tasks started after enabling it will have
+delayacct information.

After the system has booted up, use a utility
similar to getdelays.c to access the delays
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 225c8e0..af7e6eb 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -65,6 +65,10 @@ DECLARE_STATIC_KEY_FALSE(delayacct_key);
extern int delayacct_on; /* Delay accounting turned on/off */
extern struct kmem_cache *delayacct_cache;
extern void delayacct_init(void);
+
+extern int sysctl_delayacct(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos);
+
extern void __delayacct_tsk_init(struct task_struct *);
extern void __delayacct_tsk_exit(struct task_struct *);
extern void __delayacct_blkio_start(void);
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 3f08690..51530d5 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -18,6 +18,17 @@ DEFINE_STATIC_KEY_FALSE(delayacct_key);
int delayacct_on __read_mostly; /* Delay accounting turned on/off */
struct kmem_cache *delayacct_cache;

+static void set_delayacct(bool enabled)
+{
+ if (enabled) {
+ static_branch_enable(&delayacct_key);
+ delayacct_on = 1;
+ } else {
+ delayacct_on = 0;
+ static_branch_disable(&delayacct_key);
+ }
+}
+
static int __init delayacct_setup_enable(char *str)
{
delayacct_on = 1;
@@ -29,9 +40,30 @@ void delayacct_init(void)
{
delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC|SLAB_ACCOUNT);
delayacct_tsk_init(&init_task);
- if (delayacct_on)
- static_branch_enable(&delayacct_key);
+ set_delayacct(delayacct_on);
+}
+
+#ifdef CONFIG_PROC_SYSCTL
+int sysctl_delayacct(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ int state = delayacct_on;
+ struct ctl_table t;
+ int err;
+
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ t = *table;
+ t.data = &state;
+ err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
+ if (err < 0)
+ return err;
+ if (write)
+ set_delayacct(state);
+ return err;
}
+#endif

void __delayacct_tsk_init(struct task_struct *tsk)
{
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 14edf84..0afbfc8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -71,6 +71,7 @@
#include <linux/coredump.h>
#include <linux/latencytop.h>
#include <linux/pid.h>
+#include <linux/delayacct.h>

#include "../lib/kstrtox.h"

@@ -1727,6 +1728,17 @@ static struct ctl_table kern_table[] = {
.extra2 = SYSCTL_ONE,
},
#endif /* CONFIG_SCHEDSTATS */
+#ifdef CONFIG_TASK_DELAY_ACCT
+ {
+ .procname = "task_delayacct",
+ .data = NULL,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sysctl_delayacct,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#endif /* CONFIG_TASK_DELAY_ACCT */
#ifdef CONFIG_NUMA_BALANCING
{
.procname = "numa_balancing",