[PATCH v4 sysctl-next] bpf: move bpf sysctls from kernel/sysctl.c to bpf module

From: Yan Zhu
Date: Thu Apr 07 2022 - 03:08:23 EST


We're moving sysctls out of kernel/sysctl.c as its a mess. We
already moved all filesystem sysctls out. And with time the goal is
to move all sysctls out to their own subsystem/actual user.

kernel/sysctl.c has grown to an insane mess and its easy to run
into conflicts with it. The effort to move them out is part of this.

Signed-off-by: Yan Zhu <zhuyan34@xxxxxxxxxx>

---
v1->v2:
1.Added patch branch identifier sysctl-next.
2.Re-describe the reason for the patch submission.

v2->v3:
Re-describe the reason for the patch submission.

v3->v4:
1.Remove '#include <linux/bpf.h>' in kernel/sysctl.c
2.re-adaptive the patch
---
kernel/bpf/syscall.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++
kernel/sysctl.c | 79 -----------------------------------------------
2 files changed, 87 insertions(+), 79 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index cdaa1152436a..e9621cfa09f2 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4908,3 +4908,90 @@ const struct bpf_verifier_ops bpf_syscall_verifier_ops = {
const struct bpf_prog_ops bpf_syscall_prog_ops = {
.test_run = bpf_prog_test_run_syscall,
};
+
+#ifdef CONFIG_SYSCTL
+static int bpf_stats_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct static_key *key = (struct static_key *)table->data;
+ static int saved_val;
+ int val, ret;
+ struct ctl_table tmp = {
+ .data = &val,
+ .maxlen = sizeof(val),
+ .mode = table->mode,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ };
+
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ mutex_lock(&bpf_stats_enabled_mutex);
+ val = saved_val;
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+ if (write && !ret && val != saved_val) {
+ if (val)
+ static_key_slow_inc(key);
+ else
+ static_key_slow_dec(key);
+ saved_val = val;
+ }
+ mutex_unlock(&bpf_stats_enabled_mutex);
+ return ret;
+}
+
+void __weak unpriv_ebpf_notify(int new_state)
+{
+}
+
+static int bpf_unpriv_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret, unpriv_enable = *(int *)table->data;
+ bool locked_state = unpriv_enable == 1;
+ struct ctl_table tmp = *table;
+
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ tmp.data = &unpriv_enable;
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+ if (write && !ret) {
+ if (locked_state && unpriv_enable != 1)
+ return -EPERM;
+ *(int *)table->data = unpriv_enable;
+ }
+
+ unpriv_ebpf_notify(unpriv_enable);
+
+ return ret;
+}
+
+static struct ctl_table bpf_syscall_table[] = {
+ {
+ .procname = "unprivileged_bpf_disabled",
+ .data = &sysctl_unprivileged_bpf_disabled,
+ .maxlen = sizeof(sysctl_unprivileged_bpf_disabled),
+ .mode = 0644,
+ .proc_handler = bpf_unpriv_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
+ },
+ {
+ .procname = "bpf_stats_enabled",
+ .data = &bpf_stats_enabled_key.key,
+ .maxlen = sizeof(bpf_stats_enabled_key),
+ .mode = 0644,
+ .proc_handler = bpf_stats_handler,
+ },
+ { }
+};
+
+static int __init bpf_syscall_sysctl_init(void)
+{
+ register_sysctl_init("kernel", bpf_syscall_table);
+ return 0;
+}
+late_initcall(bpf_syscall_sysctl_init);
+#endif /* CONFIG_SYSCTL */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 21172d3dad6e..c0fdf465a93d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -62,7 +62,6 @@
#include <linux/binfmts.h>
#include <linux/sched/sysctl.h>
#include <linux/kexec.h>
-#include <linux/bpf.h>
#include <linux/mount.h>
#include <linux/userfaultfd_k.h>
#include <linux/latencytop.h>
@@ -139,66 +138,6 @@ static const int max_extfrag_threshold = 1000;

#endif /* CONFIG_SYSCTL */

-#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL)
-static int bpf_stats_handler(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- struct static_key *key = (struct static_key *)table->data;
- static int saved_val;
- int val, ret;
- struct ctl_table tmp = {
- .data = &val,
- .maxlen = sizeof(val),
- .mode = table->mode,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- };
-
- if (write && !capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- mutex_lock(&bpf_stats_enabled_mutex);
- val = saved_val;
- ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
- if (write && !ret && val != saved_val) {
- if (val)
- static_key_slow_inc(key);
- else
- static_key_slow_dec(key);
- saved_val = val;
- }
- mutex_unlock(&bpf_stats_enabled_mutex);
- return ret;
-}
-
-void __weak unpriv_ebpf_notify(int new_state)
-{
-}
-
-static int bpf_unpriv_handler(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- int ret, unpriv_enable = *(int *)table->data;
- bool locked_state = unpriv_enable == 1;
- struct ctl_table tmp = *table;
-
- if (write && !capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- tmp.data = &unpriv_enable;
- ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
- if (write && !ret) {
- if (locked_state && unpriv_enable != 1)
- return -EPERM;
- *(int *)table->data = unpriv_enable;
- }
-
- unpriv_ebpf_notify(unpriv_enable);
-
- return ret;
-}
-#endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */
-
/*
* /proc/sys support
*/
@@ -2112,24 +2051,6 @@ static struct ctl_table kern_table[] = {
.extra2 = SYSCTL_ONE,
},
#endif
-#ifdef CONFIG_BPF_SYSCALL
- {
- .procname = "unprivileged_bpf_disabled",
- .data = &sysctl_unprivileged_bpf_disabled,
- .maxlen = sizeof(sysctl_unprivileged_bpf_disabled),
- .mode = 0644,
- .proc_handler = bpf_unpriv_handler,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_TWO,
- },
- {
- .procname = "bpf_stats_enabled",
- .data = &bpf_stats_enabled_key.key,
- .maxlen = sizeof(bpf_stats_enabled_key),
- .mode = 0644,
- .proc_handler = bpf_stats_handler,
- },
-#endif
#if defined(CONFIG_TREE_RCU)
{
.procname = "panic_on_rcu_stall",
--
2.12.3