[RFC] Add critical process prctl

From: Daniel Colascione
Date: Wed Sep 04 2019 - 20:53:26 EST


A task with CAP_SYS_ADMIN can mark itself PR_SET_TASK_CRITICAL,
meaning that if the task ever exits, the kernel panics. This facility
is intended for use by low-level core system processes that cannot
gracefully restart without a reboot. This prctl allows these processes
to ensure that the system restarts when they die regardless of whether
the rest of userspace is operational.

Signed-off-by: Daniel Colascione <dancol@xxxxxxxxxx>
---
include/linux/sched.h | 5 +++++
include/uapi/linux/prctl.h | 5 +++++
kernel/exit.c | 2 ++
kernel/sys.c | 19 +++++++++++++++++++
4 files changed, 31 insertions(+)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9f51932bd543..29420b9ebb63 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1526,6 +1526,7 @@ static inline bool is_percpu_thread(void)
#define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */
#define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */
#define PFA_SPEC_SSB_NOEXEC 7 /* Speculative Store Bypass clear on execve() */
+#define PFA_CRITICAL 8 /* Panic system if process exits */

#define TASK_PFA_TEST(name, func) \
static inline bool task_##func(struct task_struct *p) \
@@ -1568,6 +1569,10 @@ TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)
TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)

+TASK_PFA_TEST(CRITICAL, critical)
+TASK_PFA_SET(CRITICAL, critical)
+TASK_PFA_CLEAR(CRITICAL, critical)
+
static inline void
current_restore_flags(unsigned long orig_flags, unsigned long flags)
{
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 094bb03b9cc2..4964723bbd47 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -229,4 +229,9 @@ struct prctl_mm_map {
# define PR_PAC_APDBKEY (1UL << 3)
# define PR_PAC_APGAKEY (1UL << 4)

+/* Per-task criticality control */
+#define PR_SET_TASK_CRITICAL 55
+#define PR_CRITICAL_NOT_CRITICAL 0
+#define PR_CRITICAL_CRITICAL 1
+
#endif /* _LINUX_PRCTL_H */
diff --git a/kernel/exit.c b/kernel/exit.c
index 5b4a5dcce8f8..9b3d3411d935 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -788,6 +788,8 @@ void __noreturn do_exit(long code)
panic("Aiee, killing interrupt handler!");
if (unlikely(!tsk->pid))
panic("Attempted to kill the idle task!");
+ if (unlikely(task_critical(tsk)))
+ panic("Critical task died!");

/*
* If do_exit is called because this processes oopsed, it's possible
diff --git a/kernel/sys.c b/kernel/sys.c
index 2969304c29fe..097e05ebaf94 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2269,6 +2269,20 @@ int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which,
return -EINVAL;
}

+int task_do_set_critical(struct task_struct *t, unsigned long opt)
+{
+ if (opt != PR_CRITICAL_NOT_CRITICAL &&
+ opt != PR_CRITICAL_CRITICAL)
+ return -EINVAL;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (opt == PR_CRITICAL_NOT_CRITICAL)
+ task_clear_critical(t);
+ else
+ task_set_critical(t);
+ return 0;
+}
+
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
unsigned long, arg4, unsigned long, arg5)
{
@@ -2492,6 +2506,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
return -EINVAL;
error = PAC_RESET_KEYS(me, arg2);
break;
+ case PR_SET_TASK_CRITICAL:
+ if (arg3 || arg4 || arg5)
+ return -EINVAL;
+ error = task_do_set_critical(me, arg2);
+ break;
default:
error = -EINVAL;
break;
--
2.23.0.187.g17f5b7556c-goog