[PATCH v1 2/2] signal: add procfd_signal() syscall

From: Christian Brauner
Date: Mon Nov 19 2018 - 05:33:50 EST


The kill() syscall operates on process identifiers. After a process has
exited its pid can be reused by another process. If a caller sends a signal
to a reused pid it will end up signaling the wrong process. This issue has
often surfaced and there has been a push [1] to address this problem.

A prior patch has introduced the ability to get a file descriptor
referencing struct pid by opening /proc/<pid>. This guarantees a stable
handle on a process which can be used to send signals to the referenced
process. Discussion has shown that a dedicated syscall is preferable over
ioctl()s. Thus, the new syscall procfd_signal() is introduced to solve
this problem. It operates on a process file descriptor.
The syscall takes an additional siginfo_t and flags argument. If siginfo_t
is NULL then procfd_signal() behaves like kill() if it is not NULL it
behaves like rt_sigqueueinfo.
The flags argument is added to allow for future extensions of this syscall.
It currently needs to be passed as 0.

With this patch a process can be killed via:

#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>

int main(int argc, char *argv[])
{
int ret;
char buf[1000];

if (argc < 2)
exit(EXIT_FAILURE);

ret = snprintf(buf, sizeof(buf), "/proc/%s", argv[1]);
if (ret < 0)
exit(EXIT_FAILURE);

int fd = open(buf, O_DIRECTORY | O_CLOEXEC);
if (fd < 0) {
printf("%s - Failed to open \"%s\"\n", strerror(errno), buf);
exit(EXIT_FAILURE);
}

ret = syscall(__NR_procfd_signal, fd, SIGKILL, NULL, 0);
if (ret < 0) {
printf("Failed to send SIGKILL \"%s\"\n", strerror(errno));
close(fd);
exit(EXIT_FAILURE);
}

close(fd);

exit(EXIT_SUCCESS);
}

[1]: https://lkml.org/lkml/2018/11/18/130

Cc: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>
Cc: Serge Hallyn <serge@xxxxxxxxxx>
Cc: Jann Horn <jannh@xxxxxxxxxx>
Cc: Kees Cook <keescook@xxxxxxxxxxxx>
Cc: Andy Lutomirsky <luto@xxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Oleg Nesterov <oleg@xxxxxxxxxx>
Cc: Aleksa Sarai <cyphar@xxxxxxxxxx>
Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Christian Brauner <christian@xxxxxxxxxx>
---
Changelog:
v1:
- patch introduced
---
arch/x86/entry/syscalls/syscall_32.tbl | 1 +
arch/x86/entry/syscalls/syscall_64.tbl | 1 +
fs/proc/base.c | 6 ++
include/linux/proc_fs.h | 1 +
include/linux/syscalls.h | 2 +
kernel/signal.c | 76 ++++++++++++++++++++++++--
6 files changed, 81 insertions(+), 6 deletions(-)

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 3cf7b533b3d1..e637eab883e9 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -398,3 +398,4 @@
384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl
385 i386 io_pgetevents sys_io_pgetevents __ia32_compat_sys_io_pgetevents
386 i386 rseq sys_rseq __ia32_sys_rseq
+387 i386 procfd_signal sys_procfd_signal __ia32_sys_procfd_signal
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index f0b1709a5ffb..e95f6741ab42 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -343,6 +343,7 @@
332 common statx __x64_sys_statx
333 common io_pgetevents __x64_sys_io_pgetevents
334 common rseq __x64_sys_rseq
+335 common procfd_signal __x64_sys_procfd_signal

#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6365a4fea314..a12c9de92bd0 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3055,6 +3055,12 @@ static const struct file_operations proc_tgid_base_operations = {
.release = proc_tgid_release,
};

+bool proc_is_procfd(const struct file *file)
+{
+ return d_is_dir(file->f_path.dentry) &&
+ (file->f_op == &proc_tgid_base_operations);
+}
+
static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
return proc_pident_lookup(dir, dentry,
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index d0e1f1522a78..2d53a47fba34 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -73,6 +73,7 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
int (*show)(struct seq_file *, void *),
proc_write_t write,
void *data);
+extern bool proc_is_procfd(const struct file *file);

#else /* CONFIG_PROC_FS */

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 2ac3d13a915b..a5ca8cb84566 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -907,6 +907,8 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags,
unsigned mask, struct statx __user *buffer);
asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len,
int flags, uint32_t sig);
+asmlinkage long sys_procfd_signal(int fd, int sig, siginfo_t __user *info,
+ int flags);

/*
* Architecture-specific system calls
diff --git a/kernel/signal.c b/kernel/signal.c
index 9a32bc2088c9..e8a8929de710 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -19,7 +19,9 @@
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/sched/cputime.h>
+#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/proc_fs.h>
#include <linux/tty.h>
#include <linux/binfmts.h>
#include <linux/coredump.h>
@@ -3286,6 +3288,16 @@ COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese,
}
#endif

+static inline void prepare_kill_siginfo(int sig, struct kernel_siginfo *info)
+{
+ clear_siginfo(info);
+ info->si_signo = sig;
+ info->si_errno = 0;
+ info->si_code = SI_USER;
+ info->si_pid = task_tgid_vnr(current);
+ info->si_uid = from_kuid_munged(current_user_ns(), current_uid());
+}
+
/**
* sys_kill - send a signal to a process
* @pid: the PID of the process
@@ -3295,16 +3307,68 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
{
struct kernel_siginfo info;

- clear_siginfo(&info);
- info.si_signo = sig;
- info.si_errno = 0;
- info.si_code = SI_USER;
- info.si_pid = task_tgid_vnr(current);
- info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
+ prepare_kill_siginfo(sig, &info);

return kill_something_info(sig, &info, pid);
}

+/**
+ * sys_procfd_signal - send a signal to a process through a process file
+ * descriptor
+ * @fd: the file descriptor of the process
+ * @sig: signal to be sent
+ * @info: the signal info
+ * @flags: future flags to be passed
+ */
+SYSCALL_DEFINE4(procfd_signal, int, fd, int, sig, siginfo_t __user *, info,
+ int, flags)
+{
+ int ret;
+ struct pid *pid;
+ kernel_siginfo_t kinfo;
+ struct fd f;
+
+ /* Enforce flags be set to 0 until we add an extension. */
+ if (flags)
+ return -EINVAL;
+
+ f = fdget_raw(fd);
+ if (!f.file)
+ return -EBADF;
+
+ ret = -EINVAL;
+ /* Is this a process file descriptor? */
+ if (!proc_is_procfd(f.file))
+ goto err;
+
+ pid = f.file->private_data;
+ if (!pid)
+ goto err;
+
+ if (info) {
+ ret = __copy_siginfo_from_user(sig, &kinfo, info);
+ if (unlikely(ret))
+ goto err;
+ /*
+ * Not even root can pretend to send signals from the kernel.
+ * Nor can they impersonate a kill()/tgkill(), which adds
+ * source info.
+ */
+ ret = -EPERM;
+ if ((kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL) &&
+ (task_pid(current) != pid))
+ goto err;
+ } else {
+ prepare_kill_siginfo(sig, &kinfo);
+ }
+
+ ret = kill_pid_info(sig, &kinfo, pid);
+
+err:
+ fdput(f);
+ return ret;
+}
+
static int
do_send_specific(pid_t tgid, pid_t pid, int sig, struct kernel_siginfo *info)
{
--
2.19.1