Re: [PATCH] riscv: fix seccomp reject syscall code path

From: Kees Cook
Date: Sun Feb 09 2020 - 20:11:32 EST


On Sat, Feb 08, 2020 at 08:18:17AM -0700, Tycho Andersen wrote:
> If secure_computing() rejected a system call, we were previously setting
> the system call number to -1, to indicate to later code that the syscall
> failed. However, if something (e.g. a user notification) was sleeping, and
> received a signal, we may set a0 to -ERESTARTSYS and re-try the system call
> again.
>
> In this case, seccomp "denies" the syscall (because of the signal), and we
> would set a7 to -1, thus losing the value of the system call we want to
> restart.
>
> Instead, let's return -1 from do_syscall_trace_enter() to indicate that the
> syscall was rejected, so we don't clobber the value in case of -ERESTARTSYS
> or whatever.
>
> This commit fixes the user_notification_signal seccomp selftest on riscv to
> no longer hang. That test expects the system call to be re-issued after the
> signal, and it wasn't due to the above bug. Now that it is, everything
> works normally.
>
> Note that in the ptrace (tracer) case, the tracer can set the register
> values to whatever they want, so we still need to keep the code that
> handles out-of-bounds syscalls. However, we can drop the comment.
>
> We can also drop syscall_set_nr(), since it is no longer used anywhere, and
> the code that re-loads the value in a7 because of it.
>
> Reported in: https://lore.kernel.org/bpf/CAEn-LTp=ss0Dfv6J00=rCAy+N78U2AmhqJNjfqjr2FDpPYjxEQ@xxxxxxxxxxxxxx/
>
> Reported-by: David Abdurachmanov <david.abdurachmanov@xxxxxxxxx>
> Signed-off-by: Tycho Andersen <tycho@xxxxxxxx>

Funky! Good catch. :)

Reviewed-by: Kees Cook <keescook@xxxxxxxxxxxx>

-Kees

> CC: Kees Cook <keescook@xxxxxxxxxxxx>
> CC: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
> CC: Paul Walmsley <paul.walmsley@xxxxxxxxxx>
> CC: Oleg Nesterov <oleg@xxxxxxxxxx>
> ---
> arch/riscv/include/asm/syscall.h | 7 -------
> arch/riscv/kernel/entry.S | 11 +++--------
> arch/riscv/kernel/ptrace.c | 11 +++++------
> 3 files changed, 8 insertions(+), 21 deletions(-)
>
> diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
> index 42347d0981e7..49350c8bd7b0 100644
> --- a/arch/riscv/include/asm/syscall.h
> +++ b/arch/riscv/include/asm/syscall.h
> @@ -28,13 +28,6 @@ static inline int syscall_get_nr(struct task_struct *task,
> return regs->a7;
> }
>
> -static inline void syscall_set_nr(struct task_struct *task,
> - struct pt_regs *regs,
> - int sysno)
> -{
> - regs->a7 = sysno;
> -}
> -
> static inline void syscall_rollback(struct task_struct *task,
> struct pt_regs *regs)
> {
> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> index bad4d85b5e91..208702d8c18e 100644
> --- a/arch/riscv/kernel/entry.S
> +++ b/arch/riscv/kernel/entry.S
> @@ -228,20 +228,13 @@ check_syscall_nr:
> /* Check to make sure we don't jump to a bogus syscall number. */
> li t0, __NR_syscalls
> la s0, sys_ni_syscall
> - /*
> - * The tracer can change syscall number to valid/invalid value.
> - * We use syscall_set_nr helper in syscall_trace_enter thus we
> - * cannot trust the current value in a7 and have to reload from
> - * the current task pt_regs.
> - */
> - REG_L a7, PT_A7(sp)
> /*
> * Syscall number held in a7.
> * If syscall number is above allowed value, redirect to ni_syscall.
> */
> bge a7, t0, 1f
> /*
> - * Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1.
> + * Check if syscall is rejected by tracer, i.e., a7 == -1.
> * If yes, we pretend it was executed.
> */
> li t1, -1
> @@ -334,6 +327,7 @@ work_resched:
> handle_syscall_trace_enter:
> move a0, sp
> call do_syscall_trace_enter
> + move t0, a0
> REG_L a0, PT_A0(sp)
> REG_L a1, PT_A1(sp)
> REG_L a2, PT_A2(sp)
> @@ -342,6 +336,7 @@ handle_syscall_trace_enter:
> REG_L a5, PT_A5(sp)
> REG_L a6, PT_A6(sp)
> REG_L a7, PT_A7(sp)
> + bnez t0, ret_from_syscall_rejected
> j check_syscall_nr
> handle_syscall_trace_exit:
> move a0, sp
> diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
> index 407464201b91..444dc7b0fd78 100644
> --- a/arch/riscv/kernel/ptrace.c
> +++ b/arch/riscv/kernel/ptrace.c
> @@ -148,21 +148,19 @@ long arch_ptrace(struct task_struct *child, long request,
> * Allows PTRACE_SYSCALL to work. These are called from entry.S in
> * {handle,ret_from}_syscall.
> */
> -__visible void do_syscall_trace_enter(struct pt_regs *regs)
> +__visible int do_syscall_trace_enter(struct pt_regs *regs)
> {
> if (test_thread_flag(TIF_SYSCALL_TRACE))
> if (tracehook_report_syscall_entry(regs))
> - syscall_set_nr(current, regs, -1);
> + return -1;
>
> /*
> * Do the secure computing after ptrace; failures should be fast.
> * If this fails we might have return value in a0 from seccomp
> * (via SECCOMP_RET_ERRNO/TRACE).
> */
> - if (secure_computing() == -1) {
> - syscall_set_nr(current, regs, -1);
> - return;
> - }
> + if (secure_computing() == -1)
> + return -1;
>
> #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
> if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
> @@ -170,6 +168,7 @@ __visible void do_syscall_trace_enter(struct pt_regs *regs)
> #endif
>
> audit_syscall_entry(regs->a7, regs->a0, regs->a1, regs->a2, regs->a3);
> + return 0;
> }
>
> __visible void do_syscall_trace_exit(struct pt_regs *regs)
> --
> 2.20.1
>

--
Kees Cook