[PATCH] x86,seccomp,prctl: Remove PR_TSC_SIGSEGV and seccomp TSC filtering

From: Andy Lutomirski
Date: Fri Oct 03 2014 - 13:18:27 EST


PR_SET_TSC / PR_TSC_SIGSEGV is a security feature to prevent heavily
sandboxed programs from learning the time, presumably to avoid
disclosing the wall clock and to make timing attacks much harder to
exploit.

Unfortunately, this feature is very insecure, for multiple reasons,
and has probably been insecure since before it was written.

Weakness 1: Before Linux 3.16, the vvar page and the HPET (!) were
part of the kernel's fixmap, so any user process could read them.
The vvar page contains low-resolution timing information (with real
wall clock and frequency data), and the HPET can be used for high
precision timing. Even in Linux 3.16, there clean way to disable
access to these pages.

Weakness 2: On most configurations, most or all userspace processes
have unrestricted access to RDPMC, which is even better than RDTSC
for exploiting timing attacks.

I would like to fix both of these issues. I want to deny access to
RDPMC to processes that haven't asked for access via
perf_event_open. I also want to implement real TSC blocking, which
will require some vdso enhancements

The problem is that both of these fixes will be per-mm, not per
task. So PR_SET_TSC will be barely supportable.

Therefore, I'm proposing the radical solution of ripping out the old
ABI to make room for the new.

Enabling strict seccomp mode no longer disables the TSC.

PR_GET_TSC still works and returns PR_TSC_ENABLED. PR_SET_TSC now
return -EINVAL if you try to set PR_TSC_SIGSEGV.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
---
.../prctl/disable-tsc-ctxt-sw-stress-test.c | 96 ----------------------
.../prctl/disable-tsc-on-off-stress-test.c | 95 ---------------------
Documentation/prctl/disable-tsc-test.c | 94 ---------------------
arch/x86/include/asm/processor.h | 7 --
arch/x86/include/asm/thread_info.h | 4 +-
arch/x86/include/asm/tsc.h | 2 -
arch/x86/kernel/process.c | 67 ---------------
include/uapi/linux/prctl.h | 7 +-
kernel/seccomp.c | 3 -
kernel/sys.c | 12 +--
10 files changed, 14 insertions(+), 373 deletions(-)
delete mode 100644 Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c
delete mode 100644 Documentation/prctl/disable-tsc-on-off-stress-test.c
delete mode 100644 Documentation/prctl/disable-tsc-test.c

diff --git a/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c b/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c
deleted file mode 100644
index f8e8e95e81fd..000000000000
--- a/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
- *
- * Tests if the control register is updated correctly
- * at context switches
- *
- * Warning: this test will cause a very high load for a few seconds
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <signal.h>
-#include <inttypes.h>
-#include <wait.h>
-
-
-#include <sys/prctl.h>
-#include <linux/prctl.h>
-
-/* Get/set the process' ability to use the timestamp counter instruction */
-#ifndef PR_GET_TSC
-#define PR_GET_TSC 25
-#define PR_SET_TSC 26
-# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
-# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */
-#endif
-
-uint64_t rdtsc() {
-uint32_t lo, hi;
-/* We cannot use "=A", since this would use %rax on x86_64 */
-__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
-return (uint64_t)hi << 32 | lo;
-}
-
-void sigsegv_expect(int sig)
-{
- /* */
-}
-
-void segvtask(void)
-{
- if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0)
- {
- perror("prctl");
- exit(0);
- }
- signal(SIGSEGV, sigsegv_expect);
- alarm(10);
- rdtsc();
- fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n");
- exit(0);
-}
-
-
-void sigsegv_fail(int sig)
-{
- fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n");
- exit(0);
-}
-
-void rdtsctask(void)
-{
- if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0)
- {
- perror("prctl");
- exit(0);
- }
- signal(SIGSEGV, sigsegv_fail);
- alarm(10);
- for(;;) rdtsc();
-}
-
-
-int main(int argc, char **argv)
-{
- int n_tasks = 100, i;
-
- fprintf(stderr, "[No further output means we're allright]\n");
-
- for (i=0; i<n_tasks; i++)
- if (fork() == 0)
- {
- if (i & 1)
- segvtask();
- else
- rdtsctask();
- }
-
- for (i=0; i<n_tasks; i++)
- wait(NULL);
-
- exit(0);
-}
-
diff --git a/Documentation/prctl/disable-tsc-on-off-stress-test.c b/Documentation/prctl/disable-tsc-on-off-stress-test.c
deleted file mode 100644
index 1fcd91445375..000000000000
--- a/Documentation/prctl/disable-tsc-on-off-stress-test.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
- *
- * Tests if the control register is updated correctly
- * when set with prctl()
- *
- * Warning: this test will cause a very high load for a few seconds
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <signal.h>
-#include <inttypes.h>
-#include <wait.h>
-
-
-#include <sys/prctl.h>
-#include <linux/prctl.h>
-
-/* Get/set the process' ability to use the timestamp counter instruction */
-#ifndef PR_GET_TSC
-#define PR_GET_TSC 25
-#define PR_SET_TSC 26
-# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
-# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */
-#endif
-
-/* snippet from wikipedia :-) */
-
-uint64_t rdtsc() {
-uint32_t lo, hi;
-/* We cannot use "=A", since this would use %rax on x86_64 */
-__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
-return (uint64_t)hi << 32 | lo;
-}
-
-int should_segv = 0;
-
-void sigsegv_cb(int sig)
-{
- if (!should_segv)
- {
- fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n");
- exit(0);
- }
- if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0)
- {
- perror("prctl");
- exit(0);
- }
- should_segv = 0;
-
- rdtsc();
-}
-
-void task(void)
-{
- signal(SIGSEGV, sigsegv_cb);
- alarm(10);
- for(;;)
- {
- rdtsc();
- if (should_segv)
- {
- fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n");
- exit(0);
- }
- if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0)
- {
- perror("prctl");
- exit(0);
- }
- should_segv = 1;
- }
-}
-
-
-int main(int argc, char **argv)
-{
- int n_tasks = 100, i;
-
- fprintf(stderr, "[No further output means we're allright]\n");
-
- for (i=0; i<n_tasks; i++)
- if (fork() == 0)
- task();
-
- for (i=0; i<n_tasks; i++)
- wait(NULL);
-
- exit(0);
-}
-
diff --git a/Documentation/prctl/disable-tsc-test.c b/Documentation/prctl/disable-tsc-test.c
deleted file mode 100644
index 843c81eac235..000000000000
--- a/Documentation/prctl/disable-tsc-test.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
- *
- * Basic test to test behaviour of PR_GET_TSC and PR_SET_TSC
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <signal.h>
-#include <inttypes.h>
-
-
-#include <sys/prctl.h>
-#include <linux/prctl.h>
-
-/* Get/set the process' ability to use the timestamp counter instruction */
-#ifndef PR_GET_TSC
-#define PR_GET_TSC 25
-#define PR_SET_TSC 26
-# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
-# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */
-#endif
-
-const char *tsc_names[] =
-{
- [0] = "[not set]",
- [PR_TSC_ENABLE] = "PR_TSC_ENABLE",
- [PR_TSC_SIGSEGV] = "PR_TSC_SIGSEGV",
-};
-
-uint64_t rdtsc() {
-uint32_t lo, hi;
-/* We cannot use "=A", since this would use %rax on x86_64 */
-__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
-return (uint64_t)hi << 32 | lo;
-}
-
-void sigsegv_cb(int sig)
-{
- int tsc_val = 0;
-
- printf("[ SIG_SEGV ]\n");
- printf("prctl(PR_GET_TSC, &tsc_val); ");
- fflush(stdout);
-
- if ( prctl(PR_GET_TSC, &tsc_val) == -1)
- perror("prctl");
-
- printf("tsc_val == %s\n", tsc_names[tsc_val]);
- printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n");
- fflush(stdout);
- if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1)
- perror("prctl");
-
- printf("rdtsc() == ");
-}
-
-int main(int argc, char **argv)
-{
- int tsc_val = 0;
-
- signal(SIGSEGV, sigsegv_cb);
-
- printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
- printf("prctl(PR_GET_TSC, &tsc_val); ");
- fflush(stdout);
-
- if ( prctl(PR_GET_TSC, &tsc_val) == -1)
- perror("prctl");
-
- printf("tsc_val == %s\n", tsc_names[tsc_val]);
- printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
- printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n");
- fflush(stdout);
-
- if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1)
- perror("prctl");
-
- printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
- printf("prctl(PR_SET_TSC, PR_TSC_SIGSEGV)\n");
- fflush(stdout);
-
- if ( prctl(PR_SET_TSC, PR_TSC_SIGSEGV) == -1)
- perror("prctl");
-
- printf("rdtsc() == ");
- fflush(stdout);
- printf("%llu\n", (unsigned long long)rdtsc());
- fflush(stdout);
-
- exit(EXIT_SUCCESS);
-}
-
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index eb71ec794732..d9ed8489bc04 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -946,13 +946,6 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,

#define KSTK_EIP(task) (task_pt_regs(task)->ip)

-/* Get/set a process' ability to use the timestamp counter instruction */
-#define GET_TSC_CTL(adr) get_tsc_mode((adr))
-#define SET_TSC_CTL(val) set_tsc_mode((val))
-
-extern int get_tsc_mode(unsigned long adr);
-extern int set_tsc_mode(unsigned int val);
-
extern u16 amd_get_nb_id(int cpu);

static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 854053889d4d..ac9ed8b13aa8 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -78,7 +78,6 @@ struct thread_info {
#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
-#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* IA32 compatibility process */
#define TIF_FORK 18 /* ret_from_fork */
#define TIF_NOHZ 19 /* in adaptive nohz mode */
@@ -103,7 +102,6 @@ struct thread_info {
#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_UPROBE (1 << TIF_UPROBE)
-#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
#define _TIF_FORK (1 << TIF_FORK)
#define _TIF_NOHZ (1 << TIF_NOHZ)
@@ -145,7 +143,7 @@ struct thread_info {

/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
- (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
+ (_TIF_IO_BITMAP|_TIF_BLOCKSTEP)

#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 94605c0e9cee..dc8fefa8b672 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -17,8 +17,6 @@ typedef unsigned long long cycles_t;
extern unsigned int cpu_khz;
extern unsigned int tsc_khz;

-extern void disable_TSC(void);
-
static inline cycles_t get_cycles(void)
{
unsigned long long ret = 0;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index f804dc935d2a..73e6a57e24d9 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -137,64 +137,6 @@ void flush_thread(void)
free_thread_xstate(tsk);
}

-static void hard_disable_TSC(void)
-{
- write_cr4(read_cr4() | X86_CR4_TSD);
-}
-
-void disable_TSC(void)
-{
- preempt_disable();
- if (!test_and_set_thread_flag(TIF_NOTSC))
- /*
- * Must flip the CPU state synchronously with
- * TIF_NOTSC in the current running context.
- */
- hard_disable_TSC();
- preempt_enable();
-}
-
-static void hard_enable_TSC(void)
-{
- write_cr4(read_cr4() & ~X86_CR4_TSD);
-}
-
-static void enable_TSC(void)
-{
- preempt_disable();
- if (test_and_clear_thread_flag(TIF_NOTSC))
- /*
- * Must flip the CPU state synchronously with
- * TIF_NOTSC in the current running context.
- */
- hard_enable_TSC();
- preempt_enable();
-}
-
-int get_tsc_mode(unsigned long adr)
-{
- unsigned int val;
-
- if (test_thread_flag(TIF_NOTSC))
- val = PR_TSC_SIGSEGV;
- else
- val = PR_TSC_ENABLE;
-
- return put_user(val, (unsigned int __user *)adr);
-}
-
-int set_tsc_mode(unsigned int val)
-{
- if (val == PR_TSC_SIGSEGV)
- disable_TSC();
- else if (val == PR_TSC_ENABLE)
- enable_TSC();
- else
- return -EINVAL;
-
- return 0;
-}
-
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss)
{
@@ -214,15 +156,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
update_debugctlmsr(debugctl);
}

- if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
- test_tsk_thread_flag(next_p, TIF_NOTSC)) {
- /* prev and next are different */
- if (test_tsk_thread_flag(next_p, TIF_NOTSC))
- hard_disable_TSC();
- else
- hard_enable_TSC();
- }
-
if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
/*
* Copy the relevant range of the IO bitmap.
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 58afc04c107e..9a2fea65c965 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -68,7 +68,12 @@
#define PR_CAPBSET_READ 23
#define PR_CAPBSET_DROP 24

-/* Get/set the process' ability to use the timestamp counter instruction */
+/*
+ * Get/set the process' ability to use the timestamp counter instruction
+ * Deprecated: PR_GET_TSC always reports PR_TSC_ENABLE, and PR_SET_TSC can
+ * only be used to set PR_TSC_ENABLE. On non-x86 systems, neither of these
+ * prctls exists at all.
+ */
#define PR_GET_TSC 25
#define PR_SET_TSC 26
# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 44eb005c6695..bdc60dc68e56 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -678,9 +678,6 @@ static long seccomp_set_mode_strict(void)
if (!seccomp_may_assign_mode(seccomp_mode))
goto out;

-#ifdef TIF_NOTSC
- disable_TSC();
-#endif
seccomp_assign_mode(current, seccomp_mode);
ret = 0;

diff --git a/kernel/sys.c b/kernel/sys.c
index ce8129192a26..5dd1dedb7bf9 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -88,9 +88,6 @@
#ifndef GET_TSC_CTL
# define GET_TSC_CTL(a) (-EINVAL)
#endif
-#ifndef SET_TSC_CTL
-# define SET_TSC_CTL(a) (-EINVAL)
-#endif

/*
* this is where the system-wide overflow UID and GID are defined, for
@@ -1917,12 +1914,17 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_SET_SECCOMP:
error = prctl_set_seccomp(arg2, (char __user *)arg3);
break;
+#ifdef CONFIG_X86
case PR_GET_TSC:
- error = GET_TSC_CTL(arg2);
+ error = put_user(PR_TSC_ENABLE, (int __user *)arg2);
break;
case PR_SET_TSC:
- error = SET_TSC_CTL(arg2);
+ if (arg2 == PR_TSC_ENABLE)
+ error = 0;
+ else
+ error = -EINVAL;
break;
+#endif
case PR_TASK_PERF_EVENTS_DISABLE:
error = perf_event_task_disable();
break;
--
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/