Re: [PATCH v4 0/6] Micro-optimize vclock_gettime

From: Andy Lutomirski
Date: Tue May 17 2011 - 14:31:44 EST


On 05/17/2011 07:36 AM, Ingo Molnar wrote:
>
> * Andrew Lutomirski<luto@xxxxxxx> wrote:
>
>>> Well, how does that differ from having the real syscall instruction there?
>>> How are we going to filter real (old-)glibc calls from exploits?
>>
>> Because there are only four vsyscalls: vgettimeofday, vtime, vgetcpu, and
>> venosys. None of them have side-effects, so they only allow an attacker to
>> write something to user memory somewhere. The implementation of
>> vgettimeofday needs a syscall instruction internally for its fallback, which
>> means that an attack could jump there instead of to the start of the vsyscall
>> implementation.
>
> So for this to work securely the emulation code would also have to filter the
> syscall numbers, to make sure that only these benign syscalls are used.
>
> It should perhaps also warn if it notices something weird going on.

It's even easier than that: there are no syscall numbers involved. There are four separate entry points, one for each vsyscall.

(It turns out that one of them has been broken and just segfaults since 2008 (a4928cff), so we only have to emulate three of them.)

On KVM on Sandy Bridge, I can emulate a vsyscall that does nothing in 400ns or so. I'll try to make this code emulate real vsyscalls over the weekend. This was much easier than I expected.

diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index d0983d2..52b4b49 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -39,6 +39,14 @@ extern struct timezone sys_tz;

extern void map_vsyscall(void);

+/* Emulation */
+static inline bool is_vsyscall_addr(unsigned long addr)
+{
+ return (addr & ~(3*VSYSCALL_SIZE)) == VSYSCALL_START + 4096; /* intentionally incorrect for testing */
+}
+
+void emulate_vsyscall(struct pt_regs *regs);
+
#endif /* __KERNEL__ */

#endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index dcbb28c..83590e8 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -32,6 +32,8 @@
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/notifier.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>

#include <asm/vsyscall.h>
#include <asm/pgtable.h>
@@ -233,6 +235,41 @@ static long __vsyscall(3) venosys_1(void)
return -ENOSYS;
}

+void emulate_vsyscall(struct pt_regs *regs)
+{
+ long ret = 0;
+ unsigned long called_from;
+
+ unsigned vsyscall_no = (regs->ip >> 10) & 3;
+ BUILD_BUG_ON(VSYSCALL_SIZE != (1<<10));
+
+ /* pop called_from */
+ ret = get_user(called_from, (unsigned long __user *)regs->sp);
+ if (ret)
+ goto fault;
+ regs->sp += 8;
+
+ switch(vsyscall_no) {
+ case 0: /* vgettimeofday */
+ case 1: /* vtime */
+ case 2: /* vgetcpu */
+ ret = -EINVAL;
+ goto out;
+
+ case 3: /* venosys */
+ ret = -ENOSYS;
+ goto out;
+ }
+
+out:
+ regs->ip = called_from;
+ regs->ax = ret;
+ return;
+
+fault:
+ force_sig(SIGKILL, current); /* XXX */
+}
+
#ifdef CONFIG_SYSCTL
static ctl_table kernel_table2[] = {
{ .procname = "vsyscall64",
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 20e3f87..c84df6f 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -16,6 +16,7 @@
#include <asm/traps.h> /* dotraplinkage, ... */
#include <asm/pgalloc.h> /* pgd_*(), ... */
#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
+#include <asm/vsyscall.h> /* vsyscall emulation */

/*
* Page fault error code bits:
@@ -719,6 +720,16 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
if (is_errata100(regs, address))
return;

+ /*
+ * Calling certain addresses has historical semantics that
+ * we need to emulate.
+ */
+ if (is_vsyscall_addr(regs->ip) && regs->ip == address &&
+ (error_code & (PF_WRITE | PF_INSTR)) == PF_INSTR) {
+ emulate_vsyscall(regs);
+ return;
+ }
+
if (unlikely(show_unhandled_signals))
show_signal_msg(regs, error_code, address, tsk);




I don't expect to have this ready for 2.6.40. What's the status of the RDTSC stuff -- do you want to pick it up for the 2.6.40 merge window?

--Andy
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/