Re: [PATCH v2 08/10] x86-64: Emulate vsyscalls

From: Andrew Lutomirski
Date: Mon May 30 2011 - 06:43:37 EST


On Mon, May 30, 2011 at 3:35 AM, Borislav Petkov <bp@xxxxxxxxx> wrote:
> On Sun, May 29, 2011 at 11:48:45PM -0400, Andy Lutomirski wrote:
>> There's a fair amount of code in the vsyscall page, and who knows
>> what will happen if an exploit jumps into the middle of it.  Reduce
>> the risk by replacing most of it with short magic incantations that
>> are useless if entered in the middle.  This change can be disabled
>> by CONFIG_UNSAFE_VSYSCALLS (default y).
>>
>> This causes vsyscalls to be a little more expensive than real
>> syscalls.  Fortunately sensible programs don't use them.
>>
>> Some care is taken to make sure that tools like valgrind and
>> ThreadSpotter still work.
>>
>> This patch is not perfect: the vread_tsc and vread_hpet functions
>> are still at a fixed address.  Fixing that might involve making
>> alternative patching work in the vDSO.
>>
>> Signed-off-by: Andy Lutomirski <luto@xxxxxxx>
>> ---
>>  arch/x86/Kconfig                  |   17 +++++
>>  arch/x86/kernel/Makefile          |    3 +
>>  arch/x86/kernel/vsyscall_64.c     |  121 +++++++++++++++++++++++++++++++++++--
>>  arch/x86/kernel/vsyscall_emu_64.S |   40 ++++++++++++
>>  4 files changed, 176 insertions(+), 5 deletions(-)
>>  create mode 100644 arch/x86/kernel/vsyscall_emu_64.S
>>
>> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
>> index cc6c53a..186018b 100644
>> --- a/arch/x86/Kconfig
>> +++ b/arch/x86/Kconfig
>> @@ -1650,6 +1650,23 @@ config COMPAT_VDSO
>>
>>         If unsure, say Y.
>>
>> +config UNSAFE_VSYSCALLS
>> +     def_bool y
>> +     prompt "Unsafe fast legacy vsyscalls"
>> +     depends on X86_64
>> +     ---help---
>> +       Legacy user code expects to be able to issue three syscalls
>> +       by calling fixed addresses in kernel space.  If you say N,
>> +       then the kernel traps and emulates these calls.  If you say
>> +       Y, then there is actual executable code at a fixed address
>> +       to implement these calls efficiently.
>> +
>> +       On a system with recent enough glibc (probably 2.14 or
>> +       newer) and no static binaries, you can say N without a
>> +       performance penalty to improve security
>> +
>> +       If unsure, say Y.
>> +
>>  config CMDLINE_BOOL
>>       bool "Built-in kernel command line"
>>       ---help---
>> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
>> index a24521b..b901781 100644
>> --- a/arch/x86/kernel/Makefile
>> +++ b/arch/x86/kernel/Makefile
>> @@ -42,6 +42,9 @@ obj-$(CONFIG_X86_32)        += probe_roms_32.o
>>  obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
>>  obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
>>  obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o vread_tsc_64.o
>> +ifndef CONFIG_UNSAFE_VSYSCALLS
>> +     obj-$(CONFIG_X86_64)    += vsyscall_emu_64.o
>> +endif
>>  obj-y                        += bootflag.o e820.o
>>  obj-y                        += pci-dma.o quirks.o topology.o kdebugfs.o
>>  obj-y                        += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
>> diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
>> index 71fa506..5b3d62a 100644
>> --- a/arch/x86/kernel/vsyscall_64.c
>> +++ b/arch/x86/kernel/vsyscall_64.c
>> @@ -48,9 +48,6 @@
>>  #include <asm/vgtod.h>
>>  #include <asm/traps.h>
>>
>> -#define __vsyscall(nr) \
>> -             __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
>> -
>>  DEFINE_VVAR(int, vgetcpu_mode);
>>  DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
>>  {
>> @@ -96,6 +93,7 @@ static void warn_bad_vsyscall(struct pt_regs *regs, bool is_warning,
>>               return;
>>
>>       tsk = current;
>> +
>>       printk("%s%s[%d] %s ip:%lx sp:%lx ax:%lx si:%lx di:%lx",
>>              is_warning ? KERN_WARNING : KERN_INFO,
>>              tsk->comm, task_pid_nr(tsk),
>> @@ -106,6 +104,12 @@ static void warn_bad_vsyscall(struct pt_regs *regs, bool is_warning,
>>       printk("\n");
>>  }
>>
>> +#ifdef CONFIG_UNSAFE_VSYSCALLS
>> +
>> +#define __vsyscall(nr)                                                       \
>> +     __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
>> +
>> +
>>  /* RED-PEN may want to readd seq locking, but then the variable should be
>>   * write-once.
>>   */
>> @@ -117,8 +121,11 @@ static __always_inline void do_get_tz(struct timezone * tz)
>>  static __always_inline int fallback_gettimeofday(struct timeval *tv)
>>  {
>>       int ret;
>> -     /* Invoke do_emulate_vsyscall. */
>> -     asm volatile("movb $0xce, %%al;\n\t"
>> +     /*
>> +      * Invoke do_emulate_vsyscall.  Intentionally incompatible with
>> +      * the CONFIG_UNSAFE_VSYSCALLS=n case.
>> +      */
>> +     asm volatile("mov $0xce, %%al;\n\t"
>>                    "int %[vec]"
>>                    : "=a" (ret)
>>                    : "D" (tv), [vec] "i" (VSYSCALL_EMU_VECTOR));
>> @@ -237,6 +244,8 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
>>       long ret;
>>
>>       /* Kernel code must never get here. */
>> +     if (!user_mode(regs))
>> +             early_printk("oh crap!\n");
>>       BUG_ON(!user_mode(regs));
>>
>>       local_irq_enable();
>> @@ -278,6 +287,106 @@ out:
>>       local_irq_disable();
>>  }
>>
>> +#else /* CONFIG_UNSAFE_VSYSCALLS=n below */
>> +
>> +static inline unsigned long vsyscall_intcc_addr(int vsyscall_nr)
>> +{
>> +     return VSYSCALL_START + 1024*vsyscall_nr + 2;
>> +}
>> +
>> +void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
>> +{
>> +     u8 vsyscall_nr, al;
>> +     long ret;
>> +
>> +     /* Kernel code must never get here. */
>> +     BUG_ON(!user_mode(regs));
>> +
>> +     local_irq_enable();
>> +
>> +     /*
>> +      * Decode the vsyscall number.
>> +      * 0xcc -> 0, 0xce -> 1, 0xf0 -> 2; see vsyscall_emu_64.S for why.
>> +      */
>> +     al = regs->ax & 0xff;
>> +     vsyscall_nr = (al - 0xcc) >> 1;
>
> Ok, but
>
>        (0xf0 - 0xcc) >> 1 == 0x12
>
> Don't you mean 0xd0 here? Although 0xd0 is opcode for all those
> rotate/shift insns. What am I missing?

Wow, -ESHOULDUSECALCULATOR I knew I was bad at arithmetic, but 0xcc +
4 == 0xf0 is pretty amazing. I was so excited that I found three
"safe" opcodes in an arithmetic sequence.

I'll fix that. I suspect the only reason I didn't crash anything is
because I didn't give vgetcpu enough exercise here.

--Andy
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/