Re: [PATCH] x86, paravirt, xen: Remove the 64-bit irq_enable_sysexit pvop

From: Andy Lutomirski
Date: Mon Apr 06 2015 - 11:29:31 EST


On Mon, Apr 6, 2015 at 7:10 AM, Konrad Rzeszutek Wilk
<konrad.wilk@xxxxxxxxxx> wrote:
> On Fri, Apr 03, 2015 at 03:52:30PM -0700, Andy Lutomirski wrote:
>> [cc: Boris and Konrad. Whoops]
>>
>> On Fri, Apr 3, 2015 at 3:51 PM, Andy Lutomirski <luto@xxxxxxxxxx> wrote:
>> > We don't use irq_enable_sysexit on 64-bit kernels any more. Remove
>
> Is there an commit (or name of patch) that explains why 32-bit-user-space-on-64-bit
> kernels is unsavory?

sysexit never tasted very good :-p

We're (hopefully) not breaking 32-bit-user-space-on-64-bit, but we're
trying an unconventional approach to making the code faster and less
scary. As a result, 64-bit kernels won't use sysexit any more.
Hopefully Xen is okay with the slightly sneaky thing we're doing.
AFAICT Xen thinks of sysretl and sysexit as slightly funny irets, so I
don't expect there to be any problem.

See:

https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/commit/?h=x86/asm&id=4214a16b02971c60960afd675d03544e109e0d75

and

https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/commit/?h=x86/asm&id=47091e3c5b072daca29a15d2a3caf40359b0d140

--Andy

>
> Thank you!
>> > all the paravirt and Xen machinery to support it on 64-bit kernels.
>> >
>> > Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
>> > ---
>> >
>> > I haven't actually tested this on Xen, but it builds for me.
>> >
>> > arch/x86/ia32/ia32entry.S | 6 ------
>> > arch/x86/include/asm/paravirt_types.h | 7 ++++---
>> > arch/x86/kernel/asm-offsets.c | 2 ++
>> > arch/x86/kernel/paravirt.c | 4 +++-
>> > arch/x86/kernel/paravirt_patch_64.c | 1 -
>> > arch/x86/xen/enlighten.c | 3 ++-
>> > arch/x86/xen/xen-asm_64.S | 16 ----------------
>> > arch/x86/xen/xen-ops.h | 2 ++
>> > 8 files changed, 13 insertions(+), 28 deletions(-)
>> >
>> > diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
>> > index 5d8f987a340d..eb1eb7b70f4b 100644
>> > --- a/arch/x86/ia32/ia32entry.S
>> > +++ b/arch/x86/ia32/ia32entry.S
>> > @@ -77,12 +77,6 @@ ENTRY(native_usergs_sysret32)
>> > swapgs
>> > sysretl
>> > ENDPROC(native_usergs_sysret32)
>> > -
>> > -ENTRY(native_irq_enable_sysexit)
>> > - swapgs
>> > - sti
>> > - sysexit
>> > -ENDPROC(native_irq_enable_sysexit)
>> > #endif
>> >
>> > /*
>> > diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
>> > index 7549b8b369e4..38a0ff9ef06e 100644
>> > --- a/arch/x86/include/asm/paravirt_types.h
>> > +++ b/arch/x86/include/asm/paravirt_types.h
>> > @@ -160,13 +160,14 @@ struct pv_cpu_ops {
>> > u64 (*read_pmc)(int counter);
>> > unsigned long long (*read_tscp)(unsigned int *aux);
>> >
>> > +#ifdef CONFIG_X86_32
>> > /*
>> > * Atomically enable interrupts and return to userspace. This
>> > - * is only ever used to return to 32-bit processes; in a
>> > - * 64-bit kernel, it's used for 32-on-64 compat processes, but
>> > - * never native 64-bit processes. (Jump, not call.)
>> > + * is only used in 32-bit kernels. 64-bit kernels use
>> > + * usergs_sysret32 instead.
>> > */
>> > void (*irq_enable_sysexit)(void);
>> > +#endif
>> >
>> > /*
>> > * Switch to usermode gs and return to 64-bit usermode using
>> > diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
>> > index 9f6b9341950f..2d27ebf0aed8 100644
>> > --- a/arch/x86/kernel/asm-offsets.c
>> > +++ b/arch/x86/kernel/asm-offsets.c
>> > @@ -49,7 +49,9 @@ void common(void) {
>> > OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
>> > OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
>> > OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
>> > +#ifdef CONFIG_X86_32
>> > OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
>> > +#endif
>> > OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
>> > OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
>> > #endif
>> > diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
>> > index 548d25f00c90..7563114d9c3a 100644
>> > --- a/arch/x86/kernel/paravirt.c
>> > +++ b/arch/x86/kernel/paravirt.c
>> > @@ -154,7 +154,9 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
>> > ret = paravirt_patch_ident_64(insnbuf, len);
>> >
>> > else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
>> > +#ifdef CONFIG_X86_32
>> > type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
>> > +#endif
>> > type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
>> > type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
>> > /* If operation requires a jmp, then jmp */
>> > @@ -371,7 +373,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
>> >
>> > .load_sp0 = native_load_sp0,
>> >
>> > -#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
>> > +#if defined(CONFIG_X86_32)
>> > .irq_enable_sysexit = native_irq_enable_sysexit,
>> > #endif
>> > #ifdef CONFIG_X86_64
>> > diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
>> > index a1da6737ba5b..0de21c62c348 100644
>> > --- a/arch/x86/kernel/paravirt_patch_64.c
>> > +++ b/arch/x86/kernel/paravirt_patch_64.c
>> > @@ -49,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
>> > PATCH_SITE(pv_irq_ops, save_fl);
>> > PATCH_SITE(pv_irq_ops, irq_enable);
>> > PATCH_SITE(pv_irq_ops, irq_disable);
>> > - PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
>> > PATCH_SITE(pv_cpu_ops, usergs_sysret32);
>> > PATCH_SITE(pv_cpu_ops, usergs_sysret64);
>> > PATCH_SITE(pv_cpu_ops, swapgs);
>> > diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
>> > index 81665c9f2132..3797b6b31f95 100644
>> > --- a/arch/x86/xen/enlighten.c
>> > +++ b/arch/x86/xen/enlighten.c
>> > @@ -1267,10 +1267,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
>> > .read_tscp = native_read_tscp,
>> >
>> > .iret = xen_iret,
>> > - .irq_enable_sysexit = xen_sysexit,
>> > #ifdef CONFIG_X86_64
>> > .usergs_sysret32 = xen_sysret32,
>> > .usergs_sysret64 = xen_sysret64,
>> > +#else
>> > + .irq_enable_sysexit = xen_sysexit,
>> > #endif
>> >
>> > .load_tr_desc = paravirt_nop,
>> > diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
>> > index 985fc3ee0973..a2cabb8bd6bf 100644
>> > --- a/arch/x86/xen/xen-asm_64.S
>> > +++ b/arch/x86/xen/xen-asm_64.S
>> > @@ -47,22 +47,6 @@ ENTRY(xen_iret)
>> > ENDPATCH(xen_iret)
>> > RELOC(xen_iret, 1b+1)
>> >
>> > -/*
>> > - * sysexit is not used for 64-bit processes, so it's only ever used to
>> > - * return to 32-bit compat userspace.
>> > - */
>> > -ENTRY(xen_sysexit)
>> > - pushq $__USER32_DS
>> > - pushq %rcx
>> > - pushq $X86_EFLAGS_IF
>> > - pushq $__USER32_CS
>> > - pushq %rdx
>> > -
>> > - pushq $0
>> > -1: jmp hypercall_iret
>> > -ENDPATCH(xen_sysexit)
>> > -RELOC(xen_sysexit, 1b+1)
>> > -
>> > ENTRY(xen_sysret64)
>> > /*
>> > * We're already on the usermode stack at this point, but
>> > diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
>> > index 9e195c683549..c20fe29e65f4 100644
>> > --- a/arch/x86/xen/xen-ops.h
>> > +++ b/arch/x86/xen/xen-ops.h
>> > @@ -134,7 +134,9 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long);
>> >
>> > /* These are not functions, and cannot be called normally */
>> > __visible void xen_iret(void);
>> > +#ifdef CONFIG_X86_32
>> > __visible void xen_sysexit(void);
>> > +#endif
>> > __visible void xen_sysret32(void);
>> > __visible void xen_sysret64(void);
>> > __visible void xen_adjust_exception_frame(void);
>> > --
>> > 2.3.0
>> >
>>
>>
>>
>> --
>> Andy Lutomirski
>> AMA Capital Management, LLC



--
Andy Lutomirski
AMA Capital Management, LLC
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/