Re: [PATCH RFC 0/2] x86/fpu: avoid "xstate_fault" in xsave_user/xrestore_user

From: Borislav Petkov
Date: Tue Mar 17 2015 - 07:21:58 EST


On Tue, Mar 17, 2015 at 11:00:46AM +0100, Quentin Casasnovas wrote:
> Fair point, but AFAIUI we can't do check_insn(XSAVES) alone as of today,
> and the "..." in your "check_isns(XSAVEOPT, ...)" code above would still
> need to contain the outputs operands.

I think we can do this (see diff the end of this mail).

It still explodes my guest with:

[ 2.940379] Freeing unused kernel memory: 2860K (ffffffff81a39000 - ffffffff81d04000)
[ 2.980722] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
[ 2.980722]
[ 2.984096] CPU: 1 PID: 1 Comm: init Not tainted 4.0.0-rc3+ #22
[ 2.984096] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014
[ 2.984096] ffff88007bcf0000 ffff88007bcfbc58 ffffffff81675eb9 0000000000000001
[ 2.984096] ffffffff818a9ae8 ffff88007bcfbcd8 ffffffff816745be ffff88007bcf0000
[ 2.984096] 0000000000000010 ffff88007bcfbce8 ffff88007bcfbc88 ffff88007bfcc0b0
[ 2.984096] Call Trace:
[ 2.984096] [<ffffffff81675eb9>] dump_stack+0x4f/0x7b
[ 2.984096] [<ffffffff816745be>] panic+0xc0/0x1dc
[ 2.984096] [<ffffffff81056143>] do_exit+0xc13/0xc50
[ 2.984096] [<ffffffff810574a4>] do_group_exit+0x54/0xe0
[ 2.984096] [<ffffffff81065526>] get_signal+0x266/0xab0
[ 2.984096] [<ffffffff81002523>] do_signal+0x33/0xba0
[ 2.984096] [<ffffffff8109c88e>] ? put_lock_stats.isra.19+0xe/0x30
[ 2.984096] [<ffffffff8167dd81>] ? _raw_spin_unlock_irqrestore+0x41/0x80
[ 2.984096] [<ffffffff8167dd8b>] ? _raw_spin_unlock_irqrestore+0x4b/0x80
[ 2.984096] [<ffffffff8167f9f1>] ? retint_signal+0x11/0x90
[ 2.984096] [<ffffffff810030f5>] do_notify_resume+0x65/0x80
[ 2.984096] [<ffffffff8167fa26>] retint_signal+0x46/0x90
[ 2.984096] Kernel Offset: disabled
[ 2.984096] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b

because, AFAICT and from debugging so far, we call xrstor_state() without a
previous xsave_state() in that path:

[ 3.304551] Freeing unused kernel memory: 2860K (ffffffff81a39000 - ffffffff81d04000)
[ 3.346556] traps: xrstor_state
[ 3.350418] CPU: 1 PID: 1 Comm: init Not tainted 4.0.0-rc3+ #21
[ 3.350418] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014
[ 3.350418] ffff88007b454000 ffff88007bcfbf18 ffffffff81676079 0000000000000000
[ 3.350418] ffff88007bcf0000 ffff88007bcfbf38 ffffffff810038b6 0000000000000000
[ 3.350418] 00007f6b95ba91c8 ffff88007bcfbf48 ffffffff81004433 00007ffeecd31bb0
[ 3.350418] Call Trace:
[ 3.350418] [<ffffffff81676079>] dump_stack+0x4f/0x7b
[ 3.350418] [<ffffffff810038b6>] math_state_restore+0xa6/0x220
[ 3.350418] [<ffffffff81004433>] do_device_not_available+0x23/0x30
[ 3.350418] [<ffffffff81680865>] device_not_available+0x15/0x20

so I need to sort that one out first.

But including the fault exception table in the macro is already an
improvement IMO.

Thanks.

---
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index c9a6d68b8d62..0d0cc053c7cc 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -67,6 +67,51 @@ extern int init_fpu(struct task_struct *child);
_ASM_EXTABLE(1b, 3b) \
: [err] "=r" (err)

+#define XSTATE_OP(op, st, lmask, hmask, err) \
+ asm volatile("1:" op "\n\t" \
+ "2:\n\t" \
+ ".pushsection .fixup,\"ax\"\n\t" \
+ "3: movl $-1,%[err]\n\t" \
+ "jmp 2b\n\t" \
+ ".popsection\n\t" \
+ _ASM_EXTABLE(1b, 3b) \
+ : [err] "=r" (err) \
+ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
+ : "memory")
+
+/*
+ * 661 and alt_end_marker labels below are defined in ALTERNATIVE*
+ * and we're reusing them here so as not to clutter this macro
+ * unnecessarily.
+ */
+#define XSTATE_XSAVE(st, lmask, hmask, err) \
+ asm volatile(ALTERNATIVE_2(XSAVE, \
+ XSAVEOPT, X86_FEATURE_XSAVEOPT, \
+ XSAVES, X86_FEATURE_XSAVES) \
+ "\n" \
+ ".pushsection .fixup,\"ax\"\n" \
+ "3: movl $-1, %[err]\n" \
+ "jmp " alt_end_marker "b\n" \
+ ".popsection\n" \
+ _ASM_EXTABLE(661b, 3b) \
+ : [err] "=r" (err) \
+ : "D" (st), "a" (lmask), "d" (hmask) \
+ : "memory")
+
+#define XSTATE_XRESTORE(st, lmask, hmask, err) \
+ asm volatile(ALTERNATIVE(XRSTOR, \
+ XRSTORS, X86_FEATURE_XSAVES) \
+ "\n" \
+ ".pushsection .fixup,\"ax\"\n" \
+ "3: movl $-1, %[err]\n" \
+ "jmp 663b\n" \
+ ".popsection\n" \
+ _ASM_EXTABLE(661b, 3b) \
+ : [err] "=r" (err) \
+ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
+ : "memory")
+
+
/*
* This function is called only during boot time when x86 caps are not set
* up and alternative can not be used yet.
@@ -77,20 +122,11 @@ static inline int xsave_state_booting(struct xsave_struct *fx, u64 mask)
u32 hmask = mask >> 32;
int err = 0;

- WARN_ON(system_state != SYSTEM_BOOTING);
-
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- asm volatile("1:"XSAVES"\n\t"
- "2:\n\t"
- xstate_fault
- : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
+ if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+ XSTATE_OP(XSAVES, fx, lmask, hmask, err);
else
- asm volatile("1:"XSAVE"\n\t"
- "2:\n\t"
- xstate_fault
- : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
+ XSTATE_OP(XSAVE, fx, lmask, hmask, err);
+
return err;
}

@@ -104,20 +140,12 @@ static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask)
u32 hmask = mask >> 32;
int err = 0;

- WARN_ON(system_state != SYSTEM_BOOTING);
+ WARN_ON(system_state != SYSTEM_BOOTING);

- if (boot_cpu_has(X86_FEATURE_XSAVES))
- asm volatile("1:"XRSTORS"\n\t"
- "2:\n\t"
- xstate_fault
- : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
+ if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+ XSTATE_OP(XRSTORS, fx, lmask, hmask, err);
else
- asm volatile("1:"XRSTOR"\n\t"
- "2:\n\t"
- xstate_fault
- : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
+ XSTATE_OP(XRSTOR, fx, lmask, hmask, err);
return err;
}

@@ -141,18 +169,7 @@ static inline int xsave_state(struct xsave_struct *fx, u64 mask)
*
* If none of xsaves and xsaveopt is enabled, use xsave.
*/
- alternative_input_2(
- "1:"XSAVE,
- XSAVEOPT,
- X86_FEATURE_XSAVEOPT,
- XSAVES,
- X86_FEATURE_XSAVES,
- [fx] "D" (fx), "a" (lmask), "d" (hmask) :
- "memory");
- asm volatile("2:\n\t"
- xstate_fault
- : "0" (0)
- : "memory");
+ XSTATE_XSAVE(fx, lmask, hmask, err);

return err;
}
@@ -170,17 +187,7 @@ static inline int xrstor_state(struct xsave_struct *fx, u64 mask)
* Use xrstors to restore context if it is enabled. xrstors supports
* compacted format of xsave area which is not supported by xrstor.
*/
- alternative_input(
- "1: " XRSTOR,
- XRSTORS,
- X86_FEATURE_XSAVES,
- "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
-
- asm volatile("2:\n"
- xstate_fault
- : "0" (0)
- : "memory");
+ XSTATE_XRESTORE(fx, lmask, hmask, err);

return err;
}

--
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.
--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/