Re: [RFC][PATCH] tracing/x86: Save CR2 before tracing irqsoff on error_entry

From: Peter Zijlstra
Date: Thu Mar 21 2019 - 06:45:33 EST


On Thu, Mar 21, 2019 at 10:02:41AM +0100, Peter Zijlstra wrote:
> On Thu, Mar 21, 2019 at 09:33:17AM +0100, Peter Zijlstra wrote:
>
> I'm thinking this problem wasn't new.
>
> > idtentry page_fault do_page_fault has_error_code=1
> > call error_entry
> > TRACE_IRQS_OFF
> > call trace_hardirqs_off*
> > <tracer stuff>
> > <fault> # modifies CR2
>
> CALL_enter_from_user_mode
> __context_tracking_exit()
> trace_user_exit(0)
> #PF
>
> > call do_page_fault
> > address = read_cr2(); /* whoopsie */
>
> And that also isn't fixed by your patch.
>
> I'm trying to make idtentry put cr2 in rdx, such that do_page_fault()
> takes address as a 3rd argument, but I'm still fighting that context
> tracking nonsense.
>

Something a little like so; completely untested and obviously needs
32bit changes too.

---
arch/x86/entry/entry_64.S | 28 ++++++++++++++++------------
arch/x86/mm/fault.c | 3 +--
2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1f0efdb7b629..776dbe7ba72e 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -879,7 +879,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
* @paranoid == 2 is special: the stub will never switch stacks. This is for
* #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
*/
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
+.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 read_cr2=0
ENTRY(\sym)
UNWIND_HINT_IRET_REGS offset=\has_error_code*8

@@ -901,18 +901,28 @@ ENTRY(\sym)

.if \paranoid
call paranoid_entry
+ /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
.else
call error_entry
+ /* returned flag: ebx=1: CALL_enter_from_user_mode, ebx=0: don't need it */
.endif
UNWIND_HINT_REGS
- /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */

- .if \paranoid
+ .if \read_cr2
+ mov %cr2, %rdx /* XXX paravirt crap */
+ .endif
+
.if \shift_ist != -1
TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */
.else
TRACE_IRQS_OFF
.endif
+
+ .if \paranoid == 0
+ testl %ebx, %ebx
+ jz .Lno_context_tracking_\@
+ CALL_enter_from_user_mode
+.Lno_context_tracking_\@:
.endif

movq %rsp, %rdi /* pt_regs pointer */
@@ -1140,7 +1150,7 @@ idtentry xenint3 do_int3 has_error_code=0
#endif

idtentry general_protection do_general_protection has_error_code=1
-idtentry page_fault do_page_fault has_error_code=1
+idtentry page_fault do_page_fault has_error_code=1 read_cr2=1

#ifdef CONFIG_KVM_GUEST
idtentry async_page_fault do_async_page_fault has_error_code=1
@@ -1243,17 +1253,11 @@ ENTRY(error_entry)
ENCODE_FRAME_POINTER
pushq %r12

- /*
- * We need to tell lockdep that IRQs are off. We can't do this until
- * we fix gsbase, and we should do it before enter_from_user_mode
- * (which can take locks).
- */
- TRACE_IRQS_OFF
- CALL_enter_from_user_mode
+ mov $1, %ebx /* CALL_enter_from_user_mode */
ret

.Lerror_entry_done:
- TRACE_IRQS_OFF
+ xorl %ebx, %ebx
ret

/*
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 667f1da36208..aac7a74869a2 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1560,9 +1560,8 @@ trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
* exception_{enter,exit}() contains all sorts of tracepoints.
*/
dotraplinkage void notrace
-do_page_fault(struct pt_regs *regs, unsigned long error_code)
+do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
{
- unsigned long address = read_cr2(); /* Get the faulting address */
enum ctx_state prev_state;

prev_state = exception_enter();