Re: [PATCH v5 3/6] x86/sev-es: Split up runtime #VC handler for correct state tracking

From: Peter Zijlstra
Date: Wed Jun 16 2021 - 12:23:07 EST


On Mon, Jun 14, 2021 at 03:53:24PM +0200, Joerg Roedel wrote:

> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -506,7 +506,7 @@ SYM_CODE_START(\asmsym)
>
> movq %rsp, %rdi /* pt_regs pointer */
>
> - call \cfunc
> + call kernel_\cfunc
>
> /*
> * No need to switch back to the IST stack. The current stack is either
> @@ -517,7 +517,7 @@ SYM_CODE_START(\asmsym)
>
> /* Switch to the regular task stack */
> .Lfrom_usermode_switch_stack_\@:
> - idtentry_body safe_stack_\cfunc, has_error_code=1
> + idtentry_body user_\cfunc, has_error_code=1
>
> _ASM_NOKPROBE(\asmsym)
> SYM_CODE_END(\asmsym)

Consistency with idtentry_mce_db would seem to suggest using \cfunc and
noist_\cfunc.

amluto, tglx: do we have strong feelings on consistency?


> +static bool noinstr vc_check_and_handle_db(struct pt_regs *regs, unsigned long error_code)
> +{
> + if (likely(error_code != SVM_EXIT_EXCP_BASE + X86_TRAP_DB))
> + return false;
>
> + vc_handle_trap_db(regs);

It's a bit sad this does user_mode(regs) again.

> +
> + return true;
> +}

Maybe something like:

static __always_inline bool vc_is_db(unsigned long error_code)
{
return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB;
}

> +
> +/*
> + * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode
> + * and will panic when an error happens.
> + */
> +DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication)
> +{
> + irqentry_state_t irq_state;
>
> + /*
> + * With the current implementation it is always possible to switch to a
> + * safe stack because #VC exceptions only happen at known places, like
> + * intercepted instructions or accesses to MMIO areas/IO ports. They can
> + * also happen with code instrumentation when the hypervisor intercepts
> + * #DB, but the critical paths are forbidden to be instrumented, so #DB
> + * exceptions currently also only happen in safe places.
> + *
> + * But keep this here in case the noinstr annotations are violated due
> + * to bug elsewhere.
> + */
> + if (unlikely(on_vc_fallback_stack(regs))) {
> + instrumentation_begin();
> + panic("Can't handle #VC exception from unsupported context\n");
> + instrumentation_end();
> + }
> +
> + /*
> + * Handle #DB before calling into !noinstr code to avoid recursive #DB.
> + */
> + if (vc_check_and_handle_db(regs, error_code))
> + return;

if (vc_is_db(error_core)) {
exc_debug(regs);
return;
}

> +
> + irq_state = irqentry_nmi_enter(regs);
> +
> + instrumentation_begin();
> +
> + if (!vc_raw_handle_exception(regs, error_code)) {
> /* Show some debug info */
> show_regs(regs);
>
> @@ -1443,23 +1448,38 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
> panic("Returned from Terminate-Request to Hypervisor\n");
> }
>
> + instrumentation_end();
> + irqentry_nmi_exit(regs, irq_state);
> }
>
> +/*
> + * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode
> + * and will kill the current task with SIGBUS when an error happens.
> + */
> +DEFINE_IDTENTRY_VC_USER(exc_vmm_communication)
> {
> + irqentry_state_t irq_state;
> +
> + /*
> + * Handle #DB before calling into !noinstr code to avoid recursive #DB.
> + */
> + if (vc_check_and_handle_db(regs, error_code))
> + return;

if (vs_is_db(error_code)) {
noist_exc_debug(regs);
return;
}

> +
> + irq_state = irqentry_enter(regs);
> instrumentation_begin();
>
> + if (!vc_raw_handle_exception(regs, error_code)) {
> + /*
> + * Do not kill the machine if user-space triggered the
> + * exception. Send SIGBUS instead and let user-space deal with
> + * it.
> + */
> + force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
> + }
> +
> + instrumentation_end();
> + irqentry_exit(regs, irq_state);
> }

Other than that, this seems *much* nicer. Thanks!