Re: [PATCH 03/10] arm64: KVM: Save/restore the host SPE state when entering/leaving a VM

From: Marc Zyngier
Date: Wed Feb 01 2017 - 11:29:59 EST


On 27/01/17 18:07, Will Deacon wrote:
> The SPE buffer is virtually addressed, using the page tables of the CPU
> MMU. Unusually, this means that the EL0/1 page table may be live whilst
> we're executing at EL2 on non-VHE configurations. When VHE is in use,
> we can use the same property to profile the guest behind its back.
>
> This patch adds the relevant disabling and flushing code to KVM so that
> the host can make use of SPE without corrupting guest memory, and any
> attempts by a guest to use SPE will result in a trap.
>
> Cc: Marc Zyngier <marc.zyngier@xxxxxxx>
> Cc: Alex BennÃe <alex.bennee@xxxxxxxxxx>
> Cc: Christoffer Dall <christoffer.dall@xxxxxxxxxx>
> Signed-off-by: Will Deacon <will.deacon@xxxxxxx>
> ---
> arch/arm64/include/asm/kvm_arm.h | 3 ++
> arch/arm64/include/asm/kvm_host.h | 7 ++++-
> arch/arm64/kvm/debug.c | 6 ++++
> arch/arm64/kvm/hyp/debug-sr.c | 66 +++++++++++++++++++++++++++++++++++++--
> arch/arm64/kvm/hyp/switch.c | 17 +++++++++-
> 5 files changed, 95 insertions(+), 4 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> index 2a2752b5b6aa..6e99978e83bd 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -188,6 +188,9 @@
> #define CPTR_EL2_DEFAULT 0x000033ff
>
> /* Hyp Debug Configuration Register bits */
> +#define MDCR_EL2_TPMS (1 << 14)
> +#define MDCR_EL2_E2PB_MASK (UL(0x3))
> +#define MDCR_EL2_E2PB_SHIFT (UL(12))
> #define MDCR_EL2_TDRA (1 << 11)
> #define MDCR_EL2_TDOSA (1 << 10)
> #define MDCR_EL2_TDA (1 << 9)
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index e5050388e062..443b387021f2 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -229,7 +229,12 @@ struct kvm_vcpu_arch {
>
> /* Pointer to host CPU context */
> kvm_cpu_context_t *host_cpu_context;
> - struct kvm_guest_debug_arch host_debug_state;
> + struct {
> + /* {Break,watch}point registers */
> + struct kvm_guest_debug_arch regs;
> + /* Statistical profiling extension */
> + u64 pmscr_el1;
> + } host_debug_state;
>
> /* VGIC state */
> struct vgic_cpu vgic_cpu;
> diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
> index 47e5f0feaee8..dbadfaf850a7 100644
> --- a/arch/arm64/kvm/debug.c
> +++ b/arch/arm64/kvm/debug.c
> @@ -95,6 +95,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
> * - Performance monitors (MDCR_EL2_TPM/MDCR_EL2_TPMCR)
> * - Debug ROM Address (MDCR_EL2_TDRA)
> * - OS related registers (MDCR_EL2_TDOSA)
> + * - Statistical profiler (MDCR_EL2_TPMS/MDCR_EL2_E2PB)
> *
> * Additionally, KVM only traps guest accesses to the debug registers if
> * the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY
> @@ -110,8 +111,13 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
>
> trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
>
> + /*
> + * This also clears MDCR_EL2_E2PB_MASK to disable guest access
> + * to the profiling buffer.
> + */
> vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK;
> vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM |
> + MDCR_EL2_TPMS |

I'm still worried about setting bits that are RES0 on the base version
of the architecture. I guess that we'll have to monitor this and change
it if we find an implementation that doesn't ignore the bit.

> MDCR_EL2_TPMCR |
> MDCR_EL2_TDRA |
> MDCR_EL2_TDOSA);
> diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
> index 4ba5c9095d03..f5154ed3da6c 100644
> --- a/arch/arm64/kvm/hyp/debug-sr.c
> +++ b/arch/arm64/kvm/hyp/debug-sr.c
> @@ -65,6 +65,66 @@
> default: write_debug(ptr[0], reg, 0); \
> }
>
> +#define PMSCR_EL1 sys_reg(3, 0, 9, 9, 0)
> +
> +#define PMBLIMITR_EL1 sys_reg(3, 0, 9, 10, 0)
> +#define PMBLIMITR_EL1_E BIT(0)
> +
> +#define PMBIDR_EL1 sys_reg(3, 0, 9, 10, 7)
> +#define PMBIDR_EL1_P BIT(4)
> +
> +#define psb_csync() asm volatile("hint #17")
> +
> +static void __hyp_text __debug_save_spe_vhe(u64 *pmscr_el1)
> +{
> + /* The vcpu can run. but it can't hide. */
> +}
> +
> +static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
> +{
> + u64 reg;
> +
> + /* SPE present on this CPU? */
> + if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
> + ID_AA64DFR0_PMSVER_SHIFT))
> + return;
> +
> + /* Yes; is it owned by EL3? */
> + reg = read_sysreg_s(PMBIDR_EL1);
> + if (reg & PMBIDR_EL1_P)
> + return;
> +
> + /* No; is the host actually using the thing? */
> + reg = read_sysreg_s(PMBLIMITR_EL1);
> + if (!(reg & PMBLIMITR_EL1_E))
> + return;
> +
> + /* Yes; save the control register and disable data generation */
> + *pmscr_el1 = read_sysreg_s(PMSCR_EL1);
> + write_sysreg_s(0, PMSCR_EL1);
> + isb();
> +
> + /* Now drain all buffered data to memory */
> + psb_csync();
> + dsb(nsh);
> +}
> +
> +static hyp_alternate_select(__debug_save_spe,
> + __debug_save_spe_nvhe, __debug_save_spe_vhe,
> + ARM64_HAS_VIRT_HOST_EXTN);
> +
> +static void __hyp_text __debug_restore_spe(u64 pmscr_el1)
> +{
> + if (!pmscr_el1)
> + return;
> +
> + /* The host page table is installed, but not yet synchronised */
> + isb();
> +
> + /* Re-enable data generation */
> + write_sysreg_s(pmscr_el1, PMSCR_EL1);
> +}
> +
> void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
> struct kvm_guest_debug_arch *dbg,
> struct kvm_cpu_context *ctxt)
> @@ -118,13 +178,15 @@ void __hyp_text __debug_cond_save_host_state(struct kvm_vcpu *vcpu)
> (vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_MDE))
> vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
>
> - __debug_save_state(vcpu, &vcpu->arch.host_debug_state,
> + __debug_save_state(vcpu, &vcpu->arch.host_debug_state.regs,
> kern_hyp_va(vcpu->arch.host_cpu_context));
> + __debug_save_spe()(&vcpu->arch.host_debug_state.pmscr_el1);
> }
>
> void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu)
> {
> - __debug_restore_state(vcpu, &vcpu->arch.host_debug_state,
> + __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
> + __debug_restore_state(vcpu, &vcpu->arch.host_debug_state.regs,
> kern_hyp_va(vcpu->arch.host_cpu_context));
>
> if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index 75e83dd40d43..aede1658aeda 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -103,7 +103,13 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
> static void __hyp_text __deactivate_traps_vhe(void)
> {
> extern char vectors[]; /* kernel exception vectors */
> + u64 mdcr_el2 = read_sysreg(mdcr_el2);
>
> + mdcr_el2 &= MDCR_EL2_HPMN_MASK |
> + MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
> + MDCR_EL2_TPMS;
> +
> + write_sysreg(mdcr_el2, mdcr_el2);
> write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
> write_sysreg(CPACR_EL1_FPEN, cpacr_el1);
> write_sysreg(vectors, vbar_el1);
> @@ -111,6 +117,12 @@ static void __hyp_text __deactivate_traps_vhe(void)
>
> static void __hyp_text __deactivate_traps_nvhe(void)
> {
> + u64 mdcr_el2 = read_sysreg(mdcr_el2);
> +
> + mdcr_el2 &= MDCR_EL2_HPMN_MASK;
> + mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
> +
> + write_sysreg(mdcr_el2, mdcr_el2);
> write_sysreg(HCR_RW, hcr_el2);
> write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
> }
> @@ -132,7 +144,6 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
>
> __deactivate_traps_arch()();
> write_sysreg(0, hstr_el2);
> - write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2);
> write_sysreg(0, pmuserenr_el0);
> }
>
> @@ -357,6 +368,10 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
> }
>
> __debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
> + /*
> + * This must come after restoring the host sysregs, since a non-VHE
> + * system may enable SPE here and make use of the TTBRs.
> + */
> __debug_cond_restore_host_state(vcpu);
>
> return exit_code;
>

Acked-by: Marc Zyngier <marc.zyngier@xxxxxxx>

M.
--
Jazz is not dead. It just smells funny...