[RFC PATCH 14/28] arm64: RME: Handle realm enter/exit

From: Steven Price
Date: Fri Jan 27 2023 - 06:42:16 EST


Entering a realm is done using a SMC call to the RMM. On exit the
exit-codes need to be handled slightly differently to the normal KVM
path so define our own functions for realm enter/exit and hook them
in if the guest is a realm guest.

Signed-off-by: Steven Price <steven.price@xxxxxxx>
---
arch/arm64/include/asm/kvm_rme.h | 11 ++
arch/arm64/kvm/Makefile | 2 +-
arch/arm64/kvm/arm.c | 19 +++-
arch/arm64/kvm/rme-exit.c | 168 +++++++++++++++++++++++++++++++
arch/arm64/kvm/rme.c | 11 ++
5 files changed, 205 insertions(+), 6 deletions(-)
create mode 100644 arch/arm64/kvm/rme-exit.c

diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h
index 3e75cedaad18..9d1583c44a99 100644
--- a/arch/arm64/include/asm/kvm_rme.h
+++ b/arch/arm64/include/asm/kvm_rme.h
@@ -47,6 +47,9 @@ void kvm_realm_destroy_rtts(struct realm *realm, u32 ia_bits, u32 start_level);
int kvm_create_rec(struct kvm_vcpu *vcpu);
void kvm_destroy_rec(struct kvm_vcpu *vcpu);

+int kvm_rec_enter(struct kvm_vcpu *vcpu);
+int handle_rme_exit(struct kvm_vcpu *vcpu, int rec_run_status);
+
int realm_set_ipa_state(struct kvm_vcpu *vcpu,
unsigned long addr, unsigned long end,
unsigned long ripas);
@@ -69,4 +72,12 @@ static inline unsigned long rme_rtt_level_mapsize(int level)
return (1UL << RME_RTT_LEVEL_SHIFT(level));
}

+static inline bool realm_is_addr_protected(struct realm *realm,
+ unsigned long addr)
+{
+ unsigned int ia_bits = realm->ia_bits;
+
+ return !(addr & ~(BIT(ia_bits - 1) - 1));
+}
+
#endif
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index d2f0400c50da..884c7c44439f 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -21,7 +21,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \
vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \
vgic/vgic-its.o vgic/vgic-debug.o \
- rme.o
+ rme.o rme-exit.o

kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o

diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 1b2547516f62..fd9e28f48903 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -985,7 +985,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
trace_kvm_entry(*vcpu_pc(vcpu));
guest_timing_enter_irqoff();

- ret = kvm_arm_vcpu_enter_exit(vcpu);
+ if (vcpu_is_rec(vcpu))
+ ret = kvm_rec_enter(vcpu);
+ else
+ ret = kvm_arm_vcpu_enter_exit(vcpu);

vcpu->mode = OUTSIDE_GUEST_MODE;
vcpu->stat.exits++;
@@ -1039,10 +1042,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)

local_irq_enable();

- trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
-
/* Exit types that need handling before we can be preempted */
- handle_exit_early(vcpu, ret);
+ if (!vcpu_is_rec(vcpu)) {
+ trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu),
+ *vcpu_pc(vcpu));
+
+ handle_exit_early(vcpu, ret);
+ }

preempt_enable();

@@ -1065,7 +1071,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
ret = ARM_EXCEPTION_IL;
}

- ret = handle_exit(vcpu, ret);
+ if (vcpu_is_rec(vcpu))
+ ret = handle_rme_exit(vcpu, ret);
+ else
+ ret = handle_exit(vcpu, ret);
}

/* Tell userspace about in-kernel device output levels */
diff --git a/arch/arm64/kvm/rme-exit.c b/arch/arm64/kvm/rme-exit.c
new file mode 100644
index 000000000000..15a4ff3517db
--- /dev/null
+++ b/arch/arm64/kvm/rme-exit.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 ARM Ltd.
+ */
+
+#include <linux/kvm_host.h>
+#include <kvm/arm_psci.h>
+
+#include <asm/rmi_smc.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_rme.h>
+#include <asm/kvm_mmu.h>
+
+typedef int (*exit_handler_fn)(struct kvm_vcpu *vcpu);
+
+static int rec_exit_reason_notimpl(struct kvm_vcpu *vcpu)
+{
+ struct rec *rec = &vcpu->arch.rec;
+
+ pr_err("[vcpu %d] Unhandled exit reason from realm (ESR: %#llx)\n",
+ vcpu->vcpu_id, rec->run->exit.esr);
+ return -ENXIO;
+}
+
+static int rec_exit_sync_dabt(struct kvm_vcpu *vcpu)
+{
+ struct rec *rec = &vcpu->arch.rec;
+
+ if (kvm_vcpu_dabt_iswrite(vcpu) && kvm_vcpu_dabt_isvalid(vcpu))
+ vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu),
+ rec->run->exit.gprs[0]);
+
+ return kvm_handle_guest_abort(vcpu);
+}
+
+static int rec_exit_sync_iabt(struct kvm_vcpu *vcpu)
+{
+ struct rec *rec = &vcpu->arch.rec;
+
+ pr_err("[vcpu %d] Unhandled instruction abort (ESR: %#llx).\n",
+ vcpu->vcpu_id, rec->run->exit.esr);
+ return -ENXIO;
+}
+
+static int rec_exit_sys_reg(struct kvm_vcpu *vcpu)
+{
+ struct rec *rec = &vcpu->arch.rec;
+ unsigned long esr = kvm_vcpu_get_esr(vcpu);
+ int rt = kvm_vcpu_sys_get_rt(vcpu);
+ bool is_write = !(esr & 1);
+ int ret;
+
+ if (is_write)
+ vcpu_set_reg(vcpu, rt, rec->run->exit.gprs[0]);
+
+ ret = kvm_handle_sys_reg(vcpu);
+
+ if (ret >= 0 && !is_write)
+ rec->run->entry.gprs[0] = vcpu_get_reg(vcpu, rt);
+
+ return ret;
+}
+
+static exit_handler_fn rec_exit_handlers[] = {
+ [0 ... ESR_ELx_EC_MAX] = rec_exit_reason_notimpl,
+ [ESR_ELx_EC_SYS64] = rec_exit_sys_reg,
+ [ESR_ELx_EC_DABT_LOW] = rec_exit_sync_dabt,
+ [ESR_ELx_EC_IABT_LOW] = rec_exit_sync_iabt
+};
+
+static int rec_exit_psci(struct kvm_vcpu *vcpu)
+{
+ struct rec *rec = &vcpu->arch.rec;
+ int i;
+
+ for (i = 0; i < REC_RUN_GPRS; i++)
+ vcpu_set_reg(vcpu, i, rec->run->exit.gprs[i]);
+
+ return kvm_psci_call(vcpu);
+}
+
+static int rec_exit_ripas_change(struct kvm_vcpu *vcpu)
+{
+ struct realm *realm = &vcpu->kvm->arch.realm;
+ struct rec *rec = &vcpu->arch.rec;
+ unsigned long base = rec->run->exit.ripas_base;
+ unsigned long size = rec->run->exit.ripas_size;
+ unsigned long ripas = rec->run->exit.ripas_value & 1;
+ int ret = -EINVAL;
+
+ if (realm_is_addr_protected(realm, base) &&
+ realm_is_addr_protected(realm, base + size))
+ ret = realm_set_ipa_state(vcpu, base, base + size, ripas);
+
+ WARN(ret, "Unable to satisfy SET_IPAS for %#lx - %#lx, ripas: %#lx\n",
+ base, base + size, ripas);
+
+ return 1;
+}
+
+static void update_arch_timer_irq_lines(struct kvm_vcpu *vcpu)
+{
+ struct rec *rec = &vcpu->arch.rec;
+
+ __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = rec->run->exit.cntv_ctl;
+ __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = rec->run->exit.cntv_cval;
+ __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = rec->run->exit.cntp_ctl;
+ __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = rec->run->exit.cntp_cval;
+
+ kvm_realm_timers_update(vcpu);
+}
+
+/*
+ * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
+ * proper exit to userspace.
+ */
+int handle_rme_exit(struct kvm_vcpu *vcpu, int rec_run_ret)
+{
+ struct rec *rec = &vcpu->arch.rec;
+ u8 esr_ec = ESR_ELx_EC(rec->run->exit.esr);
+ unsigned long status, index;
+
+ status = RMI_RETURN_STATUS(rec_run_ret);
+ index = RMI_RETURN_INDEX(rec_run_ret);
+
+ /*
+ * If a PSCI_SYSTEM_OFF request raced with a vcpu executing, we might
+ * see the following status code and index indicating an attempt to run
+ * a REC when the RD state is SYSTEM_OFF. In this case, we just need to
+ * return to user space which can deal with the system event or will try
+ * to run the KVM VCPU again, at which point we will no longer attempt
+ * to enter the Realm because we will have a sleep request pending on
+ * the VCPU as a result of KVM's PSCI handling.
+ */
+ if (status == RMI_ERROR_REALM && index == 1) {
+ vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
+ return 0;
+ }
+
+ if (rec_run_ret)
+ return -ENXIO;
+
+ vcpu->arch.fault.esr_el2 = rec->run->exit.esr;
+ vcpu->arch.fault.far_el2 = rec->run->exit.far;
+ vcpu->arch.fault.hpfar_el2 = rec->run->exit.hpfar;
+
+ update_arch_timer_irq_lines(vcpu);
+
+ /* Reset the emulation flags for the next run of the REC */
+ rec->run->entry.flags = 0;
+
+ switch (rec->run->exit.exit_reason) {
+ case RMI_EXIT_SYNC:
+ return rec_exit_handlers[esr_ec](vcpu);
+ case RMI_EXIT_IRQ:
+ case RMI_EXIT_FIQ:
+ return 1;
+ case RMI_EXIT_PSCI:
+ return rec_exit_psci(vcpu);
+ case RMI_EXIT_RIPAS_CHANGE:
+ return rec_exit_ripas_change(vcpu);
+ }
+
+ kvm_pr_unimpl("Unsupported exit reason: %u\n",
+ rec->run->exit.exit_reason);
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return 0;
+}
diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c
index b3ea79189839..16e0bfea98b1 100644
--- a/arch/arm64/kvm/rme.c
+++ b/arch/arm64/kvm/rme.c
@@ -802,6 +802,17 @@ void kvm_destroy_realm(struct kvm *kvm)
kvm_free_stage2_pgd(&kvm->arch.mmu);
}

+int kvm_rec_enter(struct kvm_vcpu *vcpu)
+{
+ struct rec *rec = &vcpu->arch.rec;
+
+ if (kvm_realm_state(vcpu->kvm) != REALM_STATE_ACTIVE)
+ return -EINVAL;
+
+ return rmi_rec_enter(virt_to_phys(rec->rec_page),
+ virt_to_phys(rec->run));
+}
+
static void free_rec_aux(struct page **aux_pages,
unsigned int num_aux)
{
--
2.34.1