Re: [PATCH v11 2/3] KVM: s390: guest support for topology function

From: Janis Schoetterl-Glausch
Date: Mon Jul 04 2022 - 05:08:13 EST


On 7/1/22 18:25, Pierre Morel wrote:
> We report a topology change to the guest for any CPU hotplug.
>
> The reporting to the guest is done using the Multiprocessor
> Topology-Change-Report (MTCR) bit of the utility entry in the guest's
> SCA which will be cleared during the interpretation of PTF.
>
> On every vCPU creation we set the MCTR bit to let the guest know the
> next time he uses the PTF with command 2 instruction that the> topology changed and that he should use the STSI(15.1.x) instruction
s/he/it (twice)
> to get the topology details.
>
> STSI(15.1.x) gives information on the CPU configuration topology.
> Let's accept the interception of STSI with the function code 15 and
> let the userland part of the hypervisor handle it when userland
> support the CPU Topology facility.And the user STSI capability.
Also: supportS.
>
> Signed-off-by: Pierre Morel <pmorel@xxxxxxxxxxxxx>
> Reviewed-by: Nico Boehr <nrb@xxxxxxxxxxxxx>
> ---
> arch/s390/include/asm/kvm_host.h | 18 +++++++++++++---
> arch/s390/kvm/kvm-s390.c | 36 ++++++++++++++++++++++++++++++++
> arch/s390/kvm/priv.c | 16 ++++++++++----
> arch/s390/kvm/vsie.c | 8 +++++++
> 4 files changed, 71 insertions(+), 7 deletions(-)
>
> diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
> index 766028d54a3e..ae6bd3d607de 100644
> --- a/arch/s390/include/asm/kvm_host.h
> +++ b/arch/s390/include/asm/kvm_host.h
> @@ -93,19 +93,30 @@ union ipte_control {
> };
> };
>
[...]

> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index 8fcb56141689..ee59b03f2e45 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -1691,6 +1691,31 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
> return ret;
> }
>
> +/**
> + * kvm_s390_update_topology_change_report - update CPU topology change report
> + * @kvm: guest KVM description
> + * @val: set or clear the MTCR bit
> + *
> + * Updates the Multiprocessor Topology-Change-Report bit to signal
> + * the guest with a topology change.
> + * This is only relevant if the topology facility is present.
> + *
> + * The SCA version, bsca or esca, doesn't matter as offset is the same.
> + */
> +static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
> +{
> + struct bsca_block *sca = kvm->arch.sca;
> + union sca_utility new, old;
> +
> + read_lock(&kvm->arch.sca_lock);

You forgot to put the assignment of sca under the lock.

> + do {
> + old = READ_ONCE(sca->utility);
> + new = old;
> + new.mtcr = val;
> + } while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
> + read_unlock(&kvm->arch.sca_lock);
> +}
> +
> static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
> {
> int ret;
> @@ -2877,6 +2902,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
> kvm_clear_async_pf_completion_queue(vcpu);
> if (!kvm_is_ucontrol(vcpu->kvm))
> sca_del_vcpu(vcpu);
> + kvm_s390_update_topology_change_report(vcpu->kvm, 1);
>
> if (kvm_is_ucontrol(vcpu->kvm))
> gmap_remove(vcpu->arch.gmap);
> @@ -3272,6 +3298,14 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
> vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
> if (test_kvm_facility(vcpu->kvm, 9))
> vcpu->arch.sie_block->ecb |= ECB_SRSI;
> + /*
> + * CPU Topology
> + * This facility only uses the utility field of the SCA and none
> + * of the cpu entries that are problematic with the other
> + * interpretation facilities so we can pass it through.
> + */

This is the comment for vsie.c
> + if (test_kvm_facility(vcpu->kvm, 11))
> + vcpu->arch.sie_block->ecb |= ECB_PTF;
> if (test_kvm_facility(vcpu->kvm, 73))
> vcpu->arch.sie_block->ecb |= ECB_TE;
> if (!kvm_is_ucontrol(vcpu->kvm))
> @@ -3403,6 +3437,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
> rc = kvm_s390_vcpu_setup(vcpu);
> if (rc)
> goto out_ucontrol_uninit;
> +
> + kvm_s390_update_topology_change_report(vcpu->kvm, 1);
> return 0;
>
> out_ucontrol_uninit:
> diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
> index 12c464c7cddf..046afee1be94 100644
> --- a/arch/s390/kvm/priv.c
> +++ b/arch/s390/kvm/priv.c
> @@ -873,10 +873,13 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
> if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
> return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
>
> - if (fc > 3) {
> - kvm_s390_set_psw_cc(vcpu, 3);
> - return 0;
> - }
> + /* Bailout forbidden function codes */
> + if (fc > 3 && (fc != 15 || kvm_s390_pv_cpu_is_protected(vcpu)))
> + goto out_no_data;
> +
> + /* fc 15 is provided with PTF/CPU topology support */
> + if (fc == 15 && !test_kvm_facility(vcpu->kvm, 11))
> + goto out_no_data;
>
> if (vcpu->run->s.regs.gprs[0] & 0x0fffff00
> || vcpu->run->s.regs.gprs[1] & 0xffff0000)
> @@ -910,6 +913,11 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
> goto out_no_data;
> handle_stsi_3_2_2(vcpu, (void *) mem);
> break;
> + case 15: /* fc 15 is fully handled in userspace */
> + if (vcpu->kvm->arch.user_stsi)
> + insert_stsi_usr_data(vcpu, operand2, ar, fc, sel1, sel2);
> + trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
> + return -EREMOTE;

This doesn't look right to me, you still return -EREMOTE if user_stsi is false.
The way I read the PoP here is that it is ok to set condition code 3 for the else case

> }
> if (kvm_s390_pv_cpu_is_protected(vcpu)) {
> memcpy((void *)sida_origin(vcpu->arch.sie_block), (void *)mem,
> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
> index dada78b92691..94138f8f0c1c 100644
> --- a/arch/s390/kvm/vsie.c
> +++ b/arch/s390/kvm/vsie.c
> @@ -503,6 +503,14 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
> /* Host-protection-interruption introduced with ESOP */
> if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
> scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
> + /*
> + * CPU Topology
> + * This facility only uses the utility field of the SCA and none of
> + * the cpu entries that are problematic with the other interpretation
> + * facilities so we can pass it through
> + */
> + if (test_kvm_facility(vcpu->kvm, 11))
> + scb_s->ecb |= scb_o->ecb & ECB_PTF;
> /* transactional execution */
> if (test_kvm_facility(vcpu->kvm, 73) && wants_tx) {
> /* remap the prefix is tx is toggled on */