Re: [PATCH v3 26/34] KVM: selftests: Hyper-V PV TLB flush selftest

From: Vitaly Kuznetsov
Date: Tue May 24 2022 - 10:51:50 EST


Maxim Levitsky <mlevitsk@xxxxxxxxxx> writes:

> On Thu, 2022-04-14 at 15:20 +0200, Vitaly Kuznetsov wrote:
>> Introduce a selftest for Hyper-V PV TLB flush hypercalls
>> (HvFlushVirtualAddressSpace/HvFlushVirtualAddressSpaceEx,
>> HvFlushVirtualAddressList/HvFlushVirtualAddressListEx).
>>
>> The test creates one 'sender' vCPU and two 'worker' vCPU which do busy
>> loop reading from a certain GVA checking the observed value. Sender
>> vCPU drops to the host to swap the data page with another page filled
>> with a different value. The expectation for workers is also
>> altered. Without TLB flush on worker vCPUs, they may continue to
>> observe old value. To guard against accidental TLB flushes for worker
>> vCPUs the test is repeated 100 times.
>>
>> Hyper-V TLB flush hypercalls are tested in both 'normal' and 'XMM
>> fast' modes.
>>
>> Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>
>> ---
>> tools/testing/selftests/kvm/.gitignore | 1 +
>> tools/testing/selftests/kvm/Makefile | 1 +
>> .../selftests/kvm/include/x86_64/hyperv.h | 1 +
>> .../selftests/kvm/x86_64/hyperv_tlb_flush.c | 647 ++++++++++++++++++
>> 4 files changed, 650 insertions(+)
>> create mode 100644 tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
>>
>> diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
>> index 5d5fbb161d56..1a1d09e414d5 100644
>> --- a/tools/testing/selftests/kvm/.gitignore
>> +++ b/tools/testing/selftests/kvm/.gitignore
>> @@ -25,6 +25,7 @@
>> /x86_64/hyperv_features
>> /x86_64/hyperv_ipi
>> /x86_64/hyperv_svm_test
>> +/x86_64/hyperv_tlb_flush
>> /x86_64/mmio_warning_test
>> /x86_64/mmu_role_test
>> /x86_64/platform_info_test
>> diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
>> index 44889f897fe7..8b83abc09a1a 100644
>> --- a/tools/testing/selftests/kvm/Makefile
>> +++ b/tools/testing/selftests/kvm/Makefile
>> @@ -54,6 +54,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
>> TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
>> TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi
>> TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test
>> +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_tlb_flush
>> TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test
>> TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
>> TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
>> diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
>> index f51d6fab8e93..1e34dd7c5075 100644
>> --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h
>> +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
>> @@ -185,6 +185,7 @@
>> /* hypercall options */
>> #define HV_HYPERCALL_FAST_BIT BIT(16)
>> #define HV_HYPERCALL_VARHEAD_OFFSET 17
>> +#define HV_HYPERCALL_REP_COMP_OFFSET 32
>>
>> #define HYPERV_LINUX_OS_ID ((u64)0x8100 << 48)
>>
>> diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
>> new file mode 100644
>> index 000000000000..00bcae45ddd2
>> --- /dev/null
>> +++ b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
>> @@ -0,0 +1,647 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Hyper-V HvFlushVirtualAddress{List,Space}{,Ex} tests
>> + *
>> + * Copyright (C) 2022, Red Hat, Inc.
>> + *
>> + */
>> +
>> +#define _GNU_SOURCE /* for program_invocation_short_name */
>> +#include <pthread.h>
>> +#include <inttypes.h>
>> +
>> +#include "kvm_util.h"
>> +#include "hyperv.h"
>> +#include "processor.h"
>> +#include "test_util.h"
>> +#include "vmx.h"
>> +
>> +#define SENDER_VCPU_ID 1
>> +#define WORKER_VCPU_ID_1 2
>> +#define WORKER_VCPU_ID_2 65
>> +
>> +#define NTRY 100
>> +
>> +struct thread_params {
>> + struct kvm_vm *vm;
>> + uint32_t vcpu_id;
>> +};
>> +
>> +struct hv_vpset {
>> + u64 format;
>> + u64 valid_bank_mask;
>> + u64 bank_contents[];
>> +};
>> +
>> +enum HV_GENERIC_SET_FORMAT {
>> + HV_GENERIC_SET_SPARSE_4K,
>> + HV_GENERIC_SET_ALL,
>> +};
>> +
>> +#define HV_FLUSH_ALL_PROCESSORS BIT(0)
>> +#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
>> +#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
>> +#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
>> +
>> +/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
>> +struct hv_tlb_flush {
>> + u64 address_space;
>> + u64 flags;
>> + u64 processor_mask;
>> + u64 gva_list[];
>> +} __packed;
>> +
>> +/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
>> +struct hv_tlb_flush_ex {
>> + u64 address_space;
>> + u64 flags;
>> + struct hv_vpset hv_vp_set;
>> + u64 gva_list[];
>> +} __packed;
>> +
>> +static inline void hv_init(vm_vaddr_t pgs_gpa)
>> +{
>> + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
>> + wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
>> +}
>> +
>> +static void worker_code(void *test_pages, vm_vaddr_t pgs_gpa)
>> +{
>> + u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
>> + unsigned char chr;
>> +
>> + x2apic_enable();
>> + hv_init(pgs_gpa);
>> +
>> + for (;;) {
>> + chr = READ_ONCE(*(unsigned char *)(test_pages + 4096 * 2 + vcpu_id));
> It would be nice to wrap this into a function, like set_expected_char does for ease
> of code understanding.
>
>> + if (chr)
>> + GUEST_ASSERT(*(unsigned char *)test_pages == chr);
>> + asm volatile("nop");
>> + }
>> +}
>> +
>> +static inline u64 hypercall(u64 control, vm_vaddr_t arg1, vm_vaddr_t arg2)
>> +{
>> + u64 hv_status;
>> +
>> + asm volatile("mov %3, %%r8\n"
>> + "vmcall"
>> + : "=a" (hv_status),
>> + "+c" (control), "+d" (arg1)
>> + : "r" (arg2)
>> + : "cc", "memory", "r8", "r9", "r10", "r11");
>> +
>> + return hv_status;
>> +}
>> +
>> +static inline void nop_loop(void)
>> +{
>> + int i;
>> +
>> + for (i = 0; i < 10000000; i++)
>> + asm volatile("nop");
>> +}
>> +
>> +static inline void sync_to_xmm(void *data)
>> +{
>> + int i;
>> +
>> + for (i = 0; i < 8; i++)
>> + write_sse_reg(i, (sse128_t *)(data + sizeof(sse128_t) * i));
>> +}
>
> Nitpick: I see duplicated code, I complain ;-) - maybe put the above to some common file?
>

Gone now.

>> +
>> +static void set_expected_char(void *addr, unsigned char chr, int vcpu_id)
>> +{
>> + asm volatile("mfence");
>
> I remember that Paolo once told me (I might not remember that correctly though),
> that on x86 the actual hardware barriers like mfence are not really
> needed, because hardware already does memory accesses in order,
> unless fancy (e.g non WB) memory types are used.

Even if it can be dropped we still need a compile barrier so I prefer to
keep explicit 'mfence'/'lfence'/... -- especially in tests where
performance doesn't matter much.

>
>> + *(unsigned char *)(addr + 2 * 4096 + vcpu_id) = chr;
>> +}
>> +
>> +static void sender_guest_code(void *hcall_page, void *test_pages, vm_vaddr_t pgs_gpa)
>> +{
>> + struct hv_tlb_flush *flush = (struct hv_tlb_flush *)hcall_page;
>> + struct hv_tlb_flush_ex *flush_ex = (struct hv_tlb_flush_ex *)hcall_page;
>> + int stage = 1, i;
>> + u64 res;
>> +
>> + hv_init(pgs_gpa);
>> +
>> + /* "Slow" hypercalls */
>
> I hopefully understand it correctly, see my comments below,
> but it might be worthy to add something similar to my comments
> to the code to make it easier for someone reading the code to understand it.
>
>> +
>> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */
>> + for (i = 0; i < NTRY; i++) {
>> + memset(hcall_page, 0, 4096);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2);
>
> Here we set expected char to 0, meaning that now workers will not assert
> if there is mismatch.
>
>> + GUEST_SYNC(stage++);
> Now there is a mismatch, the host swapped pages for us.
>
>> + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
>> + flush->processor_mask = BIT(WORKER_VCPU_ID_1);
>> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, pgs_gpa, pgs_gpa + 4096);
>> + GUEST_ASSERT((res & 0xffff) == 0);
>
> Now we flushed the TLB, the guest should see correct value.
>
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1);
>
> Now we force the workers to check it.
>
> Btw, an idea: it might be nice to use more that two test pages,
> like say 100 test pages each filled with different value,
> memory is cheap, and this way there will be no way for something
> to cause 'double error' which could hide the bug by a chance.
>
>
> Another thing, it might be nice to wrap this into a macro/function
> to avoid *that* much duplication.

In the next version I still keep two pages and two workers for
simpliciy, but I wrap all these pre- and post- guts into wrapper
functions.

>
>
>> + nop_loop();
>> + }
>> +
>> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */
>> + for (i = 0; i < NTRY; i++) {
>> + memset(hcall_page, 0, 4096);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2);
>> + GUEST_SYNC(stage++);
>> + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
>> + flush->processor_mask = BIT(WORKER_VCPU_ID_1);
>> + flush->gva_list[0] = (u64)test_pages;
>> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
>> + (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
>> + pgs_gpa, pgs_gpa + 4096);
>> + GUEST_ASSERT((res & 0xffff) == 0);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1);
>> + nop_loop();
>> + }
>> +
>> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */
>> + for (i = 0; i < NTRY; i++) {
>> + memset(hcall_page, 0, 4096);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2);
>> + GUEST_SYNC(stage++);
>> + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS;
>> + flush->processor_mask = 0;
>> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, pgs_gpa, pgs_gpa + 4096);
>> + GUEST_ASSERT((res & 0xffff) == 0);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2);
>> + nop_loop();
>> + }
>> +
>> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */
>> + for (i = 0; i < NTRY; i++) {
>> + memset(hcall_page, 0, 4096);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2);
>> + GUEST_SYNC(stage++);
>> + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS;
>> + flush->gva_list[0] = (u64)test_pages;
>> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
>> + (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
>> + pgs_gpa, pgs_gpa + 4096);
>> + GUEST_ASSERT((res & 0xffff) == 0);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2);
>> + nop_loop();
>> + }
>> +
>> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */
>> + for (i = 0; i < NTRY; i++) {
>> + memset(hcall_page, 0, 4096);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2);
>> + GUEST_SYNC(stage++);
>> + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
>> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
>> + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
>> + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
>> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
>> + (1 << HV_HYPERCALL_VARHEAD_OFFSET),
>> + pgs_gpa, pgs_gpa + 4096);
>> + GUEST_ASSERT((res & 0xffff) == 0);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2);
>> + nop_loop();
>> + }
>> +
>> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */
>> + for (i = 0; i < NTRY; i++) {
>> + memset(hcall_page, 0, 4096);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2);
>> + GUEST_SYNC(stage++);
>> + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
>> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
>> + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
>> + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
>> + /* bank_contents and gva_list occupy the same space, thus [1] */
>> + flush_ex->gva_list[1] = (u64)test_pages;
>> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
>> + (1 << HV_HYPERCALL_VARHEAD_OFFSET) |
>> + (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
>> + pgs_gpa, pgs_gpa + 4096);
>> + GUEST_ASSERT((res & 0xffff) == 0);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2);
>> + nop_loop();
>> + }
>> +
>> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */
>> + for (i = 0; i < NTRY; i++) {
>> + memset(hcall_page, 0, 4096);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2);
>> + GUEST_SYNC(stage++);
>> + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
>> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
>> + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) |
>> + BIT_ULL(WORKER_VCPU_ID_1 / 64);
>> + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
>> + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
>> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
>> + (2 << HV_HYPERCALL_VARHEAD_OFFSET),
>> + pgs_gpa, pgs_gpa + 4096);
>> + GUEST_ASSERT((res & 0xffff) == 0);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2);
>> + nop_loop();
>> + }
>> +
>> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */
>> + for (i = 0; i < NTRY; i++) {
>> + memset(hcall_page, 0, 4096);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2);
>> + GUEST_SYNC(stage++);
>> + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
>> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
>> + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) |
>> + BIT_ULL(WORKER_VCPU_ID_2 / 64);
>> + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
>> + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
>> + /* bank_contents and gva_list occupy the same space, thus [2] */
>> + flush_ex->gva_list[2] = (u64)test_pages;
>> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
>> + (2 << HV_HYPERCALL_VARHEAD_OFFSET) |
>> + (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
>> + pgs_gpa, pgs_gpa + 4096);
>> + GUEST_ASSERT((res & 0xffff) == 0);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2);
>> + nop_loop();
>> + }
>> +
>> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */
>> + for (i = 0; i < NTRY; i++) {
>> + memset(hcall_page, 0, 4096);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2);
>> + GUEST_SYNC(stage++);
>> + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
>> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
>> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
>> + pgs_gpa, pgs_gpa + 4096);
>> + GUEST_ASSERT((res & 0xffff) == 0);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2);
>> + nop_loop();
>> + }
>> +
>> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */
>> + for (i = 0; i < NTRY; i++) {
>> + memset(hcall_page, 0, 4096);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2);
>> + GUEST_SYNC(stage++);
>> + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
>> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
>> + flush_ex->gva_list[0] = (u64)test_pages;
>> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
>> + (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
>> + pgs_gpa, pgs_gpa + 4096);
>> + GUEST_ASSERT((res & 0xffff) == 0);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2);
>> + nop_loop();
>> + }
>> +
>> + /* "Fast" hypercalls */
>> +
>> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */
>> + for (i = 0; i < NTRY; i++) {
>> + memset(hcall_page, 0, 4096);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2);
>> + GUEST_SYNC(stage++);
>> + flush->processor_mask = BIT(WORKER_VCPU_ID_1);
>> + sync_to_xmm(&flush->processor_mask);
>> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
>> + HV_HYPERCALL_FAST_BIT, 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
>> + GUEST_ASSERT((res & 0xffff) == 0);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1);
>> + nop_loop();
>> + }
>> +
>> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */
>> + for (i = 0; i < NTRY; i++) {
>> + memset(hcall_page, 0, 4096);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2);
>> + GUEST_SYNC(stage++);
>> + flush->processor_mask = BIT(WORKER_VCPU_ID_1);
>> + flush->gva_list[0] = (u64)test_pages;
>> + sync_to_xmm(&flush->processor_mask);
>> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | HV_HYPERCALL_FAST_BIT |
>> + (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
>> + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
>> + GUEST_ASSERT((res & 0xffff) == 0);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1);
>> + nop_loop();
>> + }
>> +
>> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */
>> + for (i = 0; i < NTRY; i++) {
>> + memset(hcall_page, 0, 4096);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2);
>> + GUEST_SYNC(stage++);
>> + sync_to_xmm(&flush->processor_mask);
>> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
>> + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS);
>> + GUEST_ASSERT((res & 0xffff) == 0);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2);
>> + nop_loop();
>> + }
>> +
>> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */
>> + for (i = 0; i < NTRY; i++) {
>> + memset(hcall_page, 0, 4096);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2);
>> + GUEST_SYNC(stage++);
>> + flush->gva_list[0] = (u64)test_pages;
>> + sync_to_xmm(&flush->processor_mask);
>> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | HV_HYPERCALL_FAST_BIT |
>> + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), 0x0,
>> + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS);
>> + GUEST_ASSERT((res & 0xffff) == 0);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2);
>> + nop_loop();
>> + }
>> +
>> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */
>> + for (i = 0; i < NTRY; i++) {
>> + memset(hcall_page, 0, 4096);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2);
>> + GUEST_SYNC(stage++);
>> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
>> + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
>> + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
>> + sync_to_xmm(&flush_ex->hv_vp_set);
>> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | HV_HYPERCALL_FAST_BIT |
>> + (1 << HV_HYPERCALL_VARHEAD_OFFSET),
>> + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
>> + GUEST_ASSERT((res & 0xffff) == 0);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2);
>> + nop_loop();
>> + }
>> +
>> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */
>> + for (i = 0; i < NTRY; i++) {
>> + memset(hcall_page, 0, 4096);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2);
>> + GUEST_SYNC(stage++);
>> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
>> + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
>> + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
>> + /* bank_contents and gva_list occupy the same space, thus [1] */
>> + flush_ex->gva_list[1] = (u64)test_pages;
>> + sync_to_xmm(&flush_ex->hv_vp_set);
>> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | HV_HYPERCALL_FAST_BIT |
>> + (1 << HV_HYPERCALL_VARHEAD_OFFSET) |
>> + (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
>> + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
>> + GUEST_ASSERT((res & 0xffff) == 0);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2);
>> + nop_loop();
>> + }
>> +
>> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */
>> + for (i = 0; i < NTRY; i++) {
>> + memset(hcall_page, 0, 4096);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2);
>> + GUEST_SYNC(stage++);
>> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
>> + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) |
>> + BIT_ULL(WORKER_VCPU_ID_1 / 64);
>> + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
>> + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
>> + sync_to_xmm(&flush_ex->hv_vp_set);
>> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | HV_HYPERCALL_FAST_BIT |
>> + (2 << HV_HYPERCALL_VARHEAD_OFFSET),
>> + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
>> + GUEST_ASSERT((res & 0xffff) == 0);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2);
>> + nop_loop();
>> + }
>> +
>> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */
>> + for (i = 0; i < NTRY; i++) {
>> + memset(hcall_page, 0, 4096);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2);
>> + GUEST_SYNC(stage++);
>> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
>> + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) |
>> + BIT_ULL(WORKER_VCPU_ID_2 / 64);
>> + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
>> + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
>> + /* bank_contents and gva_list occupy the same space, thus [2] */
>> + flush_ex->gva_list[2] = (u64)test_pages;
>> + sync_to_xmm(&flush_ex->hv_vp_set);
>> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | HV_HYPERCALL_FAST_BIT |
>> + (2 << HV_HYPERCALL_VARHEAD_OFFSET) |
>> + (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
>> + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
>> + GUEST_ASSERT((res & 0xffff) == 0);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2);
>> + nop_loop();
>> + }
>> +
>> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */
>> + for (i = 0; i < NTRY; i++) {
>> + memset(hcall_page, 0, 4096);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2);
>> + GUEST_SYNC(stage++);
>> + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
>> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
>> + sync_to_xmm(&flush_ex->hv_vp_set);
>> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | HV_HYPERCALL_FAST_BIT,
>> + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
>> + GUEST_ASSERT((res & 0xffff) == 0);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2);
>> + nop_loop();
>> + }
>> +
>> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */
>> + for (i = 0; i < NTRY; i++) {
>> + memset(hcall_page, 0, 4096);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2);
>> + GUEST_SYNC(stage++);
>> + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
>> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
>> + flush_ex->gva_list[0] = (u64)test_pages;
>> + sync_to_xmm(&flush_ex->hv_vp_set);
>> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | HV_HYPERCALL_FAST_BIT |
>> + (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
>> + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
>> + GUEST_ASSERT((res & 0xffff) == 0);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1);
>> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2);
>> + nop_loop();
>> + }
>> +
>> + GUEST_DONE();
>> +}
>> +
>> +static void *vcpu_thread(void *arg)
>> +{
>> + struct thread_params *params = (struct thread_params *)arg;
>> + struct ucall uc;
>> + int old;
>> + int r;
>> + unsigned int exit_reason;
>> +
>> + r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
>> + TEST_ASSERT(r == 0,
>> + "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
>> + params->vcpu_id, r);
>> +
>> + vcpu_run(params->vm, params->vcpu_id);
>> + exit_reason = vcpu_state(params->vm, params->vcpu_id)->exit_reason;
>> +
>> + TEST_ASSERT(exit_reason == KVM_EXIT_IO,
>> + "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO",
>> + params->vcpu_id, exit_reason, exit_reason_str(exit_reason));
>> +
>> + if (get_ucall(params->vm, params->vcpu_id, &uc) == UCALL_ABORT) {
>> + TEST_ASSERT(false,
>> + "vCPU %u exited with error: %s.\n",
>> + params->vcpu_id, (const char *)uc.args[0]);
>> + }
>> +
>> + return NULL;
>> +}
>> +
>> +static void cancel_join_vcpu_thread(pthread_t thread, uint32_t vcpu_id)
>> +{
>> + void *retval;
>> + int r;
>> +
>> + r = pthread_cancel(thread);
>> + TEST_ASSERT(r == 0,
>> + "pthread_cancel on vcpu_id=%d failed with errno=%d",
>> + vcpu_id, r);
>> +
>> + r = pthread_join(thread, &retval);
>> + TEST_ASSERT(r == 0,
>> + "pthread_join on vcpu_id=%d failed with errno=%d",
>> + vcpu_id, r);
>> + TEST_ASSERT(retval == PTHREAD_CANCELED,
>> + "expected retval=%p, got %p", PTHREAD_CANCELED,
>> + retval);
>> +}
>> +
>> +int main(int argc, char *argv[])
>> +{
>> + int r;
>> + pthread_t threads[2];
>> + struct thread_params params[2];
>> + struct kvm_vm *vm;
>> + struct kvm_run *run;
>> + vm_vaddr_t hcall_page, test_pages;
>> + struct ucall uc;
>> + int stage = 1;
>> +
>> + vm = vm_create_default(SENDER_VCPU_ID, 0, sender_guest_code);
>> + params[0].vm = vm;
>> + params[1].vm = vm;
>> +
>> + /* Hypercall input/output */
>> + hcall_page = vm_vaddr_alloc_pages(vm, 2);
>> + memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
>> +
>> + /*
>> + * Test pages: the first one is filled with '0x1's, the second with '0x2's
>> + * and the test will swap their mappings. The third page keeps the indication
>> + * about the current state of mappings.
>> + */
>> + test_pages = vm_vaddr_alloc_pages(vm, 3);
>> + memset(addr_gva2hva(vm, test_pages), 0x1, 4096);
>> + memset(addr_gva2hva(vm, test_pages) + 4096, 0x2, 4096);
>> + set_expected_char(addr_gva2hva(vm, test_pages), 0x0, WORKER_VCPU_ID_1);
>> + set_expected_char(addr_gva2hva(vm, test_pages), 0x0, WORKER_VCPU_ID_2);
>> +
>> + vm_vcpu_add_default(vm, WORKER_VCPU_ID_1, worker_code);
>> + vcpu_args_set(vm, WORKER_VCPU_ID_1, 2, test_pages, addr_gva2gpa(vm, hcall_page));
>> + vcpu_set_msr(vm, WORKER_VCPU_ID_1, HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_1);
>> + vcpu_set_hv_cpuid(vm, WORKER_VCPU_ID_1);
>> +
>> + vm_vcpu_add_default(vm, WORKER_VCPU_ID_2, worker_code);
>> + vcpu_args_set(vm, WORKER_VCPU_ID_2, 2, test_pages, addr_gva2gpa(vm, hcall_page));
>> + vcpu_set_msr(vm, WORKER_VCPU_ID_2, HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_2);
>> + vcpu_set_hv_cpuid(vm, WORKER_VCPU_ID_2);
>> +
>> + vcpu_args_set(vm, SENDER_VCPU_ID, 3, hcall_page, test_pages,
>> + addr_gva2gpa(vm, hcall_page));
>
> It seems that all worker vCPUs get pointer to the hypercall page,
> which they don't need and if used will create a race.
>

Dropped (actually, I've created a new 'test_data' structure which is
shared by workers and sender).

>
>> + vcpu_set_hv_cpuid(vm, SENDER_VCPU_ID);
>> +
>> + params[0].vcpu_id = WORKER_VCPU_ID_1;
>> + r = pthread_create(&threads[0], NULL, vcpu_thread, &params[0]);
>> + TEST_ASSERT(r == 0,
>> + "pthread_create halter failed errno=%d", errno);
>> +
>> + params[1].vcpu_id = WORKER_VCPU_ID_2;
>> + r = pthread_create(&threads[1], NULL, vcpu_thread, &params[1]);
>> + TEST_ASSERT(r == 0,
>> + "pthread_create halter failed errno=%d", errno);
>
> Also here worker threads don't halt, the message was not updated I think.
>

Fixed!

>
>> +
>> + run = vcpu_state(vm, SENDER_VCPU_ID);
>> +
>> + while (true) {
>> + r = _vcpu_run(vm, SENDER_VCPU_ID);
>> + TEST_ASSERT(!r, "vcpu_run failed: %d\n", r);
>> + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
>> + "unexpected exit reason: %u (%s)",
>> + run->exit_reason, exit_reason_str(run->exit_reason));
>> +
>> + switch (get_ucall(vm, SENDER_VCPU_ID, &uc)) {
>> + case UCALL_SYNC:
>> + TEST_ASSERT(uc.args[1] == stage,
>> + "Unexpected stage: %ld (%d expected)\n",
>> + uc.args[1], stage);
>> + break;
>> + case UCALL_ABORT:
>> + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
>> + __FILE__, uc.args[1]);
>> + return 1;
>> + case UCALL_DONE:
>> + return 0;
>> + }
>> +
>> + /* Swap test pages */
>> + if (stage % 2) {
>> + __virt_pg_map(vm, test_pages, addr_gva2gpa(vm, test_pages) + 4096,
>> + X86_PAGE_SIZE_4K, true);
>> + __virt_pg_map(vm, test_pages + 4096, addr_gva2gpa(vm, test_pages) - 4096,
>> + X86_PAGE_SIZE_4K, true);
>> + } else {
>> + __virt_pg_map(vm, test_pages, addr_gva2gpa(vm, test_pages) - 4096,
>> + X86_PAGE_SIZE_4K, true);
>> + __virt_pg_map(vm, test_pages + 4096, addr_gva2gpa(vm, test_pages) + 4096,
>> + X86_PAGE_SIZE_4K, true);
>> + }
>
> Another question: why the host doing the swapping of the pages? Since
> !EPT/!NPT is not the goal of this test,
>
> no doubt, why not let the guest vCPU (the sender) do the swapping, which should eliminate the VM exits
> to the host (which can interfere with TLB flush even) and make it
> closer to the real world usage.

This is actually a good idea. It required some APIs to be exported and
some trickery so the guest can actually reach its PTEs but I think it's
worth it so the next version will be doing all updates from the guest
itself.

>
>
>> +
>> + stage++;
>> + }
>> +
>> + cancel_join_vcpu_thread(threads[0], WORKER_VCPU_ID_1);
>> + cancel_join_vcpu_thread(threads[1], WORKER_VCPU_ID_2);
>> + kvm_vm_free(vm);
>> +
>> + return 0;
>> +}
>
>

--
Vitaly