Re: [V2 PATCH 2/6] KVM: Selftests: Add support for private memory

From: Sean Christopherson
Date: Tue Jan 17 2023 - 17:47:30 EST


On Mon, Dec 05, 2022, Vishal Annapurve wrote:
> Add support for registering private memory with kvm using
> KVM_SET_USER_MEMORY_REGION ioctl.
>
> Helper function to query extended userspace mem region is introduced to
> allow memory conversion.
>
> vm_mem_backing_src types is extended to contain additional guest memory
> source types to cover the cases where guest memory can be backed by both
> anonymous memory and restricted memfd.
>
> Signed-off-by: Vishal Annapurve <vannapurve@xxxxxxxxxx>
> ---
> .../selftests/kvm/include/kvm_util_base.h | 12 +++-
> .../testing/selftests/kvm/include/test_util.h | 4 ++
> tools/testing/selftests/kvm/lib/kvm_util.c | 58 +++++++++++++++++--
> tools/testing/selftests/kvm/lib/test_util.c | 11 ++++
> 4 files changed, 78 insertions(+), 7 deletions(-)
>
> diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
> index c7685c7038ff..4ad99f295f2a 100644
> --- a/tools/testing/selftests/kvm/include/kvm_util_base.h
> +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
> @@ -31,7 +31,10 @@ typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
> typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
>
> struct userspace_mem_region {
> - struct kvm_userspace_memory_region region;
> + union {
> + struct kvm_userspace_memory_region region;
> + struct kvm_userspace_memory_region_ext region_ext;

As discussed in the UPM series, we're trending towards adding an entirely new
struct+ioctl(), kvm_userspace_memory_region2, instead of extending the existing
struct. The == -> >= hack you had to add in kvm_do_ioctl() below is one of the
reason for that change.

> + };
> struct sparsebit *unused_phy_pages;
> int fd;
> off_t offset;
> @@ -196,7 +199,7 @@ static inline bool kvm_has_cap(long cap)
>
> #define kvm_do_ioctl(fd, cmd, arg) \
> ({ \
> - static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd), ""); \
> + static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) >= _IOC_SIZE(cmd), ""); \
> ioctl(fd, cmd, arg); \
> })
>
> @@ -384,6 +387,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
> void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
> void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
> void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
> +
> struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
> vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
> vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
> @@ -715,6 +719,10 @@ struct kvm_userspace_memory_region *
> kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
> uint64_t end);
>
> +struct kvm_userspace_memory_region_ext *
> +kvm_userspace_memory_region_ext_find(struct kvm_vm *vm, uint64_t start,
> + uint64_t end);
> +
> #define sync_global_to_guest(vm, g) ({ \
> typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
> memcpy(_p, &(g), sizeof(g)); \
> diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
> index 80d6416f3012..aea80071f2b8 100644
> --- a/tools/testing/selftests/kvm/include/test_util.h
> +++ b/tools/testing/selftests/kvm/include/test_util.h
> @@ -103,6 +103,8 @@ enum vm_mem_backing_src_type {
> VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB,
> VM_MEM_SRC_SHMEM,
> VM_MEM_SRC_SHARED_HUGETLB,
> + VM_MEM_SRC_ANONYMOUS_AND_RESTRICTED_MEMFD,
> + VM_MEM_SRC_ANON_HTLB2M_AND_RESTRICTED_MEMFD,

There's no need for a dedicated flag in the backing type, vm_userspace_mem_region_add()
already takes the memslot's flags and can simply key off KVM_MEM_PRIVATE.

> @@ -881,6 +915,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
> struct userspace_mem_region *region;
> size_t backing_src_pagesz = get_backing_src_pagesz(src_type);
> size_t alignment;
> + int restricted_memfd = -1;

No need to initialize to -1, KVM is supposed to ignore the restrictedmem fd if
!KVM_MEM_PRIVATE, and if KVM_MEM_PRIVATE is set, selftests must provide a valid fd.

> TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
> "Number of guest pages is not compatible with the host. "

This is what I ended up with after splitting out the conversion to
KVM_SET_USER_MEMORY_REGION2 to a separate patch.

--
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 7c1f81f93ba3..26c6830c1aa1 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -32,6 +32,11 @@ int open_path_or_exit(const char *path, int flags)
return fd;
}

+static int memfd_restricted(unsigned int flags)
+{
+ return syscall(__NR_memfd_restricted, flags);
+}
+
/*
* Open KVM_DEV_PATH if available, otherwise exit the entire program.
*
@@ -980,6 +985,15 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
}

region->backing_src_type = src_type;
+
+ if (flags & KVM_MEM_PRIVATE) {
+ region->region.restricted_fd = memfd_restricted(0);
+ region->region.restricted_offset = 0;
+
+ TEST_ASSERT(region->region.restricted_fd >= 0,
+ "Failed to create restricted memfd");
+ }
+
region->unused_phy_pages = sparsebit_alloc();
sparsebit_set_num(region->unused_phy_pages,
guest_paddr >> vm->page_shift, npages);
@@ -992,9 +1006,10 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
" rc: %i errno: %i\n"
" slot: %u flags: 0x%x\n"
- " guest_phys_addr: 0x%lx size: 0x%lx",
+ " guest_phys_addr: 0x%lx size: 0x%lx restricted fd: %d\n",
ret, errno, slot, flags,
- guest_paddr, (uint64_t) region->region.memory_size);
+ guest_paddr, (uint64_t) region->region.memory_size,
+ region->region.restricted_fd);

/* Add to quick lookup data structures */
vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region);