[PATCH v2] KVM: selftests: Run dirty_log_perf_test on specific cpus

From: Vipin Sharma
Date: Fri Aug 19 2022 - 17:07:49 EST


Add command line options to run the vcpus and the main process on the
specific cpus on a host machine. This is useful as it provides
options to analyze performance based on the vcpus and dirty log worker
locations, like on the different numa nodes or on the same numa nodes.

Link: https://lore.kernel.org/lkml/20220801151928.270380-1-vipinsh@xxxxxxxxxx
Signed-off-by: Vipin Sharma <vipinsh@xxxxxxxxxx>
Suggested-by: David Matlack <dmatlack@xxxxxxxxxx>
Suggested-by: Sean Christopherson <seanjc@xxxxxxxxxx>
Suggested-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---

v2:
- Removed -d option.
- One cpu list passed as option, cpus for vcpus, followed by
application thread cpu.
- Added paranoid cousin of atoi().

v1: https://lore.kernel.org/lkml/20220817152956.4056410-1-vipinsh@xxxxxxxxxx

.../selftests/kvm/access_tracking_perf_test.c | 2 +-
.../selftests/kvm/demand_paging_test.c | 2 +-
.../selftests/kvm/dirty_log_perf_test.c | 89 +++++++++++++++++--
.../selftests/kvm/include/perf_test_util.h | 3 +-
.../selftests/kvm/lib/perf_test_util.c | 32 ++++++-
.../kvm/memslot_modification_stress_test.c | 2 +-
6 files changed, 116 insertions(+), 14 deletions(-)

diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index 1c2749b1481a..9659462f4747 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -299,7 +299,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
vm = perf_test_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1,
params->backing_src, !overlap_memory_access);

- perf_test_start_vcpu_threads(nr_vcpus, vcpu_thread_main);
+ perf_test_start_vcpu_threads(nr_vcpus, NULL, vcpu_thread_main);

pr_info("\n");
access_memory(vm, nr_vcpus, ACCESS_WRITE, "Populating memory");
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 779ae54f89c4..b9848174d6e7 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -336,7 +336,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
pr_info("Finished creating vCPUs and starting uffd threads\n");

clock_gettime(CLOCK_MONOTONIC, &start);
- perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker);
+ perf_test_start_vcpu_threads(nr_vcpus, NULL, vcpu_worker);
pr_info("Started all vCPUs\n");

perf_test_join_vcpu_threads(nr_vcpus);
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index f99e39a672d3..ace4ed954628 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -8,10 +8,12 @@
* Copyright (C) 2020, Google, Inc.
*/

+#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <pthread.h>
+#include <sched.h>
#include <linux/bitmap.h>

#include "kvm_util.h"
@@ -132,6 +134,7 @@ struct test_params {
bool partition_vcpu_memory_access;
enum vm_mem_backing_src_type backing_src;
int slots;
+ int *vcpu_to_lcpu;
};

static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable)
@@ -248,7 +251,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
for (i = 0; i < nr_vcpus; i++)
vcpu_last_completed_iteration[i] = -1;

- perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker);
+ perf_test_start_vcpu_threads(nr_vcpus, p->vcpu_to_lcpu, vcpu_worker);

/* Allow the vCPUs to populate memory */
pr_debug("Starting iteration %d - Populating\n", iteration);
@@ -348,12 +351,61 @@ static void run_test(enum vm_guest_mode mode, void *arg)
perf_test_destroy_vm(vm);
}

+static int atoi_paranoid(const char *num_str)
+{
+ int num;
+ char *end_ptr;
+
+ errno = 0;
+ num = (int)strtol(num_str, &end_ptr, 10);
+ TEST_ASSERT(errno == 0, "Conversion error: %d\n", errno);
+ TEST_ASSERT(num_str != end_ptr && *end_ptr == '\0',
+ "Invalid number string.\n");
+
+ return num;
+}
+
+static int parse_cpu_list(const char *arg, int *lcpu_list, int list_size)
+{
+ char delim[2] = ",";
+ char *cpu, *cpu_list;
+ int i = 0, cpu_num;
+
+ cpu_list = strdup(arg);
+ TEST_ASSERT(cpu_list, "strdup() allocation failed.\n");
+
+ cpu = strtok(cpu_list, delim);
+ while (cpu) {
+ TEST_ASSERT(i != list_size,
+ "Too many cpus, max supported: %d\n", list_size);
+
+ cpu_num = atoi_paranoid(cpu);
+ TEST_ASSERT(cpu_num >= 0, "Invalid cpu number: %d\n", cpu_num);
+ lcpu_list[i++] = cpu_num;
+ cpu = strtok(NULL, delim);
+ }
+ free(cpu_list);
+
+ return i;
+}
+
+static void assign_dirty_log_perf_test_cpu(int cpu)
+{
+ cpu_set_t cpuset;
+ int err;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+ err = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
+ TEST_ASSERT(err == 0, "Error in setting dirty log perf test cpu\n");
+}
+
static void help(char *name)
{
puts("");
printf("usage: %s [-h] [-i iterations] [-p offset] [-g] "
"[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]"
- "[-x memslots]\n", name);
+ "[-x memslots] [-c logical cpus to run test on]\n", name);
puts("");
printf(" -i: specify iteration counts (default: %"PRIu64")\n",
TEST_HOST_LOOP_N);
@@ -383,6 +435,14 @@ static void help(char *name)
backing_src_help("-s");
printf(" -x: Split the memory region into this number of memslots.\n"
" (default: 1)\n");
+ printf(" -c: Comma separated values of the logical CPUs, which will run\n"
+ " the vCPUs, followed by the main application thread cpu.\n"
+ " Number of values must be equal to the number of vCPUs + 1.\n\n"
+ " Example: ./dirty_log_perf_test -v 3 -c 22,23,24,50\n"
+ " This means that the vcpu 0 will run on the logical cpu 22,\n"
+ " vcpu 1 on the logical cpu 23, vcpu 2 on the logical cpu 24\n"
+ " and the main thread will run on cpu 50.\n"
+ " (default: No cpu mapping)\n");
puts("");
exit(0);
}
@@ -390,14 +450,18 @@ static void help(char *name)
int main(int argc, char *argv[])
{
int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+ int lcpu_list[KVM_MAX_VCPUS + 1];
+
struct test_params p = {
.iterations = TEST_HOST_LOOP_N,
.wr_fract = 1,
.partition_vcpu_memory_access = true,
.backing_src = DEFAULT_VM_MEM_SRC,
.slots = 1,
+ .vcpu_to_lcpu = NULL,
};
int opt;
+ int nr_lcpus = -1;

dirty_log_manual_caps =
kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
@@ -406,8 +470,11 @@ int main(int argc, char *argv[])

guest_modes_append_default();

- while ((opt = getopt(argc, argv, "eghi:p:m:nb:f:v:os:x:")) != -1) {
+ while ((opt = getopt(argc, argv, "c:eghi:p:m:nb:f:v:os:x:")) != -1) {
switch (opt) {
+ case 'c':
+ nr_lcpus = parse_cpu_list(optarg, lcpu_list, KVM_MAX_VCPUS + 1);
+ break;
case 'e':
/* 'e' is for evil. */
run_vcpus_while_disabling_dirty_logging = true;
@@ -415,7 +482,7 @@ int main(int argc, char *argv[])
dirty_log_manual_caps = 0;
break;
case 'i':
- p.iterations = atoi(optarg);
+ p.iterations = atoi_paranoid(optarg);
break;
case 'p':
p.phys_offset = strtoull(optarg, NULL, 0);
@@ -430,12 +497,12 @@ int main(int argc, char *argv[])
guest_percpu_mem_size = parse_size(optarg);
break;
case 'f':
- p.wr_fract = atoi(optarg);
+ p.wr_fract = atoi_paranoid(optarg);
TEST_ASSERT(p.wr_fract >= 1,
"Write fraction cannot be less than one");
break;
case 'v':
- nr_vcpus = atoi(optarg);
+ nr_vcpus = atoi_paranoid(optarg);
TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
"Invalid number of vcpus, must be between 1 and %d", max_vcpus);
break;
@@ -446,7 +513,7 @@ int main(int argc, char *argv[])
p.backing_src = parse_backing_src_type(optarg);
break;
case 'x':
- p.slots = atoi(optarg);
+ p.slots = atoi_paranoid(optarg);
break;
case 'h':
default:
@@ -455,6 +522,14 @@ int main(int argc, char *argv[])
}
}

+ if (nr_lcpus != -1) {
+ TEST_ASSERT(nr_lcpus == nr_vcpus + 1,
+ "Number of logical cpus (%d) is not equal to the number of vcpus + 1 (%d).",
+ nr_lcpus, nr_vcpus);
+ assign_dirty_log_perf_test_cpu(lcpu_list[nr_vcpus]);
+ p.vcpu_to_lcpu = lcpu_list;
+ }
+
TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations");

pr_info("Test iterations: %"PRIu64"\n", p.iterations);
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index eaa88df0555a..bd6c566cfc92 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -53,7 +53,8 @@ void perf_test_destroy_vm(struct kvm_vm *vm);

void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract);

-void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *));
+void perf_test_start_vcpu_threads(int vcpus, int *vcpus_to_lcpu,
+ void (*vcpu_fn)(struct perf_test_vcpu_args *));
void perf_test_join_vcpu_threads(int vcpus);
void perf_test_guest_code(uint32_t vcpu_id);

diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index 9618b37c66f7..771fbdf3d2c2 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -2,11 +2,14 @@
/*
* Copyright (C) 2020, Google LLC.
*/
+#define _GNU_SOURCE
#include <inttypes.h>

#include "kvm_util.h"
#include "perf_test_util.h"
#include "processor.h"
+#include <pthread.h>
+#include <sched.h>

struct perf_test_args perf_test_args;

@@ -260,10 +263,15 @@ static void *vcpu_thread_main(void *data)
return NULL;
}

-void perf_test_start_vcpu_threads(int nr_vcpus,
+void perf_test_start_vcpu_threads(int nr_vcpus, int *vcpu_to_lcpu,
void (*vcpu_fn)(struct perf_test_vcpu_args *))
{
- int i;
+ int i, err = 0;
+ pthread_attr_t attr;
+ cpu_set_t cpuset;
+
+ pthread_attr_init(&attr);
+ CPU_ZERO(&cpuset);

vcpu_thread_fn = vcpu_fn;
WRITE_ONCE(all_vcpu_threads_running, false);
@@ -274,7 +282,24 @@ void perf_test_start_vcpu_threads(int nr_vcpus,
vcpu->vcpu_idx = i;
WRITE_ONCE(vcpu->running, false);

- pthread_create(&vcpu->thread, NULL, vcpu_thread_main, vcpu);
+ if (vcpu_to_lcpu) {
+ CPU_SET(vcpu_to_lcpu[i], &cpuset);
+
+ err = pthread_attr_setaffinity_np(&attr,
+ sizeof(cpu_set_t),
+ &cpuset);
+ TEST_ASSERT(err == 0,
+ "vCPU %d could not be mapped to logical cpu %d, error returned: %d\n",
+ i, vcpu_to_lcpu[i], err);
+
+ CPU_CLR(vcpu_to_lcpu[i], &cpuset);
+ }
+
+ err = pthread_create(&vcpu->thread, &attr, vcpu_thread_main,
+ vcpu);
+ TEST_ASSERT(err == 0,
+ "error in creating vcpu %d thread, error returned: %d\n",
+ i, err);
}

for (i = 0; i < nr_vcpus; i++) {
@@ -283,6 +308,7 @@ void perf_test_start_vcpu_threads(int nr_vcpus,
}

WRITE_ONCE(all_vcpu_threads_running, true);
+ pthread_attr_destroy(&attr);
}

void perf_test_join_vcpu_threads(int nr_vcpus)
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index 6ee7e1dde404..246f8cc7bb2b 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -103,7 +103,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)

pr_info("Finished creating vCPUs\n");

- perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker);
+ perf_test_start_vcpu_threads(nr_vcpus, NULL, vcpu_worker);

pr_info("Started all vCPUs\n");

--
2.37.1.595.g718a3a8f04-goog