[PATCH 4/4] kvm: x86: export TSC offset to user-space

From: Luiz Capitulino
Date: Wed Aug 31 2016 - 13:06:08 EST


We need to retrieve a VM's TSC offset in order to use
the host's TSC to merge host and guest traces. This is
explained in detail in this thread:

[Qemu-devel] [RFC] host and guest kernel trace merging
https://lists.nongnu.org/archive/html/qemu-devel/2016-03/msg00887.html

Today, the only way to retrieve a VM's TSC offset is
by using the kvm_write_tsc_offset tracepoint. This has
a few problems. First, the tracepoint is only emitted
when the VM boots, which requires a reboot to get it if
the VM is already running. Second, tracepoints are not
supposed to be ABIs in case they need to be consumed by
user-space tools.

This commit exports a VM's TSC offset to user-space via
debugfs. A new file called "tsc-offset" is created in
the VM's debugfs directory. For example:

/sys/kernel/debug/kvm/51696-10/tsc-offset

This file contains one TSC offset per line, for each
vCPU. For example:

vcpu0: 18446742405270834952
vcpu1: 18446742405270834952
vcpu2: 18446742405270834952
vcpu3: 18446742405270834952

There are some important observations about this
solution:

- While all vCPUs TSC offsets should be equal for the
cases we care about (ie. stable TSC and no write to
the TSC MSR), I chose to follow the spec and export
each vCPU's TSC offset (might also be helpful for
debugging)

- The TSC offset is only useful after the VM has booted

- We'll probably need to export the TSC multiplier too.
However, I've been using only the TSC offset for now.
So, let's get this merged first and do the TSC multiplier
as a second step

Signed-off-by: Luiz Capitulino <lcapitulino@xxxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/svm.c | 1 +
arch/x86/kvm/vmx.c | 8 ++++++++
arch/x86/kvm/x86.c | 30 ++++++++++++++++++++++++++++++
4 files changed, 40 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 33ae3a4..5714bbd 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -952,6 +952,7 @@ struct kvm_x86_ops {
bool (*has_wbinvd_exit)(void);

u64 (*read_tsc_offset)(struct kvm_vcpu *vcpu);
+ u64 (*read_cached_tsc_offset)(struct kvm_vcpu *vcpu);
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);

u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu, u64 host_tsc);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index af523d8..c851477 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5065,6 +5065,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.has_wbinvd_exit = svm_has_wbinvd_exit,

.read_tsc_offset = svm_read_tsc_offset,
+ .read_cached_tsc_offset = svm_read_tsc_offset,
.write_tsc_offset = svm_write_tsc_offset,
.adjust_tsc_offset_guest = svm_adjust_tsc_offset_guest,
.read_l1_tsc = svm_read_l1_tsc,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5cede40..82dfe42 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -616,6 +616,7 @@ struct vcpu_vmx {
u64 hv_deadline_tsc;

u64 current_tsc_ratio;
+ u64 cached_tsc_offset;

bool guest_pkru_valid;
u32 guest_pkru;
@@ -2608,6 +2609,11 @@ static u64 vmx_read_tsc_offset(struct kvm_vcpu *vcpu)
return vmcs_read64(TSC_OFFSET);
}

+static u64 vmx_read_cached_tsc_offset(struct kvm_vcpu *vcpu)
+{
+ return to_vmx(vcpu)->cached_tsc_offset;
+}
+
/*
* writes 'offset' into guest's timestamp counter offset register
*/
@@ -2632,6 +2638,7 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
vmcs_read64(TSC_OFFSET), offset);
vmcs_write64(TSC_OFFSET, offset);
}
+ to_vmx(vcpu)->cached_tsc_offset = offset;
}

static void vmx_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment)
@@ -11275,6 +11282,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,

.read_tsc_offset = vmx_read_tsc_offset,
+ .read_cached_tsc_offset = vmx_read_cached_tsc_offset,
.write_tsc_offset = vmx_write_tsc_offset,
.adjust_tsc_offset_guest = vmx_adjust_tsc_offset_guest,
.read_l1_tsc = vmx_read_l1_tsc,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 18dfbac..75a8e23 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -54,6 +54,7 @@
#include <linux/pvclock_gtod.h>
#include <linux/kvm_irqfd.h>
#include <linux/irqbypass.h>
+#include <linux/debugfs.h>
#include <trace/events/kvm.h>

#include <asm/debugreg.h>
@@ -7779,8 +7780,37 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
return 0;
}

+static int tsc_offset_show(struct seq_file *m, void *data)
+{
+ struct kvm *kvm = m->private;
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ seq_printf(m, "vcpu%d: %llu\n",
+ vcpu->vcpu_id, kvm_x86_ops->read_cached_tsc_offset(vcpu));
+
+ return 0;
+}
+
+static int tsc_offset_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, tsc_offset_show, inode->i_private);
+}
+
+static const struct file_operations tsc_offset_fops = {
+ .owner = THIS_MODULE,
+ .open = tsc_offset_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
int kvm_arch_create_vm_debugfs(struct kvm *kvm)
{
+ if (!debugfs_create_file("tsc-offset", 0444,
+ kvm->debugfs_dentry, kvm, &tsc_offset_fops))
+ return -ENOMEM;
return 0;
}

--
2.5.5