Re: [PATCH] KVM: x86: fix missed hardware breakpoints

From: Xiao Guangrong
Date: Fri Feb 26 2016 - 05:42:29 EST




On 02/19/2016 06:56 PM, Paolo Bonzini wrote:
Sometimes when setting a breakpoint a process doesn't stop on it.
This is because the debug registers are not loaded correctly on
VCPU load.

The following simple reproducer from Oleg Nesterov tries using debug
registers in two threads. To see the bug, run a 2-VCPU guest under
"taskset -c 0", then run "./bp 0 1" inside the guest.

#include <unistd.h>
#include <signal.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/wait.h>
#include <sys/ptrace.h>
#include <sys/user.h>
#include <asm/debugreg.h>
#include <assert.h>

#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)

unsigned long encode_dr7(int drnum, int enable, unsigned int type, unsigned int len)
{
unsigned long dr7;

dr7 = ((len | type) & 0xf)
<< (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
if (enable)
dr7 |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE));

return dr7;
}

int write_dr(int pid, int dr, unsigned long val)
{
return ptrace(PTRACE_POKEUSER, pid,
offsetof (struct user, u_debugreg[dr]),
val);
}

void set_bp(pid_t pid, void *addr)
{
unsigned long dr7;
assert(write_dr(pid, 0, (long)addr) == 0);
dr7 = encode_dr7(0, 1, DR_RW_EXECUTE, DR_LEN_1);
assert(write_dr(pid, 7, dr7) == 0);
}

void *get_rip(int pid)
{
return (void*)ptrace(PTRACE_PEEKUSER, pid,
offsetof(struct user, regs.rip), 0);
}

void test(int nr)
{
void *bp_addr = &&label + nr, *bp_hit;
int pid;

printf("test bp %d\n", nr);
assert(nr < 16); // see 16 asm nops below

pid = fork();
if (!pid) {
assert(ptrace(PTRACE_TRACEME, 0,0,0) == 0);
kill(getpid(), SIGSTOP);
for (;;) {
label: asm (
"nop; nop; nop; nop;"
"nop; nop; nop; nop;"
"nop; nop; nop; nop;"
"nop; nop; nop; nop;"
);
}
}

assert(pid == wait(NULL));
set_bp(pid, bp_addr);

for (;;) {
assert(ptrace(PTRACE_CONT, pid, 0, 0) == 0);
assert(pid == wait(NULL));

bp_hit = get_rip(pid);
if (bp_hit != bp_addr)
fprintf(stderr, "ERR!! hit wrong bp %ld != %d\n",
bp_hit - &&label, nr);
}
}

int main(int argc, const char *argv[])
{
while (--argc) {
int nr = atoi(*++argv);
if (!fork())
test(nr);
}

while (wait(NULL) > 0)
;
return 0;
}

Cc: stable@xxxxxxxxxxxxxxx
Suggested-by: Nadav Amit <namit@xxxxxxxxxxxxxxxxx>
Reported-by: Andrey Wagin <avagin@xxxxxxxxx>
Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---
Andrey, sorry for the time it took to get to this one.

arch/x86/kvm/x86.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4244c2baf57d..f4891f2ece23 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2752,6 +2752,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
}

kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
+ vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;

Er, i do not understand how it works. The BP is enabled in this test case so
the debug registers are always reloaded before entering guest as
KVM_DEBUGREG_BP_ENABLED bit is always set on switch_db_regs. What did i miss?

Another impact of this fix is when vcpu is rescheduled we need to always reload
debug registers even if guest does not enable it, it is really needed?