Re: [PATCH v3] kvm: make vcpu life cycle separated from kvm instance

From: Liu ping fan
Date: Tue Dec 13 2011 - 04:30:00 EST


On Mon, Dec 12, 2011 at 8:54 PM, Gleb Natapov <gleb@xxxxxxxxxx> wrote:
> On Mon, Dec 12, 2011 at 10:41:23AM +0800, Liu Ping Fan wrote:
>> From: Liu Ping Fan <pingfank@xxxxxxxxxxxxxxxxxx>
>>
>> Currently, vcpu can be destructed only when kvm instance destroyed.
>> Change this to vcpu's destruction taken when its refcnt is zero,
>> and then vcpu MUST and CAN be destroyed before kvm's destroy.
>>
> Please drop all printks that you add. You do not use rcu_assign_pointer()
> during vcpu creation and BTW the code there is incorrect now. It assumed
> that online_vcpus is never decremented so it is OK to put newly created
> vcpu into kvm->vcpus[kvm->online_vcpus], but now it is not longer true.
> We even have BUG_ON() to catch that which I believe you can trigger with
> this patch by creating 3 vcpus, removing second one and then adding one
> more. Moving to rculist would solve this of course, and will simplify
> code that iterates over all vcpus too.
>
OK, it seems unavoidable to use rculist now :-). Just one more question, is it
useless for "case HV_X64_MSR_VP_INDEX" after adopting rculist?

Thanks and regards,
ping fan
> Also see below.
>
>> Signed-off-by: Liu Ping Fan <pingfank@xxxxxxxxxxxxxxxxxx>
>> ---
>> Âarch/x86/kvm/i8254.c   |  10 ++++--
>> Âarch/x86/kvm/i8259.c   |  12 +++++--
>> Âarch/x86/kvm/mmu.c    |  Â7 ++--
>> Âarch/x86/kvm/x86.c    |  54 +++++++++++++++++++----------------
>> Âinclude/linux/kvm_host.h | Â 71 ++++++++++++++++++++++++++++++++++++++++++----
>> Âvirt/kvm/irq_comm.c   Â|  Â7 +++-
>> Âvirt/kvm/kvm_main.c   Â|  62 +++++++++++++++++++++++++++++++++------
>> Â7 files changed, 170 insertions(+), 53 deletions(-)
>>
>> diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
>> index 76e3f1c..ac79598 100644
>> --- a/arch/x86/kvm/i8254.c
>> +++ b/arch/x86/kvm/i8254.c
>> @@ -289,7 +289,7 @@ static void pit_do_work(struct work_struct *work)
>> Â Â Â struct kvm_pit *pit = container_of(work, struct kvm_pit, expired);
>> Â Â Â struct kvm *kvm = pit->kvm;
>> Â Â Â struct kvm_vcpu *vcpu;
>> - Â Â int i;
>> + Â Â struct kvm_iter it;
>> Â Â Â struct kvm_kpit_state *ps = &pit->pit_state;
>> Â Â Â int inject = 0;
>>
>> @@ -315,9 +315,13 @@ static void pit_do_work(struct work_struct *work)
>> Â Â Â Â Â Â Â Â* LVT0 to NMI delivery. Other PIC interrupts are just sent to
>> Â Â Â Â Â Â Â Â* VCPU0, and only if its LVT0 is in EXTINT mode.
>> Â Â Â Â Â Â Â Â*/
>> - Â Â Â Â Â Â if (kvm->arch.vapics_in_nmi_mode > 0)
>> - Â Â Â Â Â Â Â Â Â Â kvm_for_each_vcpu(i, vcpu, kvm)
>> + Â Â Â Â Â Â if (kvm->arch.vapics_in_nmi_mode > 0) {
>> + Â Â Â Â Â Â Â Â Â Â rcu_read_lock();
>> + Â Â Â Â Â Â Â Â Â Â kvm_for_each_vcpu(it, vcpu, kvm) {
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â kvm_apic_nmi_wd_deliver(vcpu);
>> + Â Â Â Â Â Â Â Â Â Â }
>> + Â Â Â Â Â Â Â Â Â Â rcu_read_unlock();
>> + Â Â Â Â Â Â }
>> Â Â Â }
>> Â}
>>
>> diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
>> index cac4746..2186b30 100644
>> --- a/arch/x86/kvm/i8259.c
>> +++ b/arch/x86/kvm/i8259.c
>> @@ -50,25 +50,29 @@ static void pic_unlock(struct kvm_pic *s)
>> Â{
>> Â Â Â bool wakeup = s->wakeup_needed;
>> Â Â Â struct kvm_vcpu *vcpu, *found = NULL;
>> - Â Â int i;
>> + Â Â struct kvm *kvm = s->kvm;
>> + Â Â struct kvm_iter it;
>>
>> Â Â Â s->wakeup_needed = false;
>>
>> Â Â Â spin_unlock(&s->lock);
>>
>> Â Â Â if (wakeup) {
>> - Â Â Â Â Â Â kvm_for_each_vcpu(i, vcpu, s->kvm) {
>> + Â Â Â Â Â Â rcu_read_lock();
>> + Â Â Â Â Â Â kvm_for_each_vcpu(it, vcpu, kvm)
>> Â Â Â Â Â Â Â Â Â Â Â if (kvm_apic_accept_pic_intr(vcpu)) {
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â found = vcpu;
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â break;
>> Â Â Â Â Â Â Â Â Â Â Â }
>> - Â Â Â Â Â Â }
>>
>> - Â Â Â Â Â Â if (!found)
>> + Â Â Â Â Â Â if (!found) {
>> + Â Â Â Â Â Â Â Â Â Â rcu_read_unlock();
>> Â Â Â Â Â Â Â Â Â Â Â return;
>> + Â Â Â Â Â Â }
>>
>> Â Â Â Â Â Â Â kvm_make_request(KVM_REQ_EVENT, found);
>> Â Â Â Â Â Â Â kvm_vcpu_kick(found);
>> + Â Â Â Â Â Â rcu_read_unlock();
>> Â Â Â }
>> Â}
>>
>> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>> index f1b36cf..c16887e 100644
>> --- a/arch/x86/kvm/mmu.c
>> +++ b/arch/x86/kvm/mmu.c
>> @@ -1833,11 +1833,12 @@ static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
>>
>> Âstatic void kvm_mmu_reset_last_pte_updated(struct kvm *kvm)
>> Â{
>> - Â Â int i;
>> + Â Â struct kvm_iter it;
>> Â Â Â struct kvm_vcpu *vcpu;
>> -
>> - Â Â kvm_for_each_vcpu(i, vcpu, kvm)
>> + Â Â rcu_read_lock();
>> + Â Â kvm_for_each_vcpu(it, vcpu, kvm)
>> Â Â Â Â Â Â Â vcpu->arch.last_pte_updated = NULL;
>> + Â Â rcu_read_unlock();
>> Â}
>>
>> Âstatic void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index c38efd7..a302470 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -1831,10 +1831,15 @@ static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>> Â Â Â switch (msr) {
>> Â Â Â case HV_X64_MSR_VP_INDEX: {
>> Â Â Â Â Â Â Â int r;
>> + Â Â Â Â Â Â struct kvm_iter it;
>> Â Â Â Â Â Â Â struct kvm_vcpu *v;
>> - Â Â Â Â Â Â kvm_for_each_vcpu(r, v, vcpu->kvm)
>> + Â Â Â Â Â Â struct kvm *kvm = Âvcpu->kvm;
>> + Â Â Â Â Â Â rcu_read_lock();
>> + Â Â Â Â Â Â kvm_for_each_vcpu(it, v, kvm) {
>> Â Â Â Â Â Â Â Â Â Â Â if (v == vcpu)
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â data = r;
>> + Â Â Â Â Â Â }
>> + Â Â Â Â Â Â rcu_read_unlock();
>> Â Â Â Â Â Â Â break;
>> Â Â Â }
>> Â Â Â case HV_X64_MSR_EOI:
>> @@ -4966,7 +4971,8 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
>> Â Â Â struct cpufreq_freqs *freq = data;
>> Â Â Â struct kvm *kvm;
>> Â Â Â struct kvm_vcpu *vcpu;
>> - Â Â int i, send_ipi = 0;
>> + Â Â int send_ipi = 0;
>> + Â Â struct kvm_iter it;
>>
>> Â Â Â /*
>> Â Â Â Â* We allow guests to temporarily run on slowing clocks,
>> @@ -5016,13 +5022,16 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
>>
>> Â Â Â raw_spin_lock(&kvm_lock);
>> Â Â Â list_for_each_entry(kvm, &vm_list, vm_list) {
>> - Â Â Â Â Â Â kvm_for_each_vcpu(i, vcpu, kvm) {
>> +
>> + Â Â Â Â Â Â rcu_read_lock();
>> + Â Â Â Â Â Â kvm_for_each_vcpu(it, vcpu, kvm) {
>> Â Â Â Â Â Â Â Â Â Â Â if (vcpu->cpu != freq->cpu)
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â continue;
>> Â Â Â Â Â Â Â Â Â Â Â kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
>> Â Â Â Â Â Â Â Â Â Â Â if (vcpu->cpu != smp_processor_id())
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â send_ipi = 1;
>> Â Â Â Â Â Â Â }
>> + Â Â Â Â Â Â rcu_read_unlock();
>> Â Â Â }
>> Â Â Â raw_spin_unlock(&kvm_lock);
>>
>> @@ -6433,13 +6442,17 @@ int kvm_arch_hardware_enable(void *garbage)
>> Â{
>> Â Â Â struct kvm *kvm;
>> Â Â Â struct kvm_vcpu *vcpu;
>> - Â Â int i;
>> + Â Â struct kvm_iter it;
>>
>> Â Â Â kvm_shared_msr_cpu_online();
>> - Â Â list_for_each_entry(kvm, &vm_list, vm_list)
>> - Â Â Â Â Â Â kvm_for_each_vcpu(i, vcpu, kvm)
>> + Â Â list_for_each_entry(kvm, &vm_list, vm_list) {
>> + Â Â Â Â Â Â rcu_read_lock();
>> + Â Â Â Â Â Â kvm_for_each_vcpu(it, vcpu, kvm) {
>> Â Â Â Â Â Â Â Â Â Â Â if (vcpu->cpu == smp_processor_id())
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
>> + Â Â Â Â Â Â }
>> + Â Â Â Â Â Â rcu_read_unlock();
>> + Â Â }
>> Â Â Â return kvm_x86_ops->hardware_enable(garbage);
>> Â}
>>
>> @@ -6560,27 +6573,19 @@ static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
>> Â Â Â vcpu_put(vcpu);
>> Â}
>>
>> -static void kvm_free_vcpus(struct kvm *kvm)
>> -{
>> - Â Â unsigned int i;
>> - Â Â struct kvm_vcpu *vcpu;
>>
>> - Â Â /*
>> - Â Â Â* Unpin any mmu pages first.
>> - Â Â Â*/
>> - Â Â kvm_for_each_vcpu(i, vcpu, kvm) {
>> - Â Â Â Â Â Â kvm_clear_async_pf_completion_queue(vcpu);
>> - Â Â Â Â Â Â kvm_unload_vcpu_mmu(vcpu);
>> - Â Â }
>> - Â Â kvm_for_each_vcpu(i, vcpu, kvm)
>> - Â Â Â Â Â Â kvm_arch_vcpu_free(vcpu);
>>
>> - Â Â mutex_lock(&kvm->lock);
>> - Â Â for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
>> - Â Â Â Â Â Â kvm->vcpus[i] = NULL;
>> +void kvm_arch_vcpu_zap(struct work_struct *work)
>> +{
>> + Â Â struct kvm_vcpu *vcpu = container_of(work, struct kvm_vcpu,
>> + Â Â Â Â Â Â Â Â Â Â zap_work);
>> + Â Â struct kvm *kvm = vcpu->kvm;
>>
>> - Â Â atomic_set(&kvm->online_vcpus, 0);
>> - Â Â mutex_unlock(&kvm->lock);
>> + Â Â printk(KERN_INFO "%s, zap vcpu:0x%x\n", __func__, vcpu->vcpu_id);
>> + Â Â kvm_clear_async_pf_completion_queue(vcpu);
>> + Â Â kvm_unload_vcpu_mmu(vcpu);
>> + Â Â kvm_arch_vcpu_free(vcpu);
>> + Â Â kvm_put_kvm(kvm);
>> Â}
>>
>> Âvoid kvm_arch_sync_events(struct kvm *kvm)
>> @@ -6594,7 +6599,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>> Â Â Â kvm_iommu_unmap_guest(kvm);
>> Â Â Â kfree(kvm->arch.vpic);
>> Â Â Â kfree(kvm->arch.vioapic);
>> - Â Â kvm_free_vcpus(kvm);
>> Â Â Â if (kvm->arch.apic_access_page)
>> Â Â Â Â Â Â Â put_page(kvm->arch.apic_access_page);
>> Â Â Â if (kvm->arch.ept_identity_pagetable)
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index d526231..2faafcb 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -19,6 +19,7 @@
>> Â#include <linux/slab.h>
>> Â#include <linux/rcupdate.h>
>> Â#include <linux/ratelimit.h>
>> +#include <linux/atomic.h>
>> Â#include <asm/signal.h>
>>
>> Â#include <linux/kvm.h>
>> @@ -113,6 +114,8 @@ enum {
>>
>> Âstruct kvm_vcpu {
>> Â Â Â struct kvm *kvm;
>> + Â Â struct rcu_head head;
>> + Â Â struct work_struct zap_work;
>> Â#ifdef CONFIG_PREEMPT_NOTIFIERS
>> Â Â Â struct preempt_notifier preempt_notifier;
>> Â#endif
>> @@ -290,17 +293,73 @@ struct kvm {
>> Â#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
>> Â#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
>>
>> +void kvm_arch_vcpu_zap(struct work_struct *work);
>> +
>> +/*search vcpu, must be protected by rcu_read_lock*/
>> Âstatic inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
>> Â{
>> + Â Â struct kvm_vcpu *vcpu;
>> Â Â Â smp_rmb();
>> - Â Â return kvm->vcpus[i];
>> + Â Â vcpu = rcu_dereference(kvm->vcpus[i]);
>> + Â Â return vcpu;
>> +}
>> +
>> +/*Must be protected by RCU*/
>> +struct kvm_iter {
>> + Â Â struct kvm *kvm;
>> + Â Â int idx;
>> + Â Â int cnt;
>> +};
>> +
>> +static inline
>> +struct kvm_vcpu *kvm_fev_init(struct kvm *kvm, struct kvm_iter *it)
>> +{
>> + Â Â int idx, cnt;
>> + Â Â struct kvm_vcpu *vcpup;
>> + Â Â vcpup = NULL;
>> + Â Â for (idx = 0, cnt = 0;
>> + Â Â Â Â Â Â cnt < atomic_read(&kvm->online_vcpus) && idx < KVM_MAX_VCPUS;
>> + Â Â Â Â Â Â idx++) {
>> + Â Â Â Â Â Â Â Â Â Â vcpup = kvm_get_vcpu(kvm, idx);
>> + Â Â Â Â Â Â Â Â Â Â if (unlikely(vcpup == NULL))
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â continue;
>> + Â Â Â Â Â Â Â Â Â Â cnt++;
>> + Â Â Â Â Â Â Â Â Â Â break;
>> + Â Â }
>> +
>> + Â Â it->kvm = kvm;
>> + Â Â it->idx = idx;
>> + Â Â it->cnt = cnt;
>> + Â Â return vcpup;
>> +}
>> +
>> +static inline
>> +struct kvm_vcpu *kvm_fev_next(struct kvm_iter *it)
>> +{
>> + Â Â int idx, cnt;
>> + Â Â struct kvm_vcpu *vcpup;
>> + Â Â struct kvm *kvm = it->kvm;
>> +
>> + Â Â vcpup = NULL;
>> + Â Â for (idx = it->idx+1, cnt = it->cnt;
>> + Â Â Â Â Â Â cnt < atomic_read(&kvm->online_vcpus) && idx < KVM_MAX_VCPUS;
>> + Â Â Â Â Â Â idx++) {
>> + Â Â Â Â Â Â Â Â Â Â vcpup = kvm_get_vcpu(kvm, idx);
>> + Â Â Â Â Â Â Â Â Â Â if (unlikely(vcpup == NULL))
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â continue;
>> + Â Â Â Â Â Â Â Â Â Â Âcnt++;
>> + Â Â Â Â Â Â Â Â Â Â Âbreak;
>> + Â Â }
>> +
>> + Â Â it->idx = idx;
>> + Â Â it->cnt = cnt;
>> + Â Â return vcpup;
>> Â}
>>
>> -#define kvm_for_each_vcpu(idx, vcpup, kvm) \
>> - Â Â for (idx = 0; \
>> - Â Â Â Â Âidx < atomic_read(&kvm->online_vcpus) && \
>> - Â Â Â Â Â(vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
>> - Â Â Â Â Âidx++)
>> +#define kvm_for_each_vcpu(it, vcpu, kvm) \
>> + Â Â for (vcpu = kvm_fev_init(kvm, &it); \
>> + Â Â Â Â Â Â vcpu; \
>> + Â Â Â Â Â Â vcpu = kvm_fev_next(&it))
>>
>> Âint kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
>> Âvoid kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
>> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
>> index 9f614b4..87eae96 100644
>> --- a/virt/kvm/irq_comm.c
>> +++ b/virt/kvm/irq_comm.c
>> @@ -81,14 +81,16 @@ inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
>> Âint kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
>> Â Â Â Â Â Â Â struct kvm_lapic_irq *irq)
>> Â{
>> - Â Â int i, r = -1;
>> + Â Â int r = -1;
>> + Â Â struct kvm_iter it;
>> Â Â Â struct kvm_vcpu *vcpu, *lowest = NULL;
>>
>> Â Â Â if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
>> Â Â Â Â Â Â Â Â Â Â Â kvm_is_dm_lowest_prio(irq))
>> Â Â Â Â Â Â Â printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
>>
>> - Â Â kvm_for_each_vcpu(i, vcpu, kvm) {
>> + Â Â rcu_read_lock();
>> + Â Â kvm_for_each_vcpu(it, vcpu, kvm) {
>> Â Â Â Â Â Â Â if (!kvm_apic_present(vcpu))
>> Â Â Â Â Â Â Â Â Â Â Â continue;
>>
>> @@ -111,6 +113,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
>> Â Â Â if (lowest)
>> Â Â Â Â Â Â Â r = kvm_apic_set_irq(lowest, irq);
>>
>> + Â Â rcu_read_unlock();
>> Â Â Â return r;
>> Â}
>>
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index d9cfb78..d28356a 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -171,7 +171,8 @@ static void ack_flush(void *_completed)
>>
>> Âstatic bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
>> Â{
>> - Â Â int i, cpu, me;
>> + Â Â int cpu, me;
>> + Â Â struct kvm_iter it;
>> Â Â Â cpumask_var_t cpus;
>> Â Â Â bool called = true;
>> Â Â Â struct kvm_vcpu *vcpu;
>> @@ -179,7 +180,9 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
>> Â Â Â zalloc_cpumask_var(&cpus, GFP_ATOMIC);
>>
>> Â Â Â me = get_cpu();
>> - Â Â kvm_for_each_vcpu(i, vcpu, kvm) {
>> +
>> + Â Â rcu_read_lock();
>> + Â Â kvm_for_each_vcpu(it, vcpu, kvm) {
>> Â Â Â Â Â Â Â kvm_make_request(req, vcpu);
>> Â Â Â Â Â Â Â cpu = vcpu->cpu;
>>
>> @@ -190,12 +193,15 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
>> Â Â Â Â Â Â Â Â Â Â kvm_vcpu_exiting_guest_mode(vcpu) != OUTSIDE_GUEST_MODE)
>> Â Â Â Â Â Â Â Â Â Â Â cpumask_set_cpu(cpu, cpus);
>> Â Â Â }
>> +
>> Â Â Â if (unlikely(cpus == NULL))
>> Â Â Â Â Â Â Â smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1);
>> Â Â Â else if (!cpumask_empty(cpus))
>> Â Â Â Â Â Â Â smp_call_function_many(cpus, ack_flush, NULL, 1);
>> Â Â Â else
>> Â Â Â Â Â Â Â called = false;
>> + Â Â rcu_read_unlock();
>> +
>> Â Â Â put_cpu();
>> Â Â Â free_cpumask_var(cpus);
>> Â Â Â return called;
>> @@ -580,6 +586,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
>> Â Â Â kvm_arch_free_vm(kvm);
>> Â Â Â hardware_disable_all();
>> Â Â Â mmdrop(mm);
>> + Â Â printk(KERN_INFO "%s finished\n", __func__);
>> Â}
>>
>> Âvoid kvm_get_kvm(struct kvm *kvm)
>> @@ -1543,6 +1550,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
>> Â Â Â int last_boosted_vcpu = me->kvm->last_boosted_vcpu;
>> Â Â Â int yielded = 0;
>> Â Â Â int pass;
>> + Â Â struct kvm_iter it;
>> Â Â Â int i;
>>
>> Â Â Â /*
>> @@ -1553,9 +1561,11 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
>> Â Â Â Â* We approximate round-robin by starting at the last boosted VCPU.
>> Â Â Â Â*/
>> Â Â Â for (pass = 0; pass < 2 && !yielded; pass++) {
>> - Â Â Â Â Â Â kvm_for_each_vcpu(i, vcpu, kvm) {
>> + Â Â Â Â Â Â rcu_read_lock();
>> + Â Â Â Â Â Â kvm_for_each_vcpu(it, vcpu, kvm) {
>> Â Â Â Â Â Â Â Â Â Â Â struct task_struct *task = NULL;
>> Â Â Â Â Â Â Â Â Â Â Â struct pid *pid;
>> + Â Â Â Â Â Â Â Â Â Â i = it.idx;
>> Â Â Â Â Â Â Â Â Â Â Â if (!pass && i < last_boosted_vcpu) {
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â i = last_boosted_vcpu;
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â continue;
>> @@ -1584,6 +1594,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
>> Â Â Â Â Â Â Â Â Â Â Â }
>> Â Â Â Â Â Â Â Â Â Â Â put_task_struct(task);
>> Â Â Â Â Â Â Â }
>> + Â Â Â Â Â Â rcu_read_unlock();
>> Â Â Â }
>> Â}
>> ÂEXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
>> @@ -1620,11 +1631,23 @@ static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
>> Â Â Â return 0;
>> Â}
>>
>> +/*Can not block*/
>> +static void kvm_vcpu_zap(struct rcu_head *rcu)
>> +{
>> + Â Â struct kvm_vcpu *vcpu = container_of(rcu, struct kvm_vcpu, head);
>> + Â Â schedule_work(&vcpu->zap_work);
>> +}
>> +
>> Âstatic int kvm_vcpu_release(struct inode *inode, struct file *filp)
>> Â{
>> Â Â Â struct kvm_vcpu *vcpu = filp->private_data;
>> -
>> - Â Â kvm_put_kvm(vcpu->kvm);
>> + Â Â struct kvm *kvm = vcpu->kvm;
>> + Â Â filp->private_data = NULL;
>> + Â Â mutex_lock(&kvm->lock);
>> + Â Â rcu_assign_pointer(kvm->vcpus[vcpu->vcpu_id], NULL);
> vcpu->vcpu_id is not an index into vcpus array.
>
>> + Â Â atomic_dec(&kvm->online_vcpus);
>> + Â Â mutex_unlock(&kvm->lock);
>> + Â Â call_rcu(&vcpu->head, kvm_vcpu_zap);
>> Â Â Â return 0;
>> Â}
>>
>> @@ -1646,6 +1669,16 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu)
>> Â Â Â return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR);
>> Â}
>>
>> +static struct kvm_vcpu *kvm_vcpu_create(struct kvm *kvm, u32 id)
>> +{
>> + Â Â struct kvm_vcpu *vcpu;
>> + Â Â vcpu = kvm_arch_vcpu_create(kvm, id);
>> + Â Â if (IS_ERR(vcpu))
>> + Â Â Â Â Â Â return vcpu;
>> + Â Â INIT_WORK(&vcpu->zap_work, kvm_arch_vcpu_zap);
>> + Â Â return vcpu;
>> +}
>> +
>> Â/*
>> Â * Creates some virtual cpus. ÂGood luck creating more than one.
>> Â */
>> @@ -1653,8 +1686,9 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
>> Â{
>> Â Â Â int r;
>> Â Â Â struct kvm_vcpu *vcpu, *v;
>> + Â Â struct kvm_iter it;
>>
>> - Â Â vcpu = kvm_arch_vcpu_create(kvm, id);
>> + Â Â vcpu = kvm_vcpu_create(kvm, id);
>> Â Â Â if (IS_ERR(vcpu))
>> Â Â Â Â Â Â Â return PTR_ERR(vcpu);
>>
>> @@ -1670,11 +1704,15 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
>> Â Â Â Â Â Â Â goto unlock_vcpu_destroy;
>> Â Â Â }
>>
>> - Â Â kvm_for_each_vcpu(r, v, kvm)
>> + Â Â rcu_read_lock();
>> + Â Â kvm_for_each_vcpu(it, v, kvm) {
>> Â Â Â Â Â Â Â if (v->vcpu_id == id) {
>> + Â Â Â Â Â Â Â Â Â Â rcu_read_unlock();
>> Â Â Â Â Â Â Â Â Â Â Â r = -EEXIST;
>> Â Â Â Â Â Â Â Â Â Â Â goto unlock_vcpu_destroy;
>> Â Â Â Â Â Â Â }
>> + Â Â }
>> + Â Â rcu_read_unlock();
>>
>> Â Â Â BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]);
>>
>> @@ -2593,13 +2631,17 @@ static int vcpu_stat_get(void *_offset, u64 *val)
>> Â Â Â unsigned offset = (long)_offset;
>> Â Â Â struct kvm *kvm;
>> Â Â Â struct kvm_vcpu *vcpu;
>> - Â Â int i;
>> + Â Â struct kvm_iter it;
>>
>> Â Â Â *val = 0;
>> Â Â Â raw_spin_lock(&kvm_lock);
>> - Â Â list_for_each_entry(kvm, &vm_list, vm_list)
>> - Â Â Â Â Â Â kvm_for_each_vcpu(i, vcpu, kvm)
>> + Â Â list_for_each_entry(kvm, &vm_list, vm_list) {
>> + Â Â Â Â Â Â rcu_read_lock();
>> + Â Â Â Â Â Â kvm_for_each_vcpu(it, vcpu, kvm) {
>> Â Â Â Â Â Â Â Â Â Â Â *val += *(u32 *)((void *)vcpu + offset);
>> + Â Â Â Â Â Â }
>> + Â Â Â Â Â Â rcu_read_unlock();
>> + Â Â }
>>
>> Â Â Â raw_spin_unlock(&kvm_lock);
>> Â Â Â return 0;
>> --
>> 1.7.4.4
>
> --
> Â Â Â Â Â Â Â Â Â Â Â ÂGleb.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/