[PATCH 1/3] KVM: Support sharing gpc locks

From: David Stevens
Date: Thu Jan 26 2023 - 23:45:29 EST


From: David Stevens <stevensd@xxxxxxxxxxxx>

Support initializing a gfn_to_pfn_cache with an external lock instead of
its embedded lock. This allows groups of gpcs that are accessed together
to share a lock, which can greatly simplify locking.

Signed-off-by: David Stevens <stevensd@xxxxxxxxxxxx>
---
arch/x86/kvm/x86.c | 8 +++---
arch/x86/kvm/xen.c | 58 +++++++++++++++++++--------------------
include/linux/kvm_host.h | 12 ++++++++
include/linux/kvm_types.h | 3 +-
virt/kvm/pfncache.c | 37 +++++++++++++++----------
5 files changed, 70 insertions(+), 48 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 508074e47bc0..ec0de9bc2eae 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3047,14 +3047,14 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
struct pvclock_vcpu_time_info *guest_hv_clock;
unsigned long flags;

- read_lock_irqsave(&gpc->lock, flags);
+ read_lock_irqsave(gpc->lock, flags);
while (!kvm_gpc_check(gpc, offset + sizeof(*guest_hv_clock))) {
- read_unlock_irqrestore(&gpc->lock, flags);
+ read_unlock_irqrestore(gpc->lock, flags);

if (kvm_gpc_refresh(gpc, offset + sizeof(*guest_hv_clock)))
return;

- read_lock_irqsave(&gpc->lock, flags);
+ read_lock_irqsave(gpc->lock, flags);
}

guest_hv_clock = (void *)(gpc->khva + offset);
@@ -3083,7 +3083,7 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
guest_hv_clock->version = ++vcpu->hv_clock.version;

mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT);
- read_unlock_irqrestore(&gpc->lock, flags);
+ read_unlock_irqrestore(gpc->lock, flags);

trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
}
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 2681e2007e39..fa8ab23271d3 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -59,12 +59,12 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm);

/* It could be invalid again already, so we need to check */
- read_lock_irq(&gpc->lock);
+ read_lock_irq(gpc->lock);

if (gpc->valid)
break;

- read_unlock_irq(&gpc->lock);
+ read_unlock_irq(gpc->lock);
} while (1);

/* Paranoia checks on the 32-bit struct layout */
@@ -101,7 +101,7 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
smp_wmb();

wc->version = wc_version + 1;
- read_unlock_irq(&gpc->lock);
+ read_unlock_irq(gpc->lock);

kvm_make_all_cpus_request(kvm, KVM_REQ_MASTERCLOCK_UPDATE);

@@ -274,15 +274,15 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
*/
if (atomic) {
local_irq_save(flags);
- if (!read_trylock(&gpc1->lock)) {
+ if (!read_trylock(gpc1->lock)) {
local_irq_restore(flags);
return;
}
} else {
- read_lock_irqsave(&gpc1->lock, flags);
+ read_lock_irqsave(gpc1->lock, flags);
}
while (!kvm_gpc_check(gpc1, user_len1)) {
- read_unlock_irqrestore(&gpc1->lock, flags);
+ read_unlock_irqrestore(gpc1->lock, flags);

/* When invoked from kvm_sched_out() we cannot sleep */
if (atomic)
@@ -291,7 +291,7 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
if (kvm_gpc_refresh(gpc1, user_len1))
return;

- read_lock_irqsave(&gpc1->lock, flags);
+ read_lock_irqsave(gpc1->lock, flags);
}

if (likely(!user_len2)) {
@@ -316,19 +316,19 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
* takes them more than one at a time. Set a subclass on the
* gpc1 lock to make lockdep shut up about it.
*/
- lock_set_subclass(&gpc1->lock.dep_map, 1, _THIS_IP_);
+ lock_set_subclass(gpc1->lock.dep_map, 1, _THIS_IP_);
if (atomic) {
- if (!read_trylock(&gpc2->lock)) {
- read_unlock_irqrestore(&gpc1->lock, flags);
+ if (!read_trylock(gpc2->lock)) {
+ read_unlock_irqrestore(gpc1->lock, flags);
return;
}
} else {
- read_lock(&gpc2->lock);
+ read_lock(gpc2->lock);
}

if (!kvm_gpc_check(gpc2, user_len2)) {
- read_unlock(&gpc2->lock);
- read_unlock_irqrestore(&gpc1->lock, flags);
+ read_unlock(gpc2->lock);
+ read_unlock_irqrestore(gpc1->lock, flags);

/* When invoked from kvm_sched_out() we cannot sleep */
if (atomic)
@@ -428,9 +428,9 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
}

if (user_len2)
- read_unlock(&gpc2->lock);
+ read_unlock(gpc2->lock);

- read_unlock_irqrestore(&gpc1->lock, flags);
+ read_unlock_irqrestore(gpc1->lock, flags);

mark_page_dirty_in_slot(v->kvm, gpc1->memslot, gpc1->gpa >> PAGE_SHIFT);
if (user_len2)
@@ -505,14 +505,14 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v)
* does anyway. Page it in and retry the instruction. We're just a
* little more honest about it.
*/
- read_lock_irqsave(&gpc->lock, flags);
+ read_lock_irqsave(gpc->lock, flags);
while (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) {
- read_unlock_irqrestore(&gpc->lock, flags);
+ read_unlock_irqrestore(gpc->lock, flags);

if (kvm_gpc_refresh(gpc, sizeof(struct vcpu_info)))
return;

- read_lock_irqsave(&gpc->lock, flags);
+ read_lock_irqsave(gpc->lock, flags);
}

/* Now gpc->khva is a valid kernel address for the vcpu_info */
@@ -540,7 +540,7 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v)
: "0" (evtchn_pending_sel32));
WRITE_ONCE(vi->evtchn_upcall_pending, 1);
}
- read_unlock_irqrestore(&gpc->lock, flags);
+ read_unlock_irqrestore(gpc->lock, flags);

/* For the per-vCPU lapic vector, deliver it as MSI. */
if (v->arch.xen.upcall_vector)
@@ -568,9 +568,9 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
BUILD_BUG_ON(sizeof(rc) !=
sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending));

- read_lock_irqsave(&gpc->lock, flags);
+ read_lock_irqsave(gpc->lock, flags);
while (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) {
- read_unlock_irqrestore(&gpc->lock, flags);
+ read_unlock_irqrestore(gpc->lock, flags);

/*
* This function gets called from kvm_vcpu_block() after setting the
@@ -590,11 +590,11 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
*/
return 0;
}
- read_lock_irqsave(&gpc->lock, flags);
+ read_lock_irqsave(gpc->lock, flags);
}

rc = ((struct vcpu_info *)gpc->khva)->evtchn_upcall_pending;
- read_unlock_irqrestore(&gpc->lock, flags);
+ read_unlock_irqrestore(gpc->lock, flags);
return rc;
}

@@ -1172,7 +1172,7 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports,
int idx, i;

idx = srcu_read_lock(&kvm->srcu);
- read_lock_irqsave(&gpc->lock, flags);
+ read_lock_irqsave(gpc->lock, flags);
if (!kvm_gpc_check(gpc, PAGE_SIZE))
goto out_rcu;

@@ -1193,7 +1193,7 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports,
}

out_rcu:
- read_unlock_irqrestore(&gpc->lock, flags);
+ read_unlock_irqrestore(gpc->lock, flags);
srcu_read_unlock(&kvm->srcu, idx);

return ret;
@@ -1576,7 +1576,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm)

idx = srcu_read_lock(&kvm->srcu);

- read_lock_irqsave(&gpc->lock, flags);
+ read_lock_irqsave(gpc->lock, flags);
if (!kvm_gpc_check(gpc, PAGE_SIZE))
goto out_rcu;

@@ -1607,10 +1607,10 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm)
} else {
rc = 1; /* Delivered to the bitmap in shared_info. */
/* Now switch to the vCPU's vcpu_info to set the index and pending_sel */
- read_unlock_irqrestore(&gpc->lock, flags);
+ read_unlock_irqrestore(gpc->lock, flags);
gpc = &vcpu->arch.xen.vcpu_info_cache;

- read_lock_irqsave(&gpc->lock, flags);
+ read_lock_irqsave(gpc->lock, flags);
if (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) {
/*
* Could not access the vcpu_info. Set the bit in-kernel
@@ -1644,7 +1644,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm)
}

out_rcu:
- read_unlock_irqrestore(&gpc->lock, flags);
+ read_unlock_irqrestore(gpc->lock, flags);
srcu_read_unlock(&kvm->srcu, idx);

if (kick_vcpu) {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 109b18e2789c..7d1f9c6561e3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1279,6 +1279,18 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
struct kvm_vcpu *vcpu, enum pfn_cache_usage usage);

+/**
+ * kvm_gpc_init_with_lock - initialize gfn_to_pfn_cache with an external lock.
+ *
+ * @lock: an initialized rwlock
+ *
+ * See kvm_gpc_init. Allows multiple gfn_to_pfn_cache structs to share the
+ * same lock.
+ */
+void kvm_gpc_init_with_lock(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
+ struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
+ rwlock_t *lock);
+
/**
* kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest
* physical address.
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 76de36e56cdf..b6432c8cc19c 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -70,7 +70,8 @@ struct gfn_to_pfn_cache {
struct kvm *kvm;
struct kvm_vcpu *vcpu;
struct list_head list;
- rwlock_t lock;
+ rwlock_t *lock;
+ rwlock_t _lock;
struct mutex refresh_lock;
void *khva;
kvm_pfn_t pfn;
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index 2d6aba677830..2c6a2edaca9f 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -31,7 +31,7 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,

spin_lock(&kvm->gpc_lock);
list_for_each_entry(gpc, &kvm->gpc_list, list) {
- write_lock_irq(&gpc->lock);
+ write_lock_irq(gpc->lock);

/* Only a single page so no need to care about length */
if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) &&
@@ -50,7 +50,7 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
__set_bit(gpc->vcpu->vcpu_idx, vcpu_bitmap);
}
}
- write_unlock_irq(&gpc->lock);
+ write_unlock_irq(gpc->lock);
}
spin_unlock(&kvm->gpc_lock);

@@ -147,7 +147,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)

lockdep_assert_held(&gpc->refresh_lock);

- lockdep_assert_held_write(&gpc->lock);
+ lockdep_assert_held_write(gpc->lock);

/*
* Invalidate the cache prior to dropping gpc->lock, the gpa=>uhva
@@ -160,7 +160,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
mmu_seq = gpc->kvm->mmu_invalidate_seq;
smp_rmb();

- write_unlock_irq(&gpc->lock);
+ write_unlock_irq(gpc->lock);

/*
* If the previous iteration "failed" due to an mmu_notifier
@@ -208,7 +208,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
}
}

- write_lock_irq(&gpc->lock);
+ write_lock_irq(gpc->lock);

/*
* Other tasks must wait for _this_ refresh to complete before
@@ -231,7 +231,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
return 0;

out_error:
- write_lock_irq(&gpc->lock);
+ write_lock_irq(gpc->lock);

return -EFAULT;
}
@@ -261,7 +261,7 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa,
*/
mutex_lock(&gpc->refresh_lock);

- write_lock_irq(&gpc->lock);
+ write_lock_irq(gpc->lock);

if (!gpc->active) {
ret = -EINVAL;
@@ -321,7 +321,7 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa,
unmap_old = (old_pfn != gpc->pfn);

out_unlock:
- write_unlock_irq(&gpc->lock);
+ write_unlock_irq(gpc->lock);

mutex_unlock(&gpc->refresh_lock);

@@ -339,20 +339,29 @@ EXPORT_SYMBOL_GPL(kvm_gpc_refresh);

void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
struct kvm_vcpu *vcpu, enum pfn_cache_usage usage)
+{
+ rwlock_init(&gpc->_lock);
+ kvm_gpc_init_with_lock(gpc, kvm, vcpu, usage, &gpc->_lock);
+}
+EXPORT_SYMBOL_GPL(kvm_gpc_init);
+
+void kvm_gpc_init_with_lock(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
+ struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
+ rwlock_t *lock)
{
WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage);
WARN_ON_ONCE((usage & KVM_GUEST_USES_PFN) && !vcpu);

- rwlock_init(&gpc->lock);
mutex_init(&gpc->refresh_lock);

gpc->kvm = kvm;
gpc->vcpu = vcpu;
+ gpc->lock = lock;
gpc->usage = usage;
gpc->pfn = KVM_PFN_ERR_FAULT;
gpc->uhva = KVM_HVA_ERR_BAD;
}
-EXPORT_SYMBOL_GPL(kvm_gpc_init);
+EXPORT_SYMBOL_GPL(kvm_gpc_init_with_lock);

int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
{
@@ -371,9 +380,9 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
* refresh must not establish a mapping until the cache is
* reachable by mmu_notifier events.
*/
- write_lock_irq(&gpc->lock);
+ write_lock_irq(gpc->lock);
gpc->active = true;
- write_unlock_irq(&gpc->lock);
+ write_unlock_irq(gpc->lock);
}
return __kvm_gpc_refresh(gpc, gpa, len);
}
@@ -391,7 +400,7 @@ void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc)
* must stall mmu_notifier events until all users go away, i.e.
* until gpc->lock is dropped and refresh is guaranteed to fail.
*/
- write_lock_irq(&gpc->lock);
+ write_lock_irq(gpc->lock);
gpc->active = false;
gpc->valid = false;

@@ -406,7 +415,7 @@ void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc)

old_pfn = gpc->pfn;
gpc->pfn = KVM_PFN_ERR_FAULT;
- write_unlock_irq(&gpc->lock);
+ write_unlock_irq(gpc->lock);

spin_lock(&kvm->gpc_lock);
list_del(&gpc->list);
--
2.39.1.456.gfc5497dd1b-goog