[PATCH v10 05/25] crypto: iaa - iaa_wq uses percpu_refs for get/put reference counting.
From: Kanchana P Sridhar
Date: Fri Jul 04 2025 - 00:25:03 EST
This patch modifies the reference counting on "struct iaa_wq" to be a
percpu_ref in atomic mode, instead of an "int refcount" combined with
the "idxd->dev_lock" spin_lock currently used as a synchronization
mechanism to achieve get/put semantics.
This enables a more light-weight, cleaner and effective refcount
implementation for the iaa_wq, significantly reducing latency per
compress/decompress job submitted to the IAA accelerator:
p50: -136 ns
p99: -880 ns
Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@xxxxxxxxx>
---
drivers/crypto/intel/iaa/iaa_crypto.h | 4 +-
drivers/crypto/intel/iaa/iaa_crypto_main.c | 119 +++++++--------------
2 files changed, 41 insertions(+), 82 deletions(-)
diff --git a/drivers/crypto/intel/iaa/iaa_crypto.h b/drivers/crypto/intel/iaa/iaa_crypto.h
index cc76a047b54ad..9611f2518f42c 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto.h
+++ b/drivers/crypto/intel/iaa/iaa_crypto.h
@@ -47,8 +47,8 @@ struct iaa_wq {
struct list_head list;
struct idxd_wq *wq;
- int ref;
- bool remove;
+ struct percpu_ref ref;
+ bool free;
bool mapped;
struct iaa_device *iaa_device;
diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c
index 1de8b93185ca4..d32b345ed8097 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_main.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c
@@ -701,7 +701,7 @@ static void del_iaa_device(struct iaa_device *iaa_device)
static void free_iaa_device(struct iaa_device *iaa_device)
{
- if (!iaa_device)
+ if (!iaa_device || iaa_device->n_wq)
return;
remove_device_compression_modes(iaa_device);
@@ -731,6 +731,13 @@ static bool iaa_has_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
return false;
}
+static void __iaa_wq_release(struct percpu_ref *ref)
+{
+ struct iaa_wq *iaa_wq = container_of(ref, typeof(*iaa_wq), ref);
+
+ iaa_wq->free = true;
+}
+
static int add_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq,
struct iaa_wq **new_wq)
{
@@ -738,11 +745,20 @@ static int add_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq,
struct pci_dev *pdev = idxd->pdev;
struct device *dev = &pdev->dev;
struct iaa_wq *iaa_wq;
+ int ret;
iaa_wq = kzalloc(sizeof(*iaa_wq), GFP_KERNEL);
if (!iaa_wq)
return -ENOMEM;
+ ret = percpu_ref_init(&iaa_wq->ref, __iaa_wq_release,
+ PERCPU_REF_INIT_ATOMIC, GFP_KERNEL);
+
+ if (ret) {
+ kfree(iaa_wq);
+ return -ENOMEM;
+ }
+
iaa_wq->wq = wq;
iaa_wq->iaa_device = iaa_device;
idxd_wq_set_private(wq, iaa_wq);
@@ -818,6 +834,9 @@ static void __free_iaa_wq(struct iaa_wq *iaa_wq)
if (!iaa_wq)
return;
+ WARN_ON(!percpu_ref_is_zero(&iaa_wq->ref));
+ percpu_ref_exit(&iaa_wq->ref);
+
iaa_device = iaa_wq->iaa_device;
if (iaa_device->n_wq == 0)
free_iaa_device(iaa_wq->iaa_device);
@@ -912,53 +931,6 @@ static int save_iaa_wq(struct idxd_wq *wq)
return 0;
}
-static int iaa_wq_get(struct idxd_wq *wq)
-{
- struct idxd_device *idxd = wq->idxd;
- struct iaa_wq *iaa_wq;
- int ret = 0;
-
- spin_lock(&idxd->dev_lock);
- iaa_wq = idxd_wq_get_private(wq);
- if (iaa_wq && !iaa_wq->remove) {
- iaa_wq->ref++;
- idxd_wq_get(wq);
- } else {
- ret = -ENODEV;
- }
- spin_unlock(&idxd->dev_lock);
-
- return ret;
-}
-
-static int iaa_wq_put(struct idxd_wq *wq)
-{
- struct idxd_device *idxd = wq->idxd;
- struct iaa_wq *iaa_wq;
- bool free = false;
- int ret = 0;
-
- spin_lock(&idxd->dev_lock);
- iaa_wq = idxd_wq_get_private(wq);
- if (iaa_wq) {
- iaa_wq->ref--;
- if (iaa_wq->ref == 0 && iaa_wq->remove) {
- idxd_wq_set_private(wq, NULL);
- free = true;
- }
- idxd_wq_put(wq);
- } else {
- ret = -ENODEV;
- }
- spin_unlock(&idxd->dev_lock);
- if (free) {
- __free_iaa_wq(iaa_wq);
- kfree(iaa_wq);
- }
-
- return ret;
-}
-
/***************************************************************
* Mapping IAA devices and wqs to cores with per-cpu wq_tables.
***************************************************************/
@@ -1765,7 +1737,7 @@ static void iaa_desc_complete(struct idxd_desc *idxd_desc,
if (free_desc)
idxd_free_desc(idxd_desc->wq, idxd_desc);
- iaa_wq_put(idxd_desc->wq);
+ percpu_ref_put(&iaa_wq->ref);
}
static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
@@ -1996,19 +1968,13 @@ static int iaa_comp_acompress(struct acomp_req *req)
cpu = get_cpu();
wq = comp_wq_table_next_wq(cpu);
put_cpu();
- if (!wq) {
- pr_debug("no wq configured for cpu=%d\n", cpu);
- return -ENODEV;
- }
- ret = iaa_wq_get(wq);
- if (ret) {
+ iaa_wq = wq ? idxd_wq_get_private(wq) : NULL;
+ if (!iaa_wq || !percpu_ref_tryget(&iaa_wq->ref)) {
pr_debug("no wq available for cpu=%d\n", cpu);
return -ENODEV;
}
- iaa_wq = idxd_wq_get_private(wq);
-
dev = &wq->idxd->pdev->dev;
nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
@@ -2061,7 +2027,7 @@ static int iaa_comp_acompress(struct acomp_req *req)
err_map_dst:
dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
out:
- iaa_wq_put(wq);
+ percpu_ref_put(&iaa_wq->ref);
return ret;
}
@@ -2083,19 +2049,13 @@ static int iaa_comp_adecompress(struct acomp_req *req)
cpu = get_cpu();
wq = decomp_wq_table_next_wq(cpu);
put_cpu();
- if (!wq) {
- pr_debug("no wq configured for cpu=%d\n", cpu);
- return -ENODEV;
- }
- ret = iaa_wq_get(wq);
- if (ret) {
+ iaa_wq = wq ? idxd_wq_get_private(wq) : NULL;
+ if (!iaa_wq || !percpu_ref_tryget(&iaa_wq->ref)) {
pr_debug("no wq available for cpu=%d\n", cpu);
- return -ENODEV;
+ return deflate_generic_decompress(req);
}
- iaa_wq = idxd_wq_get_private(wq);
-
dev = &wq->idxd->pdev->dev;
nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
@@ -2130,7 +2090,7 @@ static int iaa_comp_adecompress(struct acomp_req *req)
err_map_dst:
dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
out:
- iaa_wq_put(wq);
+ percpu_ref_put(&iaa_wq->ref);
return ret;
}
@@ -2303,7 +2263,6 @@ static void iaa_crypto_remove(struct idxd_dev *idxd_dev)
struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
struct idxd_device *idxd = wq->idxd;
struct iaa_wq *iaa_wq;
- bool free = false;
atomic_set(&iaa_crypto_enabled, 0);
idxd_wq_quiesce(wq);
@@ -2324,18 +2283,18 @@ static void iaa_crypto_remove(struct idxd_dev *idxd_dev)
goto out;
}
- if (iaa_wq->ref) {
- iaa_wq->remove = true;
- } else {
- wq = iaa_wq->wq;
- idxd_wq_set_private(wq, NULL);
- free = true;
- }
+ /* Drop the initial reference. */
+ percpu_ref_kill(&iaa_wq->ref);
+
+ while (!iaa_wq->free)
+ cpu_relax();
+
+ __free_iaa_wq(iaa_wq);
+
+ idxd_wq_set_private(wq, NULL);
spin_unlock(&idxd->dev_lock);
- if (free) {
- __free_iaa_wq(iaa_wq);
- kfree(iaa_wq);
- }
+
+ kfree(iaa_wq);
idxd_drv_disable_wq(wq);
--
2.27.0