Re: [PATCH v7 3/3] crypto: hisilicon - register zip engine to uacce

From: Jonathan Cameron
Date: Mon Nov 11 2019 - 06:31:16 EST


On Tue, 5 Nov 2019 16:34:48 +0800
zhangfei <zhangfei.gao@xxxxxxxxxx> wrote:

> Hi, Jonathan
>
> On 2019/11/1 äå1:53, Jonathan Cameron wrote:
> > On Tue, 29 Oct 2019 14:40:16 +0800
> > Zhangfei Gao <zhangfei.gao@xxxxxxxxxx> wrote:
> >
> >> Register qm to uacce framework for user crypto driver
> >>
> >> Signed-off-by: Zhangfei Gao <zhangfei.gao@xxxxxxxxxx>
> >> Signed-off-by: Zhou Wang <wangzhou1@xxxxxxxxxxxxx>
> > Hi.
> >
> > This shows there is probably a race during setup that you should close.
> > Userspace interface is exposed before the driver is ready to handle it.
> >
> > Few other bits inline.
> >
> > Thanks,
> >
> > Jonathan
> >
> >> ---
> >> drivers/crypto/hisilicon/qm.c | 253 ++++++++++++++++++++++++++++++--
> >> drivers/crypto/hisilicon/qm.h | 13 +-
> >> drivers/crypto/hisilicon/zip/zip_main.c | 39 ++---
> >> include/uapi/misc/uacce/qm.h | 23 +++
> >> 4 files changed, 292 insertions(+), 36 deletions(-)
> >> create mode 100644 include/uapi/misc/uacce/qm.h
> >>
> >> diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
> >> index a8ed6990..4b9cced 100644
> >> --- a/drivers/crypto/hisilicon/qm.c
> >> +++ b/drivers/crypto/hisilicon/qm.c
> >> @@ -9,6 +9,9 @@
> >> #include <linux/log2.h>
> >> #include <linux/seq_file.h>
> >> #include <linux/slab.h>
> >> +#include <linux/uacce.h>
> >> +#include <linux/uaccess.h>
> >> +#include <uapi/misc/uacce/qm.h>
> >> #include "qm.h"
> >>
> >> /* eq/aeq irq enable */
> >> @@ -465,17 +468,22 @@ static void qm_cq_head_update(struct hisi_qp *qp)
> >>
> >> static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm)
> >> {
> >> - struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head;
> >> -
> >> - if (qp->req_cb) {
> >> - while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
> >> - dma_rmb();
> >> - qp->req_cb(qp, qp->sqe + qm->sqe_size * cqe->sq_head);
> >> - qm_cq_head_update(qp);
> >> - cqe = qp->cqe + qp->qp_status.cq_head;
> >> - qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ,
> >> - qp->qp_status.cq_head, 0);
> >> - atomic_dec(&qp->qp_status.used);
> >> + struct qm_cqe *cqe;
> >> +
> >> + if (qp->event_cb) {
> >> + qp->event_cb(qp);
> >> + } else {
> >> + cqe = qp->cqe + qp->qp_status.cq_head;
> >> +
> >> + if (qp->req_cb) {
> >> + while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
> >> + dma_rmb();
> >> + qp->req_cb(qp, qp->sqe + qm->sqe_size *
> >> + cqe->sq_head);
> >> + qm_cq_head_update(qp);
> >> + cqe = qp->cqe + qp->qp_status.cq_head;
> >> + atomic_dec(&qp->qp_status.used);
> >> + }
> >> }
> >>
> >> /* set c_flag */
> >> @@ -1397,6 +1405,220 @@ static void hisi_qm_cache_wb(struct hisi_qm *qm)
> >> }
> >> }
> >>
> >> +static void qm_qp_event_notifier(struct hisi_qp *qp)
> >> +{
> >> + wake_up_interruptible(&qp->uacce_q->wait);
> >> +}
> >> +
> >> +static int hisi_qm_get_available_instances(struct uacce_device *uacce)
> >> +{
> >> + int i, ret;
> >> + struct hisi_qm *qm = uacce->priv;
> >> +
> >> + read_lock(&qm->qps_lock);
> >> + for (i = 0, ret = 0; i < qm->qp_num; i++)
> >> + if (!qm->qp_array[i])
> >> + ret++;
> >> + read_unlock(&qm->qps_lock);
> >> +
> >> + return ret;
> >> +}
> >> +
> >> +static int hisi_qm_uacce_get_queue(struct uacce_device *uacce,
> >> + unsigned long arg,
> >> + struct uacce_queue *q)
> >> +{
> >> + struct hisi_qm *qm = uacce->priv;
> >> + struct hisi_qp *qp;
> >> + u8 alg_type = 0;
> >> +
> >> + qp = hisi_qm_create_qp(qm, alg_type);
> >> + if (IS_ERR(qp))
> >> + return PTR_ERR(qp);
> >> +
> >> + q->priv = qp;
> >> + q->uacce = uacce;
> >> + qp->uacce_q = q;
> >> + qp->event_cb = qm_qp_event_notifier;
> >> + qp->pasid = arg;
> >> +
> >> + return 0;
> >> +}
> >> +
> >> +static void hisi_qm_uacce_put_queue(struct uacce_queue *q)
> >> +{
> >> + struct hisi_qp *qp = q->priv;
> >> +
> >> + /*
> >> + * As put_queue is only called in uacce_mode=1, and only one queue can
> > We got rid of the modes I think so comment needs an update.
> Yes
> >
> >> + * be used in this mode. we flush all sqc cache back in put queue.
> >> + */
> >> + hisi_qm_cache_wb(qp->qm);
> >> +
> >> + /* need to stop hardware, but can not support in v1 */
> >> + hisi_qm_release_qp(qp);
> > Should we just drop support for the v1 hardware if we can't do this?
> >
> >> +}
> >> +
> >> +/* map sq/cq/doorbell to user space */
> >> +static int hisi_qm_uacce_mmap(struct uacce_queue *q,
> >> + struct vm_area_struct *vma,
> >> + struct uacce_qfile_region *qfr)
> >> +{
> >> + struct hisi_qp *qp = q->priv;
> >> + struct hisi_qm *qm = qp->qm;
> >> + size_t sz = vma->vm_end - vma->vm_start;
> >> + struct pci_dev *pdev = qm->pdev;
> >> + struct device *dev = &pdev->dev;
> >> + unsigned long vm_pgoff;
> >> + int ret;
> >> +
> >> + switch (qfr->type) {
> >> + case UACCE_QFRT_MMIO:
> >> + if (qm->ver == QM_HW_V2) {
> >> + if (sz > PAGE_SIZE * (QM_DOORBELL_PAGE_NR +
> >> + QM_DOORBELL_SQ_CQ_BASE_V2 / PAGE_SIZE))
> >> + return -EINVAL;
> >> + } else {
> >> + if (sz > PAGE_SIZE * QM_DOORBELL_PAGE_NR)
> >> + return -EINVAL;
> >> + }
> >> +
> >> + vma->vm_flags |= VM_IO;
> >> +
> >> + return remap_pfn_range(vma, vma->vm_start,
> >> + qm->phys_base >> PAGE_SHIFT,
> >> + sz, pgprot_noncached(vma->vm_page_prot));
> >> + case UACCE_QFRT_DUS:
> >> + if (sz != qp->qdma.size)
> >> + return -EINVAL;
> >> +
> > Comment style in here is inconsistent. Match the existing code.
> OK
> >> + /* dma_mmap_coherent() requires vm_pgoff as 0
> >> + * restore vm_pfoff to initial value for mmap()
> >> + */
> >> + vm_pgoff = vma->vm_pgoff;
> >> + vma->vm_pgoff = 0;
> >> + ret = dma_mmap_coherent(dev, vma, qp->qdma.va,
> >> + qp->qdma.dma, sz);
> >> + vma->vm_pgoff = vm_pgoff;
> >> + return ret;
> >> +
> >> + default:
> >> + return -EINVAL;
> >> + }
> >> +}
> >> +
> >> +static int hisi_qm_uacce_start_queue(struct uacce_queue *q)
> >> +{
> >> + struct hisi_qp *qp = q->priv;
> >> +
> >> + return hisi_qm_start_qp(qp, qp->pasid);
> >> +}
> >> +
> >> +static void hisi_qm_uacce_stop_queue(struct uacce_queue *q)
> >> +{
> >> + struct hisi_qp *qp = q->priv;
> >> +
> >> + hisi_qm_stop_qp(qp);
> > I'm a great fan of minimalism on these
> > hisi_qm_stop_qp(q->priv); doesn't really loose any clarity.
> OK
> >> +}
> >> +
> >> +static int qm_set_sqctype(struct uacce_queue *q, u16 type)
> >> +{
> >> + struct hisi_qm *qm = q->uacce->priv;
> >> + struct hisi_qp *qp = q->priv;
> >> +
> >> + write_lock(&qm->qps_lock);
> >> + qp->alg_type = type;
> >> + write_unlock(&qm->qps_lock);
> >> +
> >> + return 0;
> >> +}
> >> +
> >> +static long hisi_qm_uacce_ioctl(struct uacce_queue *q, unsigned int cmd,
> >> + unsigned long arg)
> >> +{
> >> + struct hisi_qp *qp = q->priv;
> >> + struct hisi_qp_ctx qp_ctx;
> >> +
> >> + if (cmd == UACCE_CMD_QM_SET_QP_CTX) {
> >> + if (copy_from_user(&qp_ctx, (void __user *)arg,
> >> + sizeof(struct hisi_qp_ctx)))
> >> + return -EFAULT;
> >> +
> >> + if (qp_ctx.qc_type != 0 && qp_ctx.qc_type != 1)
> >> + return -EINVAL;
> >> +
> >> + qm_set_sqctype(q, qp_ctx.qc_type);
> >> + qp_ctx.id = qp->qp_id;
> >> +
> >> + if (copy_to_user((void __user *)arg, &qp_ctx,
> >> + sizeof(struct hisi_qp_ctx)))
> >> + return -EFAULT;
> >> + } else {
> >> + return -EINVAL;
> >> + }
> >> +
> >> + return 0;
> >> +}
> >> +
> >> +static struct uacce_ops uacce_qm_ops = {
> >> + .get_available_instances = hisi_qm_get_available_instances,
> >> + .get_queue = hisi_qm_uacce_get_queue,
> >> + .put_queue = hisi_qm_uacce_put_queue,
> >> + .start_queue = hisi_qm_uacce_start_queue,
> >> + .stop_queue = hisi_qm_uacce_stop_queue,
> >> + .mmap = hisi_qm_uacce_mmap,
> >> + .ioctl = hisi_qm_uacce_ioctl,
> >> +};
> >> +
> >> +static int qm_register_uacce(struct hisi_qm *qm)
> >> +{
> >> + struct pci_dev *pdev = qm->pdev;
> >> + struct uacce_device *uacce;
> >> + unsigned long mmio_page_nr;
> >> + unsigned long dus_page_nr;
> >> + struct uacce_interface interface = {
> >> + .flags = UACCE_DEV_SVA,
> >> + .ops = &uacce_qm_ops,
> >> + };
> >> +
> >> + strncpy(interface.name, pdev->driver->name, sizeof(interface.name));
> >> +
> >> + uacce = uacce_register(&pdev->dev, &interface);
> >> + if (IS_ERR(uacce))
> >> + return PTR_ERR(uacce);
> > Is there a potential race here as we have exposed the character device before
> > the driver is ready for it to be used? Probably need to split the code that
> > allocates a uacce interface from the bit that actually exposes it to userspace.
> I don't think it is a race condition.
> Since no requirement of get sysfs ready then register character device.
> Also sysfs does not always refect constant members, like available_instance.
> Currently we set the sysfs members after uacce_register, which alloc
> uacce device.
> So no problem if they are ready before character device open.

The ordering of sysfs and chrdev inside the uacce_register wasn't what
I was referring to. It is the remaining setup done on the uacce below
after both chrdev and sysfs interface are ready.

>
> If we split the code, allocate an uacce interface first then expose to
> usersapce, an additional api maybe required.

I'd pay the price of the extra api. It's still far from clear to me
why we don't have a race. For example, if we stop right here for
a long time, the entire UACCE interface is exposed but we haven't
set the various priv pointers etc below. Seems unlikely to be safe.

>
> >
> >> +
> >> + if (uacce->flags & UACCE_DEV_SVA) {
> >> + qm->use_sva = true;
> >> + } else {
> >> + /* only consider sva case */
> >> + uacce_unregister(uacce);
> >> + return -EINVAL;
> >> + }
> >> +
> >> + uacce->is_vf = pdev->is_virtfn;
> >> + uacce->priv = qm;
> >> + uacce->algs = qm->algs;
> >> +
> >> + if (qm->ver == QM_HW_V1) {
> >> + mmio_page_nr = QM_DOORBELL_PAGE_NR;
> >> + uacce->api_ver = HISI_QM_API_VER_BASE;
> >> + } else {
> >> + mmio_page_nr = QM_DOORBELL_PAGE_NR +
> >> + QM_DOORBELL_SQ_CQ_BASE_V2 / PAGE_SIZE;
> >> + uacce->api_ver = HISI_QM_API_VER2_BASE;
> >> + }
> >> +
> >> + dus_page_nr = (PAGE_SIZE - 1 + qm->sqe_size * QM_Q_DEPTH +
> >> + sizeof(struct qm_cqe) * QM_Q_DEPTH) >> PAGE_SHIFT;
> >> +
> >> + uacce->qf_pg_size[UACCE_QFRT_MMIO] = mmio_page_nr;
> >> + uacce->qf_pg_size[UACCE_QFRT_DUS] = dus_page_nr;
> >> +
> >> + qm->uacce = uacce;
> >> +
> >> + return 0;
> >> +}
> >> +
> >> /**
> >> * hisi_qm_init() - Initialize configures about qm.
> >> * @qm: The qm needing init.
> >> @@ -1421,6 +1643,10 @@ int hisi_qm_init(struct hisi_qm *qm)
> >> return -EINVAL;
> >> }
> >>
> >> + ret = qm_register_uacce(qm);
> >> + if (ret < 0)
> >> + dev_warn(&pdev->dev, "fail to register uacce (%d)\n", ret);
> >> +
> > looks like there are error paths in qm_init in which we should call
> > the uacce_unregister?
> OK
> >
> >> ret = pci_enable_device_mem(pdev);
> >> if (ret < 0) {
> >> dev_err(&pdev->dev, "Failed to enable device mem!\n");
> >> @@ -1433,6 +1659,8 @@ int hisi_qm_init(struct hisi_qm *qm)
> >> goto err_disable_pcidev;
> >> }
> >>
> >> + qm->phys_base = pci_resource_start(pdev, PCI_BAR_2);
> >> + qm->size = pci_resource_len(qm->pdev, PCI_BAR_2);
> >> qm->io_base = ioremap(pci_resource_start(pdev, PCI_BAR_2),
> >> pci_resource_len(qm->pdev, PCI_BAR_2));
> > Use qm->phys_base/size in the ioremap here to avoid repeating the code.
> OK
> >
> >> if (!qm->io_base) {
> >> @@ -1504,6 +1732,9 @@ void hisi_qm_uninit(struct hisi_qm *qm)
> >> iounmap(qm->io_base);
> >> pci_release_mem_regions(pdev);
> >> pci_disable_device(pdev);
> >> +
> >> + if (qm->uacce)
> >> + uacce_unregister(qm->uacce);
> > Can we make uacce_unregister check the input?
> > Might make for cleaner users.
> OK,
> >
> >> }
> >> EXPORT_SYMBOL_GPL(hisi_qm_uninit);
> >>
> >> diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h
> >> index 103e2fd..84a3be9 100644
> >> --- a/drivers/crypto/hisilicon/qm.h
> >> +++ b/drivers/crypto/hisilicon/qm.h
> >> @@ -77,6 +77,10 @@
> >>
> >> #define HISI_ACC_SGL_SGE_NR_MAX 255
> >>
> >> +/* page number for queue file region */
> >> +#define QM_DOORBELL_PAGE_NR 1
> >> +
> > 1 blank line only is almost always enough.
> >
> >> +
> >> enum qp_state {
> >> QP_STOP,
> >> };
> >> @@ -161,7 +165,12 @@ struct hisi_qm {
> >> u32 error_mask;
> >> u32 msi_mask;
> >>
> >> + const char *algs;
> >> bool use_dma_api;
> >> + bool use_sva;
> >> + resource_size_t phys_base;
> >> + resource_size_t size;
> >> + struct uacce_device *uacce;
> >> };
> >>
> >> struct hisi_qp_status {
> >> @@ -191,10 +200,12 @@ struct hisi_qp {
> >> struct hisi_qp_ops *hw_ops;
> >> void *qp_ctx;
> >> void (*req_cb)(struct hisi_qp *qp, void *data);
> >> + void (*event_cb)(struct hisi_qp *qp);
> >> struct work_struct work;
> >> struct workqueue_struct *wq;
> >> -
> > unrelated change.
> >
> >> struct hisi_qm *qm;
> >> + u16 pasid;
> >> + struct uacce_queue *uacce_q;
> >> };
> >>
> >> int hisi_qm_init(struct hisi_qm *qm);
> >> diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
> >> index 1b2ee96..48860d2 100644
> >> --- a/drivers/crypto/hisilicon/zip/zip_main.c
> >> +++ b/drivers/crypto/hisilicon/zip/zip_main.c
> >> @@ -316,8 +316,14 @@ static void hisi_zip_set_user_domain_and_cache(struct hisi_zip *hisi_zip)
> >> writel(AXUSER_BASE, base + HZIP_BD_RUSER_32_63);
> >> writel(AXUSER_BASE, base + HZIP_SGL_RUSER_32_63);
> >> writel(AXUSER_BASE, base + HZIP_BD_WUSER_32_63);
> >> - writel(AXUSER_BASE, base + HZIP_DATA_RUSER_32_63);
> >> - writel(AXUSER_BASE, base + HZIP_DATA_WUSER_32_63);
> >> +
> >> + if (hisi_zip->qm.use_sva) {
> >> + writel(AXUSER_BASE | AXUSER_SSV, base + HZIP_DATA_RUSER_32_63);
> >> + writel(AXUSER_BASE | AXUSER_SSV, base + HZIP_DATA_WUSER_32_63);
> >> + } else {
> >> + writel(AXUSER_BASE, base + HZIP_DATA_RUSER_32_63);
> >> + writel(AXUSER_BASE, base + HZIP_DATA_WUSER_32_63);
> >> + }
> >>
> >> /* let's open all compression/decompression cores */
> >> writel(DECOMP_CHECK_ENABLE | ALL_COMP_DECOMP_EN,
> >> @@ -671,24 +677,12 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> >> qm = &hisi_zip->qm;
> >> qm->pdev = pdev;
> >> qm->ver = rev_id;
> >> -
> > Try to avoid noise from white space changes. No huge help to delete the blank line here.
> >
> >> + qm->use_dma_api = true;
> >> + qm->algs = "zlib\ngzip\n";
> >> qm->sqe_size = HZIP_SQE_SIZE;
> >> qm->dev_name = hisi_zip_name;
> >> qm->fun_type = (pdev->device == PCI_DEVICE_ID_ZIP_PF) ? QM_HW_PF :
> >> QM_HW_VF;
> > Unrelated changes I think. Can we clean out the old left overs
> > of uacce from the driver in a precursor patch? Also if it's no longer
> > used can we drop the module param?
> This patch is an example, so just in one patch to make it work.
> >
> >> - switch (uacce_mode) {
> >> - case 0:
> >> - qm->use_dma_api = true;
> >> - break;
> >> - case 1:
> >> - qm->use_dma_api = false;
> >> - break;
> >> - case 2:
> >> - qm->use_dma_api = true;
> >> - break;
> >> - default:
> >> - return -EINVAL;
> >> - }
> >>
> >> ret = hisi_qm_init(qm);
> >> if (ret) {
> >> @@ -976,12 +970,10 @@ static int __init hisi_zip_init(void)
> >> goto err_pci;
> >> }
> >>
> >> - if (uacce_mode == 0 || uacce_mode == 2) {
> >> - ret = hisi_zip_register_to_crypto();
> >> - if (ret < 0) {
> >> - pr_err("Failed to register driver to crypto.\n");
> >> - goto err_crypto;
> >> - }
> >> + ret = hisi_zip_register_to_crypto();
> >> + if (ret < 0) {
> >> + pr_err("Failed to register driver to crypto.\n");
> >> + goto err_crypto;
> >> }
> >>
> >> return 0;
> >> @@ -996,8 +988,7 @@ static int __init hisi_zip_init(void)
> >>
> >> static void __exit hisi_zip_exit(void)
> >> {
> >> - if (uacce_mode == 0 || uacce_mode == 2)
> >> - hisi_zip_unregister_from_crypto();
> >> + hisi_zip_unregister_from_crypto();
> >
> >
> >> pci_unregister_driver(&hisi_zip_pci_driver);
> >> hisi_zip_unregister_debugfs();
> >> }
> >> diff --git a/include/uapi/misc/uacce/qm.h b/include/uapi/misc/uacce/qm.h
> >> new file mode 100644
> >> index 0000000..d79a8f2
> >> --- /dev/null
> >> +++ b/include/uapi/misc/uacce/qm.h
> > Given generic directory (assuming uacce becomes heavily used) probably
> > want to prefix that if it is unique to hisilicon.
> >
> > hisi_qm.h?
> OK, good idea.
> >
> >> @@ -0,0 +1,23 @@
> >> +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
> >> +#ifndef HISI_QM_USR_IF_H
> >> +#define HISI_QM_USR_IF_H
> >> +
> >> +#include <linux/types.h>
> >> +
> >> +/**
> >> + * struct hisi_qp_ctx - User data for hisi qp.
> >> + * @id: Specifies which Turbo decode algorithm to use
> > What's a Turbo algorithm? I don't know and I have the manuals ;)
> Sorry, will change that
>
> Thanks
>