On Tue, 29 Oct 2019 14:40:16 +0800 Zhangfei Gao <zhangfei.gao@xxxxxxxxxx> wrote: > Register qm to uacce framework for user crypto driver > > Signed-off-by: Zhangfei Gao <zhangfei.gao@xxxxxxxxxx> > Signed-off-by: Zhou Wang <wangzhou1@xxxxxxxxxxxxx> Hi. This shows there is probably a race during setup that you should close. Userspace interface is exposed before the driver is ready to handle it. Few other bits inline. Thanks, Jonathan > --- > drivers/crypto/hisilicon/qm.c | 253 ++++++++++++++++++++++++++++++-- > drivers/crypto/hisilicon/qm.h | 13 +- > drivers/crypto/hisilicon/zip/zip_main.c | 39 ++--- > include/uapi/misc/uacce/qm.h | 23 +++ > 4 files changed, 292 insertions(+), 36 deletions(-) > create mode 100644 include/uapi/misc/uacce/qm.h > > diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c > index a8ed6990..4b9cced 100644 > --- a/drivers/crypto/hisilicon/qm.c > +++ b/drivers/crypto/hisilicon/qm.c > @@ -9,6 +9,9 @@ > #include <linux/log2.h> > #include <linux/seq_file.h> > #include <linux/slab.h> > +#include <linux/uacce.h> > +#include <linux/uaccess.h> > +#include <uapi/misc/uacce/qm.h> > #include "qm.h" > > /* eq/aeq irq enable */ > @@ -465,17 +468,22 @@ static void qm_cq_head_update(struct hisi_qp *qp) > > static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm) > { > - struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head; > - > - if (qp->req_cb) { > - while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) { > - dma_rmb(); > - qp->req_cb(qp, qp->sqe + qm->sqe_size * cqe->sq_head); > - qm_cq_head_update(qp); > - cqe = qp->cqe + qp->qp_status.cq_head; > - qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ, > - qp->qp_status.cq_head, 0); > - atomic_dec(&qp->qp_status.used); > + struct qm_cqe *cqe; > + > + if (qp->event_cb) { > + qp->event_cb(qp); > + } else { > + cqe = qp->cqe + qp->qp_status.cq_head; > + > + if (qp->req_cb) { > + while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) { > + dma_rmb(); > + qp->req_cb(qp, qp->sqe + qm->sqe_size * > + cqe->sq_head); > + qm_cq_head_update(qp); > + cqe = qp->cqe + qp->qp_status.cq_head; > + atomic_dec(&qp->qp_status.used); > + } > } > > /* set c_flag */ > @@ -1397,6 +1405,220 @@ static void hisi_qm_cache_wb(struct hisi_qm *qm) > } > } > > +static void qm_qp_event_notifier(struct hisi_qp *qp) > +{ > + wake_up_interruptible(&qp->uacce_q->wait); > +} > + > +static int hisi_qm_get_available_instances(struct uacce_device *uacce) > +{ > + int i, ret; > + struct hisi_qm *qm = uacce->priv; > + > + read_lock(&qm->qps_lock); > + for (i = 0, ret = 0; i < qm->qp_num; i++) > + if (!qm->qp_array[i]) > + ret++; > + read_unlock(&qm->qps_lock); > + > + return ret; > +} > + > +static int hisi_qm_uacce_get_queue(struct uacce_device *uacce, > + unsigned long arg, > + struct uacce_queue *q) > +{ > + struct hisi_qm *qm = uacce->priv; > + struct hisi_qp *qp; > + u8 alg_type = 0; > + > + qp = hisi_qm_create_qp(qm, alg_type); > + if (IS_ERR(qp)) > + return PTR_ERR(qp); > + > + q->priv = qp; > + q->uacce = uacce; > + qp->uacce_q = q; > + qp->event_cb = qm_qp_event_notifier; > + qp->pasid = arg; > + > + return 0; > +} > + > +static void hisi_qm_uacce_put_queue(struct uacce_queue *q) > +{ > + struct hisi_qp *qp = q->priv; > + > + /* > + * As put_queue is only called in uacce_mode=1, and only one queue can We got rid of the modes I think so comment needs an update. > + * be used in this mode. we flush all sqc cache back in put queue. > + */ > + hisi_qm_cache_wb(qp->qm); > + > + /* need to stop hardware, but can not support in v1 */ > + hisi_qm_release_qp(qp); Should we just drop support for the v1 hardware if we can't do this? > +} > + > +/* map sq/cq/doorbell to user space */ > +static int hisi_qm_uacce_mmap(struct uacce_queue *q, > + struct vm_area_struct *vma, > + struct uacce_qfile_region *qfr) > +{ > + struct hisi_qp *qp = q->priv; > + struct hisi_qm *qm = qp->qm; > + size_t sz = vma->vm_end - vma->vm_start; > + struct pci_dev *pdev = qm->pdev; > + struct device *dev = &pdev->dev; > + unsigned long vm_pgoff; > + int ret; > + > + switch (qfr->type) { > + case UACCE_QFRT_MMIO: > + if (qm->ver == QM_HW_V2) { > + if (sz > PAGE_SIZE * (QM_DOORBELL_PAGE_NR + > + QM_DOORBELL_SQ_CQ_BASE_V2 / PAGE_SIZE)) > + return -EINVAL; > + } else { > + if (sz > PAGE_SIZE * QM_DOORBELL_PAGE_NR) > + return -EINVAL; > + } > + > + vma->vm_flags |= VM_IO; > + > + return remap_pfn_range(vma, vma->vm_start, > + qm->phys_base >> PAGE_SHIFT, > + sz, pgprot_noncached(vma->vm_page_prot)); > + case UACCE_QFRT_DUS: > + if (sz != qp->qdma.size) > + return -EINVAL; > + Comment style in here is inconsistent. Match the existing code. > + /* dma_mmap_coherent() requires vm_pgoff as 0 > + * restore vm_pfoff to initial value for mmap() > + */ > + vm_pgoff = vma->vm_pgoff; > + vma->vm_pgoff = 0; > + ret = dma_mmap_coherent(dev, vma, qp->qdma.va, > + qp->qdma.dma, sz); > + vma->vm_pgoff = vm_pgoff; > + return ret; > + > + default: > + return -EINVAL; > + } > +} > + > +static int hisi_qm_uacce_start_queue(struct uacce_queue *q) > +{ > + struct hisi_qp *qp = q->priv; > + > + return hisi_qm_start_qp(qp, qp->pasid); > +} > + > +static void hisi_qm_uacce_stop_queue(struct uacce_queue *q) > +{ > + struct hisi_qp *qp = q->priv; > + > + hisi_qm_stop_qp(qp); I'm a great fan of minimalism on these hisi_qm_stop_qp(q->priv); doesn't really loose any clarity. > +} > + > +static int qm_set_sqctype(struct uacce_queue *q, u16 type) > +{ > + struct hisi_qm *qm = q->uacce->priv; > + struct hisi_qp *qp = q->priv; > + > + write_lock(&qm->qps_lock); > + qp->alg_type = type; > + write_unlock(&qm->qps_lock); > + > + return 0; > +} > + > +static long hisi_qm_uacce_ioctl(struct uacce_queue *q, unsigned int cmd, > + unsigned long arg) > +{ > + struct hisi_qp *qp = q->priv; > + struct hisi_qp_ctx qp_ctx; > + > + if (cmd == UACCE_CMD_QM_SET_QP_CTX) { > + if (copy_from_user(&qp_ctx, (void __user *)arg, > + sizeof(struct hisi_qp_ctx))) > + return -EFAULT; > + > + if (qp_ctx.qc_type != 0 && qp_ctx.qc_type != 1) > + return -EINVAL; > + > + qm_set_sqctype(q, qp_ctx.qc_type); > + qp_ctx.id = qp->qp_id; > + > + if (copy_to_user((void __user *)arg, &qp_ctx, > + sizeof(struct hisi_qp_ctx))) > + return -EFAULT; > + } else { > + return -EINVAL; > + } > + > + return 0; > +} > + > +static struct uacce_ops uacce_qm_ops = { > + .get_available_instances = hisi_qm_get_available_instances, > + .get_queue = hisi_qm_uacce_get_queue, > + .put_queue = hisi_qm_uacce_put_queue, > + .start_queue = hisi_qm_uacce_start_queue, > + .stop_queue = hisi_qm_uacce_stop_queue, > + .mmap = hisi_qm_uacce_mmap, > + .ioctl = hisi_qm_uacce_ioctl, > +}; > + > +static int qm_register_uacce(struct hisi_qm *qm) > +{ > + struct pci_dev *pdev = qm->pdev; > + struct uacce_device *uacce; > + unsigned long mmio_page_nr; > + unsigned long dus_page_nr; > + struct uacce_interface interface = { > + .flags = UACCE_DEV_SVA, > + .ops = &uacce_qm_ops, > + }; > + > + strncpy(interface.name, pdev->driver->name, sizeof(interface.name)); > + > + uacce = uacce_register(&pdev->dev, &interface); > + if (IS_ERR(uacce)) > + return PTR_ERR(uacce); Is there a potential race here as we have exposed the character device before the driver is ready for it to be used? Probably need to split the code that allocates a uacce interface from the bit that actually exposes it to userspace. > + > + if (uacce->flags & UACCE_DEV_SVA) { > + qm->use_sva = true; > + } else { > + /* only consider sva case */ > + uacce_unregister(uacce); > + return -EINVAL; > + } > + > + uacce->is_vf = pdev->is_virtfn; > + uacce->priv = qm; > + uacce->algs = qm->algs; > + > + if (qm->ver == QM_HW_V1) { > + mmio_page_nr = QM_DOORBELL_PAGE_NR; > + uacce->api_ver = HISI_QM_API_VER_BASE; > + } else { > + mmio_page_nr = QM_DOORBELL_PAGE_NR + > + QM_DOORBELL_SQ_CQ_BASE_V2 / PAGE_SIZE; > + uacce->api_ver = HISI_QM_API_VER2_BASE; > + } > + > + dus_page_nr = (PAGE_SIZE - 1 + qm->sqe_size * QM_Q_DEPTH + > + sizeof(struct qm_cqe) * QM_Q_DEPTH) >> PAGE_SHIFT; > + > + uacce->qf_pg_size[UACCE_QFRT_MMIO] = mmio_page_nr; > + uacce->qf_pg_size[UACCE_QFRT_DUS] = dus_page_nr; > + > + qm->uacce = uacce; > + > + return 0; > +} > + > /** > * hisi_qm_init() - Initialize configures about qm. > * @qm: The qm needing init. > @@ -1421,6 +1643,10 @@ int hisi_qm_init(struct hisi_qm *qm) > return -EINVAL; > } > > + ret = qm_register_uacce(qm); > + if (ret < 0) > + dev_warn(&pdev->dev, "fail to register uacce (%d)\n", ret); > + looks like there are error paths in qm_init in which we should call the uacce_unregister? > ret = pci_enable_device_mem(pdev); > if (ret < 0) { > dev_err(&pdev->dev, "Failed to enable device mem!\n"); > @@ -1433,6 +1659,8 @@ int hisi_qm_init(struct hisi_qm *qm) > goto err_disable_pcidev; > } > > + qm->phys_base = pci_resource_start(pdev, PCI_BAR_2); > + qm->size = pci_resource_len(qm->pdev, PCI_BAR_2); > qm->io_base = ioremap(pci_resource_start(pdev, PCI_BAR_2), > pci_resource_len(qm->pdev, PCI_BAR_2)); Use qm->phys_base/size in the ioremap here to avoid repeating the code. > if (!qm->io_base) { > @@ -1504,6 +1732,9 @@ void hisi_qm_uninit(struct hisi_qm *qm) > iounmap(qm->io_base); > pci_release_mem_regions(pdev); > pci_disable_device(pdev); > + > + if (qm->uacce) > + uacce_unregister(qm->uacce); Can we make uacce_unregister check the input? Might make for cleaner users. > } > EXPORT_SYMBOL_GPL(hisi_qm_uninit); > > diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h > index 103e2fd..84a3be9 100644 > --- a/drivers/crypto/hisilicon/qm.h > +++ b/drivers/crypto/hisilicon/qm.h > @@ -77,6 +77,10 @@ > > #define HISI_ACC_SGL_SGE_NR_MAX 255 > > +/* page number for queue file region */ > +#define QM_DOORBELL_PAGE_NR 1 > + 1 blank line only is almost always enough. > + > enum qp_state { > QP_STOP, > }; > @@ -161,7 +165,12 @@ struct hisi_qm { > u32 error_mask; > u32 msi_mask; > > + const char *algs; > bool use_dma_api; > + bool use_sva; > + resource_size_t phys_base; > + resource_size_t size; > + struct uacce_device *uacce; > }; > > struct hisi_qp_status { > @@ -191,10 +200,12 @@ struct hisi_qp { > struct hisi_qp_ops *hw_ops; > void *qp_ctx; > void (*req_cb)(struct hisi_qp *qp, void *data); > + void (*event_cb)(struct hisi_qp *qp); > struct work_struct work; > struct workqueue_struct *wq; > - unrelated change. > struct hisi_qm *qm; > + u16 pasid; > + struct uacce_queue *uacce_q; > }; > > int hisi_qm_init(struct hisi_qm *qm); > diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c > index 1b2ee96..48860d2 100644 > --- a/drivers/crypto/hisilicon/zip/zip_main.c > +++ b/drivers/crypto/hisilicon/zip/zip_main.c > @@ -316,8 +316,14 @@ static void hisi_zip_set_user_domain_and_cache(struct hisi_zip *hisi_zip) > writel(AXUSER_BASE, base + HZIP_BD_RUSER_32_63); > writel(AXUSER_BASE, base + HZIP_SGL_RUSER_32_63); > writel(AXUSER_BASE, base + HZIP_BD_WUSER_32_63); > - writel(AXUSER_BASE, base + HZIP_DATA_RUSER_32_63); > - writel(AXUSER_BASE, base + HZIP_DATA_WUSER_32_63); > + > + if (hisi_zip->qm.use_sva) { > + writel(AXUSER_BASE | AXUSER_SSV, base + HZIP_DATA_RUSER_32_63); > + writel(AXUSER_BASE | AXUSER_SSV, base + HZIP_DATA_WUSER_32_63); > + } else { > + writel(AXUSER_BASE, base + HZIP_DATA_RUSER_32_63); > + writel(AXUSER_BASE, base + HZIP_DATA_WUSER_32_63); > + } > > /* let's open all compression/decompression cores */ > writel(DECOMP_CHECK_ENABLE | ALL_COMP_DECOMP_EN, > @@ -671,24 +677,12 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id) > qm = &hisi_zip->qm; > qm->pdev = pdev; > qm->ver = rev_id; > - Try to avoid noise from white space changes. No huge help to delete the blank line here. > + qm->use_dma_api = true; > + qm->algs = "zlib\ngzip\n"; > qm->sqe_size = HZIP_SQE_SIZE; > qm->dev_name = hisi_zip_name; > qm->fun_type = (pdev->device == PCI_DEVICE_ID_ZIP_PF) ? QM_HW_PF : > QM_HW_VF; Unrelated changes I think. Can we clean out the old left overs of uacce from the driver in a precursor patch? Also if it's no longer used can we drop the module param? > - switch (uacce_mode) { > - case 0: > - qm->use_dma_api = true; > - break; > - case 1: > - qm->use_dma_api = false; > - break; > - case 2: > - qm->use_dma_api = true; > - break; > - default: > - return -EINVAL; > - } > > ret = hisi_qm_init(qm); > if (ret) { > @@ -976,12 +970,10 @@ static int __init hisi_zip_init(void) > goto err_pci; > } > > - if (uacce_mode == 0 || uacce_mode == 2) { > - ret = hisi_zip_register_to_crypto(); > - if (ret < 0) { > - pr_err("Failed to register driver to crypto.\n"); > - goto err_crypto; > - } > + ret = hisi_zip_register_to_crypto(); > + if (ret < 0) { > + pr_err("Failed to register driver to crypto.\n"); > + goto err_crypto; > } > > return 0; > @@ -996,8 +988,7 @@ static int __init hisi_zip_init(void) > > static void __exit hisi_zip_exit(void) > { > - if (uacce_mode == 0 || uacce_mode == 2) > - hisi_zip_unregister_from_crypto(); > + hisi_zip_unregister_from_crypto(); > pci_unregister_driver(&hisi_zip_pci_driver); > hisi_zip_unregister_debugfs(); > } > diff --git a/include/uapi/misc/uacce/qm.h b/include/uapi/misc/uacce/qm.h > new file mode 100644 > index 0000000..d79a8f2 > --- /dev/null > +++ b/include/uapi/misc/uacce/qm.h Given generic directory (assuming uacce becomes heavily used) probably want to prefix that if it is unique to hisilicon. hisi_qm.h? > @@ -0,0 +1,23 @@ > +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ > +#ifndef HISI_QM_USR_IF_H > +#define HISI_QM_USR_IF_H > + > +#include <linux/types.h> > + > +/** > + * struct hisi_qp_ctx - User data for hisi qp. > + * @id: Specifies which Turbo decode algorithm to use What's a Turbo algorithm? I don't know and I have the manuals ;) > + * @qc_type: Accelerator algorithm type > + */ > +struct hisi_qp_ctx { > + __u16 id; > + __u16 qc_type; > +}; > + > +#define HISI_QM_API_VER_BASE "hisi_qm_v1" > +#define HISI_QM_API_VER2_BASE "hisi_qm_v2" > + > +/* UACCE_CMD_QM_SET_QP_CTX: Set qp algorithm type */ > +#define UACCE_CMD_QM_SET_QP_CTX _IOWR('H', 10, struct hisi_qp_ctx) > + > +#endif