Allocate a CMB region to user-space as a DMA-BUF object. Signed-off-by: Haggai Eran <haggaie@xxxxxxxxxxxx> --- drivers/nvme/host/Makefile | 2 +- drivers/nvme/host/core.c | 29 ++++ drivers/nvme/host/dmabuf.c | 308 ++++++++++++++++++++++++++++++++++++++++ drivers/nvme/host/nvme-pci.h | 2 + drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/pci.c | 6 + include/uapi/linux/nvme_ioctl.h | 11 ++ 7 files changed, 358 insertions(+), 1 deletion(-) create mode 100644 drivers/nvme/host/dmabuf.c diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index 9a3ca892b4a7..f8d4f5d33398 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -5,4 +5,4 @@ nvme-core-y := core.o nvme-core-$(CONFIG_BLK_DEV_NVME_SCSI) += scsi.o nvme-core-$(CONFIG_NVM) += lightnvm.o -nvme-y += pci.o +nvme-y += pci.o dmabuf.o diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d5fb55c0a9d9..5860b468ab39 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1209,6 +1209,33 @@ out_unlock: return ret; } +static int nvme_alloc_user_cmb(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + struct nvme_alloc_user_cmb __user *ucmd) +{ + struct nvme_alloc_user_cmb cmd; + int status; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (copy_from_user(&cmd, ucmd, sizeof(cmd))) + return -EFAULT; + if (cmd.flags || cmd.rsvd1 || cmd.opcode) + return -EINVAL; + + if (!ctrl->ops->alloc_user_cmb) + return -ENOTTY; + + status = ctrl->ops->alloc_user_cmb(ctrl, cmd.size); + if (status < 0) + return status; + + cmd.fd = status; + if (copy_to_user(ucmd, &cmd, sizeof(cmd))) + return -EFAULT; + + return 0; +} + static long nvme_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -1228,6 +1255,8 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd, case NVME_IOCTL_RESCAN: nvme_queue_scan(ctrl); return 0; + case NVME_IOCTL_ALLOC_USER_CMB: + return nvme_alloc_user_cmb(ctrl, NULL, argp); default: return -ENOTTY; } diff --git a/drivers/nvme/host/dmabuf.c b/drivers/nvme/host/dmabuf.c new file mode 100644 index 000000000000..ab9484b40775 --- /dev/null +++ b/drivers/nvme/host/dmabuf.c @@ -0,0 +1,308 @@ +/* + * Copyright © 2016 Mellanox Technlogies. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/slab.h> +#include <linux/genalloc.h> +#include <linux/dma-buf.h> + +#include "nvme-pci.h" + +struct nvme_cmb_object { + struct nvme_dev *dev; + struct dma_buf *dma_buf; + void *addr; + dma_addr_t dma_addr; + int attachments; + struct kref refcount; +}; + +static size_t obj_size(struct nvme_cmb_object *obj) +{ + return obj->dma_buf->size; +} + +struct nvme_cmb_attachment { + struct sg_table sgt; + enum dma_data_direction dir; +}; + +static void nvme_cmb_object_get(struct nvme_cmb_object *obj) +{ + kref_get(&obj->refcount); +} + +static void nvme_cmb_object_release(struct kref *kref) +{ + struct nvme_cmb_object *obj = + container_of(kref, struct nvme_cmb_object, refcount); + + WARN_ON(obj->attachments); + WARN_ON(obj->addr || obj->dma_addr); + + if (obj->dma_buf) + dma_buf_put(obj->dma_buf); + kfree(obj); +} + +static void nvme_cmb_object_put(struct nvme_cmb_object *obj) +{ + kref_put(&obj->refcount, nvme_cmb_object_release); +} + +static int nvme_cmb_map_attach(struct dma_buf *dma_buf, + struct device *target_dev, + struct dma_buf_attachment *attach) +{ + struct nvme_cmb_attachment *cmb_attach; + struct nvme_cmb_object *obj = dma_buf->priv; + struct nvme_dev *dev = obj->dev; + int ret; + + cmb_attach = kzalloc(sizeof(*cmb_attach), GFP_KERNEL); + if (!cmb_attach) + return -ENOMEM; + + /* + * TODO check there is no IOMMU enabled and there is peer to peer + * access between target_dev and our device + */ + + cmb_attach->dir = DMA_NONE; + attach->priv = cmb_attach; + + if (!obj->attachments) { + obj->addr = nvme_alloc_cmb(dev, obj_size(obj), &obj->dma_addr); + if (!obj->addr) { + ret = -ENOMEM; + goto free; + } + } + ++obj->attachments; + + return 0; + +free: + kfree(cmb_attach); + return ret; +} + +static void nvme_cmb_map_detach(struct dma_buf *dma_buf, + struct dma_buf_attachment *attach) +{ + struct nvme_cmb_attachment *cmb_attach = attach->priv; + struct nvme_cmb_object *obj = dma_buf->priv; + struct nvme_dev *dev = obj->dev; + + if (!cmb_attach) + return; + + if (!--obj->attachments) { + nvme_free_cmb(dev, obj->addr, obj_size(obj)); + obj->addr = NULL; + obj->dma_addr = 0; + } + + if (cmb_attach->dir != DMA_NONE) { + /* TODO something like dma_unmap_resource */ + sg_free_table(&cmb_attach->sgt); + } + + kfree(cmb_attach); + attach->priv = NULL; +} + +static struct sg_table *nvme_cmb_map_dma_buf(struct dma_buf_attachment *attach, + enum dma_data_direction dir) +{ + struct nvme_cmb_attachment *cmb_attach = attach->priv; + struct nvme_cmb_object *obj = attach->dmabuf->priv; + int ret; + + if (WARN_ON(dir == DMA_NONE || !cmb_attach)) + return ERR_PTR(-EINVAL); + + /* return the cached mapping when possible */ + if (cmb_attach->dir == dir) + return &cmb_attach->sgt; + + /* + * two mappings with different directions for the same attachment are + * not allowed + */ + if (WARN_ON(cmb_attach->dir != DMA_NONE)) + return ERR_PTR(-EBUSY); + + ret = sg_alloc_table(&cmb_attach->sgt, 1, GFP_KERNEL); + if (ret) + return ERR_PTR(ret); + + /* + * TODO + * 1. Use something like dma_map_resource to get DMA mapping for the + * BAR. + * 2. no struct page for this address, just a pfn. Make sure callers + * don't need it. + */ + sg_dma_address(cmb_attach->sgt.sgl) = obj->dma_addr; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg_dma_len(cmb_attach->sgt.sgl) = obj_size(obj); +#endif + + cmb_attach->dir = dir; + + return &cmb_attach->sgt; +} + +static void nvme_cmb_unmap_dma_buf(struct dma_buf_attachment *attach, + struct sg_table *sgt, + enum dma_data_direction dir) +{ + /* nothing to be done here */ +} + +static void nvme_cmb_dmabuf_release(struct dma_buf *dma_buf) +{ + struct nvme_cmb_object *obj = dma_buf->priv; + + if (!obj) + return; + + nvme_cmb_object_put(obj); +} + +static void *nvme_cmb_dmabuf_kmap_atomic(struct dma_buf *dma_buf, + unsigned long page_num) +{ + struct nvme_cmb_object *obj = dma_buf->priv; + + if (!obj || !obj->addr) + return NULL; + + return obj->addr + (page_num << PAGE_SHIFT); +} + +static void nvme_cmb_vm_open(struct vm_area_struct *vma) +{ + struct nvme_cmb_object *obj = vma->vm_private_data; + + nvme_cmb_object_get(obj); +} + +static void nvme_cmb_vm_close(struct vm_area_struct *vma) +{ + struct nvme_cmb_object *obj = vma->vm_private_data; + + nvme_cmb_object_put(obj); +} + +static int nvme_cmb_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct nvme_cmb_object *obj = vma->vm_private_data; + pgoff_t offset; + unsigned long pfn; + int err; + + if (!obj->addr) + return VM_FAULT_SIGBUS; + + offset = ((unsigned long)vmf->virtual_address - vma->vm_start); + pfn = ((unsigned long)obj->addr + offset) >> PAGE_SHIFT; + + err = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); + switch (err) { + case -EAGAIN: + case 0: + case -ERESTARTSYS: + case -EINTR: + case -EBUSY: + return VM_FAULT_NOPAGE; + + case -ENOMEM: + return VM_FAULT_OOM; + } + + return VM_FAULT_SIGBUS; +} + +static const struct vm_operations_struct nvme_cmb_vm_ops = { + .fault = nvme_cmb_fault, + .open = nvme_cmb_vm_open, + .close = nvme_cmb_vm_close, +}; + +static int nvme_cmb_dmabuf_mmap(struct dma_buf *dma_buf, + struct vm_area_struct *vma) +{ + struct nvme_cmb_object *obj = dma_buf->priv; + + /* Check for valid size. */ + if (obj_size(obj) < vma->vm_end - vma->vm_start) + return -EINVAL; + + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_ops = &nvme_cmb_vm_ops; + vma->vm_private_data = obj; + vma->vm_page_prot = + pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); + + nvme_cmb_object_get(obj); + + return 0; +} + +static const struct dma_buf_ops nvme_cmb_dmabuf_ops = { + .attach = nvme_cmb_map_attach, + .detach = nvme_cmb_map_detach, + .map_dma_buf = nvme_cmb_map_dma_buf, + .unmap_dma_buf = nvme_cmb_unmap_dma_buf, + .release = nvme_cmb_dmabuf_release, + .kmap = nvme_cmb_dmabuf_kmap_atomic, + .kmap_atomic = nvme_cmb_dmabuf_kmap_atomic, + .mmap = nvme_cmb_dmabuf_mmap, +}; + +int nvme_pci_alloc_user_cmb(struct nvme_dev *dev, u64 size) +{ + struct nvme_cmb_object *obj; + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + int ret; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + kref_init(&obj->refcount); + obj->dev = dev; + + exp_info.ops = &nvme_cmb_dmabuf_ops; + exp_info.size = size; + exp_info.flags = O_CLOEXEC | O_RDWR; + exp_info.priv = obj; + + obj->dma_buf = dma_buf_export(&exp_info); + if (IS_ERR(obj->dma_buf)) { + ret = PTR_ERR(obj->dma_buf); + goto put_obj; + } + + ret = dma_buf_fd(obj->dma_buf, exp_info.flags); + if (ret < 0) + goto put_obj; + + return ret; + +put_obj: + nvme_cmb_object_put(obj); + return ret; +} + diff --git a/drivers/nvme/host/nvme-pci.h b/drivers/nvme/host/nvme-pci.h index 5b29508dc182..2292d2c24fda 100644 --- a/drivers/nvme/host/nvme-pci.h +++ b/drivers/nvme/host/nvme-pci.h @@ -18,6 +18,8 @@ struct nvme_dev; +int nvme_pci_alloc_user_cmb(struct nvme_dev *dev, u64 size); + void *nvme_alloc_cmb(struct nvme_dev *dev, size_t size, dma_addr_t *dma_addr); void nvme_free_cmb(struct nvme_dev *dev, void *addr, size_t size); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1daa0482de0e..3a65144f23be 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -152,6 +152,7 @@ struct nvme_ctrl_ops { void (*free_ctrl)(struct nvme_ctrl *ctrl); void (*post_scan)(struct nvme_ctrl *ctrl); void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx); + int (*alloc_user_cmb)(struct nvme_ctrl *ctrl, u64 size); }; static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d3da5d9552dd..2a15755e845e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1947,6 +1947,11 @@ static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl) return nvme_reset(to_nvme_dev(ctrl)); } +static int nvme_pci_alloc_user_cmb_wrapper(struct nvme_ctrl *ctrl, u64 size) +{ + return nvme_pci_alloc_user_cmb(to_nvme_dev(ctrl), size); +} + static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .module = THIS_MODULE, .reg_read32 = nvme_pci_reg_read32, @@ -1956,6 +1961,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .free_ctrl = nvme_pci_free_ctrl, .post_scan = nvme_pci_post_scan, .submit_async_event = nvme_pci_submit_async_event, + .alloc_user_cmb = nvme_pci_alloc_user_cmb_wrapper, }; static int nvme_dev_map(struct nvme_dev *dev) diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h index 50ff21f748b6..d66f5b56b163 100644 --- a/include/uapi/linux/nvme_ioctl.h +++ b/include/uapi/linux/nvme_ioctl.h @@ -55,6 +55,16 @@ struct nvme_passthru_cmd { #define nvme_admin_cmd nvme_passthru_cmd +struct nvme_alloc_user_cmb { + /* in */ + __u8 opcode; + __u8 flags; + __u16 rsvd1; + __u64 size; + /* out */ + __u32 fd; +}; + #define NVME_IOCTL_ID _IO('N', 0x40) #define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd) #define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io) @@ -62,5 +72,6 @@ struct nvme_passthru_cmd { #define NVME_IOCTL_RESET _IO('N', 0x44) #define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45) #define NVME_IOCTL_RESCAN _IO('N', 0x46) +#define NVME_IOCTL_ALLOC_USER_CMB _IOWR('N', 0x47, struct nvme_alloc_user_cmb) #endif /* _UAPI_LINUX_NVME_IOCTL_H */ -- 1.7.11.2 -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html