Signed-off-by: Bernard Metzler <bmt@xxxxxxxxxxxxxx> --- drivers/infiniband/sw/siw/siw_mem.c | 388 ++++++++++++++++++++++++++++++++++++ 1 file changed, 388 insertions(+) create mode 100644 drivers/infiniband/sw/siw/siw_mem.c diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c new file mode 100644 index 000000000000..06f43ae3b312 --- /dev/null +++ b/drivers/infiniband/sw/siw/siw_mem.c @@ -0,0 +1,388 @@ +/* + * Software iWARP device driver for Linux + * + * Authors: Animesh Trivedi <atr@xxxxxxxxxxxxxx> + * Bernard Metzler <bmt@xxxxxxxxxxxxxx> + * + * Copyright (c) 2008-2017, IBM Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of IBM nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/version.h> +#include <linux/scatterlist.h> +#include <linux/gfp.h> +#include <rdma/ib_verbs.h> +#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/pid.h> +#include <linux/sched/mm.h> + +#include "siw.h" +#include "siw_debug.h" + +static void siw_umem_update_stats(struct work_struct *work) +{ + struct siw_umem *umem = container_of(work, struct siw_umem, work); + struct mm_struct *mm_s = umem->mm_s; + + BUG_ON(!mm_s); + + down_write(&mm_s->mmap_sem); + mm_s->pinned_vm -= umem->num_pages; + up_write(&mm_s->mmap_sem); + + mmput(mm_s); + + kfree(umem->page_chunk); + kfree(umem); +} + +static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages) +{ + struct page **p = chunk->p; + + while (num_pages--) { + put_page(*p); + p++; + } +} + +void siw_umem_release(struct siw_umem *umem) +{ + struct task_struct *task = get_pid_task(umem->pid, PIDTYPE_PID); + int i, num_pages = umem->num_pages; + + for (i = 0; num_pages; i++) { + int to_free = min_t(int, PAGES_PER_CHUNK, num_pages); + + siw_free_plist(&umem->page_chunk[i], to_free); + kfree(umem->page_chunk[i].p); + num_pages -= to_free; + } + put_pid(umem->pid); + if (task) { + struct mm_struct *mm_s = get_task_mm(task); + + put_task_struct(task); + if (mm_s) { + if (down_write_trylock(&mm_s->mmap_sem)) { + mm_s->pinned_vm -= umem->num_pages; + up_write(&mm_s->mmap_sem); + mmput(mm_s); + } else { + /* + * Schedule delayed accounting if + * mm semaphore not available + */ + INIT_WORK(&umem->work, siw_umem_update_stats); + umem->mm_s = mm_s; + schedule_work(&umem->work); + + return; + } + } + } + kfree(umem->page_chunk); + kfree(umem); +} + +void siw_pbl_free(struct siw_pbl *pbl) +{ + kfree(pbl); +} + +/* + * Get physical address backed by PBL element. Address is referenced + * by linear byte offset into list of variably sized PB elements. + * Optionally, provide remaining len within current element, and + * current PBL index for later resume at same element. + */ +u64 siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx) +{ + int i = idx ? *idx : 0; + + while (i < pbl->num_buf) { + struct siw_pble *pble = &pbl->pbe[i]; + + if (pble->pbl_off + pble->size > off) { + u64 pble_off = off - pble->pbl_off; + + if (len) + *len = pble->size - pble_off; + if (idx) + *idx = i; + + return pble->addr + pble_off; + } + i++; + } + if (len) + *len = 0; + return 0; +} + +struct siw_pbl *siw_pbl_alloc(u32 num_buf) +{ + struct siw_pbl *pbl; + int buf_size = sizeof(*pbl); + + if (num_buf == 0) + return ERR_PTR(-EINVAL); + + buf_size += ((num_buf - 1) * sizeof(struct siw_pble)); + + pbl = kzalloc(buf_size, GFP_KERNEL); + if (!pbl) + return ERR_PTR(-ENOMEM); + + pbl->max_buf = num_buf; + + return pbl; +} + +struct siw_umem *siw_umem_get(u64 start, u64 len) +{ + struct siw_umem *umem; + u64 first_page_va; + unsigned long mlock_limit; + int num_pages, num_chunks, i, rv = 0; + + if (!can_do_mlock()) + return ERR_PTR(-EPERM); + + if (!len) + return ERR_PTR(-EINVAL); + + first_page_va = start & PAGE_MASK; + num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT; + num_chunks = (num_pages >> CHUNK_SHIFT) + 1; + + umem = kzalloc(sizeof(*umem), GFP_KERNEL); + if (!umem) + return ERR_PTR(-ENOMEM); + + umem->pid = get_task_pid(current, PIDTYPE_PID); + + down_write(¤t->mm->mmap_sem); + + mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + + if (num_pages + current->mm->pinned_vm > mlock_limit) { + dprint(DBG_ON|DBG_MM, + ": pages req: %d, limit: %lu, pinned: %lu\n", + num_pages, mlock_limit, current->mm->pinned_vm); + rv = -ENOMEM; + goto out; + } + umem->fp_addr = first_page_va; + + umem->page_chunk = kcalloc(num_chunks, sizeof(struct siw_page_chunk), + GFP_KERNEL); + if (!umem->page_chunk) { + rv = -ENOMEM; + goto out; + } + for (i = 0; num_pages; i++) { + int got, nents = min_t(int, num_pages, PAGES_PER_CHUNK); + + umem->page_chunk[i].p = kcalloc(nents, sizeof(struct page *), + GFP_KERNEL); + if (!umem->page_chunk[i].p) { + rv = -ENOMEM; + goto out; + } + got = 0; + while (nents) { + struct page **plist = &umem->page_chunk[i].p[got]; + + rv = get_user_pages(first_page_va, nents, FOLL_WRITE, + plist, NULL); + if (rv < 0) + goto out; + + umem->num_pages += rv; + current->mm->pinned_vm += rv; + first_page_va += rv * PAGE_SIZE; + nents -= rv; + got += rv; + } + num_pages -= got; + } +out: + up_write(¤t->mm->mmap_sem); + + if (rv > 0) + return umem; + + siw_umem_release(umem); + + return ERR_PTR(rv); +} + +/* + * DMA mapping/address translation functions. + * Used to populate siw private DMA mapping functions of + * struct dma_map_ops. + */ +static void *siw_dma_generic_alloc(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, + unsigned long attrs) +{ + struct page *page; + void *kva = NULL; + + page = alloc_pages(gfp, get_order(size)); + if (page) + kva = page_address(page); + if (dma_handle) + *dma_handle = (dma_addr_t)kva; + + return kva; +} + +static void siw_dma_generic_free(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + unsigned long attrs) +{ + free_pages((unsigned long) vaddr, get_order(size)); +} + +static dma_addr_t siw_dma_generic_map_page(struct device *dev, + struct page *page, + unsigned long offset, + size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + BUG_ON(!valid_dma_direction(dir)); + + return (u64)(page_address(page) + offset); +} + +static void siw_dma_generic_unmap_page(struct device *dev, + dma_addr_t handle, + size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + /* NOP */ +} + +static int siw_dma_generic_map_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, + unsigned long attrs) +{ + struct scatterlist *se; + int i; + + BUG_ON(!valid_dma_direction(dir)); + + for_each_sg(sgl, se, nents, i) { + /* This is just a validity check */ + if (unlikely(page_address(sg_page(se)) == NULL)) { + nents = 0; + break; + } + se->dma_address = + (dma_addr_t)(page_address(sg_page(se)) + se->offset); + sg_dma_len(se) = se->length; + } + return nents; +} + +static void siw_dma_generic_unmap_sg(struct device *dev, + struct scatterlist *sg, + int nents, + enum dma_data_direction dir, + unsigned long attrs) +{ + /* NOP */ +} + +static void siw_generic_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_handle, + size_t size, + enum dma_data_direction dir) +{ + /* NOP */ +} + + +static void siw_generic_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, + size_t size, + enum dma_data_direction dir) +{ + /* NOP */ +} + +static void siw_generic_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sg, + int nents, + enum dma_data_direction dir) +{ + /* NOP */ +} + +static void siw_generic_sync_sg_for_device(struct device *dev, + struct scatterlist *sg, + int nents, + enum dma_data_direction dir) +{ + /* NOP */ +} + +static int siw_dma_generic_mapping_error(struct device *dev, + dma_addr_t dma_addr) +{ + return dma_addr == 0; +} + +static int siw_dma_generic_supported(struct device *dev, u64 mask) +{ + return 1; +} + +const struct dma_map_ops siw_dma_generic_ops = { + .alloc = siw_dma_generic_alloc, + .free = siw_dma_generic_free, + .map_page = siw_dma_generic_map_page, + .unmap_page = siw_dma_generic_unmap_page, + .map_sg = siw_dma_generic_map_sg, + .unmap_sg = siw_dma_generic_unmap_sg, + .sync_single_for_cpu = siw_generic_sync_single_for_cpu, + .sync_single_for_device = siw_generic_sync_single_for_device, + .sync_sg_for_cpu = siw_generic_sync_sg_for_cpu, + .sync_sg_for_device = siw_generic_sync_sg_for_device, + .mapping_error = siw_dma_generic_mapping_error, + .dma_supported = siw_dma_generic_supported, + .is_phys = 1 +}; -- 2.13.6 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html