On 27/06/2019 16:58, Michal Kalderon wrote: > Create a common API for adding entries to a xa_mmap. > This API can be used by drivers that don't require special > mapping for user mapped memory. > > The code was copied from the efa driver almost as is, just renamed > function to be generic and not efa specific. I don't think we should force the mmap flags to be the same for all drivers.. Take a look at mlx5 for example: enum mlx5_ib_mmap_cmd { MLX5_IB_MMAP_REGULAR_PAGE = 0, MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, MLX5_IB_MMAP_WC_PAGE = 2, MLX5_IB_MMAP_NC_PAGE = 3, /* 5 is chosen in order to be compatible with old versions of libmlx5 */ MLX5_IB_MMAP_CORE_CLOCK = 5, MLX5_IB_MMAP_ALLOC_WC = 6, MLX5_IB_MMAP_CLOCK_INFO = 7, MLX5_IB_MMAP_DEVICE_MEM = 8, }; The flags taken from EFA aren't necessarily going to work for other drivers. Maybe the flags bits should be opaque to ib_core and leave the actual mmap callbacks in the drivers. Not sure how dealloc_ucontext is going to work with opaque flags though? > > Signed-off-by: Michal Kalderon <michal.kalderon@xxxxxxxxxxx> > --- > drivers/infiniband/core/rdma_core.c | 1 + > drivers/infiniband/core/uverbs_cmd.c | 1 + > drivers/infiniband/core/uverbs_main.c | 177 ++++++++++++++++++++++++++++++++++ > include/rdma/ib_verbs.h | 29 ++++++ > 4 files changed, 208 insertions(+) > > diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c > index ccf4d069c25c..7166741834c8 100644 > --- a/drivers/infiniband/core/rdma_core.c > +++ b/drivers/infiniband/core/rdma_core.c > @@ -817,6 +817,7 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile, > rdma_restrack_del(&ucontext->res); > > ib_dev->ops.dealloc_ucontext(ucontext); > + rdma_user_mmap_entries_remove_free(ucontext); Should this happen before calling dealloc_ucontext? > kfree(ucontext); > > ufile->ucontext = NULL; > diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c > index 911533081db5..53cd38e4c509 100644 > --- a/drivers/infiniband/core/uverbs_cmd.c > +++ b/drivers/infiniband/core/uverbs_cmd.c > @@ -243,6 +243,7 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) > > mutex_init(&ucontext->per_mm_list_lock); > INIT_LIST_HEAD(&ucontext->per_mm_list); > + xa_init(&ucontext->mmap_xa); > > ret = get_unused_fd_flags(O_CLOEXEC); > if (ret < 0) > diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c > index 11c13c1381cf..e7ba855e137b 100644 > --- a/drivers/infiniband/core/uverbs_main.c > +++ b/drivers/infiniband/core/uverbs_main.c > @@ -932,6 +932,18 @@ static const struct vm_operations_struct rdma_umap_ops = { > * their mmap() functions if they wish to send something like PCI-E BAR memory > * to userspace. > */ > + > +struct rdma_user_mmap_entry { > + void *obj; > + u64 address; > + u64 length; > + u32 mmap_page; > + u8 mmap_flag; > +}; > + > +#define RDMA_USER_MMAP_FLAG_SHIFT 56 > +#define RDMA_USER_MMAP_PAGE_MASK GENMASK(RDMA_USER_MMAP_FLAG_SHIFT - 1, 0) > + > int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, > unsigned long pfn, unsigned long size, pgprot_t prot) > { > @@ -965,6 +977,171 @@ int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, > } > EXPORT_SYMBOL(rdma_user_mmap_io); > > +static inline u64 > +rdma_user_mmap_get_key(const struct rdma_user_mmap_entry *entry) > +{ > + return ((u64)entry->mmap_flag << RDMA_USER_MMAP_FLAG_SHIFT) | > + ((u64)entry->mmap_page << PAGE_SHIFT); > +} > + > +static struct rdma_user_mmap_entry * > +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, u64 key, u64 len) > +{ > + struct rdma_user_mmap_entry *entry; > + u64 mmap_page; > + > + mmap_page = (key & RDMA_USER_MMAP_PAGE_MASK) >> PAGE_SHIFT; > + if (mmap_page > U32_MAX) > + return NULL; > + > + entry = xa_load(&ucontext->mmap_xa, mmap_page); > + if (!entry || rdma_user_mmap_get_key(entry) != key || > + entry->length != len) > + return NULL; > + > + ibdev_dbg(ucontext->device, > + "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n", > + entry->obj, key, entry->address, entry->length); > + > + return entry; > +} > + > +/* > + * Note this locking scheme cannot support removal of entries, except during > + * ucontext destruction when the core code guarentees no concurrency. > + */ > +u64 rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, void *obj, > + u64 address, u64 length, u8 mmap_flag) > +{ > + struct rdma_user_mmap_entry *entry; > + int err; > + > + entry = kmalloc(sizeof(*entry), GFP_KERNEL); > + if (!entry) > + return RDMA_USER_MMAP_INVALID; > + > + entry->obj = obj; > + entry->address = address; > + entry->length = length; > + entry->mmap_flag = mmap_flag; > + > + xa_lock(&ucontext->mmap_xa); > + entry->mmap_page = ucontext->mmap_xa_page; > + ucontext->mmap_xa_page += DIV_ROUND_UP(length, PAGE_SIZE); > + err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry, > + GFP_KERNEL); > + xa_unlock(&ucontext->mmap_xa); > + if (err) { > + kfree(entry); > + return RDMA_USER_MMAP_INVALID; > + } > + > + ibdev_dbg(ucontext->device, > + "mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n", > + entry->obj, entry->address, entry->length, > + rdma_user_mmap_get_key(entry)); > + > + return rdma_user_mmap_get_key(entry); > +} > +EXPORT_SYMBOL(rdma_user_mmap_entry_insert); Line break here. > +/* > + * This is only called when the ucontext is destroyed and there can be no > + * concurrent query via mmap or allocate on the xarray, thus we can be sure no > + * other thread is using the entry pointer. We also know that all the BAR > + * pages have either been zap'd or munmaped at this point. Normal pages are > + * refcounted and will be freed at the proper time. > + */ > +void rdma_user_mmap_entries_remove_free(struct ib_ucontext *ucontext) > +{ > + struct rdma_user_mmap_entry *entry; > + unsigned long mmap_page; > + > + xa_for_each(&ucontext->mmap_xa, mmap_page, entry) { > + xa_erase(&ucontext->mmap_xa, mmap_page); > + > + ibdev_dbg(ucontext->device, > + "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n", > + entry->obj, rdma_user_mmap_get_key(entry), > + entry->address, entry->length); > + if (entry->mmap_flag == RDMA_USER_MMAP_DMA_PAGE) > + /* DMA mapping is already gone, now free the pages */ > + free_pages_exact(phys_to_virt(entry->address), > + entry->length); > + kfree(entry); > + } > +}