On 08/07/2019 12:14, Michal Kalderon wrote: > diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c > index 8a6ccb936dfe..a830c2c5d691 100644 > --- a/drivers/infiniband/core/device.c > +++ b/drivers/infiniband/core/device.c > @@ -2521,6 +2521,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) > SET_DEVICE_OP(dev_ops, map_mr_sg_pi); > SET_DEVICE_OP(dev_ops, map_phys_fmr); > SET_DEVICE_OP(dev_ops, mmap); > + SET_DEVICE_OP(dev_ops, mmap_free); > SET_DEVICE_OP(dev_ops, modify_ah); > SET_DEVICE_OP(dev_ops, modify_cq); > SET_DEVICE_OP(dev_ops, modify_device); > diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c > index ccf4d069c25c..7166741834c8 100644 > --- a/drivers/infiniband/core/rdma_core.c > +++ b/drivers/infiniband/core/rdma_core.c > @@ -817,6 +817,7 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile, > rdma_restrack_del(&ucontext->res); > > ib_dev->ops.dealloc_ucontext(ucontext); > + rdma_user_mmap_entries_remove_free(ucontext); This should happen before dealloc_ucontext. > +struct rdma_user_mmap_entry * > +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, u64 key, u64 len) > +{ > + struct rdma_user_mmap_entry *entry; > + u64 mmap_page; > + > + mmap_page = key >> PAGE_SHIFT; > + if (mmap_page > U32_MAX) > + return NULL; > + > + entry = xa_load(&ucontext->mmap_xa, mmap_page); > + if (!entry || rdma_user_mmap_get_key(entry) != key || I wonder if the 'rdma_user_mmap_get_key(entry) != key' check is still needed. > +/* > + * This is only called when the ucontext is destroyed and there can be no > + * concurrent query via mmap or allocate on the xarray, thus we can be sure no > + * other thread is using the entry pointer. We also know that all the BAR > + * pages have either been zap'd or munmaped at this point. Normal pages are > + * refcounted and will be freed at the proper time. > + */ > +void rdma_user_mmap_entries_remove_free(struct ib_ucontext *ucontext) > +{ > + struct rdma_user_mmap_entry *entry; > + unsigned long mmap_page; > + > + xa_for_each(&ucontext->mmap_xa, mmap_page, entry) { > + xa_erase(&ucontext->mmap_xa, mmap_page); > + > + ibdev_dbg(ucontext->device, > + "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n", > + entry->obj, rdma_user_mmap_get_key(entry), > + entry->address, entry->length); > + if (ucontext->device->ops.mmap_free) > + ucontext->device->ops.mmap_free(entry->address, > + entry->length, > + entry->mmap_flag); Pass entry instead? > + kfree(entry); > + } > +} > + > void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) > { > struct rdma_umap_priv *priv, *next_priv; > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h > index 26e9c2594913..54ce3fdae180 100644 > --- a/include/rdma/ib_verbs.h > +++ b/include/rdma/ib_verbs.h > @@ -1425,6 +1425,8 @@ struct ib_ucontext { > * Implementation details of the RDMA core, don't use in drivers: > */ > struct rdma_restrack_entry res; > + struct xarray mmap_xa; > + u32 mmap_xa_page; > }; > > struct ib_uobject { > @@ -2311,6 +2313,7 @@ struct ib_device_ops { > struct ib_udata *udata); > void (*dealloc_ucontext)(struct ib_ucontext *context); > int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma); > + void (*mmap_free)(u64 address, u64 length, u8 mmap_flag); I feel like this callback needs some documentation. > void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); > int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata); > void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); > @@ -2706,9 +2709,23 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client, > void ib_set_device_ops(struct ib_device *device, > const struct ib_device_ops *ops); > > +#define RDMA_USER_MMAP_INVALID U64_MAX > +struct rdma_user_mmap_entry { > + void *obj; I know EFA is the culprit here, but please remove the extra space :). > + u64 address; > + u64 length; > + u32 mmap_page; > + u8 mmap_flag; > +}; > +