Re: [PATCH v12 rdma-next 2/8] RDMA/core: Create mmap database and cookie helper functions

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 10/30/2019 11:44 AM, Michal Kalderon wrote:
Create some common API's for adding entries to a xa_mmap.
Searching for an entry and freeing one.

Most of the code was copied from the efa driver almost as is, just renamed
function to be generic and not efa specific.
The fact that this code moved to core enabled managing it differently,
so that now entries can be removed and deleted when driver+user are
done with them. This enabled changing the insert algorithm in
comparison to what was done in efa.

Signed-off-by: Ariel Elior <ariel.elior@xxxxxxxxxxx>
Signed-off-by: Michal Kalderon <michal.kalderon@xxxxxxxxxxx>
---
  drivers/infiniband/core/device.c         |   1 +
  drivers/infiniband/core/ib_core_uverbs.c | 201 +++++++++++++++++++++++++++++++
  drivers/infiniband/core/rdma_core.c      |   1 +
  drivers/infiniband/core/uverbs_cmd.c     |   2 +
  include/rdma/ib_verbs.h                  |  34 ++++++
  5 files changed, 239 insertions(+)

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index a667636f74bf..bf3a683057bc 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2629,6 +2629,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
  	SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
  	SET_DEVICE_OP(dev_ops, map_phys_fmr);
  	SET_DEVICE_OP(dev_ops, mmap);
+	SET_DEVICE_OP(dev_ops, mmap_free);
  	SET_DEVICE_OP(dev_ops, modify_ah);
  	SET_DEVICE_OP(dev_ops, modify_cq);
  	SET_DEVICE_OP(dev_ops, modify_device);
diff --git a/drivers/infiniband/core/ib_core_uverbs.c b/drivers/infiniband/core/ib_core_uverbs.c
index b74d2a2fb342..1ffc89fd5d94 100644
--- a/drivers/infiniband/core/ib_core_uverbs.c
+++ b/drivers/infiniband/core/ib_core_uverbs.c
@@ -71,3 +71,204 @@ int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
  	return 0;
  }
  EXPORT_SYMBOL(rdma_user_mmap_io);
+
+/**
+ * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa.
+ *
+ * @ucontext: associated user context.
+ * @key: the key received from rdma_user_mmap_entry_insert which
+ *     is provided by user as the address to map.
+ * @vma: the vma related to the current mmap call.
+ *
+ * This function is called when a user tries to mmap a key it
+ * initially received from the driver. The key was created by
+ * the function rdma_user_mmap_entry_insert.
+ * This function increases the refcnt of the entry so that it won't
+ * be deleted from the xa in the meantime.
+ *
+ * Return an entry if exists or NULL if there is no match.
+ */
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, u64 key,
+			 struct vm_area_struct *vma)

Where @vma is used in this function ? I would expect that this API will return the entry pointed by @key without any relation to the @vma, wasn't that the plan ?

+{
+	struct rdma_user_mmap_entry *entry;
+	u64 mmap_page;
+
+	mmap_page = key >> PAGE_SHIFT;
+	if (mmap_page > U32_MAX)
+		return NULL;
+
+	xa_lock(&ucontext->mmap_xa);
+
+	entry = xa_load(&ucontext->mmap_xa, mmap_page);
+
+	/* if refcount is zero, entry is already being deleted */
+	if (!entry || entry->invalid || !kref_get_unless_zero(&entry->ref))
+		goto err;
+
+	xa_unlock(&ucontext->mmap_xa);
+
+	ibdev_dbg(ucontext->device,
+		  "mmap: key[%#llx] npages[%#x] returned\n",
+		  key, entry->npages);
+
+	return entry;
+
+err:
+	xa_unlock(&ucontext->mmap_xa);
+	return NULL;
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_get);
+
+void rdma_user_mmap_entry_free(struct kref *kref)
+{
+	struct rdma_user_mmap_entry *entry =
+		container_of(kref, struct rdma_user_mmap_entry, ref);
+	struct ib_ucontext *ucontext = entry->ucontext;
+	unsigned long i;
+
+	/* need to erase all entries occupied by this single entry */
+	xa_lock(&ucontext->mmap_xa);
+	for (i = 0; i < entry->npages; i++)
+		__xa_erase(&ucontext->mmap_xa, entry->mmap_page + i);
+	xa_unlock(&ucontext->mmap_xa);
+
+	ibdev_dbg(ucontext->device,
+		  "mmap: key[%#llx] npages[%#x] removed\n",
+		  rdma_user_mmap_get_key(entry),
+		  entry->npages);
+
+	if (ucontext->device->ops.mmap_free)
+		ucontext->device->ops.mmap_free(entry);
+}
+
+/**
+ * rdma_user_mmap_entry_put() - Drop reference to the mmap entry
+ *
+ * @ucontext: associated user context.
+ * @entry: an entry in the mmap_xa.
+ *
+ * This function is called when the mapping is closed if it was
+ * an io mapping or when the driver is done with the entry for
+ * some other reason.
+ * Should be called after rdma_user_mmap_entry_get was called
+ * and entry is no longer needed. This function will erase the
+ * entry and free it if its refcnt reaches zero.
+ */
+void rdma_user_mmap_entry_put(struct ib_ucontext *ucontext,
+			      struct rdma_user_mmap_entry *entry)
+{
+	kref_put(&entry->ref, rdma_user_mmap_entry_free);
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_put);
+
+/**
+ * rdma_user_mmap_entry_remove() - Drop reference to entry and
+ *				   mark it as invalid.
+ *
+ * @ucontext: associated user context.
+ * @entry: the entry to insert into the mmap_xa
+ */
+void rdma_user_mmap_entry_remove(struct ib_ucontext *ucontext,
+				 struct rdma_user_mmap_entry *entry)
+{
+	if (!entry)
+		return;
+
+	entry->invalid = true;
+	kref_put(&entry->ref, rdma_user_mmap_entry_free);
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
+
+/**
+ * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa.
+ *
+ * @ucontext: associated user context.
+ * @entry: the entry to insert into the mmap_xa
+ * @length: length of the address that will be mmapped
+ *
+ * This function should be called by drivers that use the rdma_user_mmap
+ * interface for handling user mmapped addresses. The database is handled in
+ * the core and helper functions are provided to insert entries into the
+ * database and extract entries when the user calls mmap with the given key.
+ * The function allocates a unique key that should be provided to user, the user
+ * will use the key to retrieve information such as address to
+ * be mapped and how.
+ *
+ * Return: 0 on success and -ENOMEM on failure
+ */
+int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
+				struct rdma_user_mmap_entry *entry,
+				size_t length)
+{
+	struct ib_uverbs_file *ufile = ucontext->ufile;
+	XA_STATE(xas, &ucontext->mmap_xa, 0);
+	u32 xa_first, xa_last, npages;
+	int err, i;
+
+	if (!entry)
+		return -EINVAL;
+
+	kref_init(&entry->ref);
+	entry->ucontext = ucontext;
+
+	/* We want the whole allocation to be done without interruption
+	 * from a different thread. The allocation requires finding a
+	 * free range and storing. During the xa_insert the lock could be
+	 * released, we don't want another thread taking the gap.
+	 */
+	mutex_lock(&ufile->umap_lock);
+
+	xa_lock(&ucontext->mmap_xa);
+
+	/* We want to find an empty range */
+	npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE);
+	entry->npages = npages;
+	while (true) {
+		/* First find an empty index */
+		xas_find_marked(&xas, U32_MAX, XA_FREE_MARK);
+		if (xas.xa_node == XAS_RESTART)
+			goto err_unlock;
+
+		xa_first = xas.xa_index;
+
+		/* Is there enough room to have the range? */
+		if (check_add_overflow(xa_first, npages, &xa_last))
+			goto err_unlock;
+
+		/* Now look for the next present entry. If such doesn't
+		 * exist, we found an empty range and can proceed
+		 */
+		xas_next_entry(&xas, xa_last - 1);
+		if (xas.xa_node == XAS_BOUNDS || xas.xa_index >= xa_last)
+			break;
+		/* o/w look for the next free entry */
+	}
+
+	for (i = xa_first; i < xa_last; i++) {
+		err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL);
+		if (err)
+			goto err_undo;
+	}
+
+	entry->mmap_page = xa_first;
+	xa_unlock(&ucontext->mmap_xa);
+
+	mutex_unlock(&ufile->umap_lock);
+	ibdev_dbg(ucontext->device,
+		  "mmap: key[%#llx] npages[%#x] inserted\n",
+		  rdma_user_mmap_get_key(entry), npages);
+
+	return 0;
+
+err_undo:
+	for (; i > xa_first; i--)
+		__xa_erase(&ucontext->mmap_xa, i - 1);
+
+err_unlock:
+	xa_unlock(&ucontext->mmap_xa);
+	mutex_unlock(&ufile->umap_lock);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_insert);
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index ccf4d069c25c..6c72773faf29 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -817,6 +817,7 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
  	rdma_restrack_del(&ucontext->res);
ib_dev->ops.dealloc_ucontext(ucontext);
+	WARN_ON(!xa_empty(&ucontext->mmap_xa));
  	kfree(ucontext);
ufile->ucontext = NULL;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 14a80fd9f464..06ed32c8662f 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -252,6 +252,8 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
  	ucontext->closing = false;
  	ucontext->cleanup_retryable = false;
+ xa_init_flags(&ucontext->mmap_xa, XA_FLAGS_ALLOC);
+
  	ret = get_unused_fd_flags(O_CLOEXEC);
  	if (ret < 0)
  		goto err_free;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 6a47ba85c54c..8a87c9d442bc 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1471,6 +1471,7 @@ struct ib_ucontext {
  	 * Implementation details of the RDMA core, don't use in drivers:
  	 */
  	struct rdma_restrack_entry res;
+	struct xarray mmap_xa;
  };
struct ib_uobject {
@@ -2251,6 +2252,20 @@ struct iw_cm_conn_param;
#define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct +struct rdma_user_mmap_entry {
+	struct kref ref;
+	struct ib_ucontext *ucontext;
+	u32 npages;
+	u32 mmap_page;
+	bool invalid;
+};
+
+static inline u64
+rdma_user_mmap_get_key(const struct rdma_user_mmap_entry *entry)
+{
+	return (u64)entry->mmap_page << PAGE_SHIFT;
+}
+
  /**
   * struct ib_device_ops - InfiniBand device operations
   * This structure defines all the InfiniBand device operations, providers will
@@ -2363,6 +2378,13 @@ struct ib_device_ops {
  			      struct ib_udata *udata);
  	void (*dealloc_ucontext)(struct ib_ucontext *context);
  	int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
+	/**
+	 * This will be called once refcount of an entry in mmap_xa reaches
+	 * zero. The type of the memory that was mapped may differ between
+	 * entries and is opaque to the rdma_user_mmap interface.
+	 * Therefore needs to be implemented by the driver in mmap_free.
+	 */
+	void (*mmap_free)(struct rdma_user_mmap_entry *entry);
  	void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
  	int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
  	void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
@@ -2801,6 +2823,18 @@ static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext,
  	return -EINVAL;
  }
  #endif
+int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
+				struct rdma_user_mmap_entry *entry,
+				size_t length);
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, u64 key,
+			 struct vm_area_struct *vma);
+
+void rdma_user_mmap_entry_put(struct ib_ucontext *ucontext,
+			      struct rdma_user_mmap_entry *entry);
+
+void rdma_user_mmap_entry_remove(struct ib_ucontext *ucontext,
+				 struct rdma_user_mmap_entry *entry);
static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
  {





[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux