[PATCH rdma-next v6 01/10] RDMA/restrack: Convert internal DB from hash to XArray

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Leon Romanovsky <leonro@xxxxxxxxxxxx>

The additions of .doit callbacks posses new access pattern to the
resource entries by some user visible index. Back then, the legacy
DB was implemented as hash because per-index access wasn't needed
and XArray wasn't accepted yet.

Acceptance of XArray together with per-index access requires the refresh
of DB implementation.

Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxxxx>
---
 drivers/infiniband/core/nldev.c    |  13 ++--
 drivers/infiniband/core/restrack.c | 103 ++++++++++++++++++++---------
 include/rdma/restrack.h            |  22 ++++--
 3 files changed, 93 insertions(+), 45 deletions(-)

diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 5601fa968244..4bf890ae6e28 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -970,6 +970,7 @@ static int res_get_common_dumpit(struct sk_buff *skb,
 	int start = cb->args[0];
 	bool has_cap_net_admin;
 	struct nlmsghdr *nlh;
+	unsigned long id;
 	u32 index, port = 0;
 	bool filled = false;
 
@@ -1020,7 +1021,12 @@ static int res_get_common_dumpit(struct sk_buff *skb,
 	has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
 
 	down_read(&device->res.rwsem);
-	hash_for_each_possible(device->res.hash, res, node, res_type) {
+	/*
+	 * FIXME: if the skip ahead is something common this loop should
+	 * use xas_for_each & xas_pause to optimize, we can have a lot of
+	 * objects.
+	 */
+	xa_for_each(&device->res.xa[res_type], id, res) {
 		if (idx < start)
 			goto next;
 
@@ -1047,11 +1053,6 @@ static int res_get_common_dumpit(struct sk_buff *skb,
 		rdma_restrack_put(res);
 
 		if (ret == -EMSGSIZE)
-			/*
-			 * There is a chance to optimize here.
-			 * It can be done by using list_prepare_entry
-			 * and list_for_each_entry_continue afterwards.
-			 */
 			break;
 		if (ret)
 			goto res_err;
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index f80b37d437ac..b4f302811858 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -12,6 +12,28 @@
 
 #include "cma_priv.h"
 
+static int rt_xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
+			      u32 *next)
+{
+	int err;
+
+	*id = *next;
+	if (*next == U32_MAX)
+		*id = 0;
+
+	xa_lock(xa);
+	err = __xa_alloc(xa, id, U32_MAX, entry, GFP_KERNEL);
+	if (err && *next != U32_MAX) {
+		*id = 0;
+		err = __xa_alloc(xa, id, *next, entry, GFP_KERNEL);
+	}
+
+	if (!err)
+		*next = *id + 1;
+	xa_unlock(xa);
+	return err;
+}
+
 /**
  * rdma_restrack_init() - initialize resource tracking
  * @dev:  IB device
@@ -19,6 +41,10 @@
 void rdma_restrack_init(struct ib_device *dev)
 {
 	struct rdma_restrack_root *res = &dev->res;
+	int i;
+
+	for (i = 0 ; i < RDMA_RESTRACK_MAX; i++)
+		xa_init_flags(&res->xa[i], XA_FLAGS_ALLOC);
 
 	init_rwsem(&res->rwsem);
 }
@@ -46,33 +72,42 @@ void rdma_restrack_clean(struct ib_device *dev)
 	struct rdma_restrack_root *res = &dev->res;
 	struct rdma_restrack_entry *e;
 	char buf[TASK_COMM_LEN];
+	bool found = false;
 	const char *owner;
-	int bkt;
-
-	if (hash_empty(res->hash))
-		return;
-
-	dev = container_of(res, struct ib_device, res);
-	pr_err("restrack: %s", CUT_HERE);
-	dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n");
-	hash_for_each(res->hash, bkt, e, node) {
-		if (rdma_is_kernel_res(e)) {
-			owner = e->kern_name;
-		} else {
-			/*
-			 * There is no need to call get_task_struct here,
-			 * because we can be here only if there are more
-			 * get_task_struct() call than put_task_struct().
-			 */
-			get_task_comm(buf, e->task);
-			owner = buf;
+	int i;
+
+	for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) {
+		if (!xa_empty(&res->xa[i])) {
+			unsigned long index;
+
+			if (!found) {
+				pr_err("restrack: %s", CUT_HERE);
+				dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n");
+			}
+			xa_for_each(&res->xa[i], index, e) {
+				if (rdma_is_kernel_res(e)) {
+					owner = e->kern_name;
+				} else {
+					/*
+					 * There is no need to call get_task_struct here,
+					 * because we can be here only if there are more
+					 * get_task_struct() call than put_task_struct().
+					 */
+					get_task_comm(buf, e->task);
+					owner = buf;
+				}
+
+				pr_err("restrack: %s %s object allocated by %s is not freed\n",
+				       rdma_is_kernel_res(e) ? "Kernel" :
+							       "User",
+				       type2str(e->type), owner);
+			}
+			found = true;
 		}
-
-		pr_err("restrack: %s %s object allocated by %s is not freed\n",
-		       rdma_is_kernel_res(e) ? "Kernel" : "User",
-		       type2str(e->type), owner);
+		xa_destroy(&res->xa[i]);
 	}
-	pr_err("restrack: %s", CUT_HERE);
+	if (found)
+		pr_err("restrack: %s", CUT_HERE);
 }
 
 /**
@@ -86,10 +121,11 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type,
 {
 	struct rdma_restrack_root *res = &dev->res;
 	struct rdma_restrack_entry *e;
+	unsigned long index = 0;
 	u32 cnt = 0;
 
 	down_read(&res->rwsem);
-	hash_for_each_possible(res->hash, e, node, type) {
+	xa_for_each(&res->xa[type], index, e) {
 		if (ns == &init_pid_ns ||
 		    (!rdma_is_kernel_res(e) &&
 		     ns == task_active_pid_ns(e->task)))
@@ -166,16 +202,20 @@ EXPORT_SYMBOL(rdma_restrack_set_task);
 static void rdma_restrack_add(struct rdma_restrack_entry *res)
 {
 	struct ib_device *dev = res_to_dev(res);
+	int ret;
 
 	if (!dev)
 		return;
 
 	kref_init(&res->kref);
 	init_completion(&res->comp);
-	res->valid = true;
 
 	down_write(&dev->res.rwsem);
-	hash_add(dev->res.hash, &res->node, res->type);
+	ret = rt_xa_alloc_cyclic(&dev->res.xa[res->type], &res->id, res,
+				 &dev->res.next_id[res->type]);
+
+	if (!ret)
+		res->valid = true;
 	up_write(&dev->res.rwsem);
 }
 
@@ -241,15 +281,14 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
 	if (!dev)
 		return;
 
-	rdma_restrack_put(res);
-
-	wait_for_completion(&res->comp);
-
 	down_write(&dev->res.rwsem);
-	hash_del(&res->node);
+	xa_erase(&dev->res.xa[res->type], res->id);
 	res->valid = false;
 	up_write(&dev->res.rwsem);
 
+	rdma_restrack_put(res);
+	wait_for_completion(&res->comp);
+
 out:
 	if (res->task) {
 		put_task_struct(res->task);
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index cc66cc7a11d3..16e11b4c3ec3 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -13,6 +13,7 @@
 #include <linux/completion.h>
 #include <linux/sched/task.h>
 #include <uapi/rdma/rdma_netlink.h>
+#include <linux/xarray.h>
 
 /**
  * enum rdma_restrack_type - HW objects to track
@@ -48,7 +49,6 @@ enum rdma_restrack_type {
 	RDMA_RESTRACK_MAX
 };
 
-#define RDMA_RESTRACK_HASH_BITS	8
 struct ib_device;
 struct rdma_restrack_entry;
 
@@ -62,9 +62,17 @@ struct rdma_restrack_root {
 	 */
 	struct rw_semaphore	rwsem;
 	/**
-	 * @hash: global database for all resources per-device
+	 * @xa: Array of XArray structures to hold restrack entries.
+	 * We want to use array of XArrays because insertion is type
+	 * dependent. For types with xisiting unique ID (like QPN),
+	 * we will insert to that unique index. For other types,
+	 * we insert based on pointers and auto-allocate unique index.
 	 */
-	DECLARE_HASHTABLE(hash, RDMA_RESTRACK_HASH_BITS);
+	struct xarray xa[RDMA_RESTRACK_MAX];
+	/**
+	 * @next_id: Next ID to support cyclic allocation
+	 */
+	u32 next_id[RDMA_RESTRACK_MAX];
 };
 
 /**
@@ -102,10 +110,6 @@ struct rdma_restrack_entry {
 	 * @kern_name: name of owner for the kernel created entities.
 	 */
 	const char		*kern_name;
-	/**
-	 * @node: hash table entry
-	 */
-	struct hlist_node	node;
 	/**
 	 * @type: various objects in restrack database
 	 */
@@ -114,6 +118,10 @@ struct rdma_restrack_entry {
 	 * @user: user resource
 	 */
 	bool			user;
+	/**
+	 * @id: ID to expose to users
+	 */
+	u32 id;
 };
 
 void rdma_restrack_init(struct ib_device *dev);
-- 
2.19.1




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux