From: Leon Romanovsky <leonro@xxxxxxxxxxxx> The additions of .doit callbacks posses new access pattern to the resource entries by some user visible index. Back then, the legacy DB was implemented as hash because per-index access wasn't needed and XArray wasn't accepted yet. Acceptance of XArray together with per-index access requires the refresh of DB implementation. Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxxxx> --- drivers/infiniband/core/nldev.c | 13 ++-- drivers/infiniband/core/restrack.c | 103 ++++++++++++++++++++--------- include/rdma/restrack.h | 22 ++++-- 3 files changed, 93 insertions(+), 45 deletions(-) diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 5601fa968244..4bf890ae6e28 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -970,6 +970,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, int start = cb->args[0]; bool has_cap_net_admin; struct nlmsghdr *nlh; + unsigned long id; u32 index, port = 0; bool filled = false; @@ -1020,7 +1021,12 @@ static int res_get_common_dumpit(struct sk_buff *skb, has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN); down_read(&device->res.rwsem); - hash_for_each_possible(device->res.hash, res, node, res_type) { + /* + * FIXME: if the skip ahead is something common this loop should + * use xas_for_each & xas_pause to optimize, we can have a lot of + * objects. + */ + xa_for_each(&device->res.xa[res_type], id, res) { if (idx < start) goto next; @@ -1047,11 +1053,6 @@ static int res_get_common_dumpit(struct sk_buff *skb, rdma_restrack_put(res); if (ret == -EMSGSIZE) - /* - * There is a chance to optimize here. - * It can be done by using list_prepare_entry - * and list_for_each_entry_continue afterwards. - */ break; if (ret) goto res_err; diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index f80b37d437ac..b4f302811858 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -12,6 +12,28 @@ #include "cma_priv.h" +static int rt_xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, + u32 *next) +{ + int err; + + *id = *next; + if (*next == U32_MAX) + *id = 0; + + xa_lock(xa); + err = __xa_alloc(xa, id, U32_MAX, entry, GFP_KERNEL); + if (err && *next != U32_MAX) { + *id = 0; + err = __xa_alloc(xa, id, *next, entry, GFP_KERNEL); + } + + if (!err) + *next = *id + 1; + xa_unlock(xa); + return err; +} + /** * rdma_restrack_init() - initialize resource tracking * @dev: IB device @@ -19,6 +41,10 @@ void rdma_restrack_init(struct ib_device *dev) { struct rdma_restrack_root *res = &dev->res; + int i; + + for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) + xa_init_flags(&res->xa[i], XA_FLAGS_ALLOC); init_rwsem(&res->rwsem); } @@ -46,33 +72,42 @@ void rdma_restrack_clean(struct ib_device *dev) struct rdma_restrack_root *res = &dev->res; struct rdma_restrack_entry *e; char buf[TASK_COMM_LEN]; + bool found = false; const char *owner; - int bkt; - - if (hash_empty(res->hash)) - return; - - dev = container_of(res, struct ib_device, res); - pr_err("restrack: %s", CUT_HERE); - dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n"); - hash_for_each(res->hash, bkt, e, node) { - if (rdma_is_kernel_res(e)) { - owner = e->kern_name; - } else { - /* - * There is no need to call get_task_struct here, - * because we can be here only if there are more - * get_task_struct() call than put_task_struct(). - */ - get_task_comm(buf, e->task); - owner = buf; + int i; + + for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) { + if (!xa_empty(&res->xa[i])) { + unsigned long index; + + if (!found) { + pr_err("restrack: %s", CUT_HERE); + dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n"); + } + xa_for_each(&res->xa[i], index, e) { + if (rdma_is_kernel_res(e)) { + owner = e->kern_name; + } else { + /* + * There is no need to call get_task_struct here, + * because we can be here only if there are more + * get_task_struct() call than put_task_struct(). + */ + get_task_comm(buf, e->task); + owner = buf; + } + + pr_err("restrack: %s %s object allocated by %s is not freed\n", + rdma_is_kernel_res(e) ? "Kernel" : + "User", + type2str(e->type), owner); + } + found = true; } - - pr_err("restrack: %s %s object allocated by %s is not freed\n", - rdma_is_kernel_res(e) ? "Kernel" : "User", - type2str(e->type), owner); + xa_destroy(&res->xa[i]); } - pr_err("restrack: %s", CUT_HERE); + if (found) + pr_err("restrack: %s", CUT_HERE); } /** @@ -86,10 +121,11 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, { struct rdma_restrack_root *res = &dev->res; struct rdma_restrack_entry *e; + unsigned long index = 0; u32 cnt = 0; down_read(&res->rwsem); - hash_for_each_possible(res->hash, e, node, type) { + xa_for_each(&res->xa[type], index, e) { if (ns == &init_pid_ns || (!rdma_is_kernel_res(e) && ns == task_active_pid_ns(e->task))) @@ -166,16 +202,20 @@ EXPORT_SYMBOL(rdma_restrack_set_task); static void rdma_restrack_add(struct rdma_restrack_entry *res) { struct ib_device *dev = res_to_dev(res); + int ret; if (!dev) return; kref_init(&res->kref); init_completion(&res->comp); - res->valid = true; down_write(&dev->res.rwsem); - hash_add(dev->res.hash, &res->node, res->type); + ret = rt_xa_alloc_cyclic(&dev->res.xa[res->type], &res->id, res, + &dev->res.next_id[res->type]); + + if (!ret) + res->valid = true; up_write(&dev->res.rwsem); } @@ -241,15 +281,14 @@ void rdma_restrack_del(struct rdma_restrack_entry *res) if (!dev) return; - rdma_restrack_put(res); - - wait_for_completion(&res->comp); - down_write(&dev->res.rwsem); - hash_del(&res->node); + xa_erase(&dev->res.xa[res->type], res->id); res->valid = false; up_write(&dev->res.rwsem); + rdma_restrack_put(res); + wait_for_completion(&res->comp); + out: if (res->task) { put_task_struct(res->task); diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index cc66cc7a11d3..16e11b4c3ec3 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -13,6 +13,7 @@ #include <linux/completion.h> #include <linux/sched/task.h> #include <uapi/rdma/rdma_netlink.h> +#include <linux/xarray.h> /** * enum rdma_restrack_type - HW objects to track @@ -48,7 +49,6 @@ enum rdma_restrack_type { RDMA_RESTRACK_MAX }; -#define RDMA_RESTRACK_HASH_BITS 8 struct ib_device; struct rdma_restrack_entry; @@ -62,9 +62,17 @@ struct rdma_restrack_root { */ struct rw_semaphore rwsem; /** - * @hash: global database for all resources per-device + * @xa: Array of XArray structures to hold restrack entries. + * We want to use array of XArrays because insertion is type + * dependent. For types with xisiting unique ID (like QPN), + * we will insert to that unique index. For other types, + * we insert based on pointers and auto-allocate unique index. */ - DECLARE_HASHTABLE(hash, RDMA_RESTRACK_HASH_BITS); + struct xarray xa[RDMA_RESTRACK_MAX]; + /** + * @next_id: Next ID to support cyclic allocation + */ + u32 next_id[RDMA_RESTRACK_MAX]; }; /** @@ -102,10 +110,6 @@ struct rdma_restrack_entry { * @kern_name: name of owner for the kernel created entities. */ const char *kern_name; - /** - * @node: hash table entry - */ - struct hlist_node node; /** * @type: various objects in restrack database */ @@ -114,6 +118,10 @@ struct rdma_restrack_entry { * @user: user resource */ bool user; + /** + * @id: ID to expose to users + */ + u32 id; }; void rdma_restrack_init(struct ib_device *dev); -- 2.19.1