From: Leon Romanovsky <leonro@xxxxxxxxxxxx> There is no need to expose internals of restrack DB to IB/core. Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxxxx> --- drivers/infiniband/core/device.c | 5 +- drivers/infiniband/core/nldev.c | 16 ++-- drivers/infiniband/core/restrack.c | 135 ++++++++++++++++++++++++----- include/rdma/ib_verbs.h | 7 +- include/rdma/restrack.h | 28 ++---- 5 files changed, 134 insertions(+), 57 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index fa8d79548ee7..d13491b416dc 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -288,7 +288,10 @@ struct ib_device *ib_alloc_device(size_t size) if (!device) return NULL; - rdma_restrack_init(device); + if (rdma_restrack_init(device)) { + kfree(device); + return NULL; + } device->dev.class = &ib_class; device_initialize(&device->dev); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index d9f98df95ec3..01e20bbdd7db 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1033,6 +1033,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, unsigned long id = 0; u32 index, port = 0; bool filled = false; + struct xarray *xa; err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, NULL); @@ -1080,8 +1081,9 @@ static int res_get_common_dumpit(struct sk_buff *skb, has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN); - down_read(&device->res.rwsem); - xa_for_each(&device->res.xa[res_type], res, id, ULONG_MAX, XA_PRESENT) { + xa = rdma_dev_to_xa(device, res_type); + rdma_rt_read_lock(device, res_type); + xa_for_each(xa, res, id, ULONG_MAX, XA_PRESENT) { if (idx < start) goto next; @@ -1102,13 +1104,13 @@ static int res_get_common_dumpit(struct sk_buff *skb, if (!entry_attr) { ret = -EMSGSIZE; rdma_restrack_put(res); - up_read(&device->res.rwsem); + rdma_rt_read_unlock(device, res_type); break; } - up_read(&device->res.rwsem); + rdma_rt_read_unlock(device, res_type); ret = fe->fill_res_func(skb, has_cap_net_admin, res, port); - down_read(&device->res.rwsem); + rdma_rt_read_lock(device, res_type); /* * Return resource back, but it won't be released till * the &device->res.rwsem will be released for write. @@ -1129,7 +1131,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, nla_nest_end(skb, entry_attr); next: idx++; } - up_read(&device->res.rwsem); + rdma_rt_read_unlock(device, res_type); nla_nest_end(skb, table_attr); nlmsg_end(skb, nlh); @@ -1147,7 +1149,7 @@ next: idx++; res_err: nla_nest_cancel(skb, table_attr); - up_read(&device->res.rwsem); + rdma_rt_read_unlock(device, res_type); err: nlmsg_cancel(skb, nlh); diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 15a110eff1ca..78e27e78d8b6 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -9,22 +9,52 @@ #include <linux/mutex.h> #include <linux/sched/task.h> #include <linux/pid_namespace.h> +#include <linux/rwsem.h> #include "cma_priv.h" /** - * rdma_restrack_init() - initialize resource tracking + * struct rdma_restrack_root - main resource tracking management + * entity, per-device + */ +struct rdma_restrack_root { + /* + * @rwsem: Read/write lock to protect erase of entry. + * Lists and insertions are protected by XArray internal lock. + */ + struct rw_semaphore rwsem; + /** + * @xa: Array of XArray structures to hold restrack entries. + * We want to use array of XArrays because insertion is type + * dependent. For types with xisiting unique ID (like QPN), + * we will insert to that unique index. For other types, + * we insert based on pointers and auto-allocate unique index. + */ + struct xarray xa[RDMA_RESTRACK_MAX]; +}; + +/** + * rdma_restrack_init() - initialize and allocate resource tracking * @dev: IB device + * + * Return: 0 on success */ -void rdma_restrack_init(struct ib_device *dev) +int rdma_restrack_init(struct ib_device *dev) { - struct rdma_restrack_root *res = &dev->res; + struct rdma_restrack_root *rt; int i; + dev->res = kzalloc(sizeof(*rt), GFP_KERNEL); + if (!dev->res) + return -ENOMEM; + + rt = dev->res; + for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) - xa_init_flags(&res->xa[i], XA_FLAGS_ALLOC); + xa_init_flags(&rt->xa[i], XA_FLAGS_ALLOC); + init_rwsem(&rt->rwsem); - init_rwsem(&res->rwsem); + return 0; } static const char *type2str(enum rdma_restrack_type type) @@ -41,13 +71,52 @@ static const char *type2str(enum rdma_restrack_type type) return names[type]; }; +/** + * rdma_dev_to_xa() - translate from device to XArray DB + * @dev: IB device to work + * @type: resource track type + * + * Return: XArray DB to use for xa_for_each() iterations + */ +struct xarray *rdma_dev_to_xa(struct ib_device *dev, + enum rdma_restrack_type type) +{ + return &dev->res->xa[type]; + +} +EXPORT_SYMBOL(rdma_dev_to_xa); + +/** + * rdma_rt_read_lock() - Lock XArray for read, needed while iterating + * with xa_for_each() + * @dev: IB device to work + * @type: resource track type + */ +void rdma_rt_read_lock(struct ib_device *dev, enum rdma_restrack_type type) +{ + down_read(&dev->res->rwsem); +} +EXPORT_SYMBOL(rdma_rt_read_lock); + +/** + * rdma_rt_read_unlock() - Unlock XArray for read, needed while iterating + * with xa_for_each() + * @dev: IB device to work + * @type: resource track type + */ +void rdma_rt_read_unlock(struct ib_device *dev, enum rdma_restrack_type type) +{ + up_read(&dev->res->rwsem); +} +EXPORT_SYMBOL(rdma_rt_read_unlock); + /** * rdma_restrack_clean() - clean resource tracking * @dev: IB device */ void rdma_restrack_clean(struct ib_device *dev) { - struct rdma_restrack_root *res = &dev->res; + struct rdma_restrack_root *rt = dev->res; struct rdma_restrack_entry *e; char buf[TASK_COMM_LEN]; bool found = false; @@ -55,14 +124,16 @@ void rdma_restrack_clean(struct ib_device *dev) int i; for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) { - if (!xa_empty(&res->xa[i])) { + struct xarray *xa = rdma_dev_to_xa(dev, i); + + if (!xa_empty(xa)) { unsigned long index = 0; if (!found) { pr_err("restrack: %s", CUT_HERE); dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n"); } - xa_for_each(&res->xa[i], e, index, ULONG_MAX, XA_PRESENT) { + xa_for_each(xa, e, index, ULONG_MAX, XA_PRESENT) { if (rdma_is_kernel_res(e)) { owner = e->kern_name; } else { @@ -82,10 +153,12 @@ void rdma_restrack_clean(struct ib_device *dev) } found = true; } - xa_destroy(&res->xa[i]); + xa_destroy(xa); } if (found) pr_err("restrack: %s", CUT_HERE); + + kfree(rt); } /** @@ -97,19 +170,19 @@ void rdma_restrack_clean(struct ib_device *dev) int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, struct pid_namespace *ns) { - struct rdma_restrack_root *res = &dev->res; + struct xarray *xa = rdma_dev_to_xa(dev, type); struct rdma_restrack_entry *e; unsigned long index = 0; u32 cnt = 0; - down_read(&res->rwsem); - xa_for_each(&res->xa[type], e, index, ULONG_MAX, XA_PRESENT) { + rdma_rt_read_lock(dev, type); + xa_for_each(xa, e, index, ULONG_MAX, XA_PRESENT) { if (ns == &init_pid_ns || (!rdma_is_kernel_res(e) && ns == task_active_pid_ns(e->task))) cnt++; } - up_read(&res->rwsem); + rdma_rt_read_unlock(dev, type); return cnt; } EXPORT_SYMBOL(rdma_restrack_count); @@ -196,6 +269,7 @@ static unsigned long res_to_id(struct rdma_restrack_entry *res) static void rdma_restrack_add(struct rdma_restrack_entry *res) { struct ib_device *dev = res_to_dev(res); + struct xarray *xa = rdma_dev_to_xa(dev, res->type); unsigned long id; int ret; @@ -218,7 +292,7 @@ static void rdma_restrack_add(struct rdma_restrack_entry *res) res->valid = true; id = res_to_id(res); - ret = xa_insert(&dev->res.xa[res->type], id, res, GFP_KERNEL); + ret = xa_insert(xa, id, res, GFP_KERNEL); WARN_ONCE(ret == -EEXIST, "Tried to add non-unique type %d entry\n", res->type); if (ret) @@ -265,10 +339,10 @@ struct rdma_restrack_entry * rdma_restrack_get_byid(struct ib_device *dev, enum rdma_restrack_type type, u32 id) { - struct rdma_restrack_root *rt = &dev->res; + struct xarray *xa = rdma_dev_to_xa(dev, type); struct rdma_restrack_entry *res; - res = xa_load(&rt->xa[type], id); + res = xa_load(xa, id); if (!res || xa_is_err(res) || !rdma_restrack_get(res)) return ERR_PTR(-ENOENT); return res; @@ -291,25 +365,42 @@ EXPORT_SYMBOL(rdma_restrack_put); void rdma_restrack_del(struct rdma_restrack_entry *res) { - struct ib_device *dev; + struct ib_device *dev = res_to_dev(res); + struct xarray *xa; unsigned long id; if (!res->valid) goto out; - dev = res_to_dev(res); + /* + * All objects except CM_ID set valid device immediately + * after new object is created, it means that for not valid + * objects will still have "dev". + * + * It is not the case for CM_ID, newly created object has + * this field set to NULL and it is set in _cma_attach_to_dev() + * only. + * + * Becasue we don't want to add any conditions on call + * to rdma_restrack_del(), the check below protects from + * NULL-dereference. + */ if (!dev) return; + xa = rdma_dev_to_xa(dev, res->type); + id = res_to_id(res); + if (!xa_load(xa, id)) + goto out; + rdma_restrack_put(res); wait_for_completion(&res->comp); - id = res_to_id(res); - down_write(&dev->res.rwsem); - xa_erase(&dev->res.xa[res->type], id); + down_write(&dev->res->rwsem); + xa_erase(xa, id); res->valid = false; - up_write(&dev->res.rwsem); + up_write(&dev->res->rwsem); out: if (res->task) { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9d593c89f541..d54c87640f89 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2519,6 +2519,8 @@ struct ib_device_ops { struct rdma_restrack_entry *entry); }; +struct rdma_restrack_root; + struct ib_device { /* Do not access @dma_device directly from ULP nor from HW drivers. */ struct device *dma_device; @@ -2583,10 +2585,7 @@ struct ib_device { #endif u32 index; - /* - * Implementation details of the RDMA core, don't use in drivers - */ - struct rdma_restrack_root res; + struct rdma_restrack_root *res; const struct uapi_definition *driver_def; enum rdma_driver_id driver_id; diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index d6490cdebcfa..d66b0b052354 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -7,7 +7,6 @@ #define _RDMA_RESTRACK_H_ #include <linux/typecheck.h> -#include <linux/rwsem.h> #include <linux/sched.h> #include <linux/kref.h> #include <linux/completion.h> @@ -50,27 +49,6 @@ enum rdma_restrack_type { }; struct ib_device; -struct rdma_restrack_entry; - -/** - * struct rdma_restrack_root - main resource tracking management - * entity, per-device - */ -struct rdma_restrack_root { - /* - * @rwsem: Read/write lock to protect erase of entry. - * Lists and insertions are protected by XArray internal lock. - */ - struct rw_semaphore rwsem; - /** - * @xa: Array of XArray structures to hold restrack entries. - * We want to use array of XArrays because insertion is type - * dependent. For types with xisiting unique ID (like QPN), - * we will insert to that unique index. For other types, - * we insert based on pointers and auto-allocate unique index. - */ - struct xarray xa[RDMA_RESTRACK_MAX]; -}; /** * struct rdma_restrack_entry - metadata per-entry @@ -117,7 +95,7 @@ struct rdma_restrack_entry { bool user; }; -void rdma_restrack_init(struct ib_device *dev); +int rdma_restrack_init(struct ib_device *dev); void rdma_restrack_clean(struct ib_device *dev); int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, @@ -175,4 +153,8 @@ int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, struct rdma_restrack_entry *rdma_restrack_get_byid(struct ib_device *dev, enum rdma_restrack_type type, u32 id); +struct xarray *rdma_dev_to_xa(struct ib_device *dev, + enum rdma_restrack_type type); +void rdma_rt_read_lock(struct ib_device *dev, enum rdma_restrack_type type); +void rdma_rt_read_unlock(struct ib_device *dev, enum rdma_restrack_type type); #endif /* _RDMA_RESTRACK_H_ */ -- 2.19.1