Re: [PATCH rdma-next v1 10/12] IB/mlx5: Enable subscription for device events over DEVX

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Jun 18, 2019 at 08:15:38PM +0300, Leon Romanovsky wrote:
> From: Yishai Hadas <yishaih@xxxxxxxxxxxx>
> 
> Enable subscription for device events over DEVX.
> 
> Each subscription is added to the two level XA data structure according
> to its event number and the DEVX object information in case was given
> with the given target fd.
> 
> Those events will be reported over the given fd once will occur.
> Downstream patches will mange the dispatching to any subscription.
> 
> Signed-off-by: Yishai Hadas <yishaih@xxxxxxxxxxxx>
> Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxxxx>
>  drivers/infiniband/hw/mlx5/devx.c        | 564 ++++++++++++++++++++++-
>  include/uapi/rdma/mlx5_user_ioctl_cmds.h |   9 +
>  2 files changed, 566 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
> index e9b9ba5a3e9a..304b13e7a265 100644
> +++ b/drivers/infiniband/hw/mlx5/devx.c
> @@ -14,6 +14,7 @@
>  #include <linux/mlx5/driver.h>
>  #include <linux/mlx5/fs.h>
>  #include "mlx5_ib.h"
> +#include <linux/xarray.h>
>  
>  #define UVERBS_MODULE_NAME mlx5_ib
>  #include <rdma/uverbs_named_ioctl.h>
> @@ -33,6 +34,37 @@ struct devx_async_data {
>  	struct mlx5_ib_uapi_devx_async_cmd_hdr hdr;
>  };
>  
> +/* first level XA value data structure */
> +struct devx_event {
> +	struct xarray object_ids; /* second XA level, Key = object id */
> +	struct list_head unaffiliated_list;
> +};
> +
> +/* second level XA value data structure */
> +struct devx_obj_event {
> +	struct rcu_head rcu;
> +	struct list_head obj_sub_list;
> +};
> +
> +struct devx_event_subscription {
> +	struct list_head file_list; /* headed in private_data->
> +				     * subscribed_events_list
> +				     */
> +	struct list_head xa_list; /* headed in devx_event->unaffiliated_list or
> +				   * devx_obj_event->obj_sub_list
> +				   */
> +	struct list_head obj_list; /* headed in devx_object */
> +
> +	u32 xa_key_level1;
> +	u32 xa_key_level2;
> +	struct rcu_head	rcu;
> +	u64 cookie;
> +	bool is_obj_related;
> +	struct ib_uobject *fd_uobj;
> +	void *object;	/* May need direct access upon hot unplug */

This should be a 'struct file *' and have a better name.

And I'm unclear why we need to store both the ib_uobject and the
struct file for the same thing? And why are we storing the uobj here
instead of the struct devx_async_event_file *?

Since uobj->object == flip && filp->private_data == uobj, I have a
hard time to understand why we need both things, it seems to me that
if we get the fget on the filp then we can rely on the
filp->private_data to get back to the devx_async_event_file.

> +	struct eventfd_ctx *eventfd;
> +};
> +

>  /*
>   * As the obj_id in the firmware is not globally unique the object type
>   * must be considered upon checking for a valid object id.
> @@ -1143,14 +1275,53 @@ static void devx_cleanup_mkey(struct devx_obj *obj)
>  	write_unlock_irqrestore(&table->lock, flags);
>  }
>  
> +static void devx_cleanup_subscription(struct mlx5_ib_dev *dev,
> +				      struct devx_event_subscription *sub)
> +{
> +	list_del_rcu(&sub->file_list);
> +	list_del_rcu(&sub->xa_list);
> +
> +	if (sub->is_obj_related) {

is_obj_related looks like it is just list_empty(obj_list)??

Success oriented flow

> @@ -1523,6 +1700,350 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
>  	return err;
>  }
>  
> +static void
> +subscribe_event_xa_dealloc(struct mlx5_devx_event_table *devx_event_table,
> +			   u32 key_level1,
> +			   u32 key_level2,
> +			   struct devx_obj_event *alloc_obj_event)
> +{
> +	struct devx_event *event;
> +
> +	/* Level 1 is valid for future use - no need to free */
> +	if (!alloc_obj_event)
> +		return;
> +
> +	event = xa_load(&devx_event_table->event_xa, key_level1);
> +	WARN_ON(!event);
> +
> +	xa_erase(&event->object_ids, key_level2);

Shoulnd't this only erase if the value stored is NULL?

> +	kfree(alloc_obj_event);
> +}
> +
> +static int
> +subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
> +			 u32 key_level1,
> +			 bool is_level2,
> +			 u32 key_level2,
> +			 struct devx_obj_event **alloc_obj_event)
> +{
> +	struct devx_obj_event *obj_event;
> +	struct devx_event *event;
> +	bool new_entry_level1 = false;
> +	int err;
> +
> +	event = xa_load(&devx_event_table->event_xa, key_level1);
> +	if (!event) {
> +		event = kzalloc(sizeof(*event), GFP_KERNEL);
> +		if (!event)
> +			return -ENOMEM;
> +
> +		new_entry_level1 = true;
> +		INIT_LIST_HEAD(&event->unaffiliated_list);
> +		xa_init(&event->object_ids);
> +
> +		err = xa_insert(&devx_event_table->event_xa,
> +				key_level1,
> +				event,
> +				GFP_KERNEL);
> +		if (err)
> +			goto end;
> +	}
> +
> +	if (!is_level2)
> +		return 0;
> +
> +	obj_event = xa_load(&event->object_ids, key_level2);
> +	if (!obj_event) {
> +		err = xa_reserve(&event->object_ids, key_level2, GFP_KERNEL);
> +		if (err)
> +			goto err_level1;
> +
> +		obj_event = kzalloc(sizeof(*obj_event), GFP_KERNEL);
> +		if (!obj_event) {
> +			err = -ENOMEM;
> +			goto err_level2;
> +		}
> +
> +		INIT_LIST_HEAD(&obj_event->obj_sub_list);
> +		*alloc_obj_event = obj_event;

This is goofy, just store the empty obj_event in the xa instead of
using xa_reserve, and when you go to do the error unwind just delete
any level2' devx_obj_event' that has a list_empty(obj_sub_list), get
rid of the wonky alloc_obj_event stuff.

The best configuration would be to use devx_cleanup_subscription to
undo the partially ready subscription.

> +	}
> +
> +	return 0;
> +
> +err_level2:
> +	xa_erase(&event->object_ids, key_level2);
> +
> +err_level1:
> +	if (new_entry_level1)
> +		xa_erase(&devx_event_table->event_xa, key_level1);
> +end:
> +	if (new_entry_level1)
> +		kfree(event);

Can't do this, once the level1 is put in the tree it could be referenced by
the irqs. At least it needs a kfree_rcu, most likely it is simpler to
just leave it.

> +#define MAX_NUM_EVENTS 16
> +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
> +	struct uverbs_attr_bundle *attrs)
> +{
> +	struct ib_uobject *devx_uobj = uverbs_attr_get_uobject(
> +				attrs,
> +				MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE);
> +	struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
> +		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
> +	struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
> +	struct ib_uobject *fd_uobj;
> +	struct devx_obj *obj = NULL;
> +	struct devx_async_event_file *ev_file;
> +	struct mlx5_devx_event_table *devx_event_table = &dev->devx_event_table;
> +	u16 *event_type_num_list;
> +	struct devx_event_subscription **event_sub_arr;
> +	struct devx_obj_event  **event_obj_array_alloc;
> +	int redirect_fd;
> +	bool use_eventfd = false;
> +	int num_events;
> +	int num_alloc_xa_entries = 0;
> +	u16 obj_type = 0;
> +	u64 cookie = 0;
> +	u32 obj_id = 0;
> +	int err;
> +	int i;
> +
> +	if (!c->devx_uid)
> +		return -EINVAL;
> +
> +	if (!IS_ERR(devx_uobj)) {
> +		obj = (struct devx_obj *)devx_uobj->object;
> +		if (obj)
> +			obj_id = get_dec_obj_id(obj->obj_id);
> +	}
> +
> +	fd_uobj = uverbs_attr_get_uobject(attrs,
> +				MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE);
> +	if (IS_ERR(fd_uobj))
> +		return PTR_ERR(fd_uobj);
> +
> +	ev_file = container_of(fd_uobj, struct devx_async_event_file,
> +			       uobj);
> +
> +	if (uverbs_attr_is_valid(attrs,
> +				 MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM)) {
> +		err = uverbs_copy_from(&redirect_fd, attrs,
> +			       MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM);
> +		if (err)
> +			return err;
> +
> +		use_eventfd = true;
> +	}
> +
> +	if (uverbs_attr_is_valid(attrs,
> +				 MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE)) {
> +		if (use_eventfd)
> +			return -EINVAL;
> +
> +		err = uverbs_copy_from(&cookie, attrs,
> +				MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE);
> +		if (err)
> +			return err;
> +	}
> +
> +	num_events = uverbs_attr_ptr_get_array_size(
> +		attrs, MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
> +		sizeof(u16));
> +
> +	if (num_events < 0)
> +		return num_events;
> +
> +	if (num_events > MAX_NUM_EVENTS)
> +		return -EINVAL;
> +
> +	event_type_num_list = uverbs_attr_get_alloced_ptr(attrs,
> +			MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST);
> +
> +	if (!is_valid_events(dev->mdev, num_events, event_type_num_list, obj))
> +		return -EINVAL;
> +
> +	event_sub_arr = uverbs_zalloc(attrs,
> +		MAX_NUM_EVENTS * sizeof(struct devx_event_subscription *));
> +	event_obj_array_alloc = uverbs_zalloc(attrs,
> +		MAX_NUM_EVENTS * sizeof(struct devx_obj_event *));

There are so many list_heads in the devx_event_subscription, why not
use just one of them to store the allocated events instead of this
temp array? ie event_list looks good for this purpose.

> +
> +	if (!event_sub_arr || !event_obj_array_alloc)
> +		return -ENOMEM;
> +
> +	/* Protect from concurrent subscriptions to same XA entries to allow
> +	 * both to succeed
> +	 */
> +	mutex_lock(&devx_event_table->event_xa_lock);
> +	for (i = 0; i < num_events; i++) {
> +		u32 key_level1;
> +
> +		if (obj)
> +			obj_type = get_dec_obj_type(obj,
> +						    event_type_num_list[i]);
> +		key_level1 = event_type_num_list[i] | obj_type << 16;
> +
> +		err = subscribe_event_xa_alloc(devx_event_table,
> +					       key_level1,
> +					       obj ? true : false,
> +					       obj_id,
> +					       &event_obj_array_alloc[i]);

Usless ?:

> +		if (err)
> +			goto err;
> +
> +		num_alloc_xa_entries++;
> +		event_sub_arr[i] = kzalloc(sizeof(*event_sub_arr[i]),
> +					   GFP_KERNEL);
> +		if (!event_sub_arr[i])
> +			goto err;
> +
> +		if (use_eventfd) {
> +			event_sub_arr[i]->eventfd =
> +				eventfd_ctx_fdget(redirect_fd);
> +
> +			if (IS_ERR(event_sub_arr[i]->eventfd)) {
> +				err = PTR_ERR(event_sub_arr[i]->eventfd);
> +				event_sub_arr[i]->eventfd = NULL;
> +				goto err;
> +			}
> +		}
> +
> +		event_sub_arr[i]->cookie = cookie;
> +		event_sub_arr[i]->fd_uobj = fd_uobj;
> +		event_sub_arr[i]->object = fd_uobj->object;
> +		/* May be needed upon cleanup the devx object/subscription */
> +		event_sub_arr[i]->xa_key_level1 = key_level1;
> +		event_sub_arr[i]->xa_key_level2 = obj_id;
> +		event_sub_arr[i]->is_obj_related = obj ? true : false;

Unneeded ?:


Jason




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux