RE: [PATCH rdma-next v1 1/2] IB/core: Let IB core distribute cache update events

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




> -----Original Message-----
> From: Leon Romanovsky <leon@xxxxxxxxxx>
> Sent: Tuesday, October 29, 2019 6:53 AM
> To: Doug Ledford <dledford@xxxxxxxxxx>; Jason Gunthorpe
> <jgg@xxxxxxxxxxxx>
> Cc: Leon Romanovsky <leonro@xxxxxxxxxxxx>; RDMA mailing list <linux-
> rdma@xxxxxxxxxxxxxxx>; Daniel Jurgens <danielj@xxxxxxxxxxxx>; Parav
> Pandit <parav@xxxxxxxxxxxx>
> Subject: [PATCH rdma-next v1 1/2] IB/core: Let IB core distribute cache
> update events
> 
> From: Parav Pandit <parav@xxxxxxxxxxxx>
> 
> Currently when low level driver notifies Pkey, GID, port change events, they
> are notified to the registered handlers in the order they are registered.
> IB core and other ULPs such as IPoIB are interested in GID, LID, Pkey change
> events.
> Since all GID query done by ULPs is serviced by IB core, in below flow when
> GID change event occurs, IB core is yet to update the GID cache when IPoIB
> queries the GID, resulting into not updating IPoIB address.
> 
> mlx5_ib_handle_event()
>   ib_dispatch_event()
>     ib_cache_event()
>        queue_work() -> slow cache update
> 
>     [..]
>     ipoib_event()
>      queue_work()
>        [..]
>        work handler
>          ipoib_ib_dev_flush_light()
>            __ipoib_ib_dev_flush()
>               ipoib_dev_addr_changed_valid()
>                 rdma_query_gid() <- Returns old GID, cache not updated.
> 
> Hence, all events which require cache update are handled first by the IB
> core. Once cache update work is completed, IB core distributes the event to
> subscriber clients.
> 
> Fixes: f35faa4ba956 ("IB/core: Simplify ib_query_gid to always refer to
> cache")
> Signed-off-by: Parav Pandit <parav@xxxxxxxxxxxx>
> Reviewed-by: Daniel Jurgens <danielj@xxxxxxxxxxxx>
> Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxxxx>
> ---
>  drivers/infiniband/core/cache.c     | 84 ++++++++++++++---------------
>  drivers/infiniband/core/core_priv.h |  2 +
>  drivers/infiniband/core/device.c    | 27 ++++++----
>  include/rdma/ib_verbs.h             |  1 -
>  4 files changed, 57 insertions(+), 57 deletions(-)
> 
> diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
> index d535995711c3..1d5c0df7dfab 100644
> --- a/drivers/infiniband/core/cache.c
> +++ b/drivers/infiniband/core/cache.c
> @@ -51,9 +51,8 @@ struct ib_pkey_cache {
> 
>  struct ib_update_work {
>  	struct work_struct work;
> -	struct ib_device  *device;
> -	u8                 port_num;
> -	bool		   enforce_security;
> +	struct ib_event event;
> +	bool enforce_security;
>  };
> 
>  union ib_gid zgid;
> @@ -130,7 +129,7 @@ static void dispatch_gid_change_event(struct
> ib_device *ib_dev, u8 port)
>  	event.element.port_num	= port;
>  	event.event		= IB_EVENT_GID_CHANGE;
> 
> -	ib_dispatch_event(&event);
> +	ib_dispatch_cache_event_clients(&event);
>  }
> 
>  static const char * const gid_type_str[] = { @@ -1381,9 +1380,8 @@ static
> int config_non_roce_gid_cache(struct ib_device *device,
>  	return ret;
>  }
> 
> -static void ib_cache_update(struct ib_device *device,
> -			    u8                port,
> -			    bool	      enforce_security)
> +static int
> +ib_cache_update(struct ib_device *device, u8 port, bool
> 	enforce_security)
>  {
>  	struct ib_port_attr       *tprops = NULL;
>  	struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
> @@ -1391,11 +1389,11 @@ static void ib_cache_update(struct ib_device
> *device,
>  	int                        ret;
> 
>  	if (!rdma_is_port_valid(device, port))
> -		return;
> +		return -EINVAL;
> 
>  	tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
>  	if (!tprops)
> -		return;
> +		return -ENOMEM;
> 
>  	ret = ib_query_port(device, port, tprops);
>  	if (ret) {
> @@ -1413,8 +1411,10 @@ static void ib_cache_update(struct ib_device
> *device,
>  	pkey_cache = kmalloc(struct_size(pkey_cache, table,
>  					 tprops->pkey_tbl_len),
>  			     GFP_KERNEL);
> -	if (!pkey_cache)
> +	if (!pkey_cache) {
> +		ret = -ENOMEM;
>  		goto err;
> +	}
> 
>  	pkey_cache->table_len = tprops->pkey_tbl_len;
> 
> @@ -1446,49 +1446,48 @@ static void ib_cache_update(struct ib_device
> *device,
> 
>  	kfree(old_pkey_cache);
>  	kfree(tprops);
> -	return;
> +	return 0;
> 
>  err:
>  	kfree(pkey_cache);
>  	kfree(tprops);
> +	return ret;
>  }
> 
>  static void ib_cache_task(struct work_struct *_work)  {
>  	struct ib_update_work *work =
>  		container_of(_work, struct ib_update_work, work);
> +	int ret;
> +
> +	ret = ib_cache_update(work->event.device, work-
> >event.element.port_num,
> +			      work->enforce_security);
> +
> +	/* GID event is notified already for individual GID entries by
> +	 * dispatch_gid_change_event(). Hence, notifiy for rest of the
> +	 * events.
> +	 */
> +	if (!ret && work->event.event != IB_EVENT_GID_CHANGE)
> +		ib_dispatch_cache_event_clients(&work->event);
> 
> -	ib_cache_update(work->device,
> -			work->port_num,
> -			work->enforce_security);
>  	kfree(work);
>  }
> 
> -static void ib_cache_event(struct ib_event_handler *handler,
> -			   struct ib_event *event)
> +void ib_enqueue_cache_update_event(const struct ib_event *event)
>  {
>  	struct ib_update_work *work;
> 
> -	if (event->event == IB_EVENT_PORT_ERR    ||
> -	    event->event == IB_EVENT_PORT_ACTIVE ||
> -	    event->event == IB_EVENT_LID_CHANGE  ||
> -	    event->event == IB_EVENT_PKEY_CHANGE ||
> -	    event->event == IB_EVENT_CLIENT_REREGISTER ||
> -	    event->event == IB_EVENT_GID_CHANGE) {
> -		work = kmalloc(sizeof *work, GFP_ATOMIC);
> -		if (work) {
> -			INIT_WORK(&work->work, ib_cache_task);
> -			work->device   = event->device;
> -			work->port_num = event->element.port_num;
> -			if (event->event == IB_EVENT_PKEY_CHANGE ||
> -			    event->event == IB_EVENT_GID_CHANGE)
> -				work->enforce_security = true;
> -			else
> -				work->enforce_security = false;
> -
> -			queue_work(ib_wq, &work->work);
> -		}
> -	}
> +	work = kzalloc(sizeof(*work), GFP_ATOMIC);
> +	if (!work)
> +		return;
> +
> +	INIT_WORK(&work->work, ib_cache_task);
> +	work->event = *event;
> +	if (event->event == IB_EVENT_PKEY_CHANGE ||
> +	    event->event == IB_EVENT_GID_CHANGE)
> +		work->enforce_security = true;
> +
> +	queue_work(ib_wq, &work->work);
>  }
> 
>  int ib_cache_setup_one(struct ib_device *device) @@ -1505,9 +1504,6 @@
> int ib_cache_setup_one(struct ib_device *device)
>  	rdma_for_each_port (device, p)
>  		ib_cache_update(device, p, true);
> 
> -	INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
> -			      device, ib_cache_event);
> -	ib_register_event_handler(&device->cache.event_handler);
>  	return 0;
>  }
> 
> @@ -1529,14 +1525,12 @@ void ib_cache_release_one(struct ib_device
> *device)
> 
>  void ib_cache_cleanup_one(struct ib_device *device)  {
> -	/* The cleanup function unregisters the event handler,
> -	 * waits for all in-progress workqueue elements and cleans
> -	 * up the GID cache. This function should be called after
> -	 * the device was removed from the devices list and all
> -	 * clients were removed, so the cache exists but is
> +	/* The cleanup function waits for all in-progress workqueue
> +	 * elements and cleans up the GID cache. This function should be
> +	 * called after the device was removed from the devices list and
> +	 * all clients were removed, so the cache exists but is
>  	 * non-functional and shouldn't be updated anymore.
>  	 */
> -	ib_unregister_event_handler(&device->cache.event_handler);
>  	flush_workqueue(ib_wq);
>  	gid_table_cleanup_one(device);
> 
> diff --git a/drivers/infiniband/core/core_priv.h
> b/drivers/infiniband/core/core_priv.h
> index 3a8b0911c3bc..137c98098489 100644
> --- a/drivers/infiniband/core/core_priv.h
> +++ b/drivers/infiniband/core/core_priv.h
> @@ -149,6 +149,8 @@ unsigned long roce_gid_type_mask_support(struct
> ib_device *ib_dev, u8 port);  int ib_cache_setup_one(struct ib_device
> *device);  void ib_cache_cleanup_one(struct ib_device *device);  void
> ib_cache_release_one(struct ib_device *device);
> +void ib_enqueue_cache_update_event(const struct ib_event *event); void
> +ib_dispatch_cache_event_clients(struct ib_event *event);
> 
>  #ifdef CONFIG_CGROUP_RDMA
>  void ib_device_register_rdmacg(struct ib_device *device); diff --git
> a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
> index f8d383ceae05..9db229c628e9 100644
> --- a/drivers/infiniband/core/device.c
> +++ b/drivers/infiniband/core/device.c
> @@ -1931,8 +1931,8 @@ EXPORT_SYMBOL(ib_set_client_data);
>   *
>   * ib_register_event_handler() registers an event handler that will be
>   * called back when asynchronous IB events occur (as defined in
> - * chapter 11 of the InfiniBand Architecture Specification).  This
> - * callback may occur in interrupt context.
> + * chapter 11 of the InfiniBand Architecture Specification). This
> + * callback always occurring in workqueue context.
>   */
>  void ib_register_event_handler(struct ib_event_handler *event_handler)  {
> @@ -1962,15 +1962,7 @@ void ib_unregister_event_handler(struct
> ib_event_handler *event_handler)  }
> EXPORT_SYMBOL(ib_unregister_event_handler);
> 
> -/**
> - * ib_dispatch_event - Dispatch an asynchronous event
> - * @event:Event to dispatch
> - *
> - * Low-level drivers must call ib_dispatch_event() to dispatch the
> - * event to all registered event handlers when an asynchronous event
> - * occurs.
> - */
> -void ib_dispatch_event(struct ib_event *event)
> +void ib_dispatch_cache_event_clients(struct ib_event *event)
>  {
>  	unsigned long flags;
>  	struct ib_event_handler *handler;
> @@ -1982,6 +1974,19 @@ void ib_dispatch_event(struct ib_event *event)
> 
>  	spin_unlock_irqrestore(&event->device->event_handler_lock, flags);
> }
> +
> +/**
> + * ib_dispatch_event - Dispatch an asynchronous event
> + * @event:Event to dispatch
> + *
> + * Low-level drivers must call ib_dispatch_event() to dispatch the
> + * event to all registered event handlers when an asynchronous event
> + * occurs.
> + */
> +void ib_dispatch_event(struct ib_event *event) {
> +	ib_enqueue_cache_update_event(event);
> +}
>  EXPORT_SYMBOL(ib_dispatch_event);
> 
>  static int iw_query_port(struct ib_device *device,
> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> index 0626b62ed107..6604115f7c85 100644
> --- a/include/rdma/ib_verbs.h
> +++ b/include/rdma/ib_verbs.h
> @@ -2148,7 +2148,6 @@ struct ib_port_cache {
> 
>  struct ib_cache {
>  	rwlock_t                lock;
> -	struct ib_event_handler event_handler;
>  };
> 
>  struct ib_port_immutable {
> --
> 2.20.1

We noticed one Kazan report with this patch in additional QA tests. I am looking into it.
Please wait until we have more updates.




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux