Re: [PATCH] RDMA/ipoib: Update paths on CLIENT_REREG/SM_CHANGE events

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, 2018-05-18 at 12:00 -0400, Doug Ledford wrote:
> We do a light flush on CLIENT_REREG and SM_CHANGE events.  This goes
> through and marks paths invalid. But we weren't always checking for this
> validity when we needed to, and so we could keep using a path marked
> invalid.  What's more, once we establish a path with a valid ah, we put
> a pointer to the ah in the neigh struct directly, so even if we mark the
> path as invalid, as long as the neigh has a direct pointer to the ah, it
> keeps using the old, outdated ah.
> 
> To fix this we do several things.
> 
> 1) Put the valid flag in the ah instead of the path struct, so when we
> put the ah pointer directly in the neigh struct, we can easily check the
> validity of the ah on send events.
> 2) Check the neigh->ah and neigh->ah->valid elements in the needed
> places, and if we have an ah, but it's invalid, then invoke a refresh of
> the ah.
> 3) Fix the various places that check for path, but didn't check for
> path->valid (now path->ah && path->ah->valid).
> 
> Reported-by: Evgenii Smirnov <evgenii.smirnov@xxxxxxxxxxxxxxxx>
> Fixes: ee1e2c82c245 ("IPoIB: Refresh paths instead of flushing them on
> 		      SM change events")
> Signed-off-by: Doug Ledford <dledford@xxxxxxxxxx>
> ---

Evgenii, have you had a chance to see if this patch resolves your issue
properly?

>  drivers/infiniband/ulp/ipoib/ipoib.h      |  2 +-
>  drivers/infiniband/ulp/ipoib/ipoib_main.c | 33 ++++++++++++++++++-----
>  2 files changed, 28 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
> index 308e0ce49289..a50b062ed13e 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib.h
> +++ b/drivers/infiniband/ulp/ipoib/ipoib.h
> @@ -415,6 +415,7 @@ struct ipoib_ah {
>  	struct list_head   list;
>  	struct kref	   ref;
>  	unsigned	   last_send;
> +	int  		   valid;
>  };
>  
>  struct ipoib_path {
> @@ -431,7 +432,6 @@ struct ipoib_path {
>  
>  	struct rb_node	      rb_node;
>  	struct list_head      list;
> -	int  		      valid;
>  };
>  
>  struct ipoib_neigh {
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> index cf291f90b58f..788bb9573f1f 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> @@ -697,7 +697,8 @@ void ipoib_mark_paths_invalid(struct net_device *dev)
>  		ipoib_dbg(priv, "mark path LID 0x%08x GID %pI6 invalid\n",
>  			  be32_to_cpu(sa_path_get_dlid(&path->pathrec)),
>  			  path->pathrec.dgid.raw);
> -		path->valid =  0;
> +		if (path->ah)
> +			path->ah->valid = 0;
>  	}
>  
>  	spin_unlock_irq(&priv->lock);
> @@ -833,7 +834,7 @@ static void path_rec_completion(int status,
>  			while ((skb = __skb_dequeue(&neigh->queue)))
>  				__skb_queue_tail(&skqueue, skb);
>  		}
> -		path->valid = 1;
> +		path->ah->valid = 1;
>  	}
>  
>  	path->query = NULL;
> @@ -926,6 +927,24 @@ static int path_rec_start(struct net_device *dev,
>  	return 0;
>  }
>  
> +static void neigh_refresh_path(struct ipoib_neigh *neigh, u8 *daddr,
> +			       struct net_device *dev)
> +{
> +	struct ipoib_dev_priv *priv = ipoib_priv(dev);
> +	struct ipoib_path *path;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&priv->lock, flags);
> +
> +	path = __path_find(dev, daddr + 4);
> +	if (!path)
> +		goto out;
> +	if (!path->query)
> +		path_rec_start(dev, path);
> +out:
> +	spin_unlock_irqrestore(&priv->lock, flags);
> +}
> +
>  static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr,
>  					  struct net_device *dev)
>  {
> @@ -963,7 +982,7 @@ static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr,
>  
>  	list_add_tail(&neigh->list, &path->neigh_list);
>  
> -	if (path->ah) {
> +	if (path->ah && path->ah->valid) {
>  		kref_get(&path->ah->ref);
>  		neigh->ah = path->ah;
>  
> @@ -1034,7 +1053,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
>  		goto drop_and_unlock;
>  
>  	path = __path_find(dev, phdr->hwaddr + 4);
> -	if (!path || !path->valid) {
> +	if (!path || !path->ah || !path->ah->valid) {
>  		int new_path = 0;
>  
>  		if (!path) {
> @@ -1069,7 +1088,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
>  		return;
>  	}
>  
> -	if (path->ah) {
> +	if (path->ah && path->ah->valid) {
>  		ipoib_dbg(priv, "Send unicast ARP to %08x\n",
>  			  be32_to_cpu(sa_path_get_dlid(&path->pathrec)));
>  
> @@ -1161,10 +1180,12 @@ static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
>  			ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
>  			goto unref;
>  		}
> -	} else if (neigh->ah) {
> +	} else if (neigh->ah && neigh->ah->valid) {
>  		neigh->ah->last_send = rn->send(dev, skb, neigh->ah->ah,
>  						IPOIB_QPN(phdr->hwaddr));
>  		goto unref;
> +	} else if (neigh->ah) {
> +		neigh_refresh_path(neigh, phdr->hwaddr, dev);
>  	}
>  
>  	if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {

-- 
Doug Ledford <dledford@xxxxxxxxxx>
    GPG KeyID: B826A3330E572FDD
    Key fingerprint = AE6B 1BDA 122B 23B4 265B  1274 B826 A333 0E57 2FDD

Attachment: signature.asc
Description: This is a digitally signed message part


[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux