On Fri, 2018-05-18 at 12:00 -0400, Doug Ledford wrote: > We do a light flush on CLIENT_REREG and SM_CHANGE events. This goes > through and marks paths invalid. But we weren't always checking for this > validity when we needed to, and so we could keep using a path marked > invalid. What's more, once we establish a path with a valid ah, we put > a pointer to the ah in the neigh struct directly, so even if we mark the > path as invalid, as long as the neigh has a direct pointer to the ah, it > keeps using the old, outdated ah. > > To fix this we do several things. > > 1) Put the valid flag in the ah instead of the path struct, so when we > put the ah pointer directly in the neigh struct, we can easily check the > validity of the ah on send events. > 2) Check the neigh->ah and neigh->ah->valid elements in the needed > places, and if we have an ah, but it's invalid, then invoke a refresh of > the ah. > 3) Fix the various places that check for path, but didn't check for > path->valid (now path->ah && path->ah->valid). > > Reported-by: Evgenii Smirnov <evgenii.smirnov@xxxxxxxxxxxxxxxx> > Fixes: ee1e2c82c245 ("IPoIB: Refresh paths instead of flushing them on > SM change events") > Signed-off-by: Doug Ledford <dledford@xxxxxxxxxx> > --- Evgenii, have you had a chance to see if this patch resolves your issue properly? > drivers/infiniband/ulp/ipoib/ipoib.h | 2 +- > drivers/infiniband/ulp/ipoib/ipoib_main.c | 33 ++++++++++++++++++----- > 2 files changed, 28 insertions(+), 7 deletions(-) > > diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h > index 308e0ce49289..a50b062ed13e 100644 > --- a/drivers/infiniband/ulp/ipoib/ipoib.h > +++ b/drivers/infiniband/ulp/ipoib/ipoib.h > @@ -415,6 +415,7 @@ struct ipoib_ah { > struct list_head list; > struct kref ref; > unsigned last_send; > + int valid; > }; > > struct ipoib_path { > @@ -431,7 +432,6 @@ struct ipoib_path { > > struct rb_node rb_node; > struct list_head list; > - int valid; > }; > > struct ipoib_neigh { > diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c > index cf291f90b58f..788bb9573f1f 100644 > --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c > +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c > @@ -697,7 +697,8 @@ void ipoib_mark_paths_invalid(struct net_device *dev) > ipoib_dbg(priv, "mark path LID 0x%08x GID %pI6 invalid\n", > be32_to_cpu(sa_path_get_dlid(&path->pathrec)), > path->pathrec.dgid.raw); > - path->valid = 0; > + if (path->ah) > + path->ah->valid = 0; > } > > spin_unlock_irq(&priv->lock); > @@ -833,7 +834,7 @@ static void path_rec_completion(int status, > while ((skb = __skb_dequeue(&neigh->queue))) > __skb_queue_tail(&skqueue, skb); > } > - path->valid = 1; > + path->ah->valid = 1; > } > > path->query = NULL; > @@ -926,6 +927,24 @@ static int path_rec_start(struct net_device *dev, > return 0; > } > > +static void neigh_refresh_path(struct ipoib_neigh *neigh, u8 *daddr, > + struct net_device *dev) > +{ > + struct ipoib_dev_priv *priv = ipoib_priv(dev); > + struct ipoib_path *path; > + unsigned long flags; > + > + spin_lock_irqsave(&priv->lock, flags); > + > + path = __path_find(dev, daddr + 4); > + if (!path) > + goto out; > + if (!path->query) > + path_rec_start(dev, path); > +out: > + spin_unlock_irqrestore(&priv->lock, flags); > +} > + > static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr, > struct net_device *dev) > { > @@ -963,7 +982,7 @@ static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr, > > list_add_tail(&neigh->list, &path->neigh_list); > > - if (path->ah) { > + if (path->ah && path->ah->valid) { > kref_get(&path->ah->ref); > neigh->ah = path->ah; > > @@ -1034,7 +1053,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, > goto drop_and_unlock; > > path = __path_find(dev, phdr->hwaddr + 4); > - if (!path || !path->valid) { > + if (!path || !path->ah || !path->ah->valid) { > int new_path = 0; > > if (!path) { > @@ -1069,7 +1088,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, > return; > } > > - if (path->ah) { > + if (path->ah && path->ah->valid) { > ipoib_dbg(priv, "Send unicast ARP to %08x\n", > be32_to_cpu(sa_path_get_dlid(&path->pathrec))); > > @@ -1161,10 +1180,12 @@ static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) > ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); > goto unref; > } > - } else if (neigh->ah) { > + } else if (neigh->ah && neigh->ah->valid) { > neigh->ah->last_send = rn->send(dev, skb, neigh->ah->ah, > IPOIB_QPN(phdr->hwaddr)); > goto unref; > + } else if (neigh->ah) { > + neigh_refresh_path(neigh, phdr->hwaddr, dev); > } > > if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { -- Doug Ledford <dledford@xxxxxxxxxx> GPG KeyID: B826A3330E572FDD Key fingerprint = AE6B 1BDA 122B 23B4 265B 1274 B826 A333 0E57 2FDD
Attachment:
signature.asc
Description: This is a digitally signed message part