RE: [PATCH rdma-next 7/7] RDMA/core: Allow detaching gid attribute netdevice for RoCE

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




> -----Original Message-----
> From: Leon Romanovsky
> Sent: Wednesday, April 10, 2019 9:04 AM
> To: Jason Gunthorpe <jgg@xxxxxxxx>
> Cc: Parav Pandit <parav@xxxxxxxxxxxx>; Doug Ledford
> <dledford@xxxxxxxxxx>; RDMA mailing list <linux-rdma@xxxxxxxxxxxxxxx>;
> Huy Nguyen <huyn@xxxxxxxxxxxx>; Martin Wilck <mwilck@xxxxxxxx>
> Subject: Re: [PATCH rdma-next 7/7] RDMA/core: Allow detaching gid
> attribute netdevice for RoCE
> 
> On Wed, Apr 10, 2019 at 10:55:07AM -0300, Jason Gunthorpe wrote:
> > On Wed, Apr 10, 2019 at 01:47:52PM +0000, Parav Pandit wrote:
> > > Hi Leon,
> > >
> > > > From: Leon Romanovsky
> > > > Sent: Wednesday, April 10, 2019 4:29 AM
> > > > To: Doug Ledford <dledford@xxxxxxxxxx>; Jason Gunthorpe
> > > > <jgg@xxxxxxxxxxxx>
> > > > Cc: RDMA mailing list <linux-rdma@xxxxxxxxxxxxxxx>; Huy Nguyen
> > > > <huyn@xxxxxxxxxxxx>; Martin Wilck <mwilck@xxxxxxxx>; Parav
> Pandit
> > > > <parav@xxxxxxxxxxxx>
> > > > Subject: Re: [PATCH rdma-next 7/7] RDMA/core: Allow detaching gid
> > > > attribute netdevice for RoCE
> > > >
> > > > On Wed, Apr 10, 2019 at 11:15:24AM +0300, Leon Romanovsky wrote:
> > > > > From: Parav Pandit <parav@xxxxxxxxxxxx>
> > > > >
> > > > > When there is active traffic through a GID, a QP/AH holds
> > > > > reference to this GID entry. RoCE GID entry holds reference to
> > > > > its attached netdevice. Due to this when netdevice is deleted by
> > > > > admin user, its refcount is not dropped.
> > > > >
> > > > > Therefore, while deleting RoCE GID, wait for all GID attribute's
> > > > > netdev users to finish accessing netdev in rcu context.
> > > > > Once all users done accessing it, release the netdev refcount.
> > > > >
> > > > > Signed-off-by: Huy Nguyen <huyn@xxxxxxxxxxxx>
> > > > > Signed-off-by: Parav Pandit <parav@xxxxxxxxxxxx>
> > > > > Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxxxx>
> > > > > drivers/infiniband/core/cache.c | 75
> > > > > +++++++++++++++++++++++++++------
> > > > > +++++++++++++++++++++++++++drivers/infiniband/core/sysfs.c | 13
> > > > ++++--
> > > > >  include/rdma/ib_verbs.h         |  2 +-
> > > > >  3 files changed, 73 insertions(+), 17 deletions(-)
> > > > >
> > > > > diff --git a/drivers/infiniband/core/cache.c
> > > > > b/drivers/infiniband/core/cache.c index
> > > > > d1626502a0d5..0a6f67f5b34f
> > > > > 100644
> > > > > +++ b/drivers/infiniband/core/cache.c
> > > > > @@ -78,11 +78,22 @@ enum gid_table_entry_state {
> > > > >  	GID_TABLE_ENTRY_PENDING_DEL	= 3,
> > > > >  };
> > > > >
> > > > > +struct roce_gid_ndev_storage {
> > > > > +	struct rcu_head rcu_head;
> > > > > +	struct net_device *ndev;
> > > > > +};
> > > > > +
> > > > >  struct ib_gid_table_entry {
> > > > >  	struct kref			kref;
> > > > >  	struct work_struct		del_work;
> > > > >  	struct ib_gid_attr		attr;
> > > > >  	void				*context;
> > > > > +	/* Store the ndev pointer to release reference later on in
> > > > > +	 * call_rcu context because by that time gid_table_entry
> > > > > +	 * and attr might be already freed. So keep a copy of it.
> > > > > +	 * ndev_storage is freed by rcu callback.
> > > > > +	 */
> > > > > +	struct roce_gid_ndev_storage	*ndev_storage;
> > > > >  	enum gid_table_entry_state	state;
> > > > >  };
> > > > >
> > > > > @@ -206,6 +217,20 @@ static void schedule_free_gid(struct kref
> *kref)
> > > > >  	queue_work(ib_wq, &entry->del_work);  }
> > > > >
> > > > > +static void put_gid_ndev(struct rcu_head *head) {
> > > > > +	struct roce_gid_ndev_storage *storage =
> > > > > +		container_of(head, struct roce_gid_ndev_storage,
> rcu_head);
> > > > > +
> > > > > +	WARN_ON(!storage->ndev);
> > > > > +	/* At this point its safe to release netdev reference,
> > > > > +	 * as all callers working on gid_attr->ndev are done
> > > > > +	 * using this netdev.
> > > > > +	 */
> > > > > +	dev_put(storage->ndev);
> > > > > +	kfree(storage);
> > > > > +}
> > > > > +
> > > > >  static void free_gid_entry_locked(struct ib_gid_table_entry
> > > > > *entry) {
> > > > >  	struct ib_device *device = entry->attr.device; @@ -228,8
> > > > > +253,8 @@ static void free_gid_entry_locked(struct
> ib_gid_table_entry *entry)
> > > > >  	/* Now this index is ready to be allocated */
> > > > >  	write_unlock_irq(&table->rwlock);
> > > > >
> > > > > -	if (entry->attr.ndev)
> > > > > -		dev_put(entry->attr.ndev);
> > > > > +	if (entry->ndev_storage)
> > > > > +		call_rcu(&entry->ndev_storage->rcu_head,
> put_gid_ndev);
> > > > >  	kfree(entry);
> > > > >  }
> > > > >
> > > > > @@ -266,14 +291,27 @@ static struct ib_gid_table_entry *
> > > > > alloc_gid_entry(const struct ib_gid_attr *attr)  {
> > > > >  	struct ib_gid_table_entry *entry;
> > > > > +	struct net_device *ndev;
> > > > >
> > > > >  	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
> > > > >  	if (!entry)
> > > > >  		return NULL;
> > > > > +	/* rcu_dereference is not needed. It is only to avoid
> > > > > +	 * smatch complain.
> > > > > +	 */
> > > > > +	ndev = rcu_dereference(attr->ndev);
> > > >
> > > > I don't know why I missed it, but it is wrong.
> > > > It should be rcu_access_pointer().
> > > >
> > > We are dereferencing a pointer using dev_hold() under if (ndev).
> > > rcu_access_pointer() comment snippet section below,
> > >
> > >  * Return the value of the specified RCU-protected pointer, but omit
> > > the
> > >  * lockdep checks for being in an RCU read-side critical section.
> > > This is
> > >  * useful when the value of this pointer is accessed, but the
> > > pointer is
> > >  * not dereferenced,
> > >    ^^^^^^^^^^^
> > >
> > > So we should not use rcu_access_pointer() due to dev_hold?
> >
> > If you really don't need to be in the RCU critical section then the
> > right primitive is rcu_dereference_protected - but the comment should
> > explain why the critical region is not required here not complain
> > opaquely about smatch.
> 
> Parav,
> 
> Boot with rcu_dereference produces the following warning.
> 
> [    7.921247] mlx5_core 0000:00:0c.0: firmware version: 3.10.9999
> [    7.921730] mlx5_core 0000:00:0c.0: 0.000 Gb/s available PCIe bandwidth
> (Unknown speed x255 link)
> [    8.299897] mlx5_ib: Mellanox Connect-IB Infiniband driver v5.0-0
> [    8.307859]
> [    8.307989] =============================
> [    8.308084] WARNING: suspicious RCU usage
> [    8.308193] 5.1.0-rc2+ #278 Not tainted
> [    8.308302] -----------------------------
> [    8.308446] drivers/infiniband/core/cache.c:302 suspicious
> rcu_dereference_check() usage!
> 
> >
Yes. So lets do 

rcu_dereference_protected(attr->ndev, 1);

with below updated comment?

+	/* rcu_dereference is not needed because GID attr being passed as input during
+	 *  GID addition cannot change. It is used only to avoid smatch complain.
+	 */

> > Jason




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux