Re: [PATCH rdma-next 7/7] RDMA/core: Allow detaching gid attribute netdevice for RoCE

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Apr 10, 2019 at 10:55:07AM -0300, Jason Gunthorpe wrote:
> On Wed, Apr 10, 2019 at 01:47:52PM +0000, Parav Pandit wrote:
> > Hi Leon,
> >
> > > From: Leon Romanovsky
> > > Sent: Wednesday, April 10, 2019 4:29 AM
> > > To: Doug Ledford <dledford@xxxxxxxxxx>; Jason Gunthorpe
> > > <jgg@xxxxxxxxxxxx>
> > > Cc: RDMA mailing list <linux-rdma@xxxxxxxxxxxxxxx>; Huy Nguyen
> > > <huyn@xxxxxxxxxxxx>; Martin Wilck <mwilck@xxxxxxxx>; Parav Pandit
> > > <parav@xxxxxxxxxxxx>
> > > Subject: Re: [PATCH rdma-next 7/7] RDMA/core: Allow detaching gid
> > > attribute netdevice for RoCE
> > >
> > > On Wed, Apr 10, 2019 at 11:15:24AM +0300, Leon Romanovsky wrote:
> > > > From: Parav Pandit <parav@xxxxxxxxxxxx>
> > > >
> > > > When there is active traffic through a GID, a QP/AH holds reference to
> > > > this GID entry. RoCE GID entry holds reference to its attached
> > > > netdevice. Due to this when netdevice is deleted by admin user, its
> > > > refcount is not dropped.
> > > >
> > > > Therefore, while deleting RoCE GID, wait for all GID attribute's
> > > > netdev users to finish accessing netdev in rcu context.
> > > > Once all users done accessing it, release the netdev refcount.
> > > >
> > > > Signed-off-by: Huy Nguyen <huyn@xxxxxxxxxxxx>
> > > > Signed-off-by: Parav Pandit <parav@xxxxxxxxxxxx>
> > > > Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxxxx>
> > > >  drivers/infiniband/core/cache.c | 75
> > > > +++++++++++++++++++++++++++------  drivers/infiniband/core/sysfs.c | 13
> > > ++++--
> > > >  include/rdma/ib_verbs.h         |  2 +-
> > > >  3 files changed, 73 insertions(+), 17 deletions(-)
> > > >
> > > > diff --git a/drivers/infiniband/core/cache.c
> > > > b/drivers/infiniband/core/cache.c index d1626502a0d5..0a6f67f5b34f
> > > > 100644
> > > > +++ b/drivers/infiniband/core/cache.c
> > > > @@ -78,11 +78,22 @@ enum gid_table_entry_state {
> > > >  	GID_TABLE_ENTRY_PENDING_DEL	= 3,
> > > >  };
> > > >
> > > > +struct roce_gid_ndev_storage {
> > > > +	struct rcu_head rcu_head;
> > > > +	struct net_device *ndev;
> > > > +};
> > > > +
> > > >  struct ib_gid_table_entry {
> > > >  	struct kref			kref;
> > > >  	struct work_struct		del_work;
> > > >  	struct ib_gid_attr		attr;
> > > >  	void				*context;
> > > > +	/* Store the ndev pointer to release reference later on in
> > > > +	 * call_rcu context because by that time gid_table_entry
> > > > +	 * and attr might be already freed. So keep a copy of it.
> > > > +	 * ndev_storage is freed by rcu callback.
> > > > +	 */
> > > > +	struct roce_gid_ndev_storage	*ndev_storage;
> > > >  	enum gid_table_entry_state	state;
> > > >  };
> > > >
> > > > @@ -206,6 +217,20 @@ static void schedule_free_gid(struct kref *kref)
> > > >  	queue_work(ib_wq, &entry->del_work);  }
> > > >
> > > > +static void put_gid_ndev(struct rcu_head *head) {
> > > > +	struct roce_gid_ndev_storage *storage =
> > > > +		container_of(head, struct roce_gid_ndev_storage, rcu_head);
> > > > +
> > > > +	WARN_ON(!storage->ndev);
> > > > +	/* At this point its safe to release netdev reference,
> > > > +	 * as all callers working on gid_attr->ndev are done
> > > > +	 * using this netdev.
> > > > +	 */
> > > > +	dev_put(storage->ndev);
> > > > +	kfree(storage);
> > > > +}
> > > > +
> > > >  static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
> > > > {
> > > >  	struct ib_device *device = entry->attr.device; @@ -228,8 +253,8 @@
> > > > static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
> > > >  	/* Now this index is ready to be allocated */
> > > >  	write_unlock_irq(&table->rwlock);
> > > >
> > > > -	if (entry->attr.ndev)
> > > > -		dev_put(entry->attr.ndev);
> > > > +	if (entry->ndev_storage)
> > > > +		call_rcu(&entry->ndev_storage->rcu_head, put_gid_ndev);
> > > >  	kfree(entry);
> > > >  }
> > > >
> > > > @@ -266,14 +291,27 @@ static struct ib_gid_table_entry *
> > > > alloc_gid_entry(const struct ib_gid_attr *attr)  {
> > > >  	struct ib_gid_table_entry *entry;
> > > > +	struct net_device *ndev;
> > > >
> > > >  	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
> > > >  	if (!entry)
> > > >  		return NULL;
> > > > +	/* rcu_dereference is not needed. It is only to avoid
> > > > +	 * smatch complain.
> > > > +	 */
> > > > +	ndev = rcu_dereference(attr->ndev);
> > >
> > > I don't know why I missed it, but it is wrong.
> > > It should be rcu_access_pointer().
> > >
> > We are dereferencing a pointer using dev_hold() under if (ndev).
> > rcu_access_pointer() comment snippet section below,
> >
> >  * Return the value of the specified RCU-protected pointer, but omit the
> >  * lockdep checks for being in an RCU read-side critical section.  This is
> >  * useful when the value of this pointer is accessed, but the pointer is
> >  * not dereferenced,
> >    ^^^^^^^^^^^
> >
> > So we should not use rcu_access_pointer() due to dev_hold?
>
> If you really don't need to be in the RCU critical section then the
> right primitive is rcu_dereference_protected - but the comment should
> explain why the critical region is not required here not complain
> opaquely about smatch.

Parav,

Boot with rcu_dereference produces the following warning.

[    7.921247] mlx5_core 0000:00:0c.0: firmware version: 3.10.9999
[    7.921730] mlx5_core 0000:00:0c.0: 0.000 Gb/s available PCIe bandwidth (Unknown speed x255 link)
[    8.299897] mlx5_ib: Mellanox Connect-IB Infiniband driver v5.0-0
[    8.307859]
[    8.307989] =============================
[    8.308084] WARNING: suspicious RCU usage
[    8.308193] 5.1.0-rc2+ #278 Not tainted
[    8.308302] -----------------------------
[    8.308446] drivers/infiniband/core/cache.c:302 suspicious rcu_dereference_check() usage!

>
> Jason

Attachment: signature.asc
Description: PGP signature


[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux