Re: [PATCH for-next 03/14] RDMA/bnxt_re: Fix race between netdev register and unregister events

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, May 10, 2017 at 03:45:28AM -0700, Selvin Xavier wrote:
> From: Somnath Kotur <somnath.kotur@xxxxxxxxxxxx>
>
> Upon receipt of the NETDEV_REGISTER event from the netdev notifier
> chain, the IB stack registration is spawned off to a workqueue
> since that also requires an rtnl lock.
> There could be 2 kinds of races between the NETDEV_REGISTER and the
> NETDEV_UNREGISTER event handling.
> a)The NETDEV_UNREGISTER event is received in rapid succession after
> the NETDEV_REGISTER event even before the work queue got a chance
> to run
> b)The NETDEV_UNREGISTER event is received while the workqueue that
> handles registration with the IB stack is still in progress.
> Handle both the races with a bit flag that is set as part of the
> NETDEV_REGISTER event just before the work item is queued and cleared
> in the workqueue after the registration with the IB stack is complete.
> Use a barrier to ensure the bit is cleared only after the IB stack
> registration code is completed.

I have a very strong feeling that this patch doesn't solve race, but
makes it is hard to reproduce. Why don't you use locks to protect flows?

>
> Removes the link speed query from the query_port as it causes
> deadlock while trying to acquire rtnl_lock. Now querying the
> speed from the nedev event itself.
>
> Also, added a code to wait for resources(like CQ) to be freed by the
> ULPs, during driver unload
>
> Signed-off-by: Somnath Kotur <somnath.kotur@xxxxxxxxxxxx>
> Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@xxxxxxxxxxxx>
> Signed-off-by: Selvin Xavier <selvin.xavier@xxxxxxxxxxxx>
> ---
>  drivers/infiniband/hw/bnxt_re/bnxt_re.h  | 13 +++++++----
>  drivers/infiniband/hw/bnxt_re/ib_verbs.c | 21 +++--------------
>  drivers/infiniband/hw/bnxt_re/main.c     | 39 ++++++++++++++++++++++++++++++++
>  3 files changed, 50 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
> index ebf7be8..fad04b2 100644
> --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
> +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
> @@ -80,11 +80,13 @@ struct bnxt_re_dev {
>  	struct ib_device		ibdev;
>  	struct list_head		list;
>  	unsigned long			flags;
> -#define BNXT_RE_FLAG_NETDEV_REGISTERED	0
> -#define BNXT_RE_FLAG_IBDEV_REGISTERED	1
> -#define BNXT_RE_FLAG_GOT_MSIX		2
> -#define BNXT_RE_FLAG_RCFW_CHANNEL_EN	8
> -#define BNXT_RE_FLAG_QOS_WORK_REG	16
> +#define BNXT_RE_FLAG_NETDEV_REGISTERED         0
> +#define BNXT_RE_FLAG_IBDEV_REGISTERED          1
> +#define BNXT_RE_FLAG_GOT_MSIX                  2
> +#define BNXT_RE_FLAG_RCFW_CHANNEL_EN           4
> +#define BNXT_RE_FLAG_QOS_WORK_REG              5
> +#define BNXT_RE_FLAG_NETDEV_REG_IN_PROG        6
> +
>  	struct net_device		*netdev;
>  	unsigned int			version, major, minor;
>  	struct bnxt_en_dev		*en_dev;
> @@ -127,6 +129,7 @@ struct bnxt_re_dev {
>  	struct bnxt_re_qp		*qp1_sqp;
>  	struct bnxt_re_ah		*sqp_ah;
>  	struct bnxt_re_sqp_entries sqp_tbl[1024];
> +	u32 espeed;
>  };
>
>  #define to_bnxt_re_dev(ptr, member)	\
> diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> index 6385213..c717d5d 100644
> --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> @@ -223,20 +223,8 @@ int bnxt_re_modify_device(struct ib_device *ibdev,
>  	return 0;
>  }
>
> -static void __to_ib_speed_width(struct net_device *netdev, u8 *speed, u8 *width)
> +static void __to_ib_speed_width(u32 espeed, u8 *speed, u8 *width)
>  {
> -	struct ethtool_link_ksettings lksettings;
> -	u32 espeed;
> -
> -	if (netdev->ethtool_ops && netdev->ethtool_ops->get_link_ksettings) {
> -		memset(&lksettings, 0, sizeof(lksettings));
> -		rtnl_lock();
> -		netdev->ethtool_ops->get_link_ksettings(netdev, &lksettings);
> -		rtnl_unlock();
> -		espeed = lksettings.base.speed;
> -	} else {
> -		espeed = SPEED_UNKNOWN;
> -	}
>  	switch (espeed) {
>  	case SPEED_1000:
>  		*speed = IB_SPEED_SDR;
> @@ -303,12 +291,9 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
>  	port_attr->sm_sl = 0;
>  	port_attr->subnet_timeout = 0;
>  	port_attr->init_type_reply = 0;
> -	/* call the underlying netdev's ethtool hooks to query speed settings
> -	 * for which we acquire rtnl_lock _only_ if it's registered with
> -	 * IB stack to avoid race in the NETDEV_UNREG path
> -	 */
> +
>  	if (test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
> -		__to_ib_speed_width(rdev->netdev, &port_attr->active_speed,
> +		__to_ib_speed_width(rdev->espeed, &port_attr->active_speed,
>  				    &port_attr->active_width);
>  	return 0;
>  }
> diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
> index 5d35540..99a54fd 100644
> --- a/drivers/infiniband/hw/bnxt_re/main.c
> +++ b/drivers/infiniband/hw/bnxt_re/main.c
> @@ -48,6 +48,8 @@
>  #include <net/ipv6.h>
>  #include <net/addrconf.h>
>  #include <linux/if_ether.h>
> +#include <linux/atomic.h>
> +#include <asm/barrier.h>
>
>  #include <rdma/ib_verbs.h>
>  #include <rdma/ib_user_verbs.h>
> @@ -926,6 +928,10 @@ static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait)
>  	if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
>  		cancel_delayed_work(&rdev->worker);
>
> +	/* Wait for ULPs to release references */
> +	while (atomic_read(&rdev->cq_count))
> +		usleep_range(500, 1000);
> +

It can change immediately after your check.

>  	bnxt_re_cleanup_res(rdev);
>  	bnxt_re_free_res(rdev, lock_wait);
>
> @@ -1152,6 +1158,22 @@ static void bnxt_re_remove_one(struct bnxt_re_dev *rdev)
>  	pci_dev_put(rdev->en_dev->pdev);
>  }
>
> +static void bnxt_re_get_link_speed(struct bnxt_re_dev *rdev)
> +{
> +	struct ethtool_link_ksettings lksettings;
> +	struct net_device *netdev = rdev->netdev;
> +
> +	if (netdev->ethtool_ops && netdev->ethtool_ops->get_link_ksettings) {
> +		memset(&lksettings, 0, sizeof(lksettings));
> +		if (rtnl_trylock()) {
> +			netdev->ethtool_ops->get_link_ksettings(netdev,
> +								&lksettings);
> +			rdev->espeed = lksettings.base.speed;
> +			rtnl_unlock();
> +		}
> +	}
> +}
> +
>  /* Handle all deferred netevents tasks */
>  static void bnxt_re_task(struct work_struct *work)
>  {
> @@ -1169,6 +1191,10 @@ static void bnxt_re_task(struct work_struct *work)
>  	switch (re_work->event) {
>  	case NETDEV_REGISTER:
>  		rc = bnxt_re_ib_reg(rdev);
> +		/* Use memory barrier to sync the rdev->flags */
> +		smp_mb();
> +		clear_bit(BNXT_RE_FLAG_NETDEV_REG_IN_PROG,
> +			  &rdev->flags);
>  		if (rc)
>  			dev_err(rdev_to_dev(rdev),
>  				"Failed to register with IB: %#x", rc);
> @@ -1186,6 +1212,7 @@ static void bnxt_re_task(struct work_struct *work)
>  		else if (netif_carrier_ok(rdev->netdev))
>  			bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
>  					       IB_EVENT_PORT_ACTIVE);
> +		bnxt_re_get_link_speed(rdev);
>  		break;
>  	default:
>  		break;
> @@ -1244,10 +1271,22 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
>  			break;
>  		}
>  		bnxt_re_init_one(rdev);
> +		/*
> +		 *  Query the link speed at the time of registration.
> +		 *  Already in rtnl_lock context
> +		 */
> +		bnxt_re_get_link_speed(rdev);
> +
> +		set_bit(BNXT_RE_FLAG_NETDEV_REG_IN_PROG, &rdev->flags);
>  		sch_work = true;
>  		break;
>
>  	case NETDEV_UNREGISTER:
> +		/* netdev notifier will call NETDEV_UNREGISTER again later since
> +		 * we are still holding the reference to the netdev
> +		 */
> +		if (test_bit(BNXT_RE_FLAG_NETDEV_REG_IN_PROG, &rdev->flags))
> +			goto exit;
>  		bnxt_re_ib_unreg(rdev, false);
>  		bnxt_re_remove_one(rdev);
>  		bnxt_re_dev_unreg(rdev);
> --
> 2.5.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

Attachment: signature.asc
Description: PGP signature


[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux