Re: [PATCH v6 3/4] RDMA/Core: add RDMA_NLDEV_CMD_NEWLINK/DELLINK support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Dec 07, 2018 at 10:12:44AM -0600, Steve Wise wrote:

> Here is a way to ensure the module doesn't unload.  With this and my
> current rxe changes, I think the races are all handled.

I don't like messing with module refcount to solve a locking problem..

Here something like this compile-tested thing..

Instead of implementing dellink as a function call implement it as a
flag 'ALLOW_USER_UNREGISTER'. The core code simply calls
ib_unregister_device_and_put() upon dellink.

rxe was full of lifetime bugs in this area, I think this fixes alot of
them.. It probably replaces that other patch trying to do the same.

>From 202ce0c783f0a2022ad49f5154be801b1d2a7b32 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@xxxxxxxxxxxx>
Date: Fri, 7 Dec 2018 10:30:52 -0700
Subject: [PATCH] verbs/rxe: Use core services to keep track of RXE things

Replace
 - The nonsense rxe_dev.kref with rxe_dev.ib_dev.dev.kref
 - rxe_dev_list with core's device_list.
 - Buggy net_to_rxe with ib_device_get_by_netdev that does proper
   lifetime management
 - Buggy get_rxe_by_name with ib_device_get_by_name

Add
 - ib_unregister_driver so drivers like rxe don't have to keep track
   of things to do module unload
 - ib_unregister_device_and_put to work with potiners returned by
   that various ib_device_get_* functions

The refcount change to the pool might need more work, but fundamentally
the driver cannot have krefs open in the pool across unregistration or it
contains module unlolad races.

Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxxxx>
---
 drivers/infiniband/core/core_priv.h   |   1 -
 drivers/infiniband/core/device.c      | 210 ++++++++++++++++++++++++--
 drivers/infiniband/sw/rxe/rxe.c       |  38 +----
 drivers/infiniband/sw/rxe/rxe.h       |  14 +-
 drivers/infiniband/sw/rxe/rxe_loc.h   |   2 +-
 drivers/infiniband/sw/rxe/rxe_net.c   |  82 +++-------
 drivers/infiniband/sw/rxe/rxe_pool.c  |   4 -
 drivers/infiniband/sw/rxe/rxe_sysfs.c |  39 ++---
 drivers/infiniband/sw/rxe/rxe_verbs.c |   7 -
 drivers/infiniband/sw/rxe/rxe_verbs.h |   3 -
 include/rdma/ib_verbs.h               |  10 ++
 11 files changed, 257 insertions(+), 153 deletions(-)

diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index cc7535c5e19233..1b2575430032d0 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -267,7 +267,6 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map,
 #endif
 
 struct ib_device *ib_device_get_by_index(u32 ifindex);
-void ib_device_put(struct ib_device *device);
 /* RDMA device netlink */
 void nldev_init(void);
 void nldev_exit(void);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 0027b0d79b09e5..ec60ea3429b024 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -85,6 +85,9 @@ static LIST_HEAD(client_list);
 static DEFINE_MUTEX(device_mutex);
 static DECLARE_RWSEM(lists_rwsem);
 
+/* Used to serialize ib_unregister_driver */
+static DECLARE_RWSEM(unregister_rwsem);
+
 static int ib_security_change(struct notifier_block *nb, unsigned long event,
 			      void *lsm_data);
 static void ib_policy_change_task(struct work_struct *work);
@@ -168,6 +171,7 @@ void ib_device_put(struct ib_device *device)
 	if (refcount_dec_and_test(&device->refcount))
 		complete(&device->unreg_completion);
 }
+EXPORT_SYMBOL(ib_device_put);
 
 static struct ib_device *__ib_device_get_by_name(const char *name)
 {
@@ -180,6 +184,27 @@ static struct ib_device *__ib_device_get_by_name(const char *name)
 	return NULL;
 }
 
+struct ib_device *ib_device_get_by_name(const char *name,
+					enum rdma_driver_id driver_id)
+{
+	struct ib_device *device;
+
+	down_read(&lists_rwsem);
+	device = __ib_device_get_by_name(name);
+	if (device && driver_id != RDMA_DRIVER_UNKNOWN &&
+	    device->driver_id != driver_id)
+		device = NULL;
+
+	if (device) {
+		/* Do not return a device if unregistration has started. */
+		if (!refcount_inc_not_zero(&device->refcount))
+			device = NULL;
+	}
+	up_read(&lists_rwsem);
+	return device;
+}
+EXPORT_SYMBOL(ib_device_get_by_name);
+
 int ib_device_rename(struct ib_device *ibdev, const char *name)
 {
 	struct ib_device *device;
@@ -641,28 +666,41 @@ int ib_register_device(struct ib_device *device, const char *name,
 }
 EXPORT_SYMBOL(ib_register_device);
 
-/**
- * ib_unregister_device - Unregister an IB device
- * @device:Device to unregister
+/* Returns false if the device is already undergoing unregistration in another
+ * thread, and the device pointer may be invalid upon return.
  *
- * Unregister an IB device.  All clients will receive a remove callback.
+ * If true is returned then the caller owns a kref associated with the device
+ * (taken from the kref owned by the list).
+ *
+ * Upon return the device cannot be returned by any 'get' functions.
  */
-void ib_unregister_device(struct ib_device *device)
+static bool ib_pre_unregister(struct ib_device *device)
+{
+	bool already_removed;
+
+	mutex_lock(&device_mutex);
+	down_write(&lists_rwsem);
+	already_removed = list_empty(&device->core_list);
+	list_del_init(&device->core_list);
+	up_write(&lists_rwsem);
+	mutex_unlock(&device_mutex);
+
+	/* Matches the recound_set in ib_alloc_device */
+	ib_device_put(device);
+
+	return !already_removed;
+}
+
+static void __ib_unregister_device(struct ib_device *device)
 {
 	struct ib_client_data *context, *tmp;
 	unsigned long flags;
 
-	/*
-	 * Wait for all netlink command callers to finish working on the
-	 * device.
-	 */
-	ib_device_put(device);
 	wait_for_completion(&device->unreg_completion);
 
 	mutex_lock(&device_mutex);
 
 	down_write(&lists_rwsem);
-	list_del(&device->core_list);
 	write_lock_irq(&device->client_data_lock);
 	list_for_each_entry(context, &device->client_data_list, list)
 		context->going_down = true;
@@ -696,9 +734,115 @@ void ib_unregister_device(struct ib_device *device)
 	up_write(&lists_rwsem);
 
 	device->reg_state = IB_DEV_UNREGISTERED;
+
+	/*
+	 * Drivers using the new flow may not call ib_dealloc_device except
+	 * in error unwind prior to registration success.
+	 */
+	if (device->driver_unregister) {
+		device->driver_unregister(device);
+		ib_dealloc_device(device);
+	}
+}
+
+/**
+ * ib_unregister_device - Unregister an IB device
+ * @device: The device to unregister
+ *
+ * Unregister an IB device.  All clients will receive a remove callback.
+ *
+ * Callers can call this routine only once, and must protect against
+ * races. Typically it should only be called as part of a remove callback in
+ * an implementation of driver core's struct device_driver and related.
+ *
+ * A driver must choose to use either only ib_unregister_device or
+ * ib_unregister_device_and_put & ib_unregister_driver.
+ */
+void ib_unregister_device(struct ib_device *device)
+{
+	down_read(&unregister_rwsem);
+	if (ib_pre_unregister(device))
+		__ib_unregister_device(device);
+	else
+		WARN(true, "Driver mixing ib_unregister_device with _and_put");
+	up_read(&unregister_rwsem);
 }
 EXPORT_SYMBOL(ib_unregister_device);
 
+/**
+ * ib_unregister_device_and_put - Unregister a device while holding a 'get'
+ * device: The device to unregister
+ *
+ * This is the same as ib_unregister_device(), except it includes an internal
+ * ib_device_put() that should match a 'get' obtained by the caller.
+ *
+ * It is safe to call this routine concurrently from multiple threads while
+ * holding the 'get', however in this case return from the function does not
+ * guarantee the device is destroyed, only that destruction is in-progress.
+ *
+ * Drivers using this flow MUST use the driver_unregister callback to clean up
+ * their resources associated with the device and dealloc it.
+ */
+void ib_unregister_device_and_put(struct ib_device *device)
+{
+	down_read(&unregister_rwsem);
+	if (ib_pre_unregister(device)) {
+		/*
+		 * Caller's get can be returned now that we hold the kref,
+		 * otherwise ib_pre_unregister returned the caller's get
+		 */
+		ib_device_put(device);
+		__ib_unregister_device(device);
+	}
+	up_read(&unregister_rwsem);
+}
+EXPORT_SYMBOL(ib_unregister_device_and_put);
+
+/**
+ * ib_unregister_driver - Unregister all IB devices for a driver
+ * @driver_id: The driver to unregister
+ *
+ * This implements a fence for device unregistration. It only returns once all
+ * devices associated with the driver_id have fully completed their
+ * unregistration and returned from ib_unregister_device*().
+ *
+ * If device's are not yet unregistered it goes ahead and starts unregistering
+ * them.
+ *
+ * The caller must ensure that no devices can be be added while this is
+ * running (or at all for the module_unload case)
+ */
+void ib_unregister_driver(enum rdma_driver_id driver_id)
+{
+	struct ib_device *device;
+	LIST_HEAD(driver_devs);
+
+	down_write(&unregister_rwsem);
+	mutex_lock(&device_mutex);
+	down_write(&lists_rwsem);
+	list_for_each_entry(device, &device_list, core_list) {
+		if (device->driver_id == driver_id)
+			list_move(&device->core_list, &driver_devs);
+	}
+	up_write(&lists_rwsem);
+
+	while ((device = list_first_entry_or_null(
+			&driver_devs, struct ib_device, core_list))) {
+		mutex_unlock(&device_mutex);
+		/*
+		 * We are holding the unregister_rwsem, so it is impossible
+		 * for another thread to be doing registration.
+		 */
+		if (!ib_pre_unregister(device))
+			WARN(true, "Impossible failure of ib_pre_unregister");
+		__ib_unregister_device(device);
+		mutex_lock(&device_mutex);
+	}
+	mutex_unlock(&device_mutex);
+	up_write(&unregister_rwsem);
+}
+EXPORT_SYMBOL(ib_unregister_driver);
+
 /**
  * ib_register_client - Register an IB client
  * @client:Client to register
@@ -981,6 +1125,50 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev,
 		}
 }
 
+struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
+					  enum rdma_driver_id driver_id)
+{
+	struct ib_device *ib_dev;
+
+	down_read(&lists_rwsem);
+	list_for_each_entry(ib_dev, &device_list, core_list) {
+		unsigned int port;
+
+		if (driver_id != RDMA_DRIVER_UNKNOWN &&
+		    ib_dev->driver_id != driver_id)
+			continue;
+
+		if (!ib_dev->get_netdev)
+			continue;
+
+		for (port = rdma_start_port(ib_dev);
+		     port <= rdma_end_port(ib_dev); port++) {
+			struct net_device *idev;
+
+			if (!rdma_protocol_roce(ib_dev, port))
+				continue;
+
+			idev = ib_dev->get_netdev(ib_dev, port);
+			if (idev != ndev)
+				continue;
+
+			/*
+			 * Do not return a device if unregistration has
+			 * started.
+			 */
+			if (!refcount_inc_not_zero(&ib_dev->refcount))
+				continue;
+
+			up_read(&lists_rwsem);
+			return ib_dev;
+		}
+	}
+	up_read(&lists_rwsem);
+
+	return NULL;
+}
+EXPORT_SYMBOL(ib_device_get_by_netdev);
+
 /**
  * ib_enum_all_roce_netdevs - enumerate all RoCE devices
  * @filter: Should we call the callback?
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 383e65c7bbc057..4069463f9a6641 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -50,8 +50,10 @@ static void rxe_cleanup_ports(struct rxe_dev *rxe)
 /* free resources for a rxe device all objects created for this device must
  * have been destroyed
  */
-static void rxe_cleanup(struct rxe_dev *rxe)
+void rxe_unregistered(struct ib_device *ib_dev)
 {
+	struct rxe_dev *rxe = container_of(ib_dev, struct rxe_dev, ib_dev);
+
 	rxe_pool_cleanup(&rxe->uc_pool);
 	rxe_pool_cleanup(&rxe->pd_pool);
 	rxe_pool_cleanup(&rxe->ah_pool);
@@ -68,15 +70,6 @@ static void rxe_cleanup(struct rxe_dev *rxe)
 	crypto_free_shash(rxe->tfm);
 }
 
-/* called when all references have been dropped */
-void rxe_release(struct kref *kref)
-{
-	struct rxe_dev *rxe = container_of(kref, struct rxe_dev, ref_cnt);
-
-	rxe_cleanup(rxe);
-	ib_dealloc_device(&rxe->ib_dev);
-}
-
 /* initialize rxe device parameters */
 static void rxe_init_device_param(struct rxe_dev *rxe)
 {
@@ -279,7 +272,6 @@ static int rxe_init(struct rxe_dev *rxe)
 	spin_lock_init(&rxe->mmap_offset_lock);
 	spin_lock_init(&rxe->pending_lock);
 	INIT_LIST_HEAD(&rxe->pending_mmaps);
-	INIT_LIST_HEAD(&rxe->list);
 
 	mutex_init(&rxe->usdev_lock);
 
@@ -312,31 +304,13 @@ int rxe_add(struct rxe_dev *rxe, unsigned int mtu)
 {
 	int err;
 
-	kref_init(&rxe->ref_cnt);
-
 	err = rxe_init(rxe);
 	if (err)
-		goto err1;
+		return err;
 
 	rxe_set_mtu(rxe, mtu);
 
-	err = rxe_register_device(rxe);
-	if (err)
-		goto err1;
-
-	return 0;
-
-err1:
-	rxe_dev_put(rxe);
-	return err;
-}
-
-/* called by the ifc layer to remove a device */
-void rxe_remove(struct rxe_dev *rxe)
-{
-	rxe_unregister_device(rxe);
-
-	rxe_dev_put(rxe);
+	return rxe_register_device(rxe);
 }
 
 static int __init rxe_module_init(void)
@@ -360,7 +334,7 @@ static int __init rxe_module_init(void)
 
 static void __exit rxe_module_exit(void)
 {
-	rxe_remove_all();
+	ib_unregister_driver(RDMA_DRIVER_RXE);
 	rxe_net_exit();
 	rxe_cache_exit();
 
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index 8f79bd86d0337f..75e7113a756d22 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -96,17 +96,19 @@ static inline u32 rxe_crc32(struct rxe_dev *rxe,
 void rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
 
 int rxe_add(struct rxe_dev *rxe, unsigned int mtu);
-void rxe_remove(struct rxe_dev *rxe);
-void rxe_remove_all(void);
 
 void rxe_rcv(struct sk_buff *skb);
 
-static inline void rxe_dev_put(struct rxe_dev *rxe)
+/* The caller must do a matching ib_device_put(&dev->ib_dev) */
+static inline struct rxe_dev *rxe_get_dev_from_net(struct net_device *ndev)
 {
-	kref_put(&rxe->ref_cnt, rxe_release);
+	struct ib_device *ibdev =
+		ib_device_get_by_netdev(ndev, RDMA_DRIVER_RXE);
+
+	if (!ibdev)
+		return NULL;
+	return container_of(ibdev, struct rxe_dev, ib_dev);
 }
-struct rxe_dev *net_to_rxe(struct net_device *ndev);
-struct rxe_dev *get_rxe_by_name(const char *name);
 
 void rxe_port_up(struct rxe_dev *rxe);
 void rxe_port_down(struct rxe_dev *rxe);
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index a675c9f2b42783..e830d56a15c2a4 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -231,7 +231,7 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
 		      struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
 		      struct rxe_modify_srq_cmd *ucmd);
 
-void rxe_release(struct kref *kref);
+void rxe_unregistered(struct ib_device *ib_dev);
 
 int rxe_completer(void *arg);
 int rxe_requester(void *arg);
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index b26a8141f3edcb..d1f5959d9013a5 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -45,43 +45,6 @@
 #include "rxe_net.h"
 #include "rxe_loc.h"
 
-static LIST_HEAD(rxe_dev_list);
-static DEFINE_SPINLOCK(dev_list_lock); /* spinlock for device list */
-
-struct rxe_dev *net_to_rxe(struct net_device *ndev)
-{
-	struct rxe_dev *rxe;
-	struct rxe_dev *found = NULL;
-
-	spin_lock_bh(&dev_list_lock);
-	list_for_each_entry(rxe, &rxe_dev_list, list) {
-		if (rxe->ndev == ndev) {
-			found = rxe;
-			break;
-		}
-	}
-	spin_unlock_bh(&dev_list_lock);
-
-	return found;
-}
-
-struct rxe_dev *get_rxe_by_name(const char *name)
-{
-	struct rxe_dev *rxe;
-	struct rxe_dev *found = NULL;
-
-	spin_lock_bh(&dev_list_lock);
-	list_for_each_entry(rxe, &rxe_dev_list, list) {
-		if (!strcmp(name, dev_name(&rxe->ib_dev.dev))) {
-			found = rxe;
-			break;
-		}
-	}
-	spin_unlock_bh(&dev_list_lock);
-	return found;
-}
-
-
 static struct rxe_recv_sockets recv_sockets;
 
 struct device *rxe_dma_device(struct rxe_dev *rxe)
@@ -229,12 +192,12 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 	struct udphdr *udph;
 	struct net_device *ndev = skb->dev;
 	struct net_device *rdev = ndev;
-	struct rxe_dev *rxe = net_to_rxe(ndev);
+	struct rxe_dev *rxe = rxe_get_dev_from_net(ndev);
 	struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
 
 	if (!rxe && is_vlan_dev(rdev)) {
 		rdev = vlan_dev_real_dev(ndev);
-		rxe = net_to_rxe(rdev);
+		rxe = rxe_get_dev_from_net(rdev);
 	}
 	if (!rxe)
 		goto drop;
@@ -253,6 +216,12 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 
 	rxe_rcv(skb);
 
+	/*
+	 * FIXME: this is in the wrong place, it needs to be done when pkt is
+	 * destroyed
+	 */
+	ib_device_put(&rxe->ib_dev);
+
 	return 0;
 drop:
 	kfree_skb(skb);
@@ -558,36 +527,18 @@ struct rxe_dev *rxe_net_add(struct net_device *ndev)
 	rxe = (struct rxe_dev *)ib_alloc_device(sizeof(*rxe));
 	if (!rxe)
 		return NULL;
-
+	rxe->ib_dev.driver_unregister = rxe_unregistered;
 	rxe->ndev = ndev;
 
 	err = rxe_add(rxe, ndev->mtu);
 	if (err) {
-		ib_dealloc_device(&rxe->ib_dev);
+		rxe_unregistered(&rxe->ib_dev);
 		return NULL;
 	}
 
-	spin_lock_bh(&dev_list_lock);
-	list_add_tail(&rxe->list, &rxe_dev_list);
-	spin_unlock_bh(&dev_list_lock);
 	return rxe;
 }
 
-void rxe_remove_all(void)
-{
-	spin_lock_bh(&dev_list_lock);
-	while (!list_empty(&rxe_dev_list)) {
-		struct rxe_dev *rxe =
-			list_first_entry(&rxe_dev_list, struct rxe_dev, list);
-
-		list_del(&rxe->list);
-		spin_unlock_bh(&dev_list_lock);
-		rxe_remove(rxe);
-		spin_lock_bh(&dev_list_lock);
-	}
-	spin_unlock_bh(&dev_list_lock);
-}
-
 static void rxe_port_event(struct rxe_dev *rxe,
 			   enum ib_event_type event)
 {
@@ -630,16 +581,16 @@ static int rxe_notify(struct notifier_block *not_blk,
 		      void *arg)
 {
 	struct net_device *ndev = netdev_notifier_info_to_dev(arg);
-	struct rxe_dev *rxe = net_to_rxe(ndev);
+	struct rxe_dev *rxe = rxe_get_dev_from_net(ndev);
 
 	if (!rxe)
-		goto out;
+		return NOTIFY_OK;
 
 	switch (event) {
 	case NETDEV_UNREGISTER:
-		list_del(&rxe->list);
-		rxe_remove(rxe);
-		break;
+		ib_unregister_device_and_put(&rxe->ib_dev);
+		return NOTIFY_OK;
+
 	case NETDEV_UP:
 		rxe_port_up(rxe);
 		break;
@@ -666,7 +617,8 @@ static int rxe_notify(struct notifier_block *not_blk,
 			event, ndev->name);
 		break;
 	}
-out:
+
+	ib_device_put(&rxe->ib_dev);
 	return NOTIFY_OK;
 }
 
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index a04a076e03afbd..bb7e3268713ed6 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -390,8 +390,6 @@ void *rxe_alloc(struct rxe_pool *pool)
 	kref_get(&pool->ref_cnt);
 	read_unlock_irqrestore(&pool->pool_lock, flags);
 
-	kref_get(&pool->rxe->ref_cnt);
-
 	if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
 		goto out_put_pool;
 
@@ -408,7 +406,6 @@ void *rxe_alloc(struct rxe_pool *pool)
 
 out_put_pool:
 	atomic_dec(&pool->num_elem);
-	rxe_dev_put(pool->rxe);
 	rxe_pool_put(pool);
 	return NULL;
 }
@@ -424,7 +421,6 @@ void rxe_elem_release(struct kref *kref)
 
 	kmem_cache_free(pool_cache(pool), elem);
 	atomic_dec(&pool->num_elem);
-	rxe_dev_put(pool->rxe);
 	rxe_pool_put(pool);
 }
 
diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c
index 73a19f808e1bab..87c17675cf9d9e 100644
--- a/drivers/infiniband/sw/rxe/rxe_sysfs.c
+++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c
@@ -53,20 +53,14 @@ static int sanitize_arg(const char *val, char *intf, int intf_len)
 	return len;
 }
 
-static void rxe_set_port_state(struct net_device *ndev)
+static void rxe_set_port_state(struct rxe_dev *rxe, struct net_device *ndev)
 {
-	struct rxe_dev *rxe = net_to_rxe(ndev);
 	bool is_up = netif_running(ndev) && netif_carrier_ok(ndev);
 
-	if (!rxe)
-		goto out;
-
 	if (is_up)
 		rxe_port_up(rxe);
 	else
 		rxe_port_down(rxe); /* down for unknown state */
-out:
-	return;
 }
 
 static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
@@ -74,24 +68,25 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
 	int len;
 	int err = 0;
 	char intf[32];
-	struct net_device *ndev = NULL;
+	struct net_device *ndev;
+	struct rxe_dev *exists;
 	struct rxe_dev *rxe;
 
 	len = sanitize_arg(val, intf, sizeof(intf));
 	if (!len) {
 		pr_err("add: invalid interface name\n");
-		err = -EINVAL;
-		goto err;
+		return -EINVAL;
 	}
 
 	ndev = dev_get_by_name(&init_net, intf);
 	if (!ndev) {
 		pr_err("interface %s not found\n", intf);
-		err = -EINVAL;
-		goto err;
+		return -EINVAL;
 	}
 
-	if (net_to_rxe(ndev)) {
+	exists = rxe_get_dev_from_net(ndev);
+	if (exists) {
+		ib_device_put(&exists->ib_dev);
 		pr_err("already configured on %s\n", intf);
 		err = -EINVAL;
 		goto err;
@@ -104,11 +99,11 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
 		goto err;
 	}
 
-	rxe_set_port_state(ndev);
+	rxe_set_port_state(rxe, ndev);
 	dev_info(&rxe->ib_dev.dev, "added %s\n", intf);
+
 err:
-	if (ndev)
-		dev_put(ndev);
+	dev_put(ndev);
 	return err;
 }
 
@@ -116,7 +111,7 @@ static int rxe_param_set_remove(const char *val, const struct kernel_param *kp)
 {
 	int len;
 	char intf[32];
-	struct rxe_dev *rxe;
+	struct ib_device *ib_dev;
 
 	len = sanitize_arg(val, intf, sizeof(intf));
 	if (!len) {
@@ -126,20 +121,18 @@ static int rxe_param_set_remove(const char *val, const struct kernel_param *kp)
 
 	if (strncmp("all", intf, len) == 0) {
 		pr_info("rxe_sys: remove all");
-		rxe_remove_all();
+		ib_unregister_driver(RDMA_DRIVER_RXE);
 		return 0;
 	}
 
-	rxe = get_rxe_by_name(intf);
+	ib_dev = ib_device_get_by_name(intf, RDMA_DRIVER_RXE);
 
-	if (!rxe) {
+	if (!ib_dev) {
 		pr_err("not configured on %s\n", intf);
 		return -EINVAL;
 	}
 
-	list_del(&rxe->list);
-	rxe_remove(rxe);
-
+	ib_unregister_device_and_put(ib_dev);
 	return 0;
 }
 
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 30817c79ba962f..28beee8f583823 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1286,10 +1286,3 @@ int rxe_register_device(struct rxe_dev *rxe)
 
 	return err;
 }
-
-void rxe_unregister_device(struct rxe_dev *rxe)
-{
-	struct ib_device *dev = &rxe->ib_dev;
-
-	ib_unregister_device(dev);
-}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 831381b7788da9..a0b63581278420 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -385,7 +385,6 @@ struct rxe_dev {
 	struct ib_device_attr	attr;
 	int			max_ucontext;
 	int			max_inline_data;
-	struct kref		ref_cnt;
 	struct mutex	usdev_lock;
 
 	struct net_device	*ndev;
@@ -412,7 +411,6 @@ struct rxe_dev {
 	u64			stats_counters[RXE_NUM_OF_COUNTERS];
 
 	struct rxe_port		port;
-	struct list_head	list;
 	struct crypto_shash	*tfm;
 };
 
@@ -467,7 +465,6 @@ static inline struct rxe_mem *to_rmw(struct ib_mw *mw)
 }
 
 int rxe_register_device(struct rxe_dev *rxe);
-void rxe_unregister_device(struct rxe_dev *rxe);
 
 void rxe_mc_cleanup(struct rxe_pool_entry *arg);
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 85021451eee073..5fbeedacb3aa88 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2534,6 +2534,9 @@ struct ib_device {
 				 struct ib_counters_read_attr *counters_read_attr,
 				 struct uverbs_attr_bundle *attrs);
 
+	/* Called after all clients have been destroyed*/
+	void (*driver_unregister)(struct ib_device *dev);
+
 	/**
 	 * rdma netdev operation
 	 *
@@ -2653,6 +2656,8 @@ int ib_register_device(struct ib_device *device, const char *name,
 		       int (*port_callback)(struct ib_device *, u8,
 					    struct kobject *));
 void ib_unregister_device(struct ib_device *device);
+void ib_unregister_driver(enum rdma_driver_id driver_id);
+void ib_unregister_device_and_put(struct ib_device *device);
 
 int ib_register_client   (struct ib_client *client);
 void ib_unregister_client(struct ib_client *client);
@@ -3937,6 +3942,11 @@ static inline bool ib_access_writable(int access_flags)
 int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
 		       struct ib_mr_status *mr_status);
 
+void ib_device_put(struct ib_device *device);
+struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
+					  enum rdma_driver_id driver_id);
+struct ib_device *ib_device_get_by_name(const char *name,
+					enum rdma_driver_id driver_id);
 struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
 					    u16 pkey, const union ib_gid *gid,
 					    const struct sockaddr *addr);
-- 
2.19.2




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux