+ list_del_init(&device->core_list);
+ up_write(&lists_rwsem);
+ mutex_unlock(&device_mutex);
+
+ /* Matches the recound_set in ib_alloc_device */
+ ib_device_put(device);
+
+ return !already_removed;
+}
+
+static void __ib_unregister_device(struct ib_device *device)
{
struct ib_client_data *context, *tmp;
unsigned long flags;
- /*
- * Wait for all netlink command callers to finish working on the
- * device.
- */
- ib_device_put(device);
wait_for_completion(&device->unreg_completion);
mutex_lock(&device_mutex);
down_write(&lists_rwsem);
- list_del(&device->core_list);
write_lock_irq(&device->client_data_lock);
list_for_each_entry(context, &device->client_data_list, list)
context->going_down = true;
@@ -696,10 +734,116 @@ void ib_unregister_device(struct ib_device *device)
up_write(&lists_rwsem);
device->reg_state = IB_DEV_UNREGISTERED;
+
+ /*
+ * Drivers using the new flow may not call ib_dealloc_device except
+ * in error unwind prior to registration success.
+ */
+ if (device->driver_unregister) {
+ device->driver_unregister(device);
+ ib_dealloc_device(device);
+ }
+}
+
+/**
+ * ib_unregister_device - Unregister an IB device
+ * @device: The device to unregister
+ *
+ * Unregister an IB device. All clients will receive a remove callback.
+ *
+ * Callers can call this routine only once, and must protect against
+ * races. Typically it should only be called as part of a remove callback in
+ * an implementation of driver core's struct device_driver and related.
+ *
+ * A driver must choose to use either only ib_unregister_device or
+ * ib_unregister_device_and_put & ib_unregister_driver.
+ */
+void ib_unregister_device(struct ib_device *device)
+{
+ down_read(&unregister_rwsem);
+ if (ib_pre_unregister(device))
+ __ib_unregister_device(device);
+ else
+ WARN(true, "Driver mixing %s with _and_put", __func__);
+ up_read(&unregister_rwsem);
}
EXPORT_SYMBOL(ib_unregister_device);
/**
+ * ib_unregister_device_and_put - Unregister a device while holding a 'get'
+ * device: The device to unregister
+ *
+ * This is the same as ib_unregister_device(), except it includes an internal
+ * ib_device_put() that should match a 'get' obtained by the caller.
+ *
+ * It is safe to call this routine concurrently from multiple threads while
+ * holding the 'get', however in this case return from the function does not
+ * guarantee the device is destroyed, only that destruction is in-progress.
+ *
+ * Drivers using this flow MUST use the driver_unregister callback to clean up
+ * their resources associated with the device and dealloc it.
+ */
+void ib_unregister_device_and_put(struct ib_device *device)
+{
+ down_read(&unregister_rwsem);
+ if (ib_pre_unregister(device)) {
+ /*
+ * Caller's get can be returned now that we hold the kref,
+ * otherwise ib_pre_unregister returned the caller's get
+ */
+ ib_device_put(device);
+ __ib_unregister_device(device);
+ }
+ up_read(&unregister_rwsem);
+}
+EXPORT_SYMBOL(ib_unregister_device_and_put);
+
+/**
+ * ib_unregister_driver - Unregister all IB devices for a driver
+ * @driver_id: The driver to unregister
+ *
+ * This implements a fence for device unregistration. It only returns once all
+ * devices associated with the driver_id have fully completed their
+ * unregistration and returned from ib_unregister_device*().
+ *
+ * If device's are not yet unregistered it goes ahead and starts unregistering
+ * them.
+ *
+ * The caller must ensure that no devices can be be added while this is
+ * running (or at all for the module_unload case)
+ */
+void ib_unregister_driver(enum rdma_driver_id driver_id)
+{
+ struct ib_device *device, *tmp;
+ LIST_HEAD(driver_devs);
+
+ down_write(&unregister_rwsem);
+ mutex_lock(&device_mutex);
+ down_write(&lists_rwsem);
+ list_for_each_entry_safe(device, tmp, &device_list, core_list) {
+ if (device->driver_id == driver_id)
+ list_move(&device->core_list, &driver_devs);
+ }
+ up_write(&lists_rwsem);
+
+ while ((device = list_first_entry_or_null(
+ &driver_devs, struct ib_device, core_list))) {
+ mutex_unlock(&device_mutex);
+ /*
+ * We are holding the unregister_rwsem, so it is impossible
+ * for another thread to be doing registration.
+ */
+ if (!ib_pre_unregister(device))
+ WARN(true, "Impossible failure of ib_pre_unregister");
+ __ib_unregister_device(device);
+ mutex_lock(&device_mutex);
+ }
+ mutex_unlock(&device_mutex);
+ up_write(&unregister_rwsem);
+}
+EXPORT_SYMBOL(ib_unregister_driver);
+
+/**
* ib_register_client - Register an IB client
* @client:Client to register
*
@@ -981,6 +1125,53 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev,
}
}
+struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
+ enum rdma_driver_id driver_id)
+{
+ struct ib_device *ib_dev;
+
+ down_read(&lists_rwsem);
+ list_for_each_entry(ib_dev, &device_list, core_list) {
+ unsigned int port;
+
+ if (driver_id != RDMA_DRIVER_UNKNOWN &&
+ ib_dev->driver_id != driver_id)
+ continue;
+
+ if (!ib_dev->get_netdev)
+ continue;
+
+ for (port = rdma_start_port(ib_dev);
+ port <= rdma_end_port(ib_dev); port++) {
+ struct net_device *idev;
+
+ idev = ib_dev->get_netdev(ib_dev, port);
+ if (idev != ndev) {
+ if (idev)
+ dev_put(idev);
+ continue;
+ }
+
+ /*
+ * Do not return a device if unregistration has
+ * started.
+ */
+ if (!refcount_inc_not_zero(&ib_dev->refcount)) {
+ dev_put(idev);
+ continue;
+ }
+
+ up_read(&lists_rwsem);
+ dev_put(idev);
+ return ib_dev;
+ }
+ }
+ up_read(&lists_rwsem);
+
+ return NULL;
+}
+EXPORT_SYMBOL(ib_device_get_by_netdev);
+
/**
* ib_enum_all_roce_netdevs - enumerate all RoCE devices
* @filter: Should we call the callback?
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 383e65c7bbc0..4069463f9a66 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -50,8 +50,10 @@ static void rxe_cleanup_ports(struct rxe_dev *rxe)
/* free resources for a rxe device all objects created for this device must
* have been destroyed
*/
-static void rxe_cleanup(struct rxe_dev *rxe)
+void rxe_unregistered(struct ib_device *ib_dev)
{
+ struct rxe_dev *rxe = container_of(ib_dev, struct rxe_dev, ib_dev);
+
rxe_pool_cleanup(&rxe->uc_pool);
rxe_pool_cleanup(&rxe->pd_pool);
rxe_pool_cleanup(&rxe->ah_pool);
@@ -68,15 +70,6 @@ static void rxe_cleanup(struct rxe_dev *rxe)
crypto_free_shash(rxe->tfm);
}
-/* called when all references have been dropped */
-void rxe_release(struct kref *kref)
-{
- struct rxe_dev *rxe = container_of(kref, struct rxe_dev, ref_cnt);
-
- rxe_cleanup(rxe);
- ib_dealloc_device(&rxe->ib_dev);
-}
-
/* initialize rxe device parameters */
static void rxe_init_device_param(struct rxe_dev *rxe)
{
@@ -279,7 +272,6 @@ static int rxe_init(struct rxe_dev *rxe)
spin_lock_init(&rxe->mmap_offset_lock);
spin_lock_init(&rxe->pending_lock);
INIT_LIST_HEAD(&rxe->pending_mmaps);
- INIT_LIST_HEAD(&rxe->list);
mutex_init(&rxe->usdev_lock);
@@ -312,31 +304,13 @@ int rxe_add(struct rxe_dev *rxe, unsigned int mtu)
{
int err;
- kref_init(&rxe->ref_cnt);
-
err = rxe_init(rxe);
if (err)
- goto err1;
+ return err;
rxe_set_mtu(rxe, mtu);
- err = rxe_register_device(rxe);
- if (err)
- goto err1;
-
- return 0;
-
-err1:
- rxe_dev_put(rxe);
- return err;
-}
-
-/* called by the ifc layer to remove a device */
-void rxe_remove(struct rxe_dev *rxe)
-{
- rxe_unregister_device(rxe);
-
- rxe_dev_put(rxe);
+ return rxe_register_device(rxe);
}
static int __init rxe_module_init(void)
@@ -360,7 +334,7 @@ static int __init rxe_module_init(void)
static void __exit rxe_module_exit(void)
{
- rxe_remove_all();
+ ib_unregister_driver(RDMA_DRIVER_RXE);
rxe_net_exit();
rxe_cache_exit();
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index 8f79bd86d033..75e7113a756d 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -96,17 +96,19 @@ static inline u32 rxe_crc32(struct rxe_dev *rxe,
void rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
int rxe_add(struct rxe_dev *rxe, unsigned int mtu);
-void rxe_remove(struct rxe_dev *rxe);
-void rxe_remove_all(void);
void rxe_rcv(struct sk_buff *skb);
-static inline void rxe_dev_put(struct rxe_dev *rxe)
+/* The caller must do a matching ib_device_put(&dev->ib_dev) */
+static inline struct rxe_dev *rxe_get_dev_from_net(struct net_device *ndev)
{
- kref_put(&rxe->ref_cnt, rxe_release);
+ struct ib_device *ibdev =
+ ib_device_get_by_netdev(ndev, RDMA_DRIVER_RXE);
+
+ if (!ibdev)
+ return NULL;
+ return container_of(ibdev, struct rxe_dev, ib_dev);
}
-struct rxe_dev *net_to_rxe(struct net_device *ndev);
-struct rxe_dev *get_rxe_by_name(const char *name);
void rxe_port_up(struct rxe_dev *rxe);
void rxe_port_down(struct rxe_dev *rxe);
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index a675c9f2b427..e830d56a15c2 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -231,7 +231,7 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
struct rxe_modify_srq_cmd *ucmd);
-void rxe_release(struct kref *kref);
+void rxe_unregistered(struct ib_device *ib_dev);
int rxe_completer(void *arg);
int rxe_requester(void *arg);
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index b26a8141f3ed..c7637ad5e584 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -45,43 +45,6 @@
#include "rxe_net.h"
#include "rxe_loc.h"
-static LIST_HEAD(rxe_dev_list);
-static DEFINE_SPINLOCK(dev_list_lock); /* spinlock for device list */
-
-struct rxe_dev *net_to_rxe(struct net_device *ndev)
-{
- struct rxe_dev *rxe;
- struct rxe_dev *found = NULL;
-
- spin_lock_bh(&dev_list_lock);
- list_for_each_entry(rxe, &rxe_dev_list, list) {
- if (rxe->ndev == ndev) {
- found = rxe;
- break;
- }
- }
- spin_unlock_bh(&dev_list_lock);
-
- return found;
-}
-
-struct rxe_dev *get_rxe_by_name(const char *name)
-{
- struct rxe_dev *rxe;
- struct rxe_dev *found = NULL;
-
- spin_lock_bh(&dev_list_lock);
- list_for_each_entry(rxe, &rxe_dev_list, list) {
- if (!strcmp(name, dev_name(&rxe->ib_dev.dev))) {
- found = rxe;
- break;
- }
- }
- spin_unlock_bh(&dev_list_lock);
- return found;
-}
-
-
static struct rxe_recv_sockets recv_sockets;
struct device *rxe_dma_device(struct rxe_dev *rxe)
@@ -229,18 +192,19 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
struct udphdr *udph;
struct net_device *ndev = skb->dev;
struct net_device *rdev = ndev;
- struct rxe_dev *rxe = net_to_rxe(ndev);
+ struct rxe_dev *rxe = rxe_get_dev_from_net(ndev);
struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
if (!rxe && is_vlan_dev(rdev)) {
rdev = vlan_dev_real_dev(ndev);
- rxe = net_to_rxe(rdev);
+ rxe = rxe_get_dev_from_net(rdev);
}
if (!rxe)
goto drop;
if (skb_linearize(skb)) {
pr_err("skb_linearize failed\n");
+ ib_device_put(&rxe->ib_dev);
goto drop;
}
@@ -253,6 +217,12 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
rxe_rcv(skb);
+ /*
+ * FIXME: this is in the wrong place, it needs to be done when pkt is
+ * destroyed
+ */
+ ib_device_put(&rxe->ib_dev);
+
return 0;
drop:
kfree_skb(skb);
@@ -558,36 +528,18 @@ struct rxe_dev *rxe_net_add(struct net_device *ndev)
rxe = (struct rxe_dev *)ib_alloc_device(sizeof(*rxe));
if (!rxe)
return NULL;
-
+ rxe->ib_dev.driver_unregister = rxe_unregistered;
rxe->ndev = ndev;
err = rxe_add(rxe, ndev->mtu);
if (err) {
- ib_dealloc_device(&rxe->ib_dev);
+ rxe_unregistered(&rxe->ib_dev);
return NULL;
}
- spin_lock_bh(&dev_list_lock);
- list_add_tail(&rxe->list, &rxe_dev_list);
- spin_unlock_bh(&dev_list_lock);
return rxe;
}
-void rxe_remove_all(void)
-{
- spin_lock_bh(&dev_list_lock);
- while (!list_empty(&rxe_dev_list)) {
- struct rxe_dev *rxe =
- list_first_entry(&rxe_dev_list, struct rxe_dev, list);
-
- list_del(&rxe->list);
- spin_unlock_bh(&dev_list_lock);
- rxe_remove(rxe);
- spin_lock_bh(&dev_list_lock);
- }
- spin_unlock_bh(&dev_list_lock);
-}
-
static void rxe_port_event(struct rxe_dev *rxe,
enum ib_event_type event)
{
@@ -630,16 +582,16 @@ static int rxe_notify(struct notifier_block *not_blk,
void *arg)
{
struct net_device *ndev = netdev_notifier_info_to_dev(arg);
- struct rxe_dev *rxe = net_to_rxe(ndev);
+ struct rxe_dev *rxe = rxe_get_dev_from_net(ndev);
if (!rxe)
- goto out;
+ return NOTIFY_OK;
switch (event) {
case NETDEV_UNREGISTER:
- list_del(&rxe->list);
- rxe_remove(rxe);
- break;
+ ib_unregister_device_and_put(&rxe->ib_dev);
+ return NOTIFY_OK;
+
case NETDEV_UP:
rxe_port_up(rxe);
break;
@@ -666,7 +618,8 @@ static int rxe_notify(struct notifier_block *not_blk,
event, ndev->name);
break;
}
-out:
+
+ ib_device_put(&rxe->ib_dev);
return NOTIFY_OK;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index a04a076e03af..bb7e3268713e 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -390,8 +390,6 @@ void *rxe_alloc(struct rxe_pool *pool)
kref_get(&pool->ref_cnt);
read_unlock_irqrestore(&pool->pool_lock, flags);
- kref_get(&pool->rxe->ref_cnt);
-
if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
goto out_put_pool;
@@ -408,7 +406,6 @@ void *rxe_alloc(struct rxe_pool *pool)
out_put_pool:
atomic_dec(&pool->num_elem);
- rxe_dev_put(pool->rxe);
rxe_pool_put(pool);
return NULL;
}
@@ -424,7 +421,6 @@ void rxe_elem_release(struct kref *kref)
kmem_cache_free(pool_cache(pool), elem);
atomic_dec(&pool->num_elem);
- rxe_dev_put(pool->rxe);
rxe_pool_put(pool);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c
index 73a19f808e1b..87c17675cf9d 100644
--- a/drivers/infiniband/sw/rxe/rxe_sysfs.c
+++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c
@@ -53,20 +53,14 @@ static int sanitize_arg(const char *val, char *intf, int intf_len)
return len;
}
-static void rxe_set_port_state(struct net_device *ndev)
+static void rxe_set_port_state(struct rxe_dev *rxe, struct net_device *ndev)
{
- struct rxe_dev *rxe = net_to_rxe(ndev);
bool is_up = netif_running(ndev) && netif_carrier_ok(ndev);
- if (!rxe)
- goto out;
-
if (is_up)
rxe_port_up(rxe);
else
rxe_port_down(rxe); /* down for unknown state */
-out:
- return;
}
static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
@@ -74,24 +68,25 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
int len;
int err = 0;
char intf[32];
- struct net_device *ndev = NULL;
+ struct net_device *ndev;
+ struct rxe_dev *exists;
struct rxe_dev *rxe;
len = sanitize_arg(val, intf, sizeof(intf));
if (!len) {
pr_err("add: invalid interface name\n");
- err = -EINVAL;
- goto err;
+ return -EINVAL;
}
ndev = dev_get_by_name(&init_net, intf);
if (!ndev) {
pr_err("interface %s not found\n", intf);
- err = -EINVAL;
- goto err;
+ return -EINVAL;
}
- if (net_to_rxe(ndev)) {
+ exists = rxe_get_dev_from_net(ndev);
+ if (exists) {
+ ib_device_put(&exists->ib_dev);
pr_err("already configured on %s\n", intf);
err = -EINVAL;
goto err;
@@ -104,11 +99,11 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
goto err;
}
- rxe_set_port_state(ndev);
+ rxe_set_port_state(rxe, ndev);
dev_info(&rxe->ib_dev.dev, "added %s\n", intf);
+
err:
- if (ndev)
- dev_put(ndev);
+ dev_put(ndev);
return err;
}
@@ -116,7 +111,7 @@ static int rxe_param_set_remove(const char *val, const struct kernel_param *kp)
{
int len;
char intf[32];
- struct rxe_dev *rxe;
+ struct ib_device *ib_dev;
len = sanitize_arg(val, intf, sizeof(intf));
if (!len) {
@@ -126,20 +121,18 @@ static int rxe_param_set_remove(const char *val, const struct kernel_param *kp)
if (strncmp("all", intf, len) == 0) {
pr_info("rxe_sys: remove all");
- rxe_remove_all();
+ ib_unregister_driver(RDMA_DRIVER_RXE);
return 0;
}
- rxe = get_rxe_by_name(intf);
+ ib_dev = ib_device_get_by_name(intf, RDMA_DRIVER_RXE);
- if (!rxe) {
+ if (!ib_dev) {
pr_err("not configured on %s\n", intf);
return -EINVAL;
}
- list_del(&rxe->list);
- rxe_remove(rxe);
-
+ ib_unregister_device_and_put(ib_dev);
return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 30817c79ba96..28beee8f5838 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1286,10 +1286,3 @@ int rxe_register_device(struct rxe_dev *rxe)
return err;
}
-
-void rxe_unregister_device(struct rxe_dev *rxe)
-{
- struct ib_device *dev = &rxe->ib_dev;
-
- ib_unregister_device(dev);
-}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 831381b7788d..a0b635812784 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -385,7 +385,6 @@ struct rxe_dev {
struct ib_device_attr attr;
int max_ucontext;
int max_inline_data;
- struct kref ref_cnt;
struct mutex usdev_lock;
struct net_device *ndev;
@@ -412,7 +411,6 @@ struct rxe_dev {
u64 stats_counters[RXE_NUM_OF_COUNTERS];
struct rxe_port port;
- struct list_head list;
struct crypto_shash *tfm;
};
@@ -467,7 +465,6 @@ static inline struct rxe_mem *to_rmw(struct ib_mw *mw)
}
int rxe_register_device(struct rxe_dev *rxe);
-void rxe_unregister_device(struct rxe_dev *rxe);
void rxe_mc_cleanup(struct rxe_pool_entry *arg);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 85021451eee0..5fbeedacb3aa 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2534,6 +2534,9 @@ struct ib_device {
struct ib_counters_read_attr *counters_read_attr,
struct uverbs_attr_bundle *attrs);
+ /* Called after all clients have been destroyed*/
+ void (*driver_unregister)(struct ib_device *dev);
+
/**
* rdma netdev operation
*
@@ -2653,6 +2656,8 @@ int ib_register_device(struct ib_device *device, const char *name,
int (*port_callback)(struct ib_device *, u8,
struct kobject *));
void ib_unregister_device(struct ib_device *device);
+void ib_unregister_driver(enum rdma_driver_id driver_id);
+void ib_unregister_device_and_put(struct ib_device *device);
int ib_register_client (struct ib_client *client);
void ib_unregister_client(struct ib_client *client);
@@ -3937,6 +3942,11 @@ static inline bool ib_access_writable(int access_flags)
int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
+void ib_device_put(struct ib_device *device);
+struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
+ enum rdma_driver_id driver_id);
+struct ib_device *ib_device_get_by_name(const char *name,
+ enum rdma_driver_id driver_id);
struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
u16 pkey, const union ib_gid *gid,
const struct sockaddr *addr);