On 2018/12/5 23:14, Steve Wise wrote:
The rxe device can be removed, which includes unregistering with the
rdma core, from 3 places: a netdev notifier call, the sysfs handler
used to delete a rxe device, and module unload. Currently these are
not synchronized to ensure that the device is unregistered once and the
memory only freed once.
This commit adds proper serialization. Device removal and deregistration
is consolidated into rxe_net_remove() which uses dev_list_lock to
serialize removal from rxe_dev_list and ensures only 1 deregister.
Functions net_to_rxe() and get_rxe_by_name() both now take a kref
reference with dev_list_lock held to ensure that during a race between 2
removes, the rxe struct memory remains around until both racers release
the reference. So now callers to these functions must call rxe_dev_put()
when they are done using the rxe pointer.
Signed-off-by: Steve Wise <swise@xxxxxxxxxxxxxxxxxxxxx>
---
drivers/infiniband/sw/rxe/rxe.c | 8 --------
drivers/infiniband/sw/rxe/rxe.h | 1 -
drivers/infiniband/sw/rxe/rxe_net.c | 26 ++++++++++++++++++++++----
drivers/infiniband/sw/rxe/rxe_net.h | 1 +
drivers/infiniband/sw/rxe/rxe_sysfs.c | 4 ++--
5 files changed, 25 insertions(+), 15 deletions(-)
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 383e65c7bbc0..971f0862cefe 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -331,14 +331,6 @@ int rxe_add(struct rxe_dev *rxe, unsigned int mtu)
return err;
}
-/* called by the ifc layer to remove a device */
-void rxe_remove(struct rxe_dev *rxe)
-{
- rxe_unregister_device(rxe);
-
- rxe_dev_put(rxe);
-}
-
static int __init rxe_module_init(void)
{
int err;
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index 8f79bd86d033..e0c0dce80bbf 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -96,7 +96,6 @@ static inline u32 rxe_crc32(struct rxe_dev *rxe,
void rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
int rxe_add(struct rxe_dev *rxe, unsigned int mtu);
-void rxe_remove(struct rxe_dev *rxe);
void rxe_remove_all(void);
void rxe_rcv(struct sk_buff *skb);
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index b26a8141f3ed..6dc1a5b20e31 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -57,6 +57,7 @@ struct rxe_dev *net_to_rxe(struct net_device *ndev)
list_for_each_entry(rxe, &rxe_dev_list, list) {
if (rxe->ndev == ndev) {
found = rxe;
+ kref_get(&rxe->ref_cnt);
break;
}
}
You add kref_get(&rxe->ref_cnt); into net_to_rxe.
In this function,
static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
{
int len;
int err = 0;
char intf[32];
struct net_device *ndev = NULL;
struct rxe_dev *rxe;
len = sanitize_arg(val, intf, sizeof(intf));
if (!len) {
pr_err("add: invalid interface name\n");
err = -EINVAL;
goto err;
}
ndev = dev_get_by_name(&init_net, intf);
if (!ndev) {
pr_err("interface %s not found\n", intf);
err = -EINVAL;
goto err;
}
if (net_to_rxe(ndev)) { <---if this is true,
kref_get(&rxe->ref_cnt); make rxe->ref_cnt increase by 1. But this
function directly quit without handling this again.
pr_err("already configured on %s\n", intf);
rxe_dev_put(rxe);<--this should be added here. Please pay attention
rxe->ref_cnt.
err = -EINVAL;
goto err;
}
rxe = rxe_net_add(ndev);
if (!rxe) {
pr_err("failed to add %s\n", intf);
err = -EINVAL;
goto err;
}
rxe_set_port_state(ndev);
Zhu Yanjun
@@ -74,6 +75,7 @@ struct rxe_dev *get_rxe_by_name(const char *name)
list_for_each_entry(rxe, &rxe_dev_list, list) {
if (!strcmp(name, dev_name(&rxe->ib_dev.dev))) {
found = rxe;
+ kref_get(&rxe->ref_cnt);
break;
}
}
@@ -573,6 +575,21 @@ struct rxe_dev *rxe_net_add(struct net_device *ndev)
return rxe;
}
+void rxe_net_remove(struct rxe_dev *rxe)
+{
+ bool already_removed;
+
+ spin_lock_bh(&dev_list_lock);
+ already_removed = list_empty(&rxe->list);
+ list_del_init(&rxe->list);
+ spin_unlock_bh(&dev_list_lock);
+
+ if (!already_removed) {
+ rxe_unregister_device(rxe);
+ rxe_dev_put(rxe);
+ }
+}
+
void rxe_remove_all(void)
{
spin_lock_bh(&dev_list_lock);
@@ -580,9 +597,10 @@ void rxe_remove_all(void)
struct rxe_dev *rxe =
list_first_entry(&rxe_dev_list, struct rxe_dev, list);
- list_del(&rxe->list);
+ kref_get(&rxe->ref_cnt);
spin_unlock_bh(&dev_list_lock);
- rxe_remove(rxe);
+ rxe_net_remove(rxe);
+ rxe_dev_put(rxe);
spin_lock_bh(&dev_list_lock);
}
spin_unlock_bh(&dev_list_lock);
@@ -637,8 +655,7 @@ static int rxe_notify(struct notifier_block *not_blk,
switch (event) {
case NETDEV_UNREGISTER:
- list_del(&rxe->list);
- rxe_remove(rxe);
+ rxe_net_remove(rxe);
break;
case NETDEV_UP:
rxe_port_up(rxe);
@@ -666,6 +683,7 @@ static int rxe_notify(struct notifier_block *not_blk,
event, ndev->name);
break;
}
+ rxe_dev_put(rxe);
out:
return NOTIFY_OK;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h
index 106c586dbb26..222234a8d525 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.h
+++ b/drivers/infiniband/sw/rxe/rxe_net.h
@@ -44,6 +44,7 @@ struct rxe_recv_sockets {
};
struct rxe_dev *rxe_net_add(struct net_device *ndev);
+void rxe_net_remove(struct rxe_dev *rxe);
int rxe_net_init(void);
void rxe_net_exit(void);
diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c
index 73a19f808e1b..fc52eb3d1856 100644
--- a/drivers/infiniband/sw/rxe/rxe_sysfs.c
+++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c
@@ -137,8 +137,8 @@ static int rxe_param_set_remove(const char *val, const struct kernel_param *kp)
return -EINVAL;
}
- list_del(&rxe->list);
- rxe_remove(rxe);
+ rxe_net_remove(rxe);
+ rxe_dev_put(rxe);
return 0;
}