From: Parav Pandit <parav@xxxxxxxxxxxx> Add netlink command that enables/disables sharing rdma device among multiple net namespaces. Using rdma tool, $rdma sys set netns shared (default mode) When rdma subsystem netns mode is set to shared mode, rdma devices will be accessible in all net namespaces. Using rdma tool, $rdma sys set netns exclusive When rdma subsystem netns mode is set to exclusive mode, devices will be accessible in only one net namespace at any given point of time. Any rdma resources created or in-use before netns mode set to exclusive, will remain in shared mode, in other words, changing netns mode to exclusive or shared has no effect on already open devices. To change this mode, netlink command is used instead of sysctl, because netlink command allows to auto load a module. Also add module parameter to change a sharing mode of ib_core early in the boot process. This helps to those systems where modern upto date rdma rool (iproute2) package may not be available during kernel upgrade cycle. Signed-off-by: Parav Pandit <parav@xxxxxxxxxxxx> Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxxxx> --- drivers/infiniband/core/core_priv.h | 1 + drivers/infiniband/core/net_namespace.c | 30 +++++++++++++++++++++++++ drivers/infiniband/core/nldev.c | 25 +++++++++++++++++++++ include/uapi/rdma/rdma_netlink.h | 3 +-- 4 files changed, 57 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 1ba188efec44..ef2cea6de0b6 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -352,6 +352,7 @@ void __exit rdma_dev_net_cleanup(void); void rdma_compatdev_add(struct ib_device *device); void rdma_compatdev_remove(struct ib_device *device); void rdma_compatdev_rename(const struct ib_device *device); +void rdma_compatdev_set(u8 enable); void ib_free_port_attrs(struct ib_core_device *coredev); int ib_setup_port_attrs(struct ib_core_device *coredev, diff --git a/drivers/infiniband/core/net_namespace.c b/drivers/infiniband/core/net_namespace.c index 57b1fe3d1d16..8fce745433f7 100644 --- a/drivers/infiniband/core/net_namespace.c +++ b/drivers/infiniband/core/net_namespace.c @@ -43,6 +43,9 @@ static unsigned int rdma_dev_net_id; * */ ushort ib_devices_shared_netns = 1; +module_param_named(netns_mode, ib_devices_shared_netns, ushort, 0444); +MODULE_PARM_DESC(netns_mode, + "Share device among net namespaces; default=1 (shared)"); static void rdma_compatdev_release(struct device *dev) { @@ -175,6 +178,33 @@ void rdma_compatdev_rename(const struct ib_device *device) up_read(&net_rwsem); } +void rdma_compatdev_set(u8 enable) +{ + struct ib_device *device; + + mutex_lock(&ib_device_mutex); + if (ib_devices_shared_netns == enable) + goto out; + + down_read(&ib_lists_rwsem); + list_for_each_entry(device, &ib_device_list, core_list) { + device->netns_shared = enable; + if (enable) + rdma_compatdev_add(device); + else + rdma_compatdev_remove(device); + } + up_read(&ib_lists_rwsem); + + /* Continue to hold the lock while setting ib_devices_shared_netns. + * This ensure that, net enumerators or device reg() honors + * the value being set of ib_devices_shared_netns. + */ + ib_devices_shared_netns = enable; +out: + mutex_unlock(&ib_device_mutex); +} + static __net_init int rdma_dev_init_net(struct net *net) { struct rdma_dev_net *rdma_net = net_generic(net, rdma_dev_net_id); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 881b620bed7a..11ad2fe08d00 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1136,6 +1136,27 @@ static int nldev_get_sys_get_dumpit(struct sk_buff *skb, return skb->len; } +static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; + u8 enable; + int err; + + err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); + if (err || !tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]) + return -EINVAL; + + enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]); + /* Only 0 and 1 are supported */ + if (enable > 1) + return -EINVAL; + + rdma_compatdev_set(enable); + return 0; +} + static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { [RDMA_NLDEV_CMD_GET] = { .doit = nldev_get_doit, @@ -1181,6 +1202,10 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { [RDMA_NLDEV_CMD_SYS_GET] = { .dump = nldev_get_sys_get_dumpit, }, + [RDMA_NLDEV_CMD_SYS_SET] = { + .doit = nldev_set_sys_set_doit, + .flags = RDMA_NL_ADMIN_PERM, + }, }; void __init nldev_init(void) diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 1cda30f392e7..033eaf4b32ad 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -230,8 +230,7 @@ enum rdma_nldev_command { RDMA_NLDEV_CMD_SET, RDMA_NLDEV_CMD_SYS_GET, /* can dump */ - - /* 4 is free to use */ + RDMA_NLDEV_CMD_SYS_SET, RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */ -- 2.19.1