Currently global devlink_mutex is held while a doit() operation is progress. This brings a limitation. A Driver cannot perform devlink_register()/unregister() calls during devlink doit() callback functions. This is typically required when a port state change described in RFC [1] callback wants to delete an active SF port or wants to activate a SF port that results into unregistering or registering a devlink instance on different bus such as ancillary bus. An example flow: devlink_predoit() mutex_lock(&devlink_mutex); <- First lock acquire devlink_reload() driver->reload_down(inactive) adev->remove(); mlx5_adev_remove(ancillary_dev); devlink_unregister(ancillary_dev->devlink_instance); mutex_lock(&devlink_mutex); <- Second lock acquire This patch is preparation patch to enable drivers to achieve this. It achieves this by maintaining a per devlink instance refcount to prevent devlink device unregistration while user command are in progress or while devlink device is migration to init_net net namespace. devlink_nl_family continue to remain registered with parallel_ops disabled. So even after removing devlink_mutex during doit commands, it doesn't enable userspace to run multiple devlink commands for one or multiple devlink instance. [1] https://lore.kernel.org/netdev/20200519092258.GF4655@nanopsycho Signed-off-by: Parav Pandit <parav@xxxxxxxxxx> Reviewed-by: Jiri Pirko <jiri@xxxxxxxxxx> --- include/net/devlink.h | 5 +++ net/core/devlink.c | 84 +++++++++++++++++++++++++++++++------------ 2 files changed, 67 insertions(+), 22 deletions(-) diff --git a/include/net/devlink.h b/include/net/devlink.h index ef487b8ed17b..c8eab814c234 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -53,6 +53,11 @@ struct devlink { * port, sb, dpipe, resource, params, region, traps and more. */ struct mutex reload_lock; /* Protects reload operation */ + struct list_head reload_list; + refcount_t refcount; /* Serializes user doit commands and netns command + * with device unregistration. + */ + struct completion unregister_complete; u8 reload_failed:1, reload_enabled:1, registered:1; diff --git a/net/core/devlink.c b/net/core/devlink.c index c7c6f274d392..84f3ec12b3e8 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -96,9 +96,8 @@ static LIST_HEAD(devlink_list); /* devlink_mutex * - * An overall lock guarding every operation coming from userspace. - * It also guards devlink devices list and it is taken when - * driver registers/unregisters it. + * An overall lock guarding devlink devices list during operations coming from + * userspace and when driver registers/unregisters devlink device. */ static DEFINE_MUTEX(devlink_mutex); @@ -121,6 +120,18 @@ void devlink_net_set(struct devlink *devlink, struct net *net) } EXPORT_SYMBOL_GPL(devlink_net_set); +static inline bool +devlink_try_get(struct devlink *devlink) +{ + return refcount_inc_not_zero(&devlink->refcount); +} + +static void devlink_put(struct devlink *devlink) +{ + if (refcount_dec_and_test(&devlink->refcount)) + complete(&devlink->unregister_complete); +} + static struct devlink *devlink_get_from_attrs(struct net *net, struct nlattr **attrs) { @@ -139,7 +150,7 @@ static struct devlink *devlink_get_from_attrs(struct net *net, list_for_each_entry(devlink, &devlink_list, list) { if (strcmp(devlink->dev->bus->name, busname) == 0 && strcmp(dev_name(devlink->dev), devname) == 0 && - net_eq(devlink_net(devlink), net)) + net_eq(devlink_net(devlink), net) && devlink_try_get(devlink)) return devlink; } @@ -411,7 +422,7 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id) /* The per devlink instance lock is taken by default in the pre-doit * operation, yet several commands do not require this. The global - * devlink lock is taken and protects from disruption by user-calls. + * devlink lock is taken and protects from disruption by dumpit user-calls. */ #define DEVLINK_NL_FLAG_NO_LOCK BIT(2) @@ -424,10 +435,10 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, mutex_lock(&devlink_mutex); devlink = devlink_get_from_info(info); - if (IS_ERR(devlink)) { - mutex_unlock(&devlink_mutex); + mutex_unlock(&devlink_mutex); + + if (IS_ERR(devlink)) return PTR_ERR(devlink); - } if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) mutex_lock(&devlink->lock); info->user_ptr[0] = devlink; @@ -448,7 +459,7 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, unlock: if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) mutex_unlock(&devlink->lock); - mutex_unlock(&devlink_mutex); + devlink_put(devlink); return err; } @@ -460,7 +471,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops, devlink = info->user_ptr[0]; if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) mutex_unlock(&devlink->lock); - mutex_unlock(&devlink_mutex); + devlink_put(devlink); } static struct genl_family devlink_nl_family; @@ -8122,6 +8133,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) mutex_init(&devlink->lock); mutex_init(&devlink->reporters_lock); mutex_init(&devlink->reload_lock); + init_completion(&devlink->unregister_complete); return devlink; } EXPORT_SYMBOL_GPL(devlink_alloc); @@ -8136,6 +8148,7 @@ int devlink_register(struct devlink *devlink, struct device *dev) { devlink->dev = dev; devlink->registered = true; + refcount_set(&devlink->refcount, 1); mutex_lock(&devlink_mutex); list_add_tail(&devlink->list, &devlink_list); devlink_notify(devlink, DEVLINK_CMD_NEW); @@ -8151,12 +8164,23 @@ EXPORT_SYMBOL_GPL(devlink_register); */ void devlink_unregister(struct devlink *devlink) { + /* Remove from the list first, so that no new users can get it */ mutex_lock(&devlink_mutex); - WARN_ON(devlink_reload_supported(devlink->ops) && - devlink->reload_enabled); devlink_notify(devlink, DEVLINK_CMD_DEL); list_del(&devlink->list); mutex_unlock(&devlink_mutex); + + /* Balances with refcount_set in devlink_register(). */ + devlink_put(devlink); + /* Wait for any existing users to stop using the devlink device */ + wait_for_completion(&devlink->unregister_complete); + + /* At this point there are no active users working on the devlink instance; + * also net ns exit operation (if any) is also completed. + * devlink is out of global list, hence no users can acquire reference to this devlink + * instance anymore. Hence, it is safe to proceed with unregistration. + */ + WARN_ON(devlink_reload_supported(devlink->ops) && devlink->reload_enabled); } EXPORT_SYMBOL_GPL(devlink_unregister); @@ -10472,6 +10496,8 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) { struct devlink *devlink; u32 actions_performed; + LIST_HEAD(local_list); + struct devlink *tmp; int err; /* In case network namespace is getting destroyed, reload @@ -10479,18 +10505,32 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) */ mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { - if (net_eq(devlink_net(devlink), net)) { - if (WARN_ON(!devlink_reload_supported(devlink->ops))) - continue; - err = devlink_reload(devlink, &init_net, - DEVLINK_RELOAD_ACTION_DRIVER_REINIT, - DEVLINK_RELOAD_LIMIT_UNSPEC, - &actions_performed, NULL); - if (err && err != -EOPNOTSUPP) - pr_warn("Failed to reload devlink instance into init_net\n"); - } + if (!net_eq(devlink_net(devlink), net)) + continue; + + if (WARN_ON(!devlink_reload_supported(devlink->ops))) + continue; + + /* Hold the reference to devlink instance so that it doesn't get unregistered + * once global devlink_mutex is unlocked. + * Store the devlink to a shadow list so that if devlink unregistration is + * started, it can be still found in the shadow list. + */ + if (devlink_try_get(devlink)) + list_add_tail(&devlink->reload_list, &local_list); } mutex_unlock(&devlink_mutex); + + list_for_each_entry_safe(devlink, tmp, &local_list, reload_list) { + list_del_init(&devlink->reload_list); + err = devlink_reload(devlink, &init_net, + DEVLINK_RELOAD_ACTION_DRIVER_REINIT, + DEVLINK_RELOAD_LIMIT_UNSPEC, + &actions_performed, NULL); + if (err && err != -EOPNOTSUPP) + pr_warn("Failed to reload devlink instance into init_net\n"); + devlink_put(devlink); + } } static struct pernet_operations devlink_pernet_ops __net_initdata = { -- 2.26.2