On Wed, Aug 02, 2023 at 05:28:08PM +0200, Bartosz Golaszewski wrote: > From: Bartosz Golaszewski <bartosz.golaszewski@xxxxxxxxxx> > > The GPIO subsystem has a serious problem with undefined behavior and > use-after-free bugs on hot-unplug of GPIO chips. This can be considered a > corner-case by some as most GPIO controllers are enabled early in the > boot process and live until the system goes down but most GPIO drivers > do allow unbind over sysfs, many are loadable modules that can be (force) > unloaded and there are also GPIO devices that can be dynamically detached, > for instance CP2112 which is a USB GPIO expender. > > Bugs can be triggered both from user-space as well as by in-kernel users. > We have the means of testing it from user-space via the character device > but the issues manifest themselves differently in the kernel. > > This is a proposition of adding a new virtual driver - a configurable > GPIO consumer that can be configured over configfs (similarly to > gpio-sim). > > The configfs interface allows users to create dynamic GPIO lookup tables > that are registered with the GPIO subsystem. Every config group > represents a consumer device. Every sub-group represents a single GPIO > lookup. The device can work in three modes: just keeping the line > active, toggling it every second or requesting its interrupt and > reporting edges. Every lookup allows to specify the key, offset and > flags as per the lookup struct defined in linux/gpio/machine.h. > > The module together with gpio-sim allows to easily trigger kernel > hot-unplug errors. A simple use-case is to create a simulated chip, > setup the consumer to lookup one of its lines in 'monitor' mode, unbind > the simulator, unbind the consumer and observe the fireworks in dmesg. > > This driver is aimed as a helper in tackling the hot-unplug problem in > GPIO as well as basis for future regression testing once the fixes are > upstream. ... > @@ -1796,6 +1796,13 @@ config GPIO_SIM > This enables the GPIO simulator - a configfs-based GPIO testing > driver. > > +config GPIO_CONSUMER > + tristate "GPIO Consumer Testing Module" > + select CONFIGFS_FS > + help > + This enables the configurable, configfs-based virtual GPIO consumer > + testing driver. I believe the agreement (as mentioned in this file) is to keep sorted by config names, so CONSUMER is definitely should be earlier than SIM. ... > +#include <linux/completion.h> > +#include <linux/configfs.h> > +#include <linux/device.h> > +#include <linux/gpio/consumer.h> > +#include <linux/gpio/machine.h> > +#include <linux/idr.h> > +#include <linux/interrupt.h> > +#include <linux/jiffies.h> > +#include <linux/kernel.h> > +#include <linux/limits.h> > +#include <linux/list.h> > +#include <linux/module.h> > +#include <linux/mutex.h> > +#include <linux/notifier.h> > +#include <linux/of_platform.h> Wrong header. Use mod_devicetable.h. > +#include <linux/platform_device.h> > +#include <linux/printk.h> > +#include <linux/property.h> > +#include <linux/slab.h> > +#include <linux/string.h> > +#include <linux/timer.h> And general recommendation is to revisit this block and refine it accordingly. ... > +enum gpio_consumer_function { > + GPIO_CONSUMER_FUNCTION_ACTIVE = 0, = 0 is guaranteed by C standard, why do you need it be explicit? > + GPIO_CONSUMER_FUNCTION_TOGGLE, > + GPIO_CONSUMER_FUNCTION_MONITOR, > + __GPIO_CONSUMER_FUNCTION_LAST, No comma in terminator line. > +}; ... > +static void gpio_consumer_on_timer(struct timer_list *timer) > +{ > + struct gpio_consumer_timer_data *timer_data = to_timer_data(timer); > + timer_data->val = timer_data->val == 0 ? 1 : 0; timer_data->val = timer_data->val ? 0 : 1; is shorter, but what is showing better the intention is timer_data->val ^= 1; > + gpiod_set_value_cansleep(timer_data->desc, timer_data->val); > + mod_timer(&timer_data->timer, jiffies + msecs_to_jiffies(1000)); > +} ... > + ret = match_string(gpio_consumer_function_strings, -1, function_prop); > + if (ret < 0) > + return dev_err_probe(dev, -EINVAL, Why not return dev_err_probe(dev, ret, ? > + "Invalid consumer function: '%s'\n", > + function_prop); ... > + flags = function == GPIO_CONSUMER_FUNCTION_MONITOR ? > + GPIOD_IN : GPIOD_OUT_HIGH; > + for (i = 0; i < num_lines; i++) { > + desc = devm_gpiod_get(dev, lines[i], flags); > + if (IS_ERR(desc)) > + return dev_err_probe(dev, PTR_ERR(desc), > + "Failed to get GPIO '%s'\n", > + lines[i]); Would it make sense to request GPIOs via devm_gpiod_get_array() and then try the rest on them in a loop? > + } ... > +static int gpio_consumer_bus_notifier_call(struct notifier_block *nb, > + unsigned long action, void *data) > +{ > + struct gpio_consumer_device *consumer; > + struct device *dev = data; > + char devname[32]; > + > + consumer = container_of(nb, struct gpio_consumer_device, bus_notifier); > + snprintf(devname, sizeof(devname), "gpio-virtual-consumer.%d", > + consumer->id); > + if (strcmp(dev_name(dev), devname) == 0) { if (strcmp(dev_name(dev), devname)) return NOTIFY_DONE; ? > + switch (action) { > + case BUS_NOTIFY_BOUND_DRIVER: > + consumer->driver_bound = true; > + break; > + case BUS_NOTIFY_DRIVER_NOT_BOUND: > + consumer->driver_bound = false; > + break; > + default: > + return NOTIFY_DONE; > + } > + > + complete(&consumer->probe_completion); > + return NOTIFY_OK; > + } > + > + return NOTIFY_DONE; > +} ... > +static ssize_t > +gpio_consumer_lookup_config_offset_store(struct config_item *item, > + const char *page, size_t count) > +{ > + struct gpio_consumer_lookup *lookup = to_gpio_consumer_lookup(item); > + struct gpio_consumer_device *dev = lookup->parent; > + int offset, ret; > + > + ret = kstrtoint(page, 0, &offset); > + if (ret) > + return ret; > + > + /* Use -1 to indicate lookup by name. */ > + if (offset > (U16_MAX - 1)) > + return -EINVAL; So, offset here may be negative. Is it okay? > + mutex_lock(&dev->lock); > + > + if (gpio_consumer_device_is_live_unlocked(dev)) { > + mutex_unlock(&dev->lock); > + return -EBUSY; > + } > + > + lookup->offset = offset; > + > + mutex_unlock(&dev->lock); > + > + return count; > +} ... > + if (flags & GPIO_OPEN_DRAIN) > + repr = "open-drain"; > + else if (flags & GPIO_OPEN_SOURCE) > + repr = "open-source"; Can it be both flags set? > + else > + repr = "push-pull"; ... > + if (sysfs_streq(page, "push-pull")) { > + lookup->flags &= ~(GPIO_OPEN_DRAIN | GPIO_OPEN_SOURCE); > + } else if (sysfs_streq(page, "open-drain")) { > + lookup->flags &= ~GPIO_OPEN_SOURCE; > + lookup->flags |= GPIO_OPEN_DRAIN; > + } else if (sysfs_streq(page, "open-source")) { > + lookup->flags &= ~GPIO_OPEN_DRAIN; > + lookup->flags |= GPIO_OPEN_SOURCE; > + } else { > + count = -EINVAL; > + } I prefer to see some kind of the array of constant string literals and do sysfs_match_string() here lookup->flags &= ~(GPIO_OPEN_DRAIN | GPIO_OPEN_SOURCE); flag = sysfs_match_string(...); if (flag < 0) count = flag else lookup->flags |= flag; (or something similar). And respectively indexed access above. ... > + flags = gpio_consumer_lookup_get_flags(item); > + > + if (flags & GPIO_PULL_UP) > + repr = "pull-up"; > + else if (flags & GPIO_PULL_DOWN) > + repr = "pull-down"; > + else if (flags & GPIO_PULL_DISABLE) > + repr = "pull-disabled"; > + else > + repr = "as-is"; ... > + if (sysfs_streq(page, "pull-up")) { > + lookup->flags &= ~(GPIO_PULL_DOWN | GPIO_PULL_DISABLE); > + lookup->flags |= GPIO_PULL_UP; > + } else if (sysfs_streq(page, "pull-down")) { > + lookup->flags &= ~(GPIO_PULL_UP | GPIO_PULL_DISABLE); > + lookup->flags |= GPIO_PULL_DOWN; > + } else if (sysfs_streq(page, "pull-disabled")) { > + lookup->flags &= ~(GPIO_PULL_UP | GPIO_PULL_DOWN); > + lookup->flags |= GPIO_PULL_DISABLE; > + } else if (sysfs_streq(page, "as-is")) { > + lookup->flags &= ~(GPIO_PULL_UP | GPIO_PULL_DOWN | > + GPIO_PULL_DISABLE); > + } else { > + count = -EINVAL; > + } The above is more tricky, but still consider similar approach. ... > + num_entries = list_count_nodes(&dev->lookup_list); > + table = kzalloc(sizeof(*table) + num_entries * sizeof(*table->table), struct_size() ? > + GFP_KERNEL); > + if (!table) > + return -ENOMEM; ... > + if (list_empty(&dev->lookup_list)) > + return -ENODATA; Instead you may count nodes here and if 0, return an error, otherwise pass it to the callee. > + swnode = gpio_consumer_make_device_swnode(dev); > + if (IS_ERR(swnode)) > + return PTR_ERR(swnode); ... > +static ssize_t > +gpio_consumer_device_config_live_store(struct config_item *item, > + const char *page, size_t count) > +{ > + struct gpio_consumer_device *dev = to_gpio_consumer_device(item); > + bool live; > + int ret; > + > + ret = kstrtobool(page, &live); > + if (ret) > + return ret; > + > + mutex_lock(&dev->lock); > + > + if ((!live && !gpio_consumer_device_is_live_unlocked(dev)) || > + (live && gpio_consumer_device_is_live_unlocked(dev))) if (live ^ gpio_consumer_device_is_live_unlocked(dev)) ? > + ret = -EPERM; > + else if (live) > + ret = gpio_consumer_device_activate_unlocked(dev); > + else > + gpio_consumer_device_deactivate_unlocked(dev); > + > + mutex_unlock(&dev->lock); > + > + return ret ?: count; > +} ... > +static int __init gpio_consumer_init(void) > +{ > + int ret; > + > + ret = platform_driver_register(&gpio_consumer_driver); > + if (ret) { > + pr_err("Failed to register the gpio-consumer platform driver: %d\n", Isn't name will be duplicated via pr_fmt()? > + ret); If yes, can be shortened to pr_err("Failed to register the driver: %d\n", ret); > + return ret; > + } > + > + config_group_init(&gpio_consumer_config_subsys.su_group); > + mutex_init(&gpio_consumer_config_subsys.su_mutex); > + ret = configfs_register_subsystem(&gpio_consumer_config_subsys); > + if (ret) { > + pr_err("Failed to register the '%s' configfs subsystem: %d\n", > + gpio_consumer_config_subsys.su_group.cg_item.ci_namebuf, > + ret); > + mutex_destroy(&gpio_consumer_config_subsys.su_mutex); > + platform_driver_unregister(&gpio_consumer_driver); > + return ret; > + } > + > + return 0; > +} -- With Best Regards, Andy Shevchenko