On 09/14/2017 04:03 AM, Kenjiro Nakayama wrote: > This patch adds a timeout for the completion of netlink command reply. > > Current code waits for the netlink reply from userspace and the status > change, but it hangs forever when userspace failed to reply. To fix > this issue, this patch replace wait_for_completion with > wait_for_completion_timeout. > > Default timeout is 300 sec that gives enough time, and this is > configurable via configfs. > > Changes in v2: > > Makes default timeout enough long as 300 sec. > Makes timeout configurable via configfs. > Suggested by Mike Christie. > > Signed-off-by: Kenjiro Nakayama <nakayamakenjiro@xxxxxxxxx> > --- > drivers/target/target_core_user.c | 59 +++++++++++++++++++++++++++++++++++++-- > 1 file changed, 56 insertions(+), 3 deletions(-) > > diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c > index 942d094269fb..ef8570cb68a8 100644 > --- a/drivers/target/target_core_user.c > +++ b/drivers/target/target_core_user.c > @@ -150,6 +150,9 @@ struct tcmu_dev { > wait_queue_head_t nl_cmd_wq; > > char dev_config[TCMU_CONFIG_LEN]; > + > + /* timeout for netlink reply from userspace */ > + unsigned int nl_timeout; > }; > > #define TCMU_DEV(_se_dev) container_of(_se_dev, struct tcmu_dev, se_dev) > @@ -1332,8 +1335,14 @@ static int tcmu_wait_genl_cmd_reply(struct tcmu_dev *udev) > if (!tcmu_kern_cmd_reply_supported) > return 0; > > - pr_debug("sleeping for nl reply\n"); > - wait_for_completion(&nl_cmd->complete); > + pr_debug("sleeping for nl reply with timeout %lu\n", > + msecs_to_jiffies(udev->nl_timeout)); > + ret = wait_for_completion_timeout(&nl_cmd->complete, > + msecs_to_jiffies(udev->nl_timeout)); > + if (!ret) { > + printk(KERN_ERR "timeout waiting for nl reply from userspace\n"); > + return -ETIME; If you timeout here then you cannot execute new sync commands until we eventually get a response, so you only avoid the hang for this initial command. New commands will still hang. You need to do the bottom half of the function to reinit the udev->curr_nl_cmd and wake up other calls waiting. > + } > > spin_lock(&udev->nl_cmd_lock); > nl_cmd->cmd = TCMU_CMD_UNSPEC; > @@ -1506,6 +1515,10 @@ static int tcmu_configure_device(struct se_device *dev) > dev->dev_attrib.emulate_write_cache = 0; > dev->dev_attrib.hw_queue_depth = 128; > > + /* Set default timeout 300 sec */ > + if (!udev->nl_timeout) > + udev->nl_timeout = 300000; > + > /* > * Get a ref incase userspace does a close on the uio device before > * LIO has initiated tcmu_free_device. > @@ -1610,7 +1623,7 @@ static void tcmu_destroy_device(struct se_device *dev) > > enum { > Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_hw_max_sectors, > - Opt_err, > + Opt_nl_timeout, Opt_err, > }; > > static match_table_t tokens = { > @@ -1618,6 +1631,7 @@ static match_table_t tokens = { > {Opt_dev_size, "dev_size=%u"}, > {Opt_hw_block_size, "hw_block_size=%u"}, > {Opt_hw_max_sectors, "hw_max_sectors=%u"}, > + {Opt_nl_timeout, "nl_timeout=%u"}, > {Opt_err, NULL} > }; > > @@ -1692,6 +1706,17 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, > ret = tcmu_set_dev_attrib(&args[0], > &(dev->dev_attrib.hw_max_sectors)); > break; > + case Opt_nl_timeout: > + arg_p = match_strdup(&args[0]); > + if (!arg_p) { > + ret = -ENOMEM; > + break; > + } > + ret = kstrtou32(arg_p, 0, &udev->nl_timeout); Why have the user pass it in as msecs? I think just passing it in seconds is better because we do not need it that precise and it matches the other timer's units so it is consistent for the user. > + kfree(arg_p); > + if (ret < 0) > + pr_err("kstrtoul() failed for nl_timeout=\n"); > + break; > default: > break; > } > @@ -1879,11 +1904,39 @@ static ssize_t tcmu_emulate_write_cache_store(struct config_item *item, > } > CONFIGFS_ATTR(tcmu_, emulate_write_cache); > > +static ssize_t tcmu_nl_timeout_show(struct config_item *item, char *page) > +{ > + struct se_dev_attrib *da = container_of(to_config_group(item), > + struct se_dev_attrib, da_group); > + struct tcmu_dev *udev = TCMU_DEV(da->da_dev); > + > + return snprintf(page, PAGE_SIZE, "%u\n", udev->nl_timeout); > +} > + > +static ssize_t tcmu_nl_timeout_store(struct config_item *item, > + const char *page, size_t count) > +{ > + struct se_dev_attrib *da = container_of(to_config_group(item), > + struct se_dev_attrib, da_group); > + struct tcmu_dev *udev = TCMU_DEV(da->da_dev); > + u32 val; > + int ret; > + > + ret = kstrtou32(page, 0, &val); > + if (ret < 0) > + return ret; > + > + udev->nl_timeout = val; > + return count; > +} > +CONFIGFS_ATTR(tcmu_, nl_timeout); > + > static struct configfs_attribute *tcmu_attrib_attrs[] = { > &tcmu_attr_cmd_time_out, > &tcmu_attr_dev_config, > &tcmu_attr_dev_size, > &tcmu_attr_emulate_write_cache, > + &tcmu_attr_nl_timeout, > NULL, > }; > > -- To unsubscribe from this list: send the line "unsubscribe target-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html