The patch introduces the flag of memalloc_noio_resume in 'struct dev_pm_info' to help PM core to teach mm not allocating memory with GFP_KERNEL flag for avoiding probable deadlock problem. As explained in the comment, any GFP_KERNEL allocation inside runtime_resume on any one of device in the path from one block or network device to the root device in the device tree may cause deadlock, the introduced pm_runtime_set_memalloc_noio() sets or clears the flag on device of the path recursively. This patch also introduces pm_runtime_get_memalloc_noio() because the flag may be accessed in block device's error handling path (for example, usb device reset) Cc: Alan Stern <stern@xxxxxxxxxxxxxxxxxxx> Cc: "Rafael J. Wysocki" <rjw@xxxxxxx> Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxxxxx> --- v3: - introduce pm_runtime_get_memalloc_noio() - hold one global lock on pm_runtime_set_memalloc_noio - hold device power lock when accessing memalloc_noio_resume flag suggested by Alan Stern - implement pm_runtime_set_memalloc_noio without recursion suggested by Alan Stern v2: - introduce pm_runtime_set_memalloc_noio() --- drivers/base/power/runtime.c | 72 ++++++++++++++++++++++++++++++++++++++++++ include/linux/pm.h | 1 + include/linux/pm_runtime.h | 5 +++ 3 files changed, 78 insertions(+) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 3148b10..9fa6ea7 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -124,6 +124,78 @@ unsigned long pm_runtime_autosuspend_expiration(struct device *dev) } EXPORT_SYMBOL_GPL(pm_runtime_autosuspend_expiration); +/* + * pm_runtime_get_memalloc_noio - Get a device's memalloc_noio flag. + * @dev: Device to handle. + * + * Return the device's memalloc_noio flag. + * + * The device power lock is held because bitfield is not SMP-safe. + */ +bool pm_runtime_get_memalloc_noio(struct device *dev) +{ + bool ret; + spin_lock_irq(&dev->power.lock); + ret = dev->power.memalloc_noio_resume; + spin_unlock_irq(&dev->power.lock); + return ret; +} +EXPORT_SYMBOL_GPL(pm_runtime_get_memalloc_noio); + +static int dev_memalloc_noio(struct device *dev, void *data) +{ + return pm_runtime_get_memalloc_noio(dev); +} + +/* + * pm_runtime_set_memalloc_noio - Set a device's memalloc_noio flag. + * @dev: Device to handle. + * @enable: True for setting the flag and False for clearing the flag. + * + * Set the flag for all devices in the path from the device to the + * root device in the device tree if @enable is true, otherwise clear + * the flag for devices in the path which sibliings don't set the flag. + * + * The function should only be called by block device, or network + * device driver for solving the deadlock problem during runtime + * resume: + * if memory allocation with GFP_KERNEL is called inside runtime + * resume callback of any one of its ancestors(or the block device + * itself), the deadlock may be triggered inside the memory + * allocation since it might not complete until the block device + * becomes active and the involed page I/O finishes. The situation + * is pointed out first by Alan Stern. Network device are involved + * in iSCSI kind of situation. + * + * The lock of dev_hotplug_mutex is held in the function for handling + * hotplug race because pm_runtime_set_memalloc_noio() may be called + * in async probe(). + */ +void pm_runtime_set_memalloc_noio(struct device *dev, bool enable) +{ + static DEFINE_MUTEX(dev_hotplug_mutex); + + mutex_lock(&dev_hotplug_mutex); + while (dev) { + /* hold power lock since bitfield is not SMP-safe. */ + spin_lock_irq(&dev->power.lock); + dev->power.memalloc_noio_resume = enable; + spin_unlock_irq(&dev->power.lock); + + dev = dev->parent; + + /* only clear the flag for one device if all + * children of the device don't set the flag. + */ + if (!dev || (!enable && + device_for_each_child(dev, NULL, + dev_memalloc_noio))) + break; + } + mutex_unlock(&dev_hotplug_mutex); +} +EXPORT_SYMBOL_GPL(pm_runtime_set_memalloc_noio); + /** * rpm_check_suspend_allowed - Test whether a device may be suspended. * @dev: Device to test. diff --git a/include/linux/pm.h b/include/linux/pm.h index 03d7bb1..d104579 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -538,6 +538,7 @@ struct dev_pm_info { unsigned int irq_safe:1; unsigned int use_autosuspend:1; unsigned int timer_autosuspends:1; + unsigned int memalloc_noio_resume:1; enum rpm_request request; enum rpm_status runtime_status; int runtime_error; diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index f271860..b522b09 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -47,6 +47,8 @@ extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay); extern unsigned long pm_runtime_autosuspend_expiration(struct device *dev); extern void pm_runtime_update_max_time_suspended(struct device *dev, s64 delta_ns); +extern bool pm_runtime_get_memalloc_noio(struct device *dev); +extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable); static inline bool pm_children_suspended(struct device *dev) { @@ -149,6 +151,9 @@ static inline void pm_runtime_set_autosuspend_delay(struct device *dev, int delay) {} static inline unsigned long pm_runtime_autosuspend_expiration( struct device *dev) { return 0; } +static inline bool pm_runtime_get_memalloc_noio(struct device *dev) { return false; } +static inline void pm_runtime_set_memalloc_noio(struct device *dev, + bool enable){} #endif /* !CONFIG_PM_RUNTIME */ -- 1.7.9.5 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>