Async suspend-resume patch w/ rwsems (was: Re: [GIT PULL] PM updates for 2.6.33)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tuesday 08 December 2009, Rafael J. Wysocki wrote:
> On Tuesday 08 December 2009, Rafael J. Wysocki wrote:
> > On Tuesday 08 December 2009, Linus Torvalds wrote:
> > > 
> > > On Tue, 8 Dec 2009, Rafael J. Wysocki wrote:
> > > > 
> > > > Anyway, if we use an rwsem, it won't be checkable from interrupt context just
> > > > as well.
> > > 
> > > You can't do a lock() from an interrupt, but the unlocks should be 
> > > irq-safe. 
> > > 
> > > > Suppose we use rwsem and during suspend each child uses a down_read() on a
> > > > parent and then the parent uses down_write() on itself.  What if, whatever the
> > > > reason, the parent is a bit early and does the down_write() before one of the
> > > > children has a chance to do the down_read()?  Aren't we toast?
> > > 
> > > We're toast, but we're toast for a totally unrealted reason: it means that 
> > > you tried to resume a child before a parent, which would be a major bug to 
> > > begin with.
> > > 
> > > Look, I even wrote out the comments, so let me repeat the code one more 
> > > time.
> > > 
> > >  - suspend time calling:
> > >         // This won't block, because we suspend nodes before parents
> > >         down_read(node->parent->lock);
> > >         // Do the part that may block asynchronously
> > >         async_schedule(do_usb_node_suspend, node);
> > > 
> > >  - resume time calling:
> > >         // This won't block, because we resume parents before children,
> > >         // and the children will take the read lock. 
> > >         down_write(leaf->lock);
> > >         // Do the blocking part asynchronously
> > >         async_schedule(usb_node_resume, leaf);
> > > 
> > > See? So when we take the parent lock for suspend, we are guaranteed to do 
> > > so _before_ the parent node itself suspends. And conversely, when we take 
> > > the parent lock (asynchronously) for resume, we're guaranteed to do that 
> > > _after_ the parent node has done its own down_write.
> > > 
> > > And that all depends on just one trivial thing; that the suspend and 
> > > resume is called in the right order (children first vs parent first 
> > > respectively). And that is such a _major_ correctness issue that if that 
> > > isn't correct, your suspend isn't going to work _anyway_.
> > 
> > Understood (I think).
> > 
> > Let's try it, then.  Below is the resume patch based on my previous one in this
> > thread (I have only verified that it builds).
> 
> Ah, I need to check if dev->parent is not NULL before trying to lock it, but
> apart from this it doesn't break things at least.

For completness, below is the full async suspend/resume patch with rwlocks,
that has been (very slightly) tested and doesn't seem to break things.

[Note to Alan: lockdep doesn't seem to complain about the not annotated nested
locks.]

Thanks,
Rafael


---
 drivers/base/power/main.c    |  195 +++++++++++++++++++++++++++++++++++++++----
 include/linux/device.h       |    6 +
 include/linux/pm.h           |    3 
 include/linux/resume-trace.h |    7 +
 4 files changed, 194 insertions(+), 17 deletions(-)

Index: linux-2.6/include/linux/pm.h
===================================================================
--- linux-2.6.orig/include/linux/pm.h
+++ linux-2.6/include/linux/pm.h
@@ -26,6 +26,7 @@
 #include <linux/spinlock.h>
 #include <linux/wait.h>
 #include <linux/timer.h>
+#include <linux/rwsem.h>
 
 /*
  * Callbacks for platform drivers to implement.
@@ -412,9 +413,11 @@ struct dev_pm_info {
 	pm_message_t		power_state;
 	unsigned int		can_wakeup:1;
 	unsigned int		should_wakeup:1;
+	unsigned		async_suspend:1;
 	enum dpm_state		status;		/* Owned by the PM core */
 #ifdef CONFIG_PM_SLEEP
 	struct list_head	entry;
+	struct rw_semaphore	rwsem;
 #endif
 #ifdef CONFIG_PM_RUNTIME
 	struct timer_list	suspend_timer;
Index: linux-2.6/include/linux/device.h
===================================================================
--- linux-2.6.orig/include/linux/device.h
+++ linux-2.6/include/linux/device.h
@@ -472,6 +472,12 @@ static inline int device_is_registered(s
 	return dev->kobj.state_in_sysfs;
 }
 
+static inline void device_enable_async_suspend(struct device *dev, bool enable)
+{
+	if (dev->power.status == DPM_ON)
+		dev->power.async_suspend = enable;
+}
+
 void driver_init(void);
 
 /*
Index: linux-2.6/drivers/base/power/main.c
===================================================================
--- linux-2.6.orig/drivers/base/power/main.c
+++ linux-2.6/drivers/base/power/main.c
@@ -25,6 +25,7 @@
 #include <linux/resume-trace.h>
 #include <linux/rwsem.h>
 #include <linux/interrupt.h>
+#include <linux/async.h>
 
 #include "../base.h"
 #include "power.h"
@@ -42,6 +43,7 @@
 LIST_HEAD(dpm_list);
 
 static DEFINE_MUTEX(dpm_list_mtx);
+static pm_message_t pm_transition;
 
 /*
  * Set once the preparation of devices for a PM transition has started, reset
@@ -56,6 +58,7 @@ static bool transition_started;
 void device_pm_init(struct device *dev)
 {
 	dev->power.status = DPM_ON;
+	init_rwsem(&dev->power.rwsem);
 	pm_runtime_init(dev);
 }
 
@@ -334,25 +337,53 @@ static void pm_dev_err(struct device *de
  * The driver of @dev will not receive interrupts while this function is being
  * executed.
  */
-static int device_resume_noirq(struct device *dev, pm_message_t state)
+static int __device_resume_noirq(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
 	TRACE_DEVICE(dev);
 	TRACE_RESUME(0);
 
-	if (!dev->bus)
-		goto End;
+	if (dev->parent)
+		down_read(&dev->parent->power.rwsem);
 
-	if (dev->bus->pm) {
+	if (dev->bus && dev->bus->pm) {
 		pm_dev_dbg(dev, state, "EARLY ");
 		error = pm_noirq_op(dev, dev->bus->pm, state);
 	}
- End:
+
+	if (dev->parent)
+		up_read(&dev->parent->power.rwsem);
+	up_write(&dev->power.rwsem);
+
 	TRACE_RESUME(error);
 	return error;
 }
 
+static void async_resume_noirq(void *data, async_cookie_t cookie)
+{
+	struct device *dev = (struct device *)data;
+	int error;
+
+	error = __device_resume_noirq(dev, pm_transition);
+	if (error)
+		pm_dev_err(dev, pm_transition, " async EARLY", error);
+	put_device(dev);
+}
+
+static int device_resume_noirq(struct device *dev)
+{
+	down_write(&dev->power.rwsem);
+
+	if (dev->power.async_suspend && !pm_trace_is_enabled()) {
+		get_device(dev);
+		async_schedule(async_resume_noirq, dev);
+		return 0;
+	}
+
+	return __device_resume_noirq(dev, pm_transition);
+}
+
 /**
  * dpm_resume_noirq - Execute "early resume" callbacks for non-sysdev devices.
  * @state: PM transition of the system being carried out.
@@ -366,32 +397,36 @@ void dpm_resume_noirq(pm_message_t state
 
 	mutex_lock(&dpm_list_mtx);
 	transition_started = false;
+	pm_transition = state;
 	list_for_each_entry(dev, &dpm_list, power.entry)
 		if (dev->power.status > DPM_OFF) {
 			int error;
 
 			dev->power.status = DPM_OFF;
-			error = device_resume_noirq(dev, state);
+			error = device_resume_noirq(dev);
 			if (error)
 				pm_dev_err(dev, state, " early", error);
 		}
 	mutex_unlock(&dpm_list_mtx);
+	async_synchronize_full();
 	resume_device_irqs();
 }
 EXPORT_SYMBOL_GPL(dpm_resume_noirq);
 
 /**
- * device_resume - Execute "resume" callbacks for given device.
+ * __device_resume - Execute "resume" callbacks for given device.
  * @dev: Device to handle.
  * @state: PM transition of the system being carried out.
  */
-static int device_resume(struct device *dev, pm_message_t state)
+static int __device_resume(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
 	TRACE_DEVICE(dev);
 	TRACE_RESUME(0);
 
+	if (dev->parent)
+		down_read(&dev->parent->power.rwsem);
 	down(&dev->sem);
 
 	if (dev->bus) {
@@ -426,11 +461,38 @@ static int device_resume(struct device *
 	}
  End:
 	up(&dev->sem);
+	if (dev->parent)
+		up_read(&dev->parent->power.rwsem);
+	up_write(&dev->power.rwsem);
 
 	TRACE_RESUME(error);
 	return error;
 }
 
+static void async_resume(void *data, async_cookie_t cookie)
+{
+	struct device *dev = (struct device *)data;
+	int error;
+
+	error = __device_resume(dev, pm_transition);
+	if (error)
+		pm_dev_err(dev, pm_transition, " async", error);
+	put_device(dev);
+}
+
+static int device_resume(struct device *dev)
+{
+	down_write(&dev->power.rwsem);
+
+	if (dev->power.async_suspend && !pm_trace_is_enabled()) {
+		get_device(dev);
+		async_schedule(async_resume, dev);
+		return 0;
+	}
+
+	return __device_resume(dev, pm_transition);
+}
+
 /**
  * dpm_resume - Execute "resume" callbacks for non-sysdev devices.
  * @state: PM transition of the system being carried out.
@@ -444,6 +506,7 @@ static void dpm_resume(pm_message_t stat
 
 	INIT_LIST_HEAD(&list);
 	mutex_lock(&dpm_list_mtx);
+	pm_transition = state;
 	while (!list_empty(&dpm_list)) {
 		struct device *dev = to_device(dpm_list.next);
 
@@ -454,7 +517,7 @@ static void dpm_resume(pm_message_t stat
 			dev->power.status = DPM_RESUMING;
 			mutex_unlock(&dpm_list_mtx);
 
-			error = device_resume(dev, state);
+			error = device_resume(dev);
 
 			mutex_lock(&dpm_list_mtx);
 			if (error)
@@ -469,6 +532,7 @@ static void dpm_resume(pm_message_t stat
 	}
 	list_splice(&list, &dpm_list);
 	mutex_unlock(&dpm_list_mtx);
+	async_synchronize_full();
 }
 
 /**
@@ -533,6 +597,8 @@ static void dpm_complete(pm_message_t st
 	mutex_unlock(&dpm_list_mtx);
 }
 
+static atomic_t async_error;
+
 /**
  * dpm_resume_end - Execute "resume" callbacks and complete system transition.
  * @state: PM transition of the system being carried out.
@@ -580,20 +646,59 @@ static pm_message_t resume_event(pm_mess
  * The driver of @dev will not receive interrupts while this function is being
  * executed.
  */
-static int device_suspend_noirq(struct device *dev, pm_message_t state)
+static int __device_suspend_noirq(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
-	if (!dev->bus)
-		return 0;
+	down_write(&dev->power.rwsem);
 
-	if (dev->bus->pm) {
+	if (dev->bus && dev->bus->pm) {
 		pm_dev_dbg(dev, state, "LATE ");
 		error = pm_noirq_op(dev, dev->bus->pm, state);
 	}
+
+	up_write(&dev->power.rwsem);
+	if (dev->parent)
+		up_read(&dev->parent->power.rwsem);
+
 	return error;
 }
 
+static void async_suspend_noirq(void *data, async_cookie_t cookie)
+{
+	struct device *dev = (struct device *)data;
+	int error = atomic_read(&async_error);
+
+	if (error) {
+		if (dev->parent)
+			up_read(&dev->parent->power.rwsem);
+		dev->power.status = DPM_OFF;
+		return;
+	}
+
+	error = __device_suspend_noirq(dev, pm_transition);
+	if (error) {
+		pm_dev_err(dev, pm_transition, " async LATE", error);
+		dev->power.status = DPM_OFF;
+		atomic_set(&async_error, error);
+	}
+	put_device(dev);
+}
+
+static int device_suspend_noirq(struct device *dev)
+{
+	if (dev->parent)
+		down_read(&dev->parent->power.rwsem);
+
+	if (dev->power.async_suspend) {
+		get_device(dev);
+		async_schedule(async_suspend_noirq, dev);
+		return 0;
+	}
+
+	return __device_suspend_noirq(dev, pm_transition);
+}
+
 /**
  * dpm_suspend_noirq - Execute "late suspend" callbacks for non-sysdev devices.
  * @state: PM transition of the system being carried out.
@@ -608,15 +713,21 @@ int dpm_suspend_noirq(pm_message_t state
 
 	suspend_device_irqs();
 	mutex_lock(&dpm_list_mtx);
+	pm_transition = state;
 	list_for_each_entry_reverse(dev, &dpm_list, power.entry) {
-		error = device_suspend_noirq(dev, state);
+		dev->power.status = DPM_OFF_IRQ;
+		error = device_suspend_noirq(dev);
 		if (error) {
 			pm_dev_err(dev, state, " late", error);
+			dev->power.status = DPM_OFF;
 			break;
 		}
-		dev->power.status = DPM_OFF_IRQ;
+		error = atomic_read(&async_error);
+		if (error)
+			break;
 	}
 	mutex_unlock(&dpm_list_mtx);
+	async_synchronize_full();
 	if (error)
 		dpm_resume_noirq(resume_event(state));
 	return error;
@@ -628,10 +739,11 @@ EXPORT_SYMBOL_GPL(dpm_suspend_noirq);
  * @dev: Device to handle.
  * @state: PM transition of the system being carried out.
  */
-static int device_suspend(struct device *dev, pm_message_t state)
+static int __device_suspend(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
+	down_write(&dev->power.rwsem);
 	down(&dev->sem);
 
 	if (dev->class) {
@@ -668,10 +780,50 @@ static int device_suspend(struct device 
 	}
  End:
 	up(&dev->sem);
+	up_write(&dev->power.rwsem);
+	if (dev->parent)
+		up_read(&dev->parent->power.rwsem);
 
 	return error;
 }
 
+static void async_suspend(void *data, async_cookie_t cookie)
+{
+	struct device *dev = (struct device *)data;
+	int error = atomic_read(&async_error);
+
+	if (error) {
+		if (dev->parent)
+			up_read(&dev->parent->power.rwsem);
+		dev->power.status = DPM_SUSPENDING;
+		goto End;
+	}
+
+	error = __device_suspend(dev, pm_transition);
+	if (error) {
+		pm_dev_err(dev, pm_transition, " async", error);
+		dev->power.status = DPM_SUSPENDING;
+		atomic_set(&async_error, error);
+	}
+
+ End:
+	put_device(dev);
+}
+
+static int device_suspend(struct device *dev, pm_message_t state)
+{
+	if (dev->parent)
+		down_read(&dev->parent->power.rwsem);
+
+	if (dev->power.async_suspend) {
+		get_device(dev);
+		async_schedule(async_suspend, dev);
+		return 0;
+	}
+
+	return __device_suspend(dev, pm_transition);
+}
+
 /**
  * dpm_suspend - Execute "suspend" callbacks for all non-sysdev devices.
  * @state: PM transition of the system being carried out.
@@ -683,10 +835,12 @@ static int dpm_suspend(pm_message_t stat
 
 	INIT_LIST_HEAD(&list);
 	mutex_lock(&dpm_list_mtx);
+	pm_transition = state;
 	while (!list_empty(&dpm_list)) {
 		struct device *dev = to_device(dpm_list.prev);
 
 		get_device(dev);
+		dev->power.status = DPM_OFF;
 		mutex_unlock(&dpm_list_mtx);
 
 		error = device_suspend(dev, state);
@@ -694,16 +848,22 @@ static int dpm_suspend(pm_message_t stat
 		mutex_lock(&dpm_list_mtx);
 		if (error) {
 			pm_dev_err(dev, state, "", error);
+			dev->power.status = DPM_SUSPENDING;
 			put_device(dev);
 			break;
 		}
-		dev->power.status = DPM_OFF;
 		if (!list_empty(&dev->power.entry))
 			list_move(&dev->power.entry, &list);
 		put_device(dev);
+		error = atomic_read(&async_error);
+		if (error)
+			break;
 	}
 	list_splice(&list, dpm_list.prev);
 	mutex_unlock(&dpm_list_mtx);
+	async_synchronize_full();
+	if (!error)
+		error = atomic_read(&async_error);
 	return error;
 }
 
@@ -762,6 +922,7 @@ static int dpm_prepare(pm_message_t stat
 	INIT_LIST_HEAD(&list);
 	mutex_lock(&dpm_list_mtx);
 	transition_started = true;
+	atomic_set(&async_error, 0);
 	while (!list_empty(&dpm_list)) {
 		struct device *dev = to_device(dpm_list.next);
 
Index: linux-2.6/include/linux/resume-trace.h
===================================================================
--- linux-2.6.orig/include/linux/resume-trace.h
+++ linux-2.6/include/linux/resume-trace.h
@@ -6,6 +6,11 @@
 
 extern int pm_trace_enabled;
 
+static inline int pm_trace_is_enabled(void)
+{
+       return pm_trace_enabled;
+}
+
 struct device;
 extern void set_trace_device(struct device *);
 extern void generate_resume_trace(const void *tracedata, unsigned int user);
@@ -17,6 +22,8 @@ extern void generate_resume_trace(const 
 
 #else
 
+static inline int pm_trace_is_enabled(void) { return 0; }
+
 #define TRACE_DEVICE(dev) do { } while (0)
 #define TRACE_RESUME(dev) do { } while (0)
 
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux IBM ACPI]     [Linux Power Management]     [Linux Kernel]     [Linux Laptop]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]

  Powered by Linux