[PATCH RT 6/6] read lock Priority Inheritance implementation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch adds the priority inheritance (PI) to the read / write locks.
When a task is blocked on the lock that eventually is owned by a reader
in the PI chain, it will boost all the readers if they are of lower priority
than the blocked task.

Signed-off-by: Steven Rostedt <srostedt@xxxxxxxxxx>
---
 include/linux/init_task.h |    8 +++
 include/linux/rt_lock.h   |    4 +
 kernel/fork.c             |    1 
 kernel/rtmutex.c          |  115 ++++++++++++++++++++++++++++++++++++++++++----
 4 files changed, 118 insertions(+), 10 deletions(-)

Index: linux-2.6.24.4-rt4/include/linux/rt_lock.h
===================================================================
--- linux-2.6.24.4-rt4.orig/include/linux/rt_lock.h	2008-03-25 23:13:17.000000000 -0400
+++ linux-2.6.24.4-rt4/include/linux/rt_lock.h	2008-03-25 23:14:47.000000000 -0400
@@ -13,6 +13,7 @@
 #include <linux/rtmutex.h>
 #include <asm/atomic.h>
 #include <linux/spinlock_types.h>
+#include <linux/sched_prio.h>
 
 #ifdef CONFIG_PREEMPT_RT
 /*
@@ -66,6 +67,7 @@ struct rw_mutex {
 	atomic_t		count;	/* number of times held for read */
 	atomic_t		owners; /* number of owners as readers */
 	struct list_head	readers;
+	int prio;
 };
 
 /*
@@ -98,6 +100,7 @@ typedef struct {
 
 #define __RW_LOCK_UNLOCKED(name) (rwlock_t) \
 	{ .owners.mutex = __RT_SPIN_INITIALIZER(name.owners.mutex),	\
+	  .owners.prio = MAX_PRIO,					\
 	  RW_DEP_MAP_INIT(name) }
 #else /* !PREEMPT_RT */
 
@@ -196,6 +199,7 @@ extern int __bad_func_type(void);
 
 #define __RWSEM_INITIALIZER(name) \
 	{ .owners.mutex = __RT_MUTEX_INITIALIZER(name.owners.mutex),	\
+	  .owners.prio = MAX_PRIO,					\
 	  RW_DEP_MAP_INIT(name) }
 
 #define DECLARE_RWSEM(lockname) \
Index: linux-2.6.24.4-rt4/kernel/rtmutex.c
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/rtmutex.c	2008-03-25 23:13:17.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/rtmutex.c	2008-03-25 23:14:47.000000000 -0400
@@ -133,6 +133,8 @@ static inline void init_lists(struct rt_
 	}
 }
 
+static int rt_mutex_get_readers_prio(struct task_struct *task, int prio);
+
 /*
  * Calculate task priority from the waiter list priority
  *
@@ -143,6 +145,8 @@ int rt_mutex_getprio(struct task_struct 
 {
 	int prio = min(task->normal_prio, get_rcu_prio(task));
 
+	prio = rt_mutex_get_readers_prio(task, prio);
+
 	if (likely(!task_has_pi_waiters(task)))
 		return prio;
 
@@ -185,6 +189,11 @@ static void rt_mutex_adjust_prio(struct 
  */
 int max_lock_depth = 1024;
 
+static int rt_mutex_adjust_readers(struct rt_mutex *orig_lock,
+				   struct rt_mutex_waiter *orig_waiter,
+				   struct task_struct *top_task,
+				   struct rt_mutex *lock,
+				   int recursion_depth);
 /*
  * Adjust the priority chain. Also used for deadlock detection.
  * Decreases task's usage by one - may thus free the task.
@@ -194,7 +203,8 @@ static int rt_mutex_adjust_prio_chain(st
 				      int deadlock_detect,
 				      struct rt_mutex *orig_lock,
 				      struct rt_mutex_waiter *orig_waiter,
-				      struct task_struct *top_task)
+				      struct task_struct *top_task,
+				      int recursion_depth)
 {
 	struct rt_mutex *lock;
 	struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
@@ -296,8 +306,13 @@ static int rt_mutex_adjust_prio_chain(st
 	/* Grab the next task */
 	task = rt_mutex_owner(lock);
 
-	/* Writers do not boost their readers. */
+	/*
+	 * Readers are special. We may need to boost more than one owner.
+	 */
 	if (task == RT_RW_READER) {
+		ret = rt_mutex_adjust_readers(orig_lock, orig_waiter,
+					      top_task, lock,
+					      recursion_depth);
 		spin_unlock_irqrestore(&lock->wait_lock, flags);
 		goto out;
 	}
@@ -479,9 +494,12 @@ static int task_blocks_on_rt_mutex(struc
 	spin_unlock(&current->pi_lock);
 
 	if (waiter == rt_mutex_top_waiter(lock)) {
-		/* readers are not handled */
-		if (owner == RT_RW_READER)
-			return 0;
+		/* readers are handled differently */
+		if (owner == RT_RW_READER) {
+			res = rt_mutex_adjust_readers(lock, waiter,
+						      current, lock, 0);
+			return res;
+		}
 
 		spin_lock(&owner->pi_lock);
 		plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
@@ -508,7 +526,7 @@ static int task_blocks_on_rt_mutex(struc
 	spin_unlock_irqrestore(&lock->wait_lock, flags);
 
 	res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
-					 current);
+					 current, 0);
 
 	spin_lock_irq(&lock->wait_lock);
 
@@ -625,7 +643,7 @@ static void remove_waiter(struct rt_mute
 
 	spin_unlock_irqrestore(&lock->wait_lock, flags);
 
-	rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
+	rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current, 0);
 
 	spin_lock_irq(&lock->wait_lock);
 }
@@ -652,7 +670,7 @@ void rt_mutex_adjust_pi(struct task_stru
 	get_task_struct(task);
 	spin_unlock_irqrestore(&task->pi_lock, flags);
 
-	rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
+	rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task, 0);
 }
 
 /*
@@ -1088,7 +1106,6 @@ static int try_to_take_rw_read(struct rw
 			if (rt_rwlock_pending_writer(rwm))
 				return 0;
 			if (rt_mutex_has_waiters(mutex)) {
-				/* readers don't do PI */
 				waiter = rt_mutex_top_waiter(mutex);
 				if (current->prio >= waiter->task->prio)
 					return 0;
@@ -1102,7 +1119,7 @@ static int try_to_take_rw_read(struct rw
 				spin_unlock(&mtxowner->pi_lock);
 			}
 		} else if (rt_mutex_has_waiters(mutex)) {
-			/* Readers don't do PI */
+			/* Readers do things differently with respect to PI */
 			waiter = rt_mutex_top_waiter(mutex);
 			spin_lock(&current->pi_lock);
 			plist_del(&waiter->pi_list_entry, &current->pi_waiters);
@@ -1608,6 +1625,7 @@ rt_read_slowunlock(struct rw_mutex *rwm,
 
 	/* If no one is blocked, then clear all ownership */
 	if (!rt_mutex_has_waiters(mutex)) {
+		rwm->prio = MAX_PRIO;
 		/*
 		 * If count is not zero, we are under the limit with
 		 * no other readers.
@@ -1838,11 +1856,88 @@ void rt_mutex_rwsem_init(struct rw_mutex
 	rwm->owner = NULL;
 	atomic_set(&rwm->count, 0);
 	atomic_set(&rwm->owners, 0);
+	rwm->prio = MAX_PRIO;
 	INIT_LIST_HEAD(&rwm->readers);
 
 	__rt_mutex_init(mutex, name);
 }
 
+static int rt_mutex_get_readers_prio(struct task_struct *task, int prio)
+{
+	struct reader_lock_struct *rls;
+	struct rw_mutex *rwm;
+	int lock_prio;
+	int i;
+
+	for (i = 0; i < task->reader_lock_count; i++) {
+		rls = &task->owned_read_locks[i];
+		rwm = rls->lock;
+		if (rwm) {
+			lock_prio = rwm->prio;
+			if (prio > lock_prio)
+				prio = lock_prio;
+		}
+	}
+
+	return prio;
+}
+
+static int rt_mutex_adjust_readers(struct rt_mutex *orig_lock,
+				   struct rt_mutex_waiter *orig_waiter,
+				   struct task_struct *top_task,
+				   struct rt_mutex *lock,
+				   int recursion_depth)
+{
+	struct reader_lock_struct *rls;
+	struct rt_mutex_waiter *waiter;
+	struct task_struct *task;
+	struct rw_mutex *rwm = container_of(lock, struct rw_mutex, mutex);
+
+	if (rt_mutex_has_waiters(lock)) {
+		waiter = rt_mutex_top_waiter(lock);
+		/*
+		 * Do we need to grab the task->pi_lock?
+		 * Really, we are only reading it. If it
+		 * changes, then that should follow this chain
+		 * too.
+		 */
+		rwm->prio = waiter->task->prio;
+	} else
+		rwm->prio = MAX_PRIO;
+
+	if (recursion_depth >= MAX_RWLOCK_DEPTH) {
+		WARN_ON(1);
+		return 1;
+	}
+
+	list_for_each_entry(rls, &rwm->readers, list) {
+		task = rls->task;
+		get_task_struct(task);
+		/*
+		 * rt_mutex_adjust_prio_chain will do
+		 * the put_task_struct
+		 */
+		rt_mutex_adjust_prio_chain(task, 0, orig_lock,
+					   orig_waiter, top_task,
+					   recursion_depth+1);
+	}
+
+	return 0;
+}
+#else
+static int rt_mutex_adjust_readers(struct rt_mutex *orig_lock,
+				   struct rt_mutex_waiter *orig_waiter,
+				   struct task_struct *top_task,
+				   struct rt_mutex *lock,
+				   int recursion_depth)
+{
+	return 0;
+}
+
+static int rt_mutex_get_readers_prio(struct task_struct *task, int prio)
+{
+	return prio;
+}
 #endif /* CONFIG_PREEMPT_RT */
 
 #ifdef CONFIG_PREEMPT_BKL
Index: linux-2.6.24.4-rt4/include/linux/init_task.h
===================================================================
--- linux-2.6.24.4-rt4.orig/include/linux/init_task.h	2008-03-25 16:41:47.000000000 -0400
+++ linux-2.6.24.4-rt4/include/linux/init_task.h	2008-03-25 23:14:47.000000000 -0400
@@ -99,6 +99,13 @@ extern struct nsproxy init_nsproxy;
 #define INIT_PREEMPT_RCU_BOOST(tsk)
 #endif /* #else #ifdef CONFIG_PREEMPT_RCU_BOOST */
 
+#ifdef CONFIG_PREEMPT_RT
+# define INIT_RW_OWNERS(tsk) .owned_read_locks = {			\
+		[0 ... (MAX_RWLOCK_DEPTH - 1) ] = { .task = &tsk } },
+#else
+# define INIT_RW_OWNERS(tsk)
+#endif
+
 extern struct group_info init_groups;
 
 #define INIT_STRUCT_PID {						\
@@ -189,6 +196,7 @@ extern struct group_info init_groups;
 	INIT_TRACE_IRQFLAGS						\
 	INIT_LOCKDEP							\
 	INIT_PREEMPT_RCU_BOOST(tsk)					\
+	INIT_RW_OWNERS(tsk)						\
 }
 
 
Index: linux-2.6.24.4-rt4/kernel/fork.c
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/fork.c	2008-03-25 23:13:17.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/fork.c	2008-03-25 23:14:47.000000000 -0400
@@ -1214,6 +1214,7 @@ static struct task_struct *copy_process(
 			INIT_LIST_HEAD(&p->owned_read_locks[i].list);
 			p->owned_read_locks[i].count = 0;
 			p->owned_read_locks[i].lock = NULL;
+			p->owned_read_locks[i].task = p;
 		}
 	}
 #endif

-- 
--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [RT Stable]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]

  Powered by Linux