[PATCH RT] kernel/time/posix-timer: avoid schedule() while holding the RCU lock

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On -RT it is possible that an invocation of timer_set() or
timer_delete() preempts an itimer which results in TIMER_RETRY. We can't
retry immediately because if the process preempted the softirq then it
has a higher priority and will loop and retry forever.

As a workaround for -RT the "retry" thread waited for the hrtimer to
complete (hrtimer_wait_for_timer()) or left the CPU via
schedule_timeout() in case of CPU-timer.
During hrtimer_wait_for_timer() the timer could be removed (and memory
freed) which would result in use-after-free for the waiter (in
hrtimer_wait_for_timer()). Therefore the RCU read section has been
extended to cover the whole period while the thread is scheduled out.
This change is part of 8df8ef7b2b9b ("hrtimers: Prepare full
preemption") in the RT patch.

This behaviour ("sleeping" while holding the RCU lock) is reported since
commit 5b72f9643b52 ("rcu: Complain if blocking in preemptible RCU
read-side critical section") and both Daniel Wagner and Grygorii
Strashko noticed [0] it.

I revert the RCU read section to what we had before. I am adding a
`kref' object to avoid removal of the itimer (incl. the hrtimer) while
there is someone waiting on it. This looks simple in timer_settime() and
timer_delete(). We had one lookup, hold the lock, increment ref-count.
If timer is deleted the waiter does the final put and the following RCU
lookup will fail.

I don't understand why the RCU lock is also held in itimer_delete(). At
this point, the process is dead (even exit_signals() was invoked) and
there is nothing that can remove that timer. timer_delete() and
timer_settime() always lookups the timer via its id during the retry.
This is not the case for itimer_delete() which gets a struct k_itimer
handed over. The comment says that "on RT it might race against
deletion" but I have no idea where it might come from.
While I understand why timer_delete() sets
	timer->it_signal = NULL
I don't know why itimer_delete() does the same. At this point there
should be no lookup or removal happen via RCU. Also ->list is protected
via sighand->siglock which is not held at this point. I *think* this was
just copied from the above (timer_delete()) while the timer were
accidently per-thread and fixed in commit 0e568881178f ("fix
posix-timers to have proper per-process scope").

[0] https://lkml.kernel.org/r/ec8b4367-ef5b-5446-2cd0-9f8f7fd6954f@xxxxxxxxx
[1] https://git.kernel.org/history/history/c/0e568881178ff0e0aceeafdb51f9fecab39e1923

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
---
 include/linux/posix-timers.h |  1 +
 kernel/time/posix-timers.c   | 41 +++++++++++++++++++++++++----------------
 2 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 672c4f32311e..b4cac3d8da1b 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -78,6 +78,7 @@ struct k_itimer {
 	struct list_head	list;
 	struct hlist_node	t_hash;
 	spinlock_t		it_lock;
+	struct kref		kref;
 	const struct k_clock	*kclock;
 	clockid_t		it_clock;
 	timer_t			it_id;
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 0b568087bd64..77fdd050016d 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -465,6 +465,7 @@ static struct k_itimer * alloc_posix_timer(void)
 		return NULL;
 	}
 	memset(&tmr->sigq->info, 0, sizeof(siginfo_t));
+	kref_init(&tmr->kref);
 	return tmr;
 }
 
@@ -475,6 +476,23 @@ static void k_itimer_rcu_free(struct rcu_head *head)
 	kmem_cache_free(posix_timers_cache, tmr);
 }
 
+static void kitimer_get(struct k_itimer *timer)
+{
+	kref_get(&timer->kref);
+}
+
+static void kitimer_release(struct kref *kref)
+{
+	struct k_itimer *tmr = container_of(kref, struct k_itimer, kref);
+
+	call_rcu(&tmr->it.rcu, k_itimer_rcu_free);
+}
+
+static void kitimer_put(struct k_itimer *timer)
+{
+	kref_put(&timer->kref, kitimer_release);
+}
+
 #define IT_ID_SET	1
 #define IT_ID_NOT_SET	0
 static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
@@ -487,7 +505,7 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
 	}
 	put_pid(tmr->it_pid);
 	sigqueue_free(tmr->sigq);
-	call_rcu(&tmr->it.rcu, k_itimer_rcu_free);
+	kitimer_put(tmr);
 }
 
 static int common_timer_create(struct k_itimer *new_timer)
@@ -904,7 +922,7 @@ static int do_timer_settime(timer_t timer_id, int flags,
 	if (!timr)
 		return -EINVAL;
 
-	rcu_read_lock();
+	kitimer_get(timr);
 	kc = timr->kclock;
 	if (WARN_ON_ONCE(!kc || !kc->timer_set))
 		error = -EINVAL;
@@ -914,12 +932,11 @@ static int do_timer_settime(timer_t timer_id, int flags,
 	unlock_timer(timr, flag);
 	if (error == TIMER_RETRY) {
 		timer_wait_for_callback(kc, timr);
+		kitimer_put(timr);
 		old_spec64 = NULL;	// We already got the old time...
-		rcu_read_unlock();
 		goto retry;
 	}
-	rcu_read_unlock();
-
+	kitimer_put(timr);
 	return error;
 }
 
@@ -1000,15 +1017,15 @@ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id)
 	if (!timer)
 		return -EINVAL;
 
-	rcu_read_lock();
 	if (timer_delete_hook(timer) == TIMER_RETRY) {
+		kitimer_get(timer);
 		unlock_timer(timer, flags);
+
 		timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
 					timer);
-		rcu_read_unlock();
+		kitimer_put(timer);
 		goto retry_delete;
 	}
-	rcu_read_unlock();
 
 	spin_lock(&current->sighand->siglock);
 	list_del(&timer->list);
@@ -1034,18 +1051,10 @@ static void itimer_delete(struct k_itimer *timer)
 retry_delete:
 	spin_lock_irqsave(&timer->it_lock, flags);
 
-	/* On RT we can race with a deletion */
-	if (!timer->it_signal) {
-		unlock_timer(timer, flags);
-		return;
-	}
-
 	if (timer_delete_hook(timer) == TIMER_RETRY) {
-		rcu_read_lock();
 		unlock_timer(timer, flags);
 		timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
 					timer);
-		rcu_read_unlock();
 		goto retry_delete;
 	}
 	list_del(&timer->list);
-- 
2.16.2

--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [RT Stable]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]

  Powered by Linux