Hi Junchang, Please see inline comments below: On Mon, 29 Apr 2019 22:38:43 +0800, Junchang Wang wrote: > Replace the plain accesses with READ_ONCE()/WRITE_ONCE(). And then update How about: s/the plain/racy plain/ ? > the tex file accordingly. A new scheme, which directly extracts snippet > from rcu.[hc], is applied. > > Signed-off-by: Junchang Wang <junchangwang@xxxxxxxxx> > Reviewed-by: Akira Yokosawa <akiyks@xxxxxxxxx> > --- > CodeSamples/defer/rcu.c | 30 ++++++++------- > CodeSamples/defer/rcu.h | 21 ++++++---- > appendix/toyrcu/toyrcu.tex | 95 +++++++++++++++++++--------------------------- > 3 files changed, 68 insertions(+), 78 deletions(-) > > diff --git a/CodeSamples/defer/rcu.c b/CodeSamples/defer/rcu.c > index 4b868cc..4ca2739 100644 > --- a/CodeSamples/defer/rcu.c > +++ b/CodeSamples/defer/rcu.c > @@ -21,45 +21,47 @@ > #include "../api.h" > #include "rcu.h" > > -void synchronize_rcu(void) > +//\begin{snippet}[labelbase=ln:defer:rcu:synchronize,gobbleblank=yes,commandchars=\%\@\$] > +void synchronize_rcu(void) //\lnlbl{syn:b} > { > int t; > We need a "//\fcvblank" here. > /* Memory barrier ensures mutation seen before grace period. */ > > - smp_mb(); > + smp_mb(); //\lnlbl{syn:mb1} > > /* Only one synchronize_rcu() at a time. */ > > - spin_lock(&rcu_gp_lock); > + spin_lock(&rcu_gp_lock); //\lnlbl{syn:spinlock} > > /* Advance to a new grace-period number, enforce ordering. */ > > - rcu_gp_ctr += 2; > - smp_mb(); > + WRITE_ONCE(rcu_gp_ctr, rcu_gp_ctr + 2); //\lnlbl{syn:increasegp} > + smp_mb(); //\lnlbl{syn:mb2} > > /* > * Wait until all threads are either out of their RCU read-side > * critical sections or are aware of the new grace period. > */ > > - for_each_thread(t) { > - while ((per_thread(rcu_reader_gp, t) & 0x1) && > - ((per_thread(rcu_reader_gp, t) - rcu_gp_ctr) < 0)) { > + for_each_thread(t) { //\lnlbl{syn:scan:b} > + while ((per_thread(rcu_reader_gp, t) & 0x1) &&//\lnlbl{syn:even} > + ((per_thread(rcu_reader_gp, t) - //\lnlbl{syn:gt1} > + rcu_gp_ctr) < 0)) { //\lnlbl{syn:gt2} > /*@@@ poll(NULL, 0, 10); */ The original snippet in toyrcu.tex corresponds to this poll() call. > - barrier(); > + barrier(); //\lnlbl{syn:barrier} So one way to keep the snippet consistent with the text is to use the following construct. #ifndef FCV_SNIPPET /*@@@ poll(NULL, 0, 10); */ barrier(); #else poll(NULL, 0, 10); //\lnlbl{syn:poll} #endif > } > - } > + } //\lnlbl{syn:scan:e} > > /* Let other synchronize_rcu() instances move ahead. */ > > - spin_unlock(&rcu_gp_lock); > + spin_unlock(&rcu_gp_lock); //\lnlbl{syn:spinunlock} > > /* Ensure that any subsequent free()s happen -after- above checks. */ > > - smp_mb(); > -} > - > + smp_mb(); //\lnlbl{syn:mb3} > +} //\lnlbl{syn:e} > +//\end{snippet} > #ifdef TEST > #include "rcutorture.h" > #endif /* #ifdef TEST */ > diff --git a/CodeSamples/defer/rcu.h b/CodeSamples/defer/rcu.h > index f493f8b..3641c33 100644 > --- a/CodeSamples/defer/rcu.h > +++ b/CodeSamples/defer/rcu.h > @@ -31,7 +31,8 @@ static inline void rcu_init(void) > init_per_thread(rcu_reader_gp_snap, 0); > } > > -static inline void rcu_read_lock(void) > +//\begin{snippet}[labelbase=ln:defer:rcu:read_lock_unlock,gobbleblank=yes,commandchars=\%\@\$] > +static inline void rcu_read_lock(void) //\lnlbl{lock:b} > { > /* > * Copy the current GP counter to this thread's counter, setting > @@ -41,11 +42,12 @@ static inline void rcu_read_lock(void) > * periodic per-thread processing.) > */ > > - __get_thread_var(rcu_reader_gp) = rcu_gp_ctr + 1; > - smp_mb(); > -} > - > -static inline void rcu_read_unlock(void) > + __get_thread_var(rcu_reader_gp) = //\lnlbl{lock:gp1} > + READ_ONCE(rcu_gp_ctr) + 1; //\lnlbl{lock:gp2} > + smp_mb(); //\lnlbl{lock:mb} > +} //\lnlbl{lock:e} > + //\fcvblank > +static inline void rcu_read_unlock(void) //\lnlbl{unlock:b} > { > /* > * Copy the current GP counter to this thread's counter, but > @@ -54,8 +56,11 @@ static inline void rcu_read_unlock(void) > * the memory barrier requires periodic per-thread processing.) > */ > > - smp_mb(); > - __get_thread_var(rcu_reader_gp) = rcu_gp_ctr; > + smp_mb(); //\lnlbl{unlock:mb} > + __get_thread_var(rcu_reader_gp) = //\lnlbl{unlock:gp1} > + READ_ONCE(rcu_gp_ctr); //\lnlbl{unlock:gp2} > } > + //\fcvblank > +//\end{snippet} > > extern void synchronize_rcu(void); > diff --git a/appendix/toyrcu/toyrcu.tex b/appendix/toyrcu/toyrcu.tex > index c0a45a8..f7fc07b 100644 > --- a/appendix/toyrcu/toyrcu.tex > +++ b/appendix/toyrcu/toyrcu.tex > @@ -1218,44 +1218,8 @@ thread-local accesses to one, as is done in the next section. > \end{listing} > > \begin{listing}[tbp] > -{ \scriptsize > -\begin{verbbox} > - 1 static void rcu_read_lock(void) > - 2 { > - 3 __get_thread_var(rcu_reader_gp) = > - 4 ACCESS_ONCE(rcu_gp_ctr) + 1; > - 5 smp_mb(); > - 6 } > - 7 > - 8 static void rcu_read_unlock(void) > - 9 { > -10 smp_mb(); > -11 __get_thread_var(rcu_reader_gp) = > -12 ACCESS_ONCE(rcu_gp_ctr); > -13 } > -14 > -15 void synchronize_rcu(void) > -16 { > -17 int t; > -18 > -19 smp_mb(); > -20 spin_lock(&rcu_gp_lock); > -21 ACCESS_ONCE(rcu_gp_ctr) += 2; > -22 smp_mb(); > -23 for_each_thread(t) { > -24 while ((per_thread(rcu_reader_gp, t) & 0x1) && > -25 ((per_thread(rcu_reader_gp, t) - > -26 ACCESS_ONCE(rcu_gp_ctr)) < 0)) { > -27 poll(NULL, 0, 10); > -28 } > -29 } > -30 spin_unlock(&rcu_gp_lock); > -31 smp_mb(); > -32 } > -\end{verbbox} > -} > -\centering > -\theverbbox > +\input{CodeSamples/defer/rcu@read_lock_unlock.fcv}\vspace*{-11pt}\fvset{firstnumber=last} > +\input{CodeSamples/defer/rcu@xxxxxxxxxxxxxxx}\fvset{firstnumber=auto} > \caption{Free-Running Counter Using RCU} > \label{lst:app:toyrcu:Free-Running Counter Using RCU} > \end{listing} > @@ -1267,25 +1231,34 @@ that takes on only even-numbered values, with data shown in > Listing~\ref{lst:app:toyrcu:Data for Free-Running Counter Using RCU}. > The resulting \co{rcu_read_lock()} implementation is extremely > straightforward. > -Lines~3 and~4 simply add one to the global free-running \co{rcu_gp_ctr} > +\begin{lineref}[ln:defer:rcu:read_lock_unlock:lock] > +Lines~\lnref{gp1} and~\lnref{gp2} simply > +add one to the global free-running \co{rcu_gp_ctr} > variable and stores the resulting odd-numbered value into the > \co{rcu_reader_gp} per-thread variable. > -Line~5 executes a memory barrier to prevent the content of the > +Line~\lnref{mb} executes a memory barrier to prevent the content of the > subsequent RCU read-side critical section from ``leaking out''. > +\end{lineref} > > +\begin{lineref}[ln:defer:rcu:read_lock_unlock:unlock] > The \co{rcu_read_unlock()} implementation is similar. > -Line~10 executes a memory barrier, again to prevent the prior RCU > +Line~\lnref{mb} executes a memory barrier, again to prevent the prior RCU > read-side critical section from ``leaking out''. > -Lines~11 and~12 then copy the \co{rcu_gp_ctr} global variable to the > +Lines~\lnref{gp1} and~\lnref{gp2} then > +copy the \co{rcu_gp_ctr} global variable to the > \co{rcu_reader_gp} per-thread variable, leaving this per-thread > variable with an even-numbered value so that a concurrent instance > of \co{synchronize_rcu()} will know to ignore it. > +\end{lineref} > > \QuickQuiz{} > + \begin{lineref}[ln:defer:rcu:read_lock_unlock:unlock] > If any even value is sufficient to tell \co{synchronize_rcu()} > - to ignore a given task, why don't lines~10 and~11 of > + to ignore a given task, why don't lines~\lnref{gp1} > + and~\lnref{gp2} of > Listing~\ref{lst:app:toyrcu:Free-Running Counter Using RCU} > simply assign zero to \co{rcu_reader_gp}? > + \end{lineref} > \QuickQuizAnswer{ > Assigning zero (or any other even-numbered constant) > would in fact work, but assigning the value of > @@ -1294,39 +1267,46 @@ of \co{synchronize_rcu()} will know to ignore it. > thread last exited an RCU read-side critical section. > } \QuickQuizEnd > > +\begin{lineref}[ln:defer:rcu:synchronize:syn] > Thus, \co{synchronize_rcu()} could wait for all of the per-thread > \co{rcu_reader_gp} variables to take on even-numbered values. > However, it is possible to do much better than that because > \co{synchronize_rcu()} need only wait on \emph{pre-existing} > RCU read-side critical sections. > -Line~19 executes a memory barrier to prevent prior manipulations > +Line~\lnref{mb1} executes a memory barrier to prevent prior manipulations > of RCU-protected data structures from being reordered (by either > -the CPU or the compiler) to follow the increment on line~21. > -Line~20 acquires the \co{rcu_gp_lock} (and line~30 releases it) > +the CPU or the compiler) to follow the increment on > +line~\lnref{increasegp}. > +Line~\lnref{spinlock} acquires the \co{rcu_gp_lock} > +(and line~\lnref{spinunlock} releases it) > in order to prevent multiple > \co{synchronize_rcu()} instances from running concurrently. > -Line~21 then increments the global \co{rcu_gp_ctr} variable by > +Line~\lnref{increasegp} then increments the global \co{rcu_gp_ctr} variable by > two, so that all pre-existing RCU read-side critical sections will > have corresponding per-thread \co{rcu_reader_gp} variables with > values less than that of \co{rcu_gp_ctr}, modulo the machine's > word size. > Recall also that threads with even-numbered values of \co{rcu_reader_gp} > are not in an RCU read-side critical section, > -so that lines~23-29 scan the \co{rcu_reader_gp} values until they > -all are either even (line~24) or are greater than the global > -\co{rcu_gp_ctr} (lines~25-26). > -Line~27 blocks for a short period of time to wait for a > +so that lines~\lnref{scan:b}-\lnref{scan:e} > +scan the \co{rcu_reader_gp} values until they > +all are either even (line~\lnref{even}) or are greater than the global > +\co{rcu_gp_ctr} (lines~\lnref{gt1}-\lnref{gt2}). > +Line~\lnref{barrier} blocks for a short period of time to wait for a s/\lnref{barrier}/\lnref{poll}/ "poll" is the label used in the "#ifndef FCV_SNIPPET" construct suggested above. > pre-existing RCU read-side critical section, but this can be replaced with > a spin-loop if grace-period latency is of the essence. > -Finally, the memory barrier at line~31 ensures that any subsequent > +Finally, the memory barrier at line~\lnref{mb3} ensures that any subsequent > destruction will not be reordered into the preceding loop. > +\end{lineref} > > \QuickQuiz{} > - Why are the memory barriers on lines~19 and~31 of > + \begin{lineref}[ln:defer:rcu:synchronize:syn] > + Why are the memory barriers on lines~\lnref{mb1} and~\lnref{mb3} of > Listing~\ref{lst:app:toyrcu:Free-Running Counter Using RCU} > needed? > Aren't the memory barriers inherent in the locking > - primitives on lines~20 and~30 sufficient? > + primitives on lines~\lnref{spinlock} and~\lnref{spinunlock} sufficient? > + \end{lineref} > \QuickQuizAnswer{ > These memory barriers are required because the locking > primitives are only guaranteed to confine the critical > @@ -1355,11 +1335,12 @@ such CPUs. > This work is left as an exercise for the reader. > } \QuickQuizEnd > > +\begin{lineref}[ln:defer:rcu:read_lock_unlock:lock] > This implementation suffers from some serious shortcomings in > addition to the high update-side overhead noted earlier. > First, it is no longer permissible to nest RCU read-side critical > sections, a topic that is taken up in the next section. > -Second, if a reader is preempted at line~3 of > +Second, if a reader is preempted at line~\lnref{gp1} of > Listing~\ref{lst:app:toyrcu:Free-Running Counter Using RCU} after fetching from > \co{rcu_gp_ctr} but before storing to \co{rcu_reader_gp}, > and if the \co{rcu_gp_ctr} counter then runs through more than half > @@ -1371,7 +1352,8 @@ variables. Please put \end{lineref} ahead of \QuickQuiz{}. \QuickQuiz{} should have its own pair of \begin{lineref} and \end{lineref}. It looks like you have figured this out and taken care of the paring in 2/2 of this patch set, but it is desirable to make sure each commit builds properly. > > \QuickQuiz{} > Is the possibility of readers being preempted in > - lines~3-4 of Listing~\ref{lst:app:toyrcu:Free-Running Counter Using RCU} > + lines~\lnref{gp1}-\lnref{gp2} of > + Listing~\ref{lst:app:toyrcu:Free-Running Counter Using RCU} > a real problem, in other words, is there a real sequence > of events that could lead to failure? > If not, why not? > @@ -1388,6 +1370,7 @@ variables. > added in that section greatly reduces the time required to > overflow the counter. > } \QuickQuizEnd > +\end{lineref} Ditto. With those fixed, Reviewed-by: Akira Yokosawa <akiyks@xxxxxxxxx> Thanks, Akira > > \section{Nestable RCU Based on Free-Running Counter} > \label{sec:app:toyrcu:Nestable RCU Based on Free-Running Counter} >