Re: [PATCH v3 2/2] rcu_qs: Use READ_ONCE() AND WRITE_ONCE() for shared variable rcu_gp_ctr

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Junchang,

Please find inline comments below.

On Mon, 29 Apr 2019 22:38:44 +0800, Junchang Wang wrote:
> Replace the plain accesses with READ_ONCE()/WRITE_ONCE(). And then update

s/the plain/racy plain/

> the tex file accordingly. A new scheme, which directly extracts snippet
> from rcu_qs.[hc], is applied.
> 
> Signed-off-by: Junchang Wang <junchangwang@xxxxxxxxx>
> Reviewed-by: Akira Yokosawa <akiyks@xxxxxxxxx>
> ---
>  CodeSamples/defer/rcu_qs.c |  29 ++++++------
>  CodeSamples/defer/rcu_qs.h |  42 +++++++++--------
>  appendix/toyrcu/toyrcu.tex | 114 ++++++++++++++++-----------------------------
>  3 files changed, 79 insertions(+), 106 deletions(-)
> 
> diff --git a/CodeSamples/defer/rcu_qs.c b/CodeSamples/defer/rcu_qs.c
> index 27e45f7..8dc174f 100644
> --- a/CodeSamples/defer/rcu_qs.c
> +++ b/CodeSamples/defer/rcu_qs.c
> @@ -27,44 +27,47 @@
>  #include "../api.h"
>  #include "rcu_qs.h"
>  
> -void synchronize_rcu(void)
> +//\begin{snippet}[labelbase=ln:defer:rcu_qs:synchronize,gobbleblank=yes,commandchars=\%\@\$]
> +void synchronize_rcu(void)			//\lnlbl{syn:b}
>  {
>  	int t;
> -
> +							//\fcvblank
>  	/* Memory barrier ensures mutation seen before grace period. */
>  
> -	smp_mb();
> +	smp_mb();				//\lnlbl{syn:mb1}
>  
>  	/* Only one synchronize_rcu() at a time. */
>  
> -	spin_lock(&rcu_gp_lock);
> +	spin_lock(&rcu_gp_lock);		//\lnlbl{syn:spinlock}
>  
>  	/* Advance to a new grace-period number, enforce ordering. */
>  
> -	rcu_gp_ctr += 2;
> -	smp_mb();
> +	WRITE_ONCE(rcu_gp_ctr, rcu_gp_ctr + 2);	//\lnlbl{syn:increasegp}
> +	smp_mb();				//\lnlbl{syn:mb2}
>  
>  	/*
>  	 * Wait until all threads are either out of their RCU read-side
>  	 * critical sections or are aware of the new grace period.
>  	 */
>  
> -	for_each_thread(t) {
> -		while (rcu_gp_ongoing(t) &&
> -		       ((per_thread(rcu_reader_qs_gp, t) - rcu_gp_ctr) < 0)) {
> +	for_each_thread(t) {			//\lnlbl{syn:scan:b}
> +		while (rcu_gp_ongoing(t) &&	//\lnlbl{syn:ongoing}
> +		       ((per_thread(rcu_reader_qs_gp, t)//\lnlbl{syn:gp1}
> +		         - rcu_gp_ctr) < 0)) {	//\lnlbl{syn:gp2}
>  			/* @@@ poll(NULL, 0, 10); */
> -			barrier();
> +			barrier();		//\lnlbl{syn:barrier}

So we need another "#ifndef FCV_SNIPPET" construct here...

>  		}
> -	}
> +	}					//\lnlbl{syn:scan:e}
>  
>  	/* Let other synchronize_rcu() instances move ahead. */
>  
> -	spin_unlock(&rcu_gp_lock);
> +	spin_unlock(&rcu_gp_lock);		//\lnlbl{syn:spinunlock}
>  
>  	/* Ensure that any subsequent free()s happen -after- above checks. */
>  
> -	smp_mb();
> +	smp_mb();				//\lnlbl{syn:mb3}
>  }
> +//\end{snippet}
>  
>  #ifdef TEST
>  #include "rcutorture.h"
> diff --git a/CodeSamples/defer/rcu_qs.h b/CodeSamples/defer/rcu_qs.h
> index 3bb45a4..26c3fb5 100644
> --- a/CodeSamples/defer/rcu_qs.h
> +++ b/CodeSamples/defer/rcu_qs.h
> @@ -48,31 +48,35 @@ static void rcu_init(void)
>  	init_per_thread(rcu_reader_qs_gp, rcu_gp_ctr);
>  }
>  
> -static void rcu_read_lock(void)
> +//\begin{snippet}[labelbase=ln:defer:rcu_qs:read_lock_unlock,gobbleblank=yes,commandchars=\%\@\$]
> +static void rcu_read_lock(void)			//\lnlbl{lock:b}
>  {
>  }
> -
> +							//\fcvblank
>  static void rcu_read_unlock(void)
>  {
> -}
> -
> -static void rcu_quiescent_state(void)
> +}						//\lnlbl{unlock:e}
> +							//\fcvblank
> +static void rcu_quiescent_state(void)		//\lnlbl{qs:b}
>  {
> -	smp_mb();
> -	__get_thread_var(rcu_reader_qs_gp) = ACCESS_ONCE(rcu_gp_ctr) + 1;
> -	smp_mb();
> -}
> -
> -static void rcu_thread_offline(void)
> +	smp_mb();				//\lnlbl{qs:mb1}
> +	__get_thread_var(rcu_reader_qs_gp) =	//\lnlbl{qs:gp1}
> +		READ_ONCE(rcu_gp_ctr) + 1;	//\lnlbl{qs:gp2}
> +	smp_mb();				//\lnlbl{qs:mb2}
> +}						//\lnlbl{qs:e}
> +							//\fcvblank
> +static void rcu_thread_offline(void)		//\lnlbl{offline:b}
>  {
> -	smp_mb();
> -	__get_thread_var(rcu_reader_qs_gp) = ACCESS_ONCE(rcu_gp_ctr);
> -	smp_mb();
> -}
> -
> -static void rcu_thread_online(void)
> +	smp_mb();				//\lnlbl{offline:mb1}
> +	__get_thread_var(rcu_reader_qs_gp) =	//\lnlbl{offline:gp1}
> +		READ_ONCE(rcu_gp_ctr);		//\lnlbl{offline:gp2}
> +	smp_mb();				//\lnlbl{offline:mb2}
> +}						//\lnlbl{offline:e}
> +							//\fcvblank
> +static void rcu_thread_online(void)		//\lnlbl{online:b}
>  {
> -	rcu_quiescent_state();
> -}
> +	rcu_quiescent_state();			//\lnlbl{online:qs}
> +}						//\lnlbl{online:e}
> +//\end{snippet}
>  
>  extern void synchronize_rcu(void);
> diff --git a/appendix/toyrcu/toyrcu.tex b/appendix/toyrcu/toyrcu.tex
> index f7fc07b..a7ce98a 100644
> --- a/appendix/toyrcu/toyrcu.tex
> +++ b/appendix/toyrcu/toyrcu.tex
> @@ -1349,8 +1349,10 @@ will ignore the subsequent RCU read-side critical section.
>  Third and finally, this implementation requires that the enclosing software
>  environment be able to enumerate threads and maintain per-thread
>  variables.
> +\end{lineref}
>  
>  \QuickQuiz{}
> +	\begin{lineref}[ln:defer:rcu:read_lock_unlock:lock]
>  	Is the possibility of readers being preempted in
>  	lines~\lnref{gp1}-\lnref{gp2} of
>  	Listing~\ref{lst:app:toyrcu:Free-Running Counter Using RCU}
> @@ -1359,6 +1361,7 @@ variables.
>  	If not, why not?
>  	If so, what is the sequence of events, and how can the
>  	failure be addressed?
> +	\end{lineref}
>  \QuickQuizAnswer{
>  	It is a real problem, there is a sequence of events leading to
>  	failure, and there are a number of possible ways of
> @@ -1370,7 +1373,6 @@ variables.
>  	added in that section greatly reduces the time required to
>  	overflow the counter.
>  } \QuickQuizEnd
> -\end{lineref}

These changes should be moved to 1/2.

>  
>  \section{Nestable RCU Based on Free-Running Counter}
>  \label{sec:app:toyrcu:Nestable RCU Based on Free-Running Counter}
> @@ -1673,50 +1675,19 @@ overhead.
>  \end{listing}
>  
>  \begin{listing}[tbp]
> -{ \scriptsize
> -\begin{verbbox}
> -  1 static void rcu_read_lock(void)
> -  2 {
> -  3 }
> -  4
> -  5 static void rcu_read_unlock(void)
> -  6 {
> -  7 }
> -  8
> -  9 rcu_quiescent_state(void)
> - 10 {
> - 11   smp_mb();
> - 12   __get_thread_var(rcu_reader_qs_gp) =
> - 13     ACCESS_ONCE(rcu_gp_ctr) + 1;
> - 14   smp_mb();
> - 15 }
> - 16
> - 17 static void rcu_thread_offline(void)
> - 18 {
> - 19   smp_mb();
> - 20   __get_thread_var(rcu_reader_qs_gp) =
> - 21     ACCESS_ONCE(rcu_gp_ctr);
> - 22   smp_mb();
> - 23 }
> - 24
> - 25 static void rcu_thread_online(void)
> - 26 {
> - 27   rcu_quiescent_state();
> - 28 }
> -\end{verbbox}
> -}
> -\centering
> -\theverbbox
> +\input{CodeSamples/defer/rcu_qs@read_lock_unlock.fcv}
>  \caption{Quiescent-State-Based RCU Read Side}
>  \label{lst:app:toyrcu:Quiescent-State-Based RCU Read Side}
>  \end{listing}
>  
> +\begin{lineref}[ln:defer:rcu_qs:read_lock_unlock]
>  Listing~\ref{lst:app:toyrcu:Quiescent-State-Based RCU Read Side}
>  (\path{rcu_qs.h})
>  shows the read-side primitives used to construct a user-level
>  implementation of RCU based on quiescent states, with the data shown in
>  Listing~\ref{lst:app:toyrcu:Data for Quiescent-State-Based RCU}.
> -As can be seen from lines~1-7 in the listing, the \co{rcu_read_lock()}
> +As can be seen from lines~\lnref{lock:b}-\lnref{unlock:e} in the listing,
> +the \co{rcu_read_lock()}
>  and \co{rcu_read_unlock()} primitives do nothing, and can in fact
>  be expected to be inlined and optimized away, as they are in
>  server builds of the Linux kernel.
> @@ -1724,11 +1695,14 @@ This is due to the fact that quiescent-state-based RCU implementations
>  \emph{approximate} the extents of RCU read-side critical sections
>  using the aforementioned quiescent states.
>  Each of these quiescent states contains a call to
> -\co{rcu_quiescent_state()}, which is shown from lines~9-15 in the listing.
> +\co{rcu_quiescent_state()}, which is shown from
> +lines~\lnref{qs:b}-\lnref{qs:e} in the listing.
>  Threads entering extended quiescent states (for example, when blocking)
> -may instead call \co{rcu_thread_offline()} (lines~17-23) when entering
> +may instead call \co{rcu_thread_offline()}
> +(lines~\lnref{offline:b}-\lnref{offline:e}) when entering
>  an extended quiescent state and then call
> -\co{rcu_thread_online()} (lines~25-28) when leaving it.
> +\co{rcu_thread_online()}
> +(lines~\lnref{online:b}-\lnref{online:e}) when leaving it.
>  As such, \co{rcu_thread_online()} is analogous to \co{rcu_read_lock()}
>  and \co{rcu_thread_offline()} is analogous to \co{rcu_read_unlock()}.
>  In addition, \co{rcu_quiescent_state()} can be thought of as a
> @@ -1741,13 +1715,16 @@ In addition, \co{rcu_quiescent_state()} can be thought of as a
>  	performance optimizations.}
>  It is illegal to invoke \co{rcu_quiescent_state()}, \co{rcu_thread_offline()},
>  or \co{rcu_thread_online()} from an RCU read-side critical section.
> +\end{lineref}
>  
> -In \co{rcu_quiescent_state()}, line~11 executes a memory barrier
> +\begin{lineref}[ln:defer:rcu_qs:read_lock_unlock:qs]
> +In \co{rcu_quiescent_state()}, line~\lnref{mb1} executes a memory barrier
>  to prevent any code prior to the quiescent state (including possible
>  RCU read-side critical sections) from being reordered
>  into the quiescent state.
> -Lines~12-13 pick up a copy of the global \co{rcu_gp_ctr}, using
> -\co{ACCESS_ONCE()} to ensure that the compiler does not employ any
> +Lines~\lnref{gp1}-\lnref{gp2} pick up
> +a copy of the global \co{rcu_gp_ctr}, using
> +\co{READ_ONCE()} to ensure that the compiler does not employ any
>  optimizations that would result in \co{rcu_gp_ctr} being fetched
>  more than once,
>  and then adds one to the value fetched and stores it into
> @@ -1756,15 +1733,20 @@ instance of \co{synchronize_rcu()} will see an odd-numbered value,
>  thus becoming aware that a new RCU read-side critical section has started.
>  Instances of \co{synchronize_rcu()} that are waiting on older
>  RCU read-side critical sections will thus know to ignore this new one.
> -Finally, line~14 executes a memory barrier, which prevents subsequent
> +Finally, line~\lnref{mb2} executes a memory barrier, which prevents subsequent
>  code (including a possible RCU read-side critical section) from being
> -re-ordered with the lines~12-13.
> +re-ordered with the lines~\lnref{gp1}-\lnref{gp2}.
> +\end{lineref}
>  
>  \QuickQuiz{}
> -	Doesn't the additional memory barrier shown on line~14 of
> +	\begin{lineref}[ln:defer:rcu_qs:read_lock_unlock:qs]
> +	Doesn't the additional memory barrier shown on line~\lnref{mb2} of
>  	Listing~\ref{lst:app:toyrcu:Quiescent-State-Based RCU Read Side}
>  	greatly increase the overhead of \co{rcu_quiescent_state}?
> +	\end{lineref}
>  \QuickQuizAnswer{
> +	\begin{lineref}[ln:defer:rcu_qs:read_lock_unlock:qs]
> +	Doesn't the additional memory barrier shown on line~\lnref{mb2} of

??? This line shouldn't have been pasted.

With these fixed,

Reviewed-by: Akira Yokosawa <akiyks@xxxxxxxxx>

        Thanks, Akira

>  	Indeed it does!
>  	An application using this implementation of RCU should therefore
>  	invoke \co{rcu_quiescent_state} sparingly, instead using
> @@ -1772,9 +1754,11 @@ re-ordered with the lines~12-13.
>  	time.
>  
>  	However, this memory barrier is absolutely required so that
> -	other threads will see the store on lines~12-13 before any
> +	other threads will see the store on
> +	lines~\lnref{gp1}-\lnref{gp2} before any
>  	subsequent RCU read-side critical sections executed by the
>  	caller.
> +	\end{lineref}
>  } \QuickQuizEnd
>  
>  Some applications might use RCU only occasionally, but use it very heavily
> @@ -1794,17 +1778,22 @@ Any concurrent instances of \co{synchronize_rcu()} will thus know to
>  ignore this thread.
>  
>  \QuickQuiz{}
> -	Why are the two memory barriers on lines~19 and 22 of
> +	\begin{lineref}[ln:defer:rcu_qs:read_lock_unlock:qs]
> +	Why are the two memory barriers on lines~\lnref{mb1} and \lnref{mb2} of
>  	Listing~\ref{lst:app:toyrcu:Quiescent-State-Based RCU Read Side}
>  	needed?
> +	\end{lineref}
>  \QuickQuizAnswer{
> -	The memory barrier on line~19 prevents any RCU read-side
> +	\begin{lineref}[ln:defer:rcu_qs:read_lock_unlock:qs]
> +	The memory barrier on line~\lnref{mb1} prevents any RCU read-side
>  	critical sections that might precede the
>  	call to \co{rcu_thread_offline()} won't be reordered by either
> -	the compiler or the CPU to follow the assignment on lines~20-21.
> -	The memory barrier on line~22 is, strictly speaking, unnecessary,
> +	the compiler or the CPU to follow the assignment on
> +	lines~\lnref{gp1}-\lnref{gp2}.
> +	The memory barrier on line~\lnref{mb2} is, strictly speaking, unnecessary,
>  	as it is illegal to have any RCU read-side critical sections
>  	following the call to \co{rcu_thread_offline()}.
> +	\end{lineref}
>  } \QuickQuizEnd
>  
>  The \co{rcu_thread_online()} function simply invokes
> @@ -1812,30 +1801,7 @@ The \co{rcu_thread_online()} function simply invokes
>  quiescent state.
>  
>  \begin{listing}[tbp]
> -{ \scriptsize
> -\begin{verbbox}
> -  1 void synchronize_rcu(void)
> -  2 {
> -  3   int t;
> -  4
> -  5   smp_mb();
> -  6   spin_lock(&rcu_gp_lock);
> -  7   rcu_gp_ctr += 2;
> -  8   smp_mb();
> -  9   for_each_thread(t) {
> - 10     while (rcu_gp_ongoing(t) &&
> - 11            ((per_thread(rcu_reader_qs_gp, t) -
> - 12              rcu_gp_ctr) < 0)) {
> - 13       poll(NULL, 0, 10);
> - 14     }
> - 15   }
> - 16   spin_unlock(&rcu_gp_lock);
> - 17   smp_mb();
> - 18 }
> -\end{verbbox}
> -}
> -\centering
> -\theverbbox
> +\input{CodeSamples/defer/rcu_qs@xxxxxxxxxxxxxxx}
>  \caption{RCU Update Side Using Quiescent States}
>  \label{lst:app:toyrcu:RCU Update Side Using Quiescent States}
>  \end{listing}
> 




[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux