Re: [PATCH rcu 13/17] srcu: Add SRCU-fast readers

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Jan 16, 2025 at 12:21 PM Paul E. McKenney <paulmck@xxxxxxxxxx> wrote:
>
> This commit adds srcu_read_{,un}lock_fast(), which is similar
> to srcu_read_{,un}lock_lite(), but avoids the array-indexing and
> pointer-following overhead.  On a microbenchmark featuring tight
> loops around empty readers, this results in about a 20% speedup
> compared to RCU Tasks Trace on my x86 laptop.
>
> Please note that SRCU-fast has drawbacks compared to RCU Tasks
> Trace, including:
>
> o       Lack of CPU stall warnings.
> o       SRCU-fast readers permitted only where rcu_is_watching().
> o       A pointer-sized return value from srcu_read_lock_fast() must
>         be passed to the corresponding srcu_read_unlock_fast().
> o       In the absence of readers, a synchronize_srcu() having _fast()
>         readers will incur the latency of at least two normal RCU grace
>         periods.
> o       RCU Tasks Trace priority boosting could be easily added.
>         Boosting SRCU readers is more difficult.
>
> SRCU-fast also has a drawback compared to SRCU-lite, namely that the
> return value from srcu_read_lock_fast()-fast is a 64-bit pointer and
> that from srcu_read_lock_lite() is only a 32-bit int.
>
> [ paulmck: Apply feedback from Akira Yokosawa. ]
>
> Signed-off-by: Paul E. McKenney <paulmck@xxxxxxxxxx>
> Cc: Alexei Starovoitov <ast@xxxxxxxxxx>
> Cc: Andrii Nakryiko <andrii@xxxxxxxxxx>
> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Cc: Kent Overstreet <kent.overstreet@xxxxxxxxx>
> Cc: <bpf@xxxxxxxxxxxxxxx>
> ---
>  include/linux/srcu.h     | 47 ++++++++++++++++++++++++++++++++++++++--
>  include/linux/srcutiny.h | 22 +++++++++++++++++++
>  include/linux/srcutree.h | 38 ++++++++++++++++++++++++++++++++
>  3 files changed, 105 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/srcu.h b/include/linux/srcu.h
> index 2bd0e24e9b554..63bddc3014238 100644
> --- a/include/linux/srcu.h
> +++ b/include/linux/srcu.h
> @@ -47,9 +47,10 @@ int init_srcu_struct(struct srcu_struct *ssp);
>  #define SRCU_READ_FLAVOR_NORMAL        0x1             // srcu_read_lock().
>  #define SRCU_READ_FLAVOR_NMI   0x2             // srcu_read_lock_nmisafe().
>  #define SRCU_READ_FLAVOR_LITE  0x4             // srcu_read_lock_lite().
> +#define SRCU_READ_FLAVOR_FAST  0x8             // srcu_read_lock_fast().
>  #define SRCU_READ_FLAVOR_ALL   (SRCU_READ_FLAVOR_NORMAL | SRCU_READ_FLAVOR_NMI | \
> -                               SRCU_READ_FLAVOR_LITE) // All of the above.
> -#define SRCU_READ_FLAVOR_SLOWGP        SRCU_READ_FLAVOR_LITE
> +                               SRCU_READ_FLAVOR_LITE | SRCU_READ_FLAVOR_FAST) // All of the above.
> +#define SRCU_READ_FLAVOR_SLOWGP        (SRCU_READ_FLAVOR_LITE | SRCU_READ_FLAVOR_FAST)
>                                                 // Flavors requiring synchronize_rcu()
>                                                 // instead of smp_mb().
>  void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp);
> @@ -253,6 +254,33 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
>         return retval;
>  }
>
> +/**
> + * srcu_read_lock_fast - register a new reader for an SRCU-protected structure.
> + * @ssp: srcu_struct in which to register the new reader.
> + *
> + * Enter an SRCU read-side critical section, but for a light-weight
> + * smp_mb()-free reader.  See srcu_read_lock() for more information.
> + *
> + * If srcu_read_lock_fast() is ever used on an srcu_struct structure,
> + * then none of the other flavors may be used, whether before, during,
> + * or after.  Note that grace-period auto-expediting is disabled for _fast
> + * srcu_struct structures because auto-expedited grace periods invoke
> + * synchronize_rcu_expedited(), IPIs and all.
> + *
> + * Note that srcu_read_lock_fast() can be invoked only from those contexts
> + * where RCU is watching, that is, from contexts where it would be legal
> + * to invoke rcu_read_lock().  Otherwise, lockdep will complain.
> + */
> +static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct *ssp) __acquires(ssp)
> +{
> +       struct srcu_ctr __percpu *retval;
> +
> +       srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST);
> +       retval = __srcu_read_lock_fast(ssp);
> +       rcu_try_lock_acquire(&ssp->dep_map);
> +       return retval;
> +}
> +
>  /**
>   * srcu_read_lock_lite - register a new reader for an SRCU-protected structure.
>   * @ssp: srcu_struct in which to register the new reader.
> @@ -356,6 +384,21 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx)
>         __srcu_read_unlock(ssp, idx);
>  }
>
> +/**
> + * srcu_read_unlock_fast - unregister a old reader from an SRCU-protected structure.
> + * @ssp: srcu_struct in which to unregister the old reader.
> + * @scp: return value from corresponding srcu_read_lock_fast().
> + *
> + * Exit a light-weight SRCU read-side critical section.
> + */
> +static inline void srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp)
> +       __releases(ssp)
> +{
> +       srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST);
> +       srcu_lock_release(&ssp->dep_map);
> +       __srcu_read_unlock_fast(ssp, scp);
> +}
> +
>  /**
>   * srcu_read_unlock_lite - unregister a old reader from an SRCU-protected structure.
>   * @ssp: srcu_struct in which to unregister the old reader.
> diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
> index 07a0c4489ea2f..380260317d98b 100644
> --- a/include/linux/srcutiny.h
> +++ b/include/linux/srcutiny.h
> @@ -71,6 +71,28 @@ static inline int __srcu_read_lock(struct srcu_struct *ssp)
>         return idx;
>  }
>
> +struct srcu_ctr;
> +
> +static inline bool __srcu_ptr_to_ctr(struct srcu_struct *ssp, struct srcu_ctr __percpu *scpp)
> +{
> +       return (int)(intptr_t)(struct srcu_ctr __force __kernel *)scpp;
> +}
> +
> +static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ssp, int idx)
> +{
> +       return (struct srcu_ctr __percpu *)(intptr_t)idx;
> +}
> +
> +static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct *ssp)
> +{
> +       return __srcu_ctr_to_ptr(ssp, __srcu_read_lock(ssp));
> +}
> +
> +static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp)
> +{
> +       __srcu_read_unlock(ssp, __srcu_ptr_to_ctr(ssp, scp));
> +}
> +
>  #define __srcu_read_lock_lite __srcu_read_lock
>  #define __srcu_read_unlock_lite __srcu_read_unlock
>
> diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
> index ef3065c0cadcd..bdc467efce3a2 100644
> --- a/include/linux/srcutree.h
> +++ b/include/linux/srcutree.h
> @@ -226,6 +226,44 @@ static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ss
>         return &ssp->sda->srcu_ctrs[idx];
>  }
>
> +/*
> + * Counts the new reader in the appropriate per-CPU element of the
> + * srcu_struct.  Returns a pointer that must be passed to the matching
> + * srcu_read_unlock_fast().
> + *
> + * Note that this_cpu_inc() is an RCU read-side critical section either
> + * because it disables interrupts, because it is a single instruction,
> + * or because it is a read-modify-write atomic operation, depending on
> + * the whims of the architecture.
> + */
> +static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct *ssp)
> +{
> +       struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
> +
> +       RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_fast().");
> +       this_cpu_inc(scp->srcu_locks.counter); /* Y */
> +       barrier(); /* Avoid leaking the critical section. */
> +       return scp;
> +}
> +
> +/*
> + * Removes the count for the old reader from the appropriate
> + * per-CPU element of the srcu_struct.  Note that this may well be a
> + * different CPU than that which was incremented by the corresponding
> + * srcu_read_lock_fast(), but it must be within the same task.

hm... why the "same task" restriction? With uretprobes we take
srcu_read_lock under a traced task, but we can "release" this lock
from timer interrupt, which could be in the context of any task.

> + *
> + * Note that this_cpu_inc() is an RCU read-side critical section either
> + * because it disables interrupts, because it is a single instruction,
> + * or because it is a read-modify-write atomic operation, depending on
> + * the whims of the architecture.
> + */
> +static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp)
> +{
> +       barrier();  /* Avoid leaking the critical section. */
> +       this_cpu_inc(scp->srcu_unlocks.counter);  /* Z */
> +       RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast().");
> +}
> +
>  /*
>   * Counts the new reader in the appropriate per-CPU element of the
>   * srcu_struct.  Returns an index that must be passed to the matching
> --
> 2.40.1
>





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux