Hi Dave,
On 2023/8/8 10:24, Dave Chinner wrote:
On Mon, Aug 07, 2023 at 07:09:33PM +0800, Qi Zheng wrote:
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index eb342994675a..f06225f18531 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -4,6 +4,8 @@
#include <linux/atomic.h>
#include <linux/types.h>
+#include <linux/refcount.h>
+#include <linux/completion.h>
#define SHRINKER_UNIT_BITS BITS_PER_LONG
@@ -87,6 +89,10 @@ struct shrinker {
int seeks; /* seeks to recreate an obj */
unsigned flags;
+ refcount_t refcount;
+ struct completion done;
+ struct rcu_head rcu;
Documentation, please. What does the refcount protect, what does the
completion provide, etc.
How about the following:
/*
* reference count of this shrinker, holding this can guarantee
* that the shrinker will not be released.
*/
refcount_t refcount;
/*
* Wait for shrinker::refcount to reach 0, that is, no shrinker
* is running or will run again.
*/
struct completion done;
+
void *private_data;
/* These are for internal use */
@@ -120,6 +126,17 @@ struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...);
void shrinker_register(struct shrinker *shrinker);
void shrinker_free(struct shrinker *shrinker);
+static inline bool shrinker_try_get(struct shrinker *shrinker)
+{
+ return refcount_inc_not_zero(&shrinker->refcount);
+}
+
+static inline void shrinker_put(struct shrinker *shrinker)
+{
+ if (refcount_dec_and_test(&shrinker->refcount))
+ complete(&shrinker->done);
+}
+
#ifdef CONFIG_SHRINKER_DEBUG
extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker,
const char *fmt, ...);
diff --git a/mm/shrinker.c b/mm/shrinker.c
index 1911c06b8af5..d318f5621862 100644
--- a/mm/shrinker.c
+++ b/mm/shrinker.c
@@ -2,6 +2,7 @@
#include <linux/memcontrol.h>
#include <linux/rwsem.h>
#include <linux/shrinker.h>
+#include <linux/rculist.h>
#include <trace/events/vmscan.h>
#include "internal.h"
@@ -577,33 +578,42 @@ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg))
return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
- if (!down_read_trylock(&shrinker_rwsem))
- goto out;
-
- list_for_each_entry(shrinker, &shrinker_list, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(shrinker, &shrinker_list, list) {
struct shrink_control sc = {
.gfp_mask = gfp_mask,
.nid = nid,
.memcg = memcg,
};
+ if (!shrinker_try_get(shrinker))
+ continue;
+
+ /*
+ * We can safely unlock the RCU lock here since we already
+ * hold the refcount of the shrinker.
+ */
+ rcu_read_unlock();
+
ret = do_shrink_slab(&sc, shrinker, priority);
if (ret == SHRINK_EMPTY)
ret = 0;
freed += ret;
+
/*
- * Bail out if someone want to register a new shrinker to
- * prevent the registration from being stalled for long periods
- * by parallel ongoing shrinking.
+ * This shrinker may be deleted from shrinker_list and freed
+ * after the shrinker_put() below, but this shrinker is still
+ * used for the next traversal. So it is necessary to hold the
+ * RCU lock first to prevent this shrinker from being freed,
+ * which also ensures that the next shrinker that is traversed
+ * will not be freed (even if it is deleted from shrinker_list
+ * at the same time).
*/
This needs to be moved to the head of the function, and document
the whole list walk, get, put and completion parts of the algorithm
that make it safe. There's more to this than "we hold a reference
count", especially the tricky "we might see the shrinker before it
is fully initialised" case....
How about moving these documents to before list_for_each_entry_rcu(),
and then go to the head of shrink_slab_memcg() to explain the memcg
slab shrink case.
.....
void shrinker_free(struct shrinker *shrinker)
{
struct dentry *debugfs_entry = NULL;
@@ -686,9 +712,18 @@ void shrinker_free(struct shrinker *shrinker)
if (!shrinker)
return;
+ if (shrinker->flags & SHRINKER_REGISTERED) {
+ shrinker_put(shrinker);
+ wait_for_completion(&shrinker->done);
+ }
Needs a comment explaining why we need to wait here...
/*
* Wait for all lookups of the shrinker to complete, after that, no
* shrinker is running or will run again, then we can safely free
* the structure where the shrinker is located, such as super_block
* etc.
*/
+
down_write(&shrinker_rwsem);
if (shrinker->flags & SHRINKER_REGISTERED) {
- list_del(&shrinker->list);
+ /*
+ * Lookups on the shrinker are over and will fail in the future,
+ * so we can now remove it from the lists and free it.
+ */
.... rather than here after the wait has been done and provided the
guarantee that no shrinker is running or will run again...
With the above comment, how about simplifying the comment here to the
following:
/*
* Now we can safely remove it from the shrinker_list and free it.
*/
Thanks,
Qi
-Dave.