Re: [PATCH][RFC] make take_dentry_name_snapshot() lockless

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Dec 09, 2024 at 09:17:08PM +0000, Al Viro wrote:

> And yes, they are aligned - d_iname follows a pointer, inline_name follows
> struct qstr, i.e. u64 + pointer.  How about we add struct inlined_name {
> unsigned char name[DNAME_INLINE_LEN];}; and turn d_iname and inline_name
> into anon unions with that?  Hell, might even make it an array of unsigned
> long and use that to deal with this
>                 } else {
>                         /*
>                          * Both are internal.
>                          */
>                         unsigned int i;
>                         BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long)));
>                         for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) {
>                                 swap(((long *) &dentry->d_iname)[i],
>                                      ((long *) &target->d_iname)[i]);
>                         }
>                 }
> in swap_names().  With struct assignment in the corresponding case in
> copy_name() and in take_dentry_name_snapshot() - that does generate sane
> code...

Do you have any objections to the diff below?  Completely untested and needs
to be split in two...  It does seem to generate decent code; it obviously
will need to be profiled - copy_name() in the common (short-to-short) case
copies the entire thing, all 5 words of it on 64bit, instead of memcpy()
of just the amount that needs to be copied.  That thing may cross the
cacheline boundary, but both cachelines had been freshly brought into
cache and dirtied, so...

diff --git a/fs/dcache.c b/fs/dcache.c
index b4d5e9e1e43d..687558622acf 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -296,10 +296,8 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c
 }
 
 struct external_name {
-	union {
-		atomic_t count;
-		struct rcu_head head;
-	} u;
+	atomic_t count;		// ->count and ->head can't be combined
+	struct rcu_head head;	// see take_dentry_name_snapshot()
 	unsigned char name[];
 };
 
@@ -329,16 +327,33 @@ static inline int dname_external(const struct dentry *dentry)
 
 void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry)
 {
-	spin_lock(&dentry->d_lock);
-	name->name = dentry->d_name;
-	if (unlikely(dname_external(dentry))) {
-		atomic_inc(&external_name(dentry)->u.count);
-	} else {
-		memcpy(name->inline_name, dentry->d_iname,
-		       dentry->d_name.len + 1);
+	unsigned seq;
+	const unsigned char *s;
+
+	rcu_read_lock();
+retry:
+	seq = read_seqcount_begin(&dentry->d_seq);
+	s = READ_ONCE(dentry->d_name.name);
+	name->name.hash_len = dentry->d_name.hash_len;
+	if (likely(s == dentry->d_iname)) {
 		name->name.name = name->inline_name;
+		name->__inline_name = dentry->__d_iname;
+		if (read_seqcount_retry(&dentry->d_seq, seq))
+			goto retry;
+	} else {
+		struct external_name *p;
+		p = container_of(s, struct external_name, name[0]);
+		name->name.name = s;
+		// get a valid reference
+		if (unlikely(!atomic_inc_not_zero(&p->count)))
+			goto retry;
+		if (read_seqcount_retry(&dentry->d_seq, seq)) {
+			if (unlikely(atomic_dec_and_test(&p->count)))
+				kfree_rcu(p, head);
+			goto retry;
+		}
 	}
-	spin_unlock(&dentry->d_lock);
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL(take_dentry_name_snapshot);
 
@@ -347,8 +362,8 @@ void release_dentry_name_snapshot(struct name_snapshot *name)
 	if (unlikely(name->name.name != name->inline_name)) {
 		struct external_name *p;
 		p = container_of(name->name.name, struct external_name, name[0]);
-		if (unlikely(atomic_dec_and_test(&p->u.count)))
-			kfree_rcu(p, u.head);
+		if (unlikely(atomic_dec_and_test(&p->count)))
+			kfree_rcu(p, head);
 	}
 }
 EXPORT_SYMBOL(release_dentry_name_snapshot);
@@ -386,7 +401,7 @@ static void dentry_free(struct dentry *dentry)
 	WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
 	if (unlikely(dname_external(dentry))) {
 		struct external_name *p = external_name(dentry);
-		if (likely(atomic_dec_and_test(&p->u.count))) {
+		if (likely(atomic_dec_and_test(&p->count))) {
 			call_rcu(&dentry->d_u.d_rcu, __d_free_external);
 			return;
 		}
@@ -1667,7 +1682,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 			kmem_cache_free(dentry_cache, dentry); 
 			return NULL;
 		}
-		atomic_set(&p->u.count, 1);
+		atomic_set(&p->count, 1);
 		dname = p->name;
 	} else  {
 		dname = dentry->d_iname;
@@ -2749,11 +2764,9 @@ static void swap_names(struct dentry *dentry, struct dentry *target)
 			 * Both are internal.
 			 */
 			unsigned int i;
-			BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long)));
-			for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) {
-				swap(((long *) &dentry->d_iname)[i],
-				     ((long *) &target->d_iname)[i]);
-			}
+			for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++)
+				swap(dentry->__d_iname.name[i],
+				     target->__d_iname.name[i]);
 		}
 	}
 	swap(dentry->d_name.hash_len, target->d_name.hash_len);
@@ -2765,16 +2778,15 @@ static void copy_name(struct dentry *dentry, struct dentry *target)
 	if (unlikely(dname_external(dentry)))
 		old_name = external_name(dentry);
 	if (unlikely(dname_external(target))) {
-		atomic_inc(&external_name(target)->u.count);
+		atomic_inc(&external_name(target)->count);
 		dentry->d_name = target->d_name;
 	} else {
-		memcpy(dentry->d_iname, target->d_name.name,
-				target->d_name.len + 1);
+		dentry->__d_iname = target->__d_iname;
 		dentry->d_name.name = dentry->d_iname;
 		dentry->d_name.hash_len = target->d_name.hash_len;
 	}
-	if (old_name && likely(atomic_dec_and_test(&old_name->u.count)))
-		kfree_rcu(old_name, u.head);
+	if (old_name && likely(atomic_dec_and_test(&old_name->count)))
+		kfree_rcu(old_name, head);
 }
 
 /*
@@ -3189,6 +3201,7 @@ static void __init dcache_init_early(void)
 
 static void __init dcache_init(void)
 {
+	BUILD_BUG_ON(DNAME_INLINE_LEN != sizeof(struct short_name_store));
 	/*
 	 * A constructor could be added for stable state like the lists,
 	 * but it is probably not worth it because of the cache nature
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 1f28f56fd406..d604a4826765 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -77,6 +77,10 @@ extern const struct qstr dotdot_name;
 # endif
 #endif
 
+struct short_name_store {
+	unsigned long name[DNAME_INLINE_LEN / sizeof(unsigned long)];
+};
+
 #define d_lock	d_lockref.lock
 
 struct dentry {
@@ -88,7 +92,10 @@ struct dentry {
 	struct qstr d_name;
 	struct inode *d_inode;		/* Where the name belongs to - NULL is
 					 * negative */
-	unsigned char d_iname[DNAME_INLINE_LEN];	/* small names */
+	union {
+		unsigned char d_iname[DNAME_INLINE_LEN];	/* small names */
+		struct short_name_store __d_iname;
+	};
 	/* --- cacheline 1 boundary (64 bytes) was 32 bytes ago --- */
 
 	/* Ref lookup also touches following */
@@ -590,7 +597,10 @@ static inline struct inode *d_real_inode(const struct dentry *dentry)
 
 struct name_snapshot {
 	struct qstr name;
-	unsigned char inline_name[DNAME_INLINE_LEN];
+	union {
+		unsigned char inline_name[DNAME_INLINE_LEN];
+		struct short_name_store __inline_name;
+	};
 };
 void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *);
 void release_dentry_name_snapshot(struct name_snapshot *);




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux