Re: [PATCH 04/19] VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry

Christian Brauner <brauner@xxxxxxxxxx> · Thu, 6 Feb 2025 14:09:04 +0100

On Thu, Feb 06, 2025 at 01:31:56PM +0100, Christian Brauner wrote:
> On Thu, Feb 06, 2025 at 04:42:41PM +1100, NeilBrown wrote:
> > No callers of kern_path_locked() or user_path_locked_at() want a
> > negative dentry.  So change them to return -ENOENT instead.  This
> > simplifies callers.
> > 
> > This results in a subtle change to bcachefs in that an ioctl will now
> > return -ENOENT in preference to -EXDEV.  I believe this restores the
> > behaviour to what it was prior to
> >  Commit bbe6a7c899e7 ("bch2_ioctl_subvolume_destroy(): fix locking")
> > 
> > Signed-off-by: NeilBrown <neilb@xxxxxxx>
> > ---
> 
> It would be nice if you could send this as a separate cleanup patch.
> It seems unrelated to the series.
> 
> >  drivers/base/devtmpfs.c | 65 +++++++++++++++++++----------------------
> >  fs/bcachefs/fs-ioctl.c  |  4 ---
> >  fs/namei.c              |  4 +++
> >  kernel/audit_watch.c    | 12 ++++----
> >  4 files changed, 40 insertions(+), 45 deletions(-)
> > 
> > diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
> > index b848764ef018..c9e34842139f 100644
> > --- a/drivers/base/devtmpfs.c
> > +++ b/drivers/base/devtmpfs.c
> > @@ -245,15 +245,12 @@ static int dev_rmdir(const char *name)
> >  	dentry = kern_path_locked(name, &parent);
> >  	if (IS_ERR(dentry))
> >  		return PTR_ERR(dentry);
> > -	if (d_really_is_positive(dentry)) {
> > -		if (d_inode(dentry)->i_private == &thread)
> > -			err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry),
> > -					dentry);
> > -		else
> > -			err = -EPERM;
> > -	} else {
> > -		err = -ENOENT;
> > -	}
> > +	if (d_inode(dentry)->i_private == &thread)
> > +		err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry),
> > +				dentry);
> > +	else
> > +		err = -EPERM;
> > +
> >  	dput(dentry);
> >  	inode_unlock(d_inode(parent.dentry));
> >  	path_put(&parent);
> > @@ -310,6 +307,8 @@ static int handle_remove(const char *nodename, struct device *dev)
> >  {
> >  	struct path parent;
> >  	struct dentry *dentry;
> > +	struct kstat stat;
> > +	struct path p;
> >  	int deleted = 0;
> >  	int err;
> >  
> > @@ -317,32 +316,28 @@ static int handle_remove(const char *nodename, struct device *dev)
> >  	if (IS_ERR(dentry))
> >  		return PTR_ERR(dentry);
> >  
> > -	if (d_really_is_positive(dentry)) {
> > -		struct kstat stat;
> > -		struct path p = {.mnt = parent.mnt, .dentry = dentry};
> > -		err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE,
> > -				  AT_STATX_SYNC_AS_STAT);
> > -		if (!err && dev_mynode(dev, d_inode(dentry), &stat)) {
> > -			struct iattr newattrs;
> > -			/*
> > -			 * before unlinking this node, reset permissions
> > -			 * of possible references like hardlinks
> > -			 */
> > -			newattrs.ia_uid = GLOBAL_ROOT_UID;
> > -			newattrs.ia_gid = GLOBAL_ROOT_GID;
> > -			newattrs.ia_mode = stat.mode & ~0777;
> > -			newattrs.ia_valid =
> > -				ATTR_UID|ATTR_GID|ATTR_MODE;
> > -			inode_lock(d_inode(dentry));
> > -			notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL);
> > -			inode_unlock(d_inode(dentry));
> > -			err = vfs_unlink(&nop_mnt_idmap, d_inode(parent.dentry),
> > -					 dentry, NULL);
> > -			if (!err || err == -ENOENT)
> > -				deleted = 1;
> > -		}
> > -	} else {
> > -		err = -ENOENT;
> > +	p.mnt = parent.mnt;
> > +	p.dentry = dentry;
> > +	err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE,
> > +			  AT_STATX_SYNC_AS_STAT);
> > +	if (!err && dev_mynode(dev, d_inode(dentry), &stat)) {
> > +		struct iattr newattrs;
> > +		/*
> > +		 * before unlinking this node, reset permissions
> > +		 * of possible references like hardlinks
> > +		 */
> > +		newattrs.ia_uid = GLOBAL_ROOT_UID;
> > +		newattrs.ia_gid = GLOBAL_ROOT_GID;
> > +		newattrs.ia_mode = stat.mode & ~0777;
> > +		newattrs.ia_valid =
> > +			ATTR_UID|ATTR_GID|ATTR_MODE;
> > +		inode_lock(d_inode(dentry));
> > +		notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL);
> > +		inode_unlock(d_inode(dentry));
> > +		err = vfs_unlink(&nop_mnt_idmap, d_inode(parent.dentry),
> > +				 dentry, NULL);
> > +		if (!err || err == -ENOENT)
> > +			deleted = 1;
> >  	}
> >  	dput(dentry);
> >  	inode_unlock(d_inode(parent.dentry));
> > diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
> > index 15725b4ce393..595b57fabc9a 100644
> > --- a/fs/bcachefs/fs-ioctl.c
> > +++ b/fs/bcachefs/fs-ioctl.c
> > @@ -511,10 +511,6 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
> >  		ret = -EXDEV;
> >  		goto err;
> >  	}
> > -	if (!d_is_positive(victim)) {
> > -		ret = -ENOENT;
> > -		goto err;
> > -	}
> >  	ret = __bch2_unlink(dir, victim, true);
> >  	if (!ret) {
> >  		fsnotify_rmdir(dir, victim);
> > diff --git a/fs/namei.c b/fs/namei.c
> > index d684102d873d..1901120bcbb8 100644
> > --- a/fs/namei.c
> > +++ b/fs/namei.c
> > @@ -2745,6 +2745,10 @@ static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct
> >  	}
> >  	inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
> >  	d = lookup_one_qstr(&last, path->dentry, 0);
> > +	if (!IS_ERR(d) && d_is_negative(d)) {
> > +		dput(d);
> > +		d = ERR_PTR(-ENOENT);

This doesn't unlock which afaict does cause issue with your devtmpfs
changes:

--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -245,15 +245,12 @@ static int dev_rmdir(const char *name)
 	dentry = kern_path_locked(name, &parent);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);

Here you fail to unlock which means dev_rmdir() will return with inode
lock held even though it returned an error?

-	if (d_really_is_positive(dentry)) {
-		if (d_inode(dentry)->i_private == &thread)
-			err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry),
-					dentry);
-		else
-			err = -EPERM;
-	} else {
-		err = -ENOENT;
-	}
+	if (d_inode(dentry)->i_private == &thread)
+		err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry),
+				dentry);
+	else
+		err = -EPERM;
+

> > +	}
> >  	if (IS_ERR(d)) {
> >  		inode_unlock(path->dentry->d_inode);
> >  		path_put(path);
> > diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
> > index 7f358740e958..e3130675ee6b 100644
> > --- a/kernel/audit_watch.c
> > +++ b/kernel/audit_watch.c
> > @@ -350,11 +350,10 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent)
> >  	struct dentry *d = kern_path_locked(watch->path, parent);
> >  	if (IS_ERR(d))
> >  		return PTR_ERR(d);
> > -	if (d_is_positive(d)) {
> > -		/* update watch filter fields */
> > -		watch->dev = d->d_sb->s_dev;
> > -		watch->ino = d_backing_inode(d)->i_ino;
> > -	}
> > +	/* update watch filter fields */
> > +	watch->dev = d->d_sb->s_dev;
> > +	watch->ino = d_backing_inode(d)->i_ino;
> > +
> >  	inode_unlock(d_backing_inode(parent->dentry));
> >  	dput(d);
> >  	return 0;
> > @@ -419,7 +418,7 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
> >  	/* caller expects mutex locked */
> >  	mutex_lock(&audit_filter_mutex);
> >  
> > -	if (ret) {
> > +	if (ret && ret != -ENOENT) {
> >  		audit_put_watch(watch);
> >  		return ret;
> >  	}
> > @@ -438,6 +437,7 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
> >  
> >  	h = audit_hash_ino((u32)watch->ino);
> >  	*list = &audit_inode_hash[h];
> > +	ret = 0;
> >  error:
> >  	path_put(&parent_path);
> >  	audit_put_watch(watch);
> > -- 
> > 2.47.1
> >