Re: [RFC][PATCH] do d_instantiate/unlock_new_inode combinations safely

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On May 10, 2018, at 12:20 PM, Al Viro <viro@xxxxxxxxxxxxxxxxxx> wrote:
> 
> [in the spirit of "don't put 'em in without posting for review; the
> this is present in vfs.git#for-linus, if you prefer to look in git.
> 
> Background: a bunch of nfsd races fixes from back in 2008 had
> problems with lockdep enabled; in 2012 that got "fixed", unfortunately
> reopening a narrow race window.  The patch below does *NOT* fix
> all filesystems, but it does fix most of the exported local ones
> and it is easy to backport, so it makes for a sane starting point.
> 
> If anyone has objections, this is your chance to yell.
> ]
> 
> For anything NFS-exported we do _not_ want to unlock new inode
> before it has grown an alias; original set of fixes got the
> ordering right, but missed the nasty complication in case of
> lockdep being enabled - unlock_new_inode() does
>    lockdep_annotate_inode_mutex_key(inode)
> which can only be done before anyone gets a chance to touch
> ->i_mutex.  Unfortunately, flipping the order and doing
> unlock_new_inode() before d_instantiate() opens a window when
> mkdir can race with open-by-fhandle on a guessed fhandle, leading
> to multiple aliases for a directory inode and all the breakage
> that follows from that.
> 
>    Correct solution: a new primitive (d_instantiate_new())
> combining these two in the right order - lockdep annotate, then
> d_instantiate(), then the rest of unlock_new_inode().  All
> combinations of d_instantiate() with unlock_new_inode() should
> be converted to that.
> 
> Cc: stable@xxxxxxxxxx   # 2.6.29 and later
> Signed-off-by: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
> 
> diff --git a/fs/dcache.c b/fs/dcache.c
> index 86d2de63461e..6da095fef440 100644
> --- a/fs/dcache.c
> +++ b/fs/dcache.c
> @@ -1899,6 +1899,22 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
> }
> EXPORT_SYMBOL(d_instantiate);
> 
> +void d_instantiate_new(struct dentry *entry, struct inode *inode)

Is it worthwhile to add a comment here that this is a merger of
d_instantiate() and unlock_inode_new() (and possibly at those
functions as well) so that any future changes are added to both
sets of functions?

Alternately, this could be refactored a bit, but not much:

static void __unlock_new_inode(struct inode *inode)
{
        WARN_ON(!(inode->i_state & I_NEW));
        inode->i_state &= ~I_NEW;
        smp_mb();
        wake_up_bit(&inode->i_state, __I_NEW);
}

void unlock_new_inode(struct inode *inode)
{
        lockdep_annotate_inode_mutex_key(inode);
        spin_lock(&inode->i_lock);
	__unlock_new_inode(inode);
        spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(unlock_new_inode);

void d_instantiate_new(struct dentry *entry, struct inode *inode)
{
	BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
	BUG_ON(!inode);
	lockdep_annotate_inode_mutex_key(inode);
	security_d_instantiate(entry, inode);
	spin_lock(&inode->i_lock);
	__d_instantiate(entry, inode);
	__unlock_new_inode(inode);
	spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(d_instantiate_new);

Cheers, Andreas

> +{
> +	BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
> +	BUG_ON(!inode);
> +	lockdep_annotate_inode_mutex_key(inode);
> +	security_d_instantiate(entry, inode);
> +	spin_lock(&inode->i_lock);
> +	__d_instantiate(entry, inode);
> +	WARN_ON(!(inode->i_state & I_NEW));
> +	inode->i_state &= ~I_NEW;
> +	smp_mb();
> +	wake_up_bit(&inode->i_state, __I_NEW);
> +	spin_unlock(&inode->i_lock);
> +}
> +EXPORT_SYMBOL(d_instantiate_new);
> +
> /**
>  * d_instantiate_no_diralias - instantiate a non-aliased dentry
>  * @entry: dentry to complete
> diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
> index 847904aa63a9..7bba8f2693b2 100644
> --- a/fs/ecryptfs/inode.c
> +++ b/fs/ecryptfs/inode.c
> @@ -283,8 +283,7 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
> 		iget_failed(ecryptfs_inode);
> 		goto out;
> 	}
> -	unlock_new_inode(ecryptfs_inode);
> -	d_instantiate(ecryptfs_dentry, ecryptfs_inode);
> +	d_instantiate_new(ecryptfs_dentry, ecryptfs_inode);
> out:
> 	return rc;
> }
> diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
> index 55f7caadb093..152453a91877 100644
> --- a/fs/ext2/namei.c
> +++ b/fs/ext2/namei.c
> @@ -41,8 +41,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
> {
> 	int err = ext2_add_link(dentry, inode);
> 	if (!err) {
> -		unlock_new_inode(inode);
> -		d_instantiate(dentry, inode);
> +		d_instantiate_new(dentry, inode);
> 		return 0;
> 	}
> 	inode_dec_link_count(inode);
> @@ -255,8 +254,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
> 	if (err)
> 		goto out_fail;
> 
> -	unlock_new_inode(inode);
> -	d_instantiate(dentry, inode);
> +	d_instantiate_new(dentry, inode);
> out:
> 	return err;
> 
> diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
> index b1f21e3a0763..4a09063ce1d2 100644
> --- a/fs/ext4/namei.c
> +++ b/fs/ext4/namei.c
> @@ -2411,8 +2411,7 @@ static int ext4_add_nondir(handle_t *handle,
> 	int err = ext4_add_entry(handle, dentry, inode);
> 	if (!err) {
> 		ext4_mark_inode_dirty(handle, inode);
> -		unlock_new_inode(inode);
> -		d_instantiate(dentry, inode);
> +		d_instantiate_new(dentry, inode);
> 		return 0;
> 	}
> 	drop_nlink(inode);
> @@ -2651,8 +2650,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
> 	err = ext4_mark_inode_dirty(handle, dir);
> 	if (err)
> 		goto out_clear_inode;
> -	unlock_new_inode(inode);
> -	d_instantiate(dentry, inode);
> +	d_instantiate_new(dentry, inode);
> 	if (IS_DIRSYNC(dir))
> 		ext4_handle_sync(handle);
> 
> diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
> index d5098efe577c..75e37fd720b2 100644
> --- a/fs/f2fs/namei.c
> +++ b/fs/f2fs/namei.c
> @@ -294,8 +294,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
> 
> 	alloc_nid_done(sbi, ino);
> 
> -	d_instantiate(dentry, inode);
> -	unlock_new_inode(inode);
> +	d_instantiate_new(dentry, inode);
> 
> 	if (IS_DIRSYNC(dir))
> 		f2fs_sync_fs(sbi->sb, 1);
> @@ -597,8 +596,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
> 	err = page_symlink(inode, disk_link.name, disk_link.len);
> 
> err_out:
> -	d_instantiate(dentry, inode);
> -	unlock_new_inode(inode);
> +	d_instantiate_new(dentry, inode);
> 
> 	/*
> 	 * Let's flush symlink data in order to avoid broken symlink as much as
> @@ -661,8 +659,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
> 
> 	alloc_nid_done(sbi, inode->i_ino);
> 
> -	d_instantiate(dentry, inode);
> -	unlock_new_inode(inode);
> +	d_instantiate_new(dentry, inode);
> 
> 	if (IS_DIRSYNC(dir))
> 		f2fs_sync_fs(sbi->sb, 1);
> @@ -713,8 +710,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
> 
> 	alloc_nid_done(sbi, inode->i_ino);
> 
> -	d_instantiate(dentry, inode);
> -	unlock_new_inode(inode);
> +	d_instantiate_new(dentry, inode);
> 
> 	if (IS_DIRSYNC(dir))
> 		f2fs_sync_fs(sbi->sb, 1);
> diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
> index 0a754f38462e..e5a6deb38e1e 100644
> --- a/fs/jffs2/dir.c
> +++ b/fs/jffs2/dir.c
> @@ -209,8 +209,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
> 		  __func__, inode->i_ino, inode->i_mode, inode->i_nlink,
> 		  f->inocache->pino_nlink, inode->i_mapping->nrpages);
> 
> -	unlock_new_inode(inode);
> -	d_instantiate(dentry, inode);
> +	d_instantiate_new(dentry, inode);
> 	return 0;
> 
>  fail:
> @@ -430,8 +429,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
> 	mutex_unlock(&dir_f->sem);
> 	jffs2_complete_reservation(c);
> 
> -	unlock_new_inode(inode);
> -	d_instantiate(dentry, inode);
> +	d_instantiate_new(dentry, inode);
> 	return 0;
> 
>  fail:
> @@ -575,8 +573,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode
> 	mutex_unlock(&dir_f->sem);
> 	jffs2_complete_reservation(c);
> 
> -	unlock_new_inode(inode);
> -	d_instantiate(dentry, inode);
> +	d_instantiate_new(dentry, inode);
> 	return 0;
> 
>  fail:
> @@ -747,8 +744,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode
> 	mutex_unlock(&dir_f->sem);
> 	jffs2_complete_reservation(c);
> 
> -	unlock_new_inode(inode);
> -	d_instantiate(dentry, inode);
> +	d_instantiate_new(dentry, inode);
> 	return 0;
> 
>  fail:
> diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
> index b41596d71858..56c3fcbfe80e 100644
> --- a/fs/jfs/namei.c
> +++ b/fs/jfs/namei.c
> @@ -178,8 +178,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
> 		unlock_new_inode(ip);
> 		iput(ip);
> 	} else {
> -		unlock_new_inode(ip);
> -		d_instantiate(dentry, ip);
> +		d_instantiate_new(dentry, ip);
> 	}
> 
>       out2:
> @@ -313,8 +312,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
> 		unlock_new_inode(ip);
> 		iput(ip);
> 	} else {
> -		unlock_new_inode(ip);
> -		d_instantiate(dentry, ip);
> +		d_instantiate_new(dentry, ip);
> 	}
> 
>       out2:
> @@ -1059,8 +1057,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
> 		unlock_new_inode(ip);
> 		iput(ip);
> 	} else {
> -		unlock_new_inode(ip);
> -		d_instantiate(dentry, ip);
> +		d_instantiate_new(dentry, ip);
> 	}
> 
>       out2:
> @@ -1447,8 +1444,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
> 		unlock_new_inode(ip);
> 		iput(ip);
> 	} else {
> -		unlock_new_inode(ip);
> -		d_instantiate(dentry, ip);
> +		d_instantiate_new(dentry, ip);
> 	}
> 
>       out1:
> diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
> index 1a2894aa0194..dd52d3f82e8d 100644
> --- a/fs/nilfs2/namei.c
> +++ b/fs/nilfs2/namei.c
> @@ -46,8 +46,7 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode)
> 	int err = nilfs_add_link(dentry, inode);
> 
> 	if (!err) {
> -		d_instantiate(dentry, inode);
> -		unlock_new_inode(inode);
> +		d_instantiate_new(dentry, inode);
> 		return 0;
> 	}
> 	inode_dec_link_count(inode);
> @@ -243,8 +242,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
> 		goto out_fail;
> 
> 	nilfs_mark_inode_dirty(inode);
> -	d_instantiate(dentry, inode);
> -	unlock_new_inode(inode);
> +	d_instantiate_new(dentry, inode);
> out:
> 	if (!err)
> 		err = nilfs_transaction_commit(dir->i_sb);
> diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c
> index 6e3134e6d98a..1b5707c44c3f 100644
> --- a/fs/orangefs/namei.c
> +++ b/fs/orangefs/namei.c
> @@ -75,8 +75,7 @@ static int orangefs_create(struct inode *dir,
> 		     get_khandle_from_ino(inode),
> 		     dentry);
> 
> -	d_instantiate(dentry, inode);
> -	unlock_new_inode(inode);
> +	d_instantiate_new(dentry, inode);
> 	orangefs_set_timeout(dentry);
> 	ORANGEFS_I(inode)->getattr_time = jiffies - 1;
> 	ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS;
> @@ -332,8 +331,7 @@ static int orangefs_symlink(struct inode *dir,
> 		     "Assigned symlink inode new number of %pU\n",
> 		     get_khandle_from_ino(inode));
> 
> -	d_instantiate(dentry, inode);
> -	unlock_new_inode(inode);
> +	d_instantiate_new(dentry, inode);
> 	orangefs_set_timeout(dentry);
> 	ORANGEFS_I(inode)->getattr_time = jiffies - 1;
> 	ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS;
> @@ -402,8 +400,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
> 		     "Assigned dir inode new number of %pU\n",
> 		     get_khandle_from_ino(inode));
> 
> -	d_instantiate(dentry, inode);
> -	unlock_new_inode(inode);
> +	d_instantiate_new(dentry, inode);
> 	orangefs_set_timeout(dentry);
> 	ORANGEFS_I(inode)->getattr_time = jiffies - 1;
> 	ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS;
> diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
> index bd39a998843d..5089dac02660 100644
> --- a/fs/reiserfs/namei.c
> +++ b/fs/reiserfs/namei.c
> @@ -687,8 +687,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod
> 	reiserfs_update_inode_transaction(inode);
> 	reiserfs_update_inode_transaction(dir);
> 
> -	unlock_new_inode(inode);
> -	d_instantiate(dentry, inode);
> +	d_instantiate_new(dentry, inode);
> 	retval = journal_end(&th);
> 
> out_failed:
> @@ -771,8 +770,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode
> 		goto out_failed;
> 	}
> 
> -	unlock_new_inode(inode);
> -	d_instantiate(dentry, inode);
> +	d_instantiate_new(dentry, inode);
> 	retval = journal_end(&th);
> 
> out_failed:
> @@ -871,8 +869,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
> 	/* the above add_entry did not update dir's stat data */
> 	reiserfs_update_sd(&th, dir);
> 
> -	unlock_new_inode(inode);
> -	d_instantiate(dentry, inode);
> +	d_instantiate_new(dentry, inode);
> 	retval = journal_end(&th);
> out_failed:
> 	reiserfs_write_unlock(dir->i_sb);
> @@ -1187,8 +1184,7 @@ static int reiserfs_symlink(struct inode *parent_dir,
> 		goto out_failed;
> 	}
> 
> -	unlock_new_inode(inode);
> -	d_instantiate(dentry, inode);
> +	d_instantiate_new(dentry, inode);
> 	retval = journal_end(&th);
> out_failed:
> 	reiserfs_write_unlock(parent_dir->i_sb);
> diff --git a/fs/udf/namei.c b/fs/udf/namei.c
> index 0458dd47e105..c586026508db 100644
> --- a/fs/udf/namei.c
> +++ b/fs/udf/namei.c
> @@ -622,8 +622,7 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
> 	if (fibh.sbh != fibh.ebh)
> 		brelse(fibh.ebh);
> 	brelse(fibh.sbh);
> -	unlock_new_inode(inode);
> -	d_instantiate(dentry, inode);
> +	d_instantiate_new(dentry, inode);
> 
> 	return 0;
> }
> @@ -733,8 +732,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
> 	inc_nlink(dir);
> 	dir->i_ctime = dir->i_mtime = current_time(dir);
> 	mark_inode_dirty(dir);
> -	unlock_new_inode(inode);
> -	d_instantiate(dentry, inode);
> +	d_instantiate_new(dentry, inode);
> 	if (fibh.sbh != fibh.ebh)
> 		brelse(fibh.ebh);
> 	brelse(fibh.sbh);
> diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
> index 32545cd00ceb..d5f43ba76c59 100644
> --- a/fs/ufs/namei.c
> +++ b/fs/ufs/namei.c
> @@ -39,8 +39,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode)
> {
> 	int err = ufs_add_link(dentry, inode);
> 	if (!err) {
> -		unlock_new_inode(inode);
> -		d_instantiate(dentry, inode);
> +		d_instantiate_new(dentry, inode);
> 		return 0;
> 	}
> 	inode_dec_link_count(inode);
> @@ -193,8 +192,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
> 	if (err)
> 		goto out_fail;
> 
> -	unlock_new_inode(inode);
> -	d_instantiate(dentry, inode);
> +	d_instantiate_new(dentry, inode);
> 	return 0;
> 
> out_fail:
> diff --git a/include/linux/dcache.h b/include/linux/dcache.h
> index 94acbde17bb1..66c6e17e61e5 100644
> --- a/include/linux/dcache.h
> +++ b/include/linux/dcache.h
> @@ -224,6 +224,7 @@ extern seqlock_t rename_lock;
>  * These are the low-level FS interfaces to the dcache..
>  */
> extern void d_instantiate(struct dentry *, struct inode *);
> +extern void d_instantiate_new(struct dentry *, struct inode *);
> extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
> extern struct dentry * d_instantiate_anon(struct dentry *, struct inode *);
> extern int d_instantiate_no_diralias(struct dentry *, struct inode *);


Cheers, Andreas





Attachment: signature.asc
Description: Message signed with OpenPGP


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux