I applied your patch to Linux v4.17-rc3 and ran xfstests and saw no Orangefs regressions.... you can add tested-by: Mike Marshall if you dare <g> ... -Mike On Thu, May 10, 2018 at 2:20 PM, Al Viro <viro@xxxxxxxxxxxxxxxxxx> wrote: > [in the spirit of "don't put 'em in without posting for review; the > this is present in vfs.git#for-linus, if you prefer to look in git. > > Background: a bunch of nfsd races fixes from back in 2008 had > problems with lockdep enabled; in 2012 that got "fixed", unfortunately > reopening a narrow race window. The patch below does *NOT* fix > all filesystems, but it does fix most of the exported local ones > and it is easy to backport, so it makes for a sane starting point. > > If anyone has objections, this is your chance to yell. > ] > > For anything NFS-exported we do _not_ want to unlock new inode > before it has grown an alias; original set of fixes got the > ordering right, but missed the nasty complication in case of > lockdep being enabled - unlock_new_inode() does > lockdep_annotate_inode_mutex_key(inode) > which can only be done before anyone gets a chance to touch > ->i_mutex. Unfortunately, flipping the order and doing > unlock_new_inode() before d_instantiate() opens a window when > mkdir can race with open-by-fhandle on a guessed fhandle, leading > to multiple aliases for a directory inode and all the breakage > that follows from that. > > Correct solution: a new primitive (d_instantiate_new()) > combining these two in the right order - lockdep annotate, then > d_instantiate(), then the rest of unlock_new_inode(). All > combinations of d_instantiate() with unlock_new_inode() should > be converted to that. > > Cc: stable@xxxxxxxxxx # 2.6.29 and later > Signed-off-by: Al Viro <viro@xxxxxxxxxxxxxxxxxx> > > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index e064c49c9a9a..9e97cbb4f006 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -6575,8 +6575,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, > goto out_unlock_inode; > } else { > btrfs_update_inode(trans, root, inode); > - unlock_new_inode(inode); > - d_instantiate(dentry, inode); > + d_instantiate_new(dentry, inode); > } > > out_unlock: > @@ -6652,8 +6651,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, > goto out_unlock_inode; > > BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; > - unlock_new_inode(inode); > - d_instantiate(dentry, inode); > + d_instantiate_new(dentry, inode); > > out_unlock: > btrfs_end_transaction(trans); > @@ -6798,12 +6796,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) > if (err) > goto out_fail_inode; > > - d_instantiate(dentry, inode); > - /* > - * mkdir is special. We're unlocking after we call d_instantiate > - * to avoid a race with nfsd calling d_instantiate. > - */ > - unlock_new_inode(inode); > + d_instantiate_new(dentry, inode); > drop_on_err = 0; > > out_fail: > @@ -10246,8 +10239,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, > goto out_unlock_inode; > } > > - unlock_new_inode(inode); > - d_instantiate(dentry, inode); > + d_instantiate_new(dentry, inode); > > out_unlock: > btrfs_end_transaction(trans); > diff --git a/fs/dcache.c b/fs/dcache.c > index 86d2de63461e..6da095fef440 100644 > --- a/fs/dcache.c > +++ b/fs/dcache.c > @@ -1899,6 +1899,22 @@ void d_instantiate(struct dentry *entry, struct inode * inode) > } > EXPORT_SYMBOL(d_instantiate); > > +void d_instantiate_new(struct dentry *entry, struct inode *inode) > +{ > + BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); > + BUG_ON(!inode); > + lockdep_annotate_inode_mutex_key(inode); > + security_d_instantiate(entry, inode); > + spin_lock(&inode->i_lock); > + __d_instantiate(entry, inode); > + WARN_ON(!(inode->i_state & I_NEW)); > + inode->i_state &= ~I_NEW; > + smp_mb(); > + wake_up_bit(&inode->i_state, __I_NEW); > + spin_unlock(&inode->i_lock); > +} > +EXPORT_SYMBOL(d_instantiate_new); > + > /** > * d_instantiate_no_diralias - instantiate a non-aliased dentry > * @entry: dentry to complete > diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c > index 847904aa63a9..7bba8f2693b2 100644 > --- a/fs/ecryptfs/inode.c > +++ b/fs/ecryptfs/inode.c > @@ -283,8 +283,7 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, > iget_failed(ecryptfs_inode); > goto out; > } > - unlock_new_inode(ecryptfs_inode); > - d_instantiate(ecryptfs_dentry, ecryptfs_inode); > + d_instantiate_new(ecryptfs_dentry, ecryptfs_inode); > out: > return rc; > } > diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c > index 55f7caadb093..152453a91877 100644 > --- a/fs/ext2/namei.c > +++ b/fs/ext2/namei.c > @@ -41,8 +41,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) > { > int err = ext2_add_link(dentry, inode); > if (!err) { > - unlock_new_inode(inode); > - d_instantiate(dentry, inode); > + d_instantiate_new(dentry, inode); > return 0; > } > inode_dec_link_count(inode); > @@ -255,8 +254,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) > if (err) > goto out_fail; > > - unlock_new_inode(inode); > - d_instantiate(dentry, inode); > + d_instantiate_new(dentry, inode); > out: > return err; > > diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c > index b1f21e3a0763..4a09063ce1d2 100644 > --- a/fs/ext4/namei.c > +++ b/fs/ext4/namei.c > @@ -2411,8 +2411,7 @@ static int ext4_add_nondir(handle_t *handle, > int err = ext4_add_entry(handle, dentry, inode); > if (!err) { > ext4_mark_inode_dirty(handle, inode); > - unlock_new_inode(inode); > - d_instantiate(dentry, inode); > + d_instantiate_new(dentry, inode); > return 0; > } > drop_nlink(inode); > @@ -2651,8 +2650,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) > err = ext4_mark_inode_dirty(handle, dir); > if (err) > goto out_clear_inode; > - unlock_new_inode(inode); > - d_instantiate(dentry, inode); > + d_instantiate_new(dentry, inode); > if (IS_DIRSYNC(dir)) > ext4_handle_sync(handle); > > diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c > index d5098efe577c..75e37fd720b2 100644 > --- a/fs/f2fs/namei.c > +++ b/fs/f2fs/namei.c > @@ -294,8 +294,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, > > alloc_nid_done(sbi, ino); > > - d_instantiate(dentry, inode); > - unlock_new_inode(inode); > + d_instantiate_new(dentry, inode); > > if (IS_DIRSYNC(dir)) > f2fs_sync_fs(sbi->sb, 1); > @@ -597,8 +596,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, > err = page_symlink(inode, disk_link.name, disk_link.len); > > err_out: > - d_instantiate(dentry, inode); > - unlock_new_inode(inode); > + d_instantiate_new(dentry, inode); > > /* > * Let's flush symlink data in order to avoid broken symlink as much as > @@ -661,8 +659,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) > > alloc_nid_done(sbi, inode->i_ino); > > - d_instantiate(dentry, inode); > - unlock_new_inode(inode); > + d_instantiate_new(dentry, inode); > > if (IS_DIRSYNC(dir)) > f2fs_sync_fs(sbi->sb, 1); > @@ -713,8 +710,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, > > alloc_nid_done(sbi, inode->i_ino); > > - d_instantiate(dentry, inode); > - unlock_new_inode(inode); > + d_instantiate_new(dentry, inode); > > if (IS_DIRSYNC(dir)) > f2fs_sync_fs(sbi->sb, 1); > diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c > index 0a754f38462e..e5a6deb38e1e 100644 > --- a/fs/jffs2/dir.c > +++ b/fs/jffs2/dir.c > @@ -209,8 +209,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, > __func__, inode->i_ino, inode->i_mode, inode->i_nlink, > f->inocache->pino_nlink, inode->i_mapping->nrpages); > > - unlock_new_inode(inode); > - d_instantiate(dentry, inode); > + d_instantiate_new(dentry, inode); > return 0; > > fail: > @@ -430,8 +429,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char > mutex_unlock(&dir_f->sem); > jffs2_complete_reservation(c); > > - unlock_new_inode(inode); > - d_instantiate(dentry, inode); > + d_instantiate_new(dentry, inode); > return 0; > > fail: > @@ -575,8 +573,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode > mutex_unlock(&dir_f->sem); > jffs2_complete_reservation(c); > > - unlock_new_inode(inode); > - d_instantiate(dentry, inode); > + d_instantiate_new(dentry, inode); > return 0; > > fail: > @@ -747,8 +744,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode > mutex_unlock(&dir_f->sem); > jffs2_complete_reservation(c); > > - unlock_new_inode(inode); > - d_instantiate(dentry, inode); > + d_instantiate_new(dentry, inode); > return 0; > > fail: > diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c > index b41596d71858..56c3fcbfe80e 100644 > --- a/fs/jfs/namei.c > +++ b/fs/jfs/namei.c > @@ -178,8 +178,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode, > unlock_new_inode(ip); > iput(ip); > } else { > - unlock_new_inode(ip); > - d_instantiate(dentry, ip); > + d_instantiate_new(dentry, ip); > } > > out2: > @@ -313,8 +312,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode) > unlock_new_inode(ip); > iput(ip); > } else { > - unlock_new_inode(ip); > - d_instantiate(dentry, ip); > + d_instantiate_new(dentry, ip); > } > > out2: > @@ -1059,8 +1057,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, > unlock_new_inode(ip); > iput(ip); > } else { > - unlock_new_inode(ip); > - d_instantiate(dentry, ip); > + d_instantiate_new(dentry, ip); > } > > out2: > @@ -1447,8 +1444,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, > unlock_new_inode(ip); > iput(ip); > } else { > - unlock_new_inode(ip); > - d_instantiate(dentry, ip); > + d_instantiate_new(dentry, ip); > } > > out1: > diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c > index 1a2894aa0194..dd52d3f82e8d 100644 > --- a/fs/nilfs2/namei.c > +++ b/fs/nilfs2/namei.c > @@ -46,8 +46,7 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode) > int err = nilfs_add_link(dentry, inode); > > if (!err) { > - d_instantiate(dentry, inode); > - unlock_new_inode(inode); > + d_instantiate_new(dentry, inode); > return 0; > } > inode_dec_link_count(inode); > @@ -243,8 +242,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) > goto out_fail; > > nilfs_mark_inode_dirty(inode); > - d_instantiate(dentry, inode); > - unlock_new_inode(inode); > + d_instantiate_new(dentry, inode); > out: > if (!err) > err = nilfs_transaction_commit(dir->i_sb); > diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c > index 6e3134e6d98a..1b5707c44c3f 100644 > --- a/fs/orangefs/namei.c > +++ b/fs/orangefs/namei.c > @@ -75,8 +75,7 @@ static int orangefs_create(struct inode *dir, > get_khandle_from_ino(inode), > dentry); > > - d_instantiate(dentry, inode); > - unlock_new_inode(inode); > + d_instantiate_new(dentry, inode); > orangefs_set_timeout(dentry); > ORANGEFS_I(inode)->getattr_time = jiffies - 1; > ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS; > @@ -332,8 +331,7 @@ static int orangefs_symlink(struct inode *dir, > "Assigned symlink inode new number of %pU\n", > get_khandle_from_ino(inode)); > > - d_instantiate(dentry, inode); > - unlock_new_inode(inode); > + d_instantiate_new(dentry, inode); > orangefs_set_timeout(dentry); > ORANGEFS_I(inode)->getattr_time = jiffies - 1; > ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS; > @@ -402,8 +400,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode > "Assigned dir inode new number of %pU\n", > get_khandle_from_ino(inode)); > > - d_instantiate(dentry, inode); > - unlock_new_inode(inode); > + d_instantiate_new(dentry, inode); > orangefs_set_timeout(dentry); > ORANGEFS_I(inode)->getattr_time = jiffies - 1; > ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS; > diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c > index bd39a998843d..5089dac02660 100644 > --- a/fs/reiserfs/namei.c > +++ b/fs/reiserfs/namei.c > @@ -687,8 +687,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod > reiserfs_update_inode_transaction(inode); > reiserfs_update_inode_transaction(dir); > > - unlock_new_inode(inode); > - d_instantiate(dentry, inode); > + d_instantiate_new(dentry, inode); > retval = journal_end(&th); > > out_failed: > @@ -771,8 +770,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode > goto out_failed; > } > > - unlock_new_inode(inode); > - d_instantiate(dentry, inode); > + d_instantiate_new(dentry, inode); > retval = journal_end(&th); > > out_failed: > @@ -871,8 +869,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode > /* the above add_entry did not update dir's stat data */ > reiserfs_update_sd(&th, dir); > > - unlock_new_inode(inode); > - d_instantiate(dentry, inode); > + d_instantiate_new(dentry, inode); > retval = journal_end(&th); > out_failed: > reiserfs_write_unlock(dir->i_sb); > @@ -1187,8 +1184,7 @@ static int reiserfs_symlink(struct inode *parent_dir, > goto out_failed; > } > > - unlock_new_inode(inode); > - d_instantiate(dentry, inode); > + d_instantiate_new(dentry, inode); > retval = journal_end(&th); > out_failed: > reiserfs_write_unlock(parent_dir->i_sb); > diff --git a/fs/udf/namei.c b/fs/udf/namei.c > index 0458dd47e105..c586026508db 100644 > --- a/fs/udf/namei.c > +++ b/fs/udf/namei.c > @@ -622,8 +622,7 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode) > if (fibh.sbh != fibh.ebh) > brelse(fibh.ebh); > brelse(fibh.sbh); > - unlock_new_inode(inode); > - d_instantiate(dentry, inode); > + d_instantiate_new(dentry, inode); > > return 0; > } > @@ -733,8 +732,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) > inc_nlink(dir); > dir->i_ctime = dir->i_mtime = current_time(dir); > mark_inode_dirty(dir); > - unlock_new_inode(inode); > - d_instantiate(dentry, inode); > + d_instantiate_new(dentry, inode); > if (fibh.sbh != fibh.ebh) > brelse(fibh.ebh); > brelse(fibh.sbh); > diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c > index 32545cd00ceb..d5f43ba76c59 100644 > --- a/fs/ufs/namei.c > +++ b/fs/ufs/namei.c > @@ -39,8 +39,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode) > { > int err = ufs_add_link(dentry, inode); > if (!err) { > - unlock_new_inode(inode); > - d_instantiate(dentry, inode); > + d_instantiate_new(dentry, inode); > return 0; > } > inode_dec_link_count(inode); > @@ -193,8 +192,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) > if (err) > goto out_fail; > > - unlock_new_inode(inode); > - d_instantiate(dentry, inode); > + d_instantiate_new(dentry, inode); > return 0; > > out_fail: > diff --git a/include/linux/dcache.h b/include/linux/dcache.h > index 94acbde17bb1..66c6e17e61e5 100644 > --- a/include/linux/dcache.h > +++ b/include/linux/dcache.h > @@ -224,6 +224,7 @@ extern seqlock_t rename_lock; > * These are the low-level FS interfaces to the dcache.. > */ > extern void d_instantiate(struct dentry *, struct inode *); > +extern void d_instantiate_new(struct dentry *, struct inode *); > extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); > extern struct dentry * d_instantiate_anon(struct dentry *, struct inode *); > extern int d_instantiate_no_diralias(struct dentry *, struct inode *);