On Thu, 14 Nov 2024 at 08:12, Zhang Tianci <zhangtianci.1997@xxxxxxxxxxxxx> wrote: > > Function fuse_direntplus_link() might call fuse_iget() to initialize a new > fuse_inode and change its attributes. If fi->attr_version is always > initialized with 0, even if the attributes returned by the FUSE_READDIR > request is staled, as the new fi->attr_version is 0, fuse_change_attributes > will still set the staled attributes to inode. This wrong behaviour may > cause file size inconsistency even when there is no changes from > server-side. Thanks for working on this. I have some comments, see below. > --- a/fs/fuse/inode.c > +++ b/fs/fuse/inode.c > @@ -173,6 +173,7 @@ static void fuse_evict_inode(struct inode *inode) > fuse_cleanup_submount_lookup(fc, fi->submount_lookup); > fi->submount_lookup = NULL; > } > + atomic64_inc(&fc->evict_ctr); I think this should only be done if (inode->i_nlink > 0), because if the file/directory was removed, then the race between another lookup cannot happen. > @@ -426,7 +427,8 @@ static int fuse_inode_set(struct inode *inode, void *_nodeidp) > > struct inode *fuse_iget(struct super_block *sb, u64 nodeid, > int generation, struct fuse_attr *attr, > - u64 attr_valid, u64 attr_version) > + u64 attr_valid, u64 attr_version, > + u64 evict_ctr) > { > struct inode *inode; > struct fuse_inode *fi; > @@ -488,6 +490,10 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, > spin_unlock(&fi->lock); > done: > fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version); > + spin_lock(&fi->lock); > + if (evict_ctr < fuse_get_evict_ctr(fc)) > + fuse_invalidate_attr(inode); Similarly, this should not be done on creation (attr_version == 0), since in case of a brand new inode the previous inode state cannot have any effect on it. The other case that may be worth taking special care of is when the inode is already in the cache. This happens for example on lookup of hard links. This is the (fi->attr_version > 0) case where we can rely on the validity of attr_version. I.e. the attr_version comparison in fuse_change_attributes() will make sure the attributes are only updated if necessary, no need to check evict_ctr. So the only case that needs evict_ctr verification is (attr_version != 0 && fi->attr_version == 0). One other thing: I don't like the fact that the invalid mask is first cleared in fuse_change_attributes_common(), then reset in fuse_invalidate_attr(). It would be cleaner to not clear the mask in the first place. Attaching an untested incremental patch. Can you please review and test? Thanks, Miklos
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 7d0a0fab6920..59be2877786f 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -692,8 +692,7 @@ static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir, ff->nodeid = outentry.nodeid; ff->open_flags = outopenp->open_flags; inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, - &outentry.attr, ATTR_TIMEOUT(&outentry), 0, - fuse_get_evict_ctr(fm->fc)); + &outentry.attr, ATTR_TIMEOUT(&outentry), 0, 0); if (!inode) { flags &= ~(O_CREAT | O_EXCL | O_TRUNC); fuse_sync_release(NULL, ff, flags); @@ -824,8 +823,7 @@ static int create_new_entry(struct mnt_idmap *idmap, struct fuse_mount *fm, goto out_put_forget_req; inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, - &outarg.attr, ATTR_TIMEOUT(&outarg), 0, - fuse_get_evict_ctr(fm->fc)); + &outarg.attr, ATTR_TIMEOUT(&outarg), 0, 0); if (!inode) { fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1); return -ENOMEM; @@ -2031,7 +2029,7 @@ int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry, fuse_change_attributes_common(inode, &outarg.attr, NULL, ATTR_TIMEOUT(&outarg), - fuse_get_cache_mask(inode)); + fuse_get_cache_mask(inode), 0); oldsize = inode->i_size; /* see the comment in fuse_change_attributes() */ if (!is_wb || is_truncate) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index f9ff0d0029ab..a98fb243b913 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1136,7 +1136,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, struct fuse_statx *sx, - u64 attr_valid, u32 cache_mask); + u64 attr_valid, u32 cache_mask, + u64 evict_ctr); u32 fuse_get_cache_mask(struct inode *inode); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 872c61dd5661..c5109d5b806f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -173,7 +173,14 @@ static void fuse_evict_inode(struct inode *inode) fuse_cleanup_submount_lookup(fc, fi->submount_lookup); fi->submount_lookup = NULL; } - atomic64_inc(&fc->evict_ctr); + /* + * Evict of non-deleted inode may race with outstanding + * LOOKUP/READDIRPLUS requests and result in inconsistency when + * the request finishes. Deal with that here by bumping a + * counter that can be compared to the starting value. + */ + if (inode->i_nlink > 0) + atomic64_inc(&fc->evict_ctr); } if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { WARN_ON(fi->iocachectr != 0); @@ -207,7 +214,8 @@ static ino_t fuse_squash_ino(u64 ino64) void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, struct fuse_statx *sx, - u64 attr_valid, u32 cache_mask) + u64 attr_valid, u32 cache_mask, + u64 evict_ctr) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); @@ -216,8 +224,20 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, fi->attr_version = atomic64_inc_return(&fc->attr_version); fi->i_time = attr_valid; - /* Clear basic stats from invalid mask */ - set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0); + + /* + * Clear basic stats from invalid mask. + * + * Don't do this if this is coming from a fuse_iget() call and there + * might have been a racing evict which would've invalidated the result + * if the attr_version would've been preserved. + * + * !evict_ctr -> this is create + * fi->attr_version != 0 -> this is not a new inode + * evict_ctr == fuse_get_evict_ctr() -> no evicts while during request + */ + if (!evict_ctr || fi->attr_version || evict_ctr == fuse_get_evict_ctr(fc)) + set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0); inode->i_ino = fuse_squash_ino(attr->ino); inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); @@ -296,9 +316,9 @@ u32 fuse_get_cache_mask(struct inode *inode) return STATX_MTIME | STATX_CTIME | STATX_SIZE; } -void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, - struct fuse_statx *sx, - u64 attr_valid, u64 attr_version) +static void fuse_change_attributes_i(struct inode *inode, struct fuse_attr *attr, + struct fuse_statx *sx, u64 attr_valid, + u64 attr_version, u64 evict_ctr) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); @@ -332,7 +352,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, } old_mtime = inode_get_mtime(inode); - fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask); + fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask, + evict_ctr); oldsize = inode->i_size; /* @@ -373,6 +394,13 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, fuse_dax_dontcache(inode, attr->flags); } +void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, + struct fuse_statx *sx, u64 attr_valid, + u64 attr_version) +{ + fuse_change_attributes_i(inode, attr, sx, attr_valid, attr_version, 0); +} + static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl, u64 nodeid) { @@ -489,12 +517,8 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, fi->nlookup++; spin_unlock(&fi->lock); done: - fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version); - spin_lock(&fi->lock); - if (evict_ctr < fuse_get_evict_ctr(fc)) - fuse_invalidate_attr(inode); - spin_unlock(&fi->lock); - + fuse_change_attributes_i(inode, attr, NULL, attr_valid, attr_version, + evict_ctr); return inode; }