fuse_finish_open() will be called with FUSE_NOWRITE set in case of atomic O_TRUNC open(), so commit 76224355db75 ("fuse: truncate pagecache on atomic_o_trunc") replaced invalidate_inode_pages2() by truncate_pagecache() in such a case to avoid the A-A deadlock. However, we found another A-B-B-A deadlock related to the case above, which will cause the xfstests generic/464 testcase hung in our virtio-fs test environment. For example, consider two processes concurrently open one same file, one with O_TRUNC and another without O_TRUNC. The deadlock case is described below, if open(O_TRUNC) is already set_nowrite(acquired A), and is trying to lock a page (acquiring B), open() could have held the page lock (acquired B), and waiting on the page writeback (acquiring A). This would lead to deadlocks. open(O_TRUNC) ---------------------------------------------------------------- fuse_open_common inode_lock [C acquire] fuse_set_nowrite [A acquire] fuse_finish_open truncate_pagecache lock_page [B acquire] truncate_inode_page unlock_page [B release] fuse_release_nowrite [A release] inode_unlock [C release] ---------------------------------------------------------------- open() ---------------------------------------------------------------- fuse_open_common fuse_finish_open invalidate_inode_pages2 lock_page [B acquire] fuse_launder_page fuse_wait_on_page_writeback [A acquire & release] unlock_page [B release] ---------------------------------------------------------------- Besides this case, all calls of invalidate_inode_pages2() and invalidate_inode_pages2_range() in fuse code also can deadlock with open(O_TRUNC). This commit tries to fix it by adding a new lock, atomic_o_trunc, to protect the areas with the A-B-B-A deadlock risk. Signed-off-by: Jiachen Zhang <zhangjiachen.jaycee@xxxxxxxxxxxxx> --- fs/fuse/dax.c | 4 ++-- fs/fuse/dir.c | 2 +- fs/fuse/file.c | 28 ++++++++++++++++++++++++++-- fs/fuse/fuse_i.h | 7 +++++++ fs/fuse/inode.c | 7 ++++--- 5 files changed, 40 insertions(+), 8 deletions(-) diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index 182b24a14804..e5203d61698c 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -878,11 +878,11 @@ static int dmap_writeback_invalidate(struct inode *inode, return ret; } - ret = invalidate_inode_pages2_range(inode->i_mapping, + ret = fuse_invalidate_inode_pages_range(inode, start_pos >> PAGE_SHIFT, end_pos >> PAGE_SHIFT); if (ret) - pr_debug("fuse: invalidate_inode_pages2_range() failed err=%d\n", + pr_debug("fuse: fuse_invalidate_inode_pages_range() failed err=%d\n", ret); return ret; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 656e921f3506..d6d5dcd3cf1e 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1778,7 +1778,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, if ((is_truncate || !is_wb) && S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { truncate_pagecache(inode, outarg.attr.size); - invalidate_inode_pages2(mapping); + fuse_invalidate_inode_pages(inode); } clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 829094451774..1dde21bad53c 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -124,6 +124,28 @@ static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir) } } +int fuse_invalidate_inode_pages_range(struct inode *inode, pgoff_t start, + pgoff_t end) +{ + int ret; + struct fuse_conn *fc = get_fuse_conn(inode); + bool may_truncate = fc->atomic_o_trunc && + (fc->writeback_cache || FUSE_IS_DAX(inode)); + + if (may_truncate) + mutex_lock(&get_fuse_inode(inode)->atomic_trunc_mutex); + ret = invalidate_inode_pages2_range(inode->i_mapping, start, end); + if (may_truncate) + mutex_unlock(&get_fuse_inode(inode)->atomic_trunc_mutex); + + return ret; +} + +int fuse_invalidate_inode_pages(struct inode *inode) +{ + return fuse_invalidate_inode_pages_range(inode, 0, -1); +} + struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid, unsigned int open_flags, bool isdir) { @@ -214,7 +236,7 @@ void fuse_finish_open(struct inode *inode, struct file *file) file_update_time(file); fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) { - invalidate_inode_pages2(inode->i_mapping); + fuse_invalidate_inode_pages(inode); } if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache) @@ -241,6 +263,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) if (is_wb_truncate || dax_truncate) { inode_lock(inode); + mutex_lock(&get_fuse_inode(inode)->atomic_trunc_mutex); fuse_set_nowrite(inode); } @@ -261,6 +284,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) if (is_wb_truncate | dax_truncate) { fuse_release_nowrite(inode); + mutex_unlock(&get_fuse_inode(inode)->atomic_trunc_mutex); inode_unlock(inode); } @@ -2408,7 +2432,7 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) if (vma->vm_flags & VM_MAYSHARE) return -ENODEV; - invalidate_inode_pages2(file->f_mapping); + fuse_invalidate_inode_pages(file_inode(file)); return generic_file_mmap(file, vma); } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index e8e59fbdefeb..ea293d0347a0 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -149,6 +149,9 @@ struct fuse_inode { /** Lock to protect write related fields */ spinlock_t lock; + /** Lock for serializing page invalidation and atomic_o_trunc open */ + struct mutex atomic_trunc_mutex; + #ifdef CONFIG_FUSE_DAX /* * Dax specific inode data @@ -1315,4 +1318,8 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid, void fuse_file_release(struct inode *inode, struct fuse_file *ff, unsigned int open_flags, fl_owner_t id, bool isdir); +int fuse_invalidate_inode_pages(struct inode *inode); +int fuse_invalidate_inode_pages_range(struct inode *inode, + pgoff_t start, pgoff_t end); + #endif /* _FS_FUSE_I_H */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index ee846ce371d8..997c620f25df 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -86,6 +86,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) fi->state = 0; mutex_init(&fi->mutex); spin_lock_init(&fi->lock); + mutex_init(&fi->atomic_trunc_mutex); fi->forget = fuse_alloc_forget(); if (!fi->forget) goto out_free; @@ -107,6 +108,7 @@ static void fuse_free_inode(struct inode *inode) struct fuse_inode *fi = get_fuse_inode(inode); mutex_destroy(&fi->mutex); + mutex_destroy(&fi->atomic_trunc_mutex); kfree(fi->forget); #ifdef CONFIG_FUSE_DAX kfree(fi->dax); @@ -299,7 +301,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, } if (inval) - invalidate_inode_pages2(inode->i_mapping); + fuse_invalidate_inode_pages(inode); } if (IS_ENABLED(CONFIG_FUSE_DAX)) @@ -448,8 +450,7 @@ int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, pg_end = -1; else pg_end = (offset + len - 1) >> PAGE_SHIFT; - invalidate_inode_pages2_range(inode->i_mapping, - pg_start, pg_end); + fuse_invalidate_inode_pages_range(inode, pg_start, pg_end); } iput(inode); return 0; -- 2.20.1