On 2021/04/24 2:30, Jan Kara wrote: > Use invalidate_lock instead of zonefs' private i_mmap_sem. The intended > purpose is exactly the same. By this conversion we also fix a race > between hole punching and read(2) / readahead(2) paths that can lead to > stale page cache contents. zonefs does not support hole punching since the blocks of a file are determined by the device zone configuration and cannot change, ever. So I think you can remove the second sentence above. > > CC: Damien Le Moal <damien.lemoal@xxxxxxx> > CC: Johannes Thumshirn <jth@xxxxxxxxxx> > CC: <linux-fsdevel@xxxxxxxxxxxxxxx> > Signed-off-by: Jan Kara <jack@xxxxxxx> > --- > fs/zonefs/super.c | 23 +++++------------------ > fs/zonefs/zonefs.h | 7 +++---- > 2 files changed, 8 insertions(+), 22 deletions(-) > > diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c > index 049e36c69ed7..60ac5587c880 100644 > --- a/fs/zonefs/super.c > +++ b/fs/zonefs/super.c > @@ -462,7 +462,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize) > inode_dio_wait(inode); > > /* Serialize against page faults */ > - down_write(&zi->i_mmap_sem); > + down_write(&inode->i_mapping->invalidate_lock); > > /* Serialize against zonefs_iomap_begin() */ > mutex_lock(&zi->i_truncate_mutex); > @@ -500,7 +500,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize) > > unlock: > mutex_unlock(&zi->i_truncate_mutex); > - up_write(&zi->i_mmap_sem); > + up_write(&inode->i_mapping->invalidate_lock); > > return ret; > } > @@ -575,18 +575,6 @@ static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end, > return ret; > } > > -static vm_fault_t zonefs_filemap_fault(struct vm_fault *vmf) > -{ > - struct zonefs_inode_info *zi = ZONEFS_I(file_inode(vmf->vma->vm_file)); > - vm_fault_t ret; > - > - down_read(&zi->i_mmap_sem); > - ret = filemap_fault(vmf); > - up_read(&zi->i_mmap_sem); > - > - return ret; > -} > - > static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf) > { > struct inode *inode = file_inode(vmf->vma->vm_file); > @@ -607,16 +595,16 @@ static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf) > file_update_time(vmf->vma->vm_file); > > /* Serialize against truncates */ > - down_read(&zi->i_mmap_sem); > + down_read(&inode->i_mapping->invalidate_lock); > ret = iomap_page_mkwrite(vmf, &zonefs_iomap_ops); > - up_read(&zi->i_mmap_sem); > + up_read(&inode->i_mapping->invalidate_lock); > > sb_end_pagefault(inode->i_sb); > return ret; > } > > static const struct vm_operations_struct zonefs_file_vm_ops = { > - .fault = zonefs_filemap_fault, > + .fault = filemap_fault, > .map_pages = filemap_map_pages, > .page_mkwrite = zonefs_filemap_page_mkwrite, > }; > @@ -1158,7 +1146,6 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb) > > inode_init_once(&zi->i_vnode); > mutex_init(&zi->i_truncate_mutex); > - init_rwsem(&zi->i_mmap_sem); > zi->i_wr_refcnt = 0; > > return &zi->i_vnode; > diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h > index 51141907097c..7b147907c328 100644 > --- a/fs/zonefs/zonefs.h > +++ b/fs/zonefs/zonefs.h > @@ -70,12 +70,11 @@ struct zonefs_inode_info { > * and changes to the inode private data, and in particular changes to > * a sequential file size on completion of direct IO writes. > * Serialization of mmap read IOs with truncate and syscall IO > - * operations is done with i_mmap_sem in addition to i_truncate_mutex. > - * Only zonefs_seq_file_truncate() takes both lock (i_mmap_sem first, > - * i_truncate_mutex second). > + * operations is done with invalidate_lock in addition to > + * i_truncate_mutex. Only zonefs_seq_file_truncate() takes both lock > + * (invalidate_lock first, i_truncate_mutex second). > */ > struct mutex i_truncate_mutex; > - struct rw_semaphore i_mmap_sem; > > /* guarded by i_truncate_mutex */ > unsigned int i_wr_refcnt; > -- Damien Le Moal Western Digital Research