On Mon 26-04-21 06:40:27, Damien Le Moal wrote: > On 2021/04/24 2:30, Jan Kara wrote: > > Use invalidate_lock instead of zonefs' private i_mmap_sem. The intended > > purpose is exactly the same. By this conversion we also fix a race > > between hole punching and read(2) / readahead(2) paths that can lead to > > stale page cache contents. > > zonefs does not support hole punching since the blocks of a file are determined > by the device zone configuration and cannot change, ever. So I think you can > remove the second sentence above. Sure, thanks for correction. Updated. Honza > > > > > CC: Damien Le Moal <damien.lemoal@xxxxxxx> > > CC: Johannes Thumshirn <jth@xxxxxxxxxx> > > CC: <linux-fsdevel@xxxxxxxxxxxxxxx> > > Signed-off-by: Jan Kara <jack@xxxxxxx> > > --- > > fs/zonefs/super.c | 23 +++++------------------ > > fs/zonefs/zonefs.h | 7 +++---- > > 2 files changed, 8 insertions(+), 22 deletions(-) > > > > diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c > > index 049e36c69ed7..60ac5587c880 100644 > > --- a/fs/zonefs/super.c > > +++ b/fs/zonefs/super.c > > @@ -462,7 +462,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize) > > inode_dio_wait(inode); > > > > /* Serialize against page faults */ > > - down_write(&zi->i_mmap_sem); > > + down_write(&inode->i_mapping->invalidate_lock); > > > > /* Serialize against zonefs_iomap_begin() */ > > mutex_lock(&zi->i_truncate_mutex); > > @@ -500,7 +500,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize) > > > > unlock: > > mutex_unlock(&zi->i_truncate_mutex); > > - up_write(&zi->i_mmap_sem); > > + up_write(&inode->i_mapping->invalidate_lock); > > > > return ret; > > } > > @@ -575,18 +575,6 @@ static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end, > > return ret; > > } > > > > -static vm_fault_t zonefs_filemap_fault(struct vm_fault *vmf) > > -{ > > - struct zonefs_inode_info *zi = ZONEFS_I(file_inode(vmf->vma->vm_file)); > > - vm_fault_t ret; > > - > > - down_read(&zi->i_mmap_sem); > > - ret = filemap_fault(vmf); > > - up_read(&zi->i_mmap_sem); > > - > > - return ret; > > -} > > - > > static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf) > > { > > struct inode *inode = file_inode(vmf->vma->vm_file); > > @@ -607,16 +595,16 @@ static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf) > > file_update_time(vmf->vma->vm_file); > > > > /* Serialize against truncates */ > > - down_read(&zi->i_mmap_sem); > > + down_read(&inode->i_mapping->invalidate_lock); > > ret = iomap_page_mkwrite(vmf, &zonefs_iomap_ops); > > - up_read(&zi->i_mmap_sem); > > + up_read(&inode->i_mapping->invalidate_lock); > > > > sb_end_pagefault(inode->i_sb); > > return ret; > > } > > > > static const struct vm_operations_struct zonefs_file_vm_ops = { > > - .fault = zonefs_filemap_fault, > > + .fault = filemap_fault, > > .map_pages = filemap_map_pages, > > .page_mkwrite = zonefs_filemap_page_mkwrite, > > }; > > @@ -1158,7 +1146,6 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb) > > > > inode_init_once(&zi->i_vnode); > > mutex_init(&zi->i_truncate_mutex); > > - init_rwsem(&zi->i_mmap_sem); > > zi->i_wr_refcnt = 0; > > > > return &zi->i_vnode; > > diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h > > index 51141907097c..7b147907c328 100644 > > --- a/fs/zonefs/zonefs.h > > +++ b/fs/zonefs/zonefs.h > > @@ -70,12 +70,11 @@ struct zonefs_inode_info { > > * and changes to the inode private data, and in particular changes to > > * a sequential file size on completion of direct IO writes. > > * Serialization of mmap read IOs with truncate and syscall IO > > - * operations is done with i_mmap_sem in addition to i_truncate_mutex. > > - * Only zonefs_seq_file_truncate() takes both lock (i_mmap_sem first, > > - * i_truncate_mutex second). > > + * operations is done with invalidate_lock in addition to > > + * i_truncate_mutex. Only zonefs_seq_file_truncate() takes both lock > > + * (invalidate_lock first, i_truncate_mutex second). > > */ > > struct mutex i_truncate_mutex; > > - struct rw_semaphore i_mmap_sem; > > > > /* guarded by i_truncate_mutex */ > > unsigned int i_wr_refcnt; > > > > > -- > Damien Le Moal > Western Digital Research -- Jan Kara <jack@xxxxxxxx> SUSE Labs, CR