> > For implementation purpose, initially we tried to keep percpu usage counters > > inside struct inode just like there is struct sb_writers in super_block. > > But considering that it will significantly bloat up struct inode when actually > > the usage of file write freeze will be infrequent, we dropped this idea. > > Instead we have tried to use already present filesystem freezing infrastructure. > > Current approach makes it possible for implementing file write freeze without > > bloating any of struct super_block/inode. > > In FS_IOC_FWFREEZE, we wait for complete fs to be frozen, set I_WRITE_FREEZED to > > inode's state and unfreeze the fs. > Looks interesting. I have added some comments below. Hi Dmitry, First, Thanks for your opinion. > > > > @@ -40,6 +40,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, > > > > f2fs_balance_fs(sbi); > > > > + inode_start_write(inode); > > sb_start_pagefault(inode->i_sb); > IMHO it is reasonable to fold sb_start_{write,pagefault}, to inode_start_{write,pagefault} Agree. > > > > +void inode_start_write(struct inode *inode) > > +{ > > + struct super_block *sb = inode->i_sb; > > + > > +retry: > > + spin_lock(&inode->i_lock); > This means that i_lock will be acquired on each mkpage_write for all > users who do not care about fsfreeze which result smp performance drawback > It is reasonable to add lockless test first because flag is set while > whole fs is frozen so we can not enter this routine. Right, I will remove it. > > > + if (inode->i_state & I_WRITE_FREEZED) { > > + DEFINE_WAIT(wait); > > + > > + prepare_to_wait(&sb->s_writers.wait_unfrozen, &wait, > > + TASK_UNINTERRUPTIBLE); > > + spin_unlock(&inode->i_lock); > > + schedule(); > > + finish_wait(&sb->s_writers.wait_unfrozen, &wait); > > + goto retry; > > + } > > + spin_unlock(&inode->i_lock); > > +} > > diff --git a/fs/ioctl.c b/fs/ioctl.c > > index 214c3c1..c8e9ae3 100644 > > --- a/fs/ioctl.c > > +++ b/fs/ioctl.c > > @@ -540,6 +540,28 @@ static int ioctl_fsthaw(struct file *filp) > > return thaw_super(sb); > > } > > > > +static int ioctl_filefreeze(struct file *filp) > > +{ > > + struct inode *inode = file_inode(filp); > > + > > + if (!inode_owner_or_capable(inode)) > > + return -EPERM; > > + > > + /* Freeze */ > > + return file_write_freeze(inode); > > +} > > > + > > +static int ioctl_filethaw(struct file *filp) > > +{ > > + struct inode *inode = file_inode(filp); > > + > > + if (!inode_owner_or_capable(inode)) > > + return -EPERM; > > + > > + /* Thaw */ > > + return file_write_unfreeze(inode); > > +} > > + > > /* > > * When you add any new common ioctls to the switches above and below > > * please update compat_sys_ioctl() too. > > @@ -589,6 +611,14 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, > > error = ioctl_fsthaw(filp); > > break; > > > > + case FS_IOC_FWFREEZE: > > + error = ioctl_filefreeze(filp); > > + break; > > + > > + case FS_IOC_FWTHAW: > > + error = ioctl_filethaw(filp); > > + break; > > + > > case FS_IOC_FIEMAP: > > return ioctl_fiemap(filp, arg); > > > > diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c > > index 3a03e0a..5110d9d 100644 > > --- a/fs/nilfs2/file.c > > +++ b/fs/nilfs2/file.c > > @@ -66,6 +66,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) > > if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info))) > > return VM_FAULT_SIGBUS; /* -ENOSPC */ > > > > + inode_start_write(file_inode(vma->vm_file)); > > sb_start_pagefault(inode->i_sb); > > lock_page(page); > > if (page->mapping != inode->i_mapping || > > diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c > > index 10d66c7..d073fc2 100644 > > --- a/fs/ocfs2/mmap.c > > +++ b/fs/ocfs2/mmap.c > > @@ -136,6 +136,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) > > sigset_t oldset; > > int ret; > > > > + inode_start_write(inode); > > sb_start_pagefault(inode->i_sb); > > ocfs2_block_signals(&oldset); > > > > diff --git a/fs/super.c b/fs/super.c > > index eae088f..5e44e42 100644 > > --- a/fs/super.c > > +++ b/fs/super.c > > @@ -1393,3 +1393,54 @@ out: > > return 0; > > } > > EXPORT_SYMBOL(thaw_super); > > + > IMHO it is reasonable to open code this procedure so user is responsible > for calling freeze_super(), thaw_super() . This allow to call for > several inodes in a row like follows: > > ioctl(sb,FIFREEZE) > while (f = pop(files_list)) > ioctl(f,FS_IOC_FWFREEZE) > ioctl(sb,FITHAW) > > This required for directory defragmentation(small files compacting) Good point, I will check your point on V2. Thanks! > -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html