Re: [PATCH 09/15] fs: move file_start_write() into vfs_iter_write()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Nov 15, 2023 at 1:42 AM Jan Harkes <jaharkes@xxxxxxxxxx> wrote:
>
> That is a NACK for me.
>
> Your change inverts lock ordering so that after your change we hold the
> inode lock on the coda inode before we calls file_start_write.
>

hmm. It is not ok that my patch changes lock ordering.
It is in fact the correct locking order,
but it should be changed in a separate patch.

> See the comments for sb_start_write in include/linux/fs.h
> (__sb_start_write is pretty much the only thing file_start_write calls).
>
>  * Since freeze protection behaves as a lock, users have to preserve
>  * ordering of freeze protection and other filesystem locks. Generally,
>  * freeze protection should be the outermost lock. In particular, we
>  * have:
>  *
>  * sb_start_write
>  *   -> i_mutex                 (write path, truncate, directory ops,
>  *   ...)
>  *   -> s_umount                (freeze_super, thaw_super)
>

This describes the locking order within a specific fs.
host_file is not in the same fs as code_inode.

IIUC, host_file is a sort of backing file for the code inode.
In cases like this, as in cachefiles and overlayfs, it is best
to order all backing fs locks strictly after all the frontend fs locks.
See ovl_write_iter() for example.

IOW, the new lock ordering is preferred:
file_start_write(coda_file)
  inode_lock(code_inode)
    file_start_write(host_file)
      inode_lock(host_inode)


Thanks,
Amir.

>
>
> On Tue, Nov 14, 2023 at 05:32:48PM +0200, Amir Goldstein wrote:
> ...
> > diff --git a/fs/coda/file.c b/fs/coda/file.c
> > index 16acc58311ea..7c84555c8923 100644
> > --- a/fs/coda/file.c
> > +++ b/fs/coda/file.c
> > @@ -79,14 +79,12 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
> >       if (ret)
> >               goto finish_write;
> >
> > -     file_start_write(host_file);
> >       inode_lock(coda_inode);
> > -     ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos, 0);
> > +     ret = vfs_iter_write(host_file, to, &iocb->ki_pos, 0);
> >       coda_inode->i_size = file_inode(host_file)->i_size;
> >       coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
> >       inode_set_mtime_to_ts(coda_inode, inode_set_ctime_current(coda_inode));
> >       inode_unlock(coda_inode);
> > -     file_end_write(host_file);
> >
> >  finish_write:
> >       venus_access_intent(coda_inode->i_sb, coda_i2f(coda_inode),
> > diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> > index 5d704461e3b4..35c9546b3396 100644
> > --- a/fs/nfsd/vfs.c
> > +++ b/fs/nfsd/vfs.c
> > @@ -1186,9 +1186,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
> >       since = READ_ONCE(file->f_wb_err);
> >       if (verf)
> >               nfsd_copy_write_verifier(verf, nn);
> > -     file_start_write(file);
> >       host_err = vfs_iter_write(file, &iter, &pos, flags);
> > -     file_end_write(file);
> >       if (host_err < 0) {
> >               commit_reset_write_verifier(nn, rqstp, host_err);
> >               goto out_nfserr;
> > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> > index 131621daeb13..690b173f34fc 100644
> > --- a/fs/overlayfs/file.c
> > +++ b/fs/overlayfs/file.c
> > @@ -436,9 +436,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
> >       if (is_sync_kiocb(iocb)) {
> >               rwf_t rwf = iocb_to_rw_flags(ifl);
> >
> > -             file_start_write(real.file);
> >               ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, rwf);
> > -             file_end_write(real.file);
> >               /* Update size */
> >               ovl_file_modified(file);
> >       } else {
> > diff --git a/fs/read_write.c b/fs/read_write.c
> > index 590ab228fa98..8cdc6e6a9639 100644
> > --- a/fs/read_write.c
> > +++ b/fs/read_write.c
> > @@ -846,7 +846,7 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
> >  EXPORT_SYMBOL(vfs_iter_read);
> >
> >  static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
> > -             loff_t *pos, rwf_t flags)
> > +                          loff_t *pos, rwf_t flags)
> >  {
> >       size_t tot_len;
> >       ssize_t ret = 0;
> > @@ -901,11 +901,18 @@ ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
> >  EXPORT_SYMBOL(vfs_iocb_iter_write);
> >
> >  ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
> > -             rwf_t flags)
> > +                    rwf_t flags)
> >  {
> > +     int ret;
> > +
> >       if (!file->f_op->write_iter)
> >               return -EINVAL;
> > -     return do_iter_write(file, iter, ppos, flags);
> > +
> > +     file_start_write(file);
> > +     ret = do_iter_write(file, iter, ppos, flags);
> > +     file_end_write(file);
> > +
> > +     return ret;
> >  }
> >  EXPORT_SYMBOL(vfs_iter_write);
> >
> > --
> > 2.34.1
> >
> >





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux