Re: [RFC PATCH 11/11] ceph: wait for async dir ops to complete before doing synchronous dir ops

Jeff Layton <jlayton@xxxxxxxxxxxxxxx> · Wed, 10 Apr 2019 08:16:41 -0400

On Wed, Apr 10, 2019 at 7:05 AM Luis Henriques <lhenriques@xxxxxxxx> wrote:
>
> Jeff Layton <jlayton@xxxxxxxxxx> writes:
>
> > Ensure that we wait on replies from any pending directory operations
> > involving children before we allow synchronous operations involving
> > that directory to proceed.
> >
> > Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
> > ---
> >  fs/ceph/dir.c   | 65 ++++++++++++++++++++++++++++++++++++++++++++++---
> >  fs/ceph/file.c  |  4 +++
> >  fs/ceph/super.h |  1 +
> >  3 files changed, 66 insertions(+), 4 deletions(-)
> >
> > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> > index 386c9439a020..0b8cee46e07c 100644
> > --- a/fs/ceph/dir.c
> > +++ b/fs/ceph/dir.c
> > @@ -998,11 +998,16 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
> >       struct ceph_mds_request *req;
> >       int err;
> >
> > +     dout("link in dir %p old_dentry %p dentry %p\n", dir,
> > +          old_dentry, dentry);
> > +
> >       if (ceph_snap(dir) != CEPH_NOSNAP)
> >               return -EROFS;
> >
> > -     dout("link in dir %p old_dentry %p dentry %p\n", dir,
> > -          old_dentry, dentry);
> > +     err = ceph_async_dirop_request_wait(dir);
> > +     if (err)
> > +             return err;
> > +
> >       req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS);
> >       if (IS_ERR(req)) {
> >               d_drop(dentry);
> > @@ -1041,6 +1046,43 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
> >       iput(req->r_old_inode);
> >  }
> >
> > +int ceph_async_dirop_request_wait(struct inode *inode)
> > +{
> > +     struct ceph_inode_info *ci = ceph_inode(inode);
> > +     struct ceph_mds_request *req = NULL;
> > +     int ret = 0;
> > +
> > +     /* Only applicable for directories */
> > +     if (S_ISDIR(inode->i_mode))
> > +             return 0;
> > +
> > +     spin_lock(&ci->i_unsafe_lock);
> > +     if (!list_empty(&ci->i_unsafe_dirops)) {
> > +             struct ceph_mds_request *last;
> > +             last = list_last_entry(&ci->i_unsafe_dirops,
> > +                                    struct ceph_mds_request,
> > +                                    r_unsafe_dir_item);
> > +             /*
> > +              * If last request hasn't gotten a reply, then wait
> > +              * for it.
> > +              */
> > +             if (!test_bit(CEPH_MDS_R_GOT_UNSAFE, &last->r_req_flags) &&
> > +                 !test_bit(CEPH_MDS_R_GOT_SAFE, &last->r_req_flags)) {
> > +                     req = last;
> > +                     ceph_mdsc_get_request(req);
> > +             }
> > +     }
> > +     spin_unlock(&ci->i_unsafe_lock);
> > +
> > +     if (req) {
> > +             dout("%s %p wait on tid %llu\n", __func__, inode,
> > +                  req ? req->r_tid : 0ULL);
> > +             ret = wait_for_completion_killable(&req->r_completion);
> > +             ceph_mdsc_put_request(req);
> > +     }
> > +     return ret;
> > +}
> > +
> >  /*
> >   * rmdir and unlink are differ only by the metadata op code
> >   */
> > @@ -1064,6 +1106,12 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
> >                       CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK;
> >       } else
> >               goto out;
> > +
> > +     /* Wait for any requests involving children to get a reply */
> > +     err = ceph_async_dirop_request_wait(dir);
> > +     if (err)
> > +             goto out;
> > +
>
> In this case, couldn't we move this check into the 'else' branch added
> in the previous patch?  IOW, couldn't we have two (or more) asynchronous
> unlink operations at the same time?
>
> Cheers,

For this set, it won't matter. We're only doing async unlinks on
regular files, and rmdir is still done synchronously. So, if this is a
candidate for an async unlink it can't have any child dirops anyway.

It's a minor thing, but this has us blocking before the
ceph_mdsc_create_request call, which means we won't do any allocation
until we're ready to fire off the request, which I like marginally
better.

Longer term, I'd like to expand this so that can do async rmdirs as
well, but that require a different set of caps (FxLx on the parent).
Once I get there, I'll probably split off a separate ceph_rmdir
inode_operation.

>
> >       req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); if
> > (IS_ERR(req)) { err = PTR_ERR(req); @@ -1115,6 +1163,9 @@ static int
> > ceph_rename(struct inode *old_dir, struct dentry *old_dentry, int op =
> > CEPH_MDS_OP_RENAME; int err;
> >
> > +     dout("rename dir %p dentry %p to dir %p dentry %p\n",
> > +          old_dir, old_dentry, new_dir, new_dentry);
> > +
> >       if (flags)
> >               return -EINVAL;
> >
> > @@ -1131,8 +1182,14 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
> >           (!ceph_quota_is_same_realm(old_dir, new_dir)))
> >               return -EXDEV;
> >
> > -     dout("rename dir %p dentry %p to dir %p dentry %p\n",
> > -          old_dir, old_dentry, new_dir, new_dentry);
> > +     err = ceph_async_dirop_request_wait(old_dir);
> > +     if (err)
> > +             return err;
> > +
> > +     err = ceph_async_dirop_request_wait(new_dir);
> > +     if (err)
> > +             return err;
> > +
> >       req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
> >       if (IS_ERR(req))
> >               return PTR_ERR(req);
> > diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> > index f24d18f46715..f7e49907514e 100644
> > --- a/fs/ceph/file.c
> > +++ b/fs/ceph/file.c
> > @@ -444,6 +444,10 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
> >            dir, dentry, dentry,
> >            d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode);
> >
> > +     err = ceph_async_dirop_request_wait(dir);
> > +     if (err)
> > +             return err;
> > +
> >       if (dentry->d_name.len > NAME_MAX)
> >               return -ENAMETOOLONG;
> >
> > diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> > index 5c361dc1f47f..e97a6ce31a4e 100644
> > --- a/fs/ceph/super.h
> > +++ b/fs/ceph/super.h
> > @@ -1070,6 +1070,7 @@ extern int ceph_handle_snapdir(struct ceph_mds_request *req,
> >                              struct dentry *dentry, int err);
> >  extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
> >                                        struct dentry *dentry, int err);
> > +extern int ceph_async_dirop_request_wait(struct inode *inode);
> >
> >  extern void __ceph_dentry_lease_touch(struct ceph_dentry_info *di);
> >  extern void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di);

-- 
Jeff Layton <jlayton@xxxxxxxxxxxxxxx>