On Thu, 2022-05-19 at 08:22 +0800, Xiubo Li wrote: > On 5/19/22 4:28 AM, Jeff Layton wrote: > > On Wed, 2022-05-18 at 22:45 +0800, Xiubo Li wrote: > > > In async unlink case the kclient won't wait for the first reply > > > from MDS and just drop all the links and unhash the dentry and then > > > succeeds immediately. > > > > > > For any new create/link/rename,etc requests followed by using the > > > same file names we must wait for the first reply of the inflight > > > unlink request, or the MDS possibly will fail these following > > > requests with -EEXIST if the inflight async unlink request was > > > delayed for some reasons. > > > > > > And the worst case is that for the none async openc request it will > > > successfully open the file if the CDentry hasn't been unlinked yet, > > > but later the previous delayed async unlink request will remove the > > > CDenty. That means the just created file is possiblly deleted later > > > by accident. > > > > > > We need to wait for the inflight async unlink requests to finish > > > when creating new files/directories by using the same file names. > > > > > > URL: https://tracker.ceph.com/issues/55332 > > > Reported-by: kernel test robot <lkp@xxxxxxxxx> > > > Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx> > > > --- > > > fs/ceph/dir.c | 70 +++++++++++++++++++++++++++++++++++++++--- > > > fs/ceph/file.c | 4 +++ > > > fs/ceph/mds_client.c | 73 ++++++++++++++++++++++++++++++++++++++++++++ > > > fs/ceph/mds_client.h | 1 + > > > fs/ceph/super.c | 3 ++ > > > fs/ceph/super.h | 19 +++++++++--- > > > 6 files changed, 160 insertions(+), 10 deletions(-) > > > > > > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c > > > index eae417d71136..01e7facef9b2 100644 > > > --- a/fs/ceph/dir.c > > > +++ b/fs/ceph/dir.c > > > @@ -856,6 +856,10 @@ static int ceph_mknod(struct user_namespace *mnt_userns, struct inode *dir, > > > if (ceph_snap(dir) != CEPH_NOSNAP) > > > return -EROFS; > > > > > > + err = ceph_wait_on_conflict_unlink(dentry); > > > + if (err) > > > + return err; > > > + > > > if (ceph_quota_is_max_files_exceeded(dir)) { > > > err = -EDQUOT; > > > goto out; > > > @@ -918,6 +922,10 @@ static int ceph_symlink(struct user_namespace *mnt_userns, struct inode *dir, > > > if (ceph_snap(dir) != CEPH_NOSNAP) > > > return -EROFS; > > > > > > + err = ceph_wait_on_conflict_unlink(dentry); > > > + if (err) > > > + return err; > > > + > > > if (ceph_quota_is_max_files_exceeded(dir)) { > > > err = -EDQUOT; > > > goto out; > > > @@ -968,9 +976,13 @@ static int ceph_mkdir(struct user_namespace *mnt_userns, struct inode *dir, > > > struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); > > > struct ceph_mds_request *req; > > > struct ceph_acl_sec_ctx as_ctx = {}; > > > - int err = -EROFS; > > > + int err; > > > int op; > > > > > > + err = ceph_wait_on_conflict_unlink(dentry); > > > + if (err) > > > + return err; > > > + > > > if (ceph_snap(dir) == CEPH_SNAPDIR) { > > > /* mkdir .snap/foo is a MKSNAP */ > > > op = CEPH_MDS_OP_MKSNAP; > > > @@ -980,6 +992,7 @@ static int ceph_mkdir(struct user_namespace *mnt_userns, struct inode *dir, > > > dout("mkdir dir %p dn %p mode 0%ho\n", dir, dentry, mode); > > > op = CEPH_MDS_OP_MKDIR; > > > } else { > > > + err = -EROFS; > > > goto out; > > > } > > > > > > @@ -1037,6 +1050,10 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, > > > struct ceph_mds_request *req; > > > int err; > > > > > > + err = ceph_wait_on_conflict_unlink(dentry); > > > + if (err) > > > + return err; > > > + > > > if (ceph_snap(dir) != CEPH_NOSNAP) > > > return -EROFS; > > > > > > @@ -1071,9 +1088,27 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, > > > static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc, > > > struct ceph_mds_request *req) > > > { > > > + struct dentry *dentry = req->r_dentry; > > > + struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); > > > + struct ceph_dentry_info *di = ceph_dentry(dentry); > > > int result = req->r_err ? req->r_err : > > > le32_to_cpu(req->r_reply_info.head->result); > > > > > > + if (!test_bit(CEPH_DENTRY_ASYNC_UNLINK_BIT, &di->flags)) > > > + pr_warn("%s dentry %p:%pd async unlink bit is not set\n", > > > + __func__, dentry, dentry); > > > + > > This is a pr_warn so it won't have a "ceph: " prefix or anything > > prepending it like dout messages do. You should probably prepend this > > with something like that. > > The pr_fmt already help add the module name: > > #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt > > Such as for one current log in ceph: > > 2022-05-02T17:28:51.006593+00:00 smithi040 kernel: ceph: ceph: async > create failure path=(1)client.0/tmp/ceph/src/os/ObjectStore.h result=-17! > > My mistake! Removing those is fine. > > > > > + spin_lock(&fsc->async_unlink_conflict_lock); > > > + hash_del_rcu(&di->hnode); > > > + spin_unlock(&fsc->async_unlink_conflict_lock); > > > + > > > + spin_lock(&dentry->d_lock); > > > + di->flags &= ~CEPH_DENTRY_ASYNC_UNLINK; > > > + wake_up_bit(&di->flags, CEPH_DENTRY_ASYNC_UNLINK_BIT); > > > + spin_unlock(&dentry->d_lock); > > > + > > > + synchronize_rcu(); > > > + > > > if (result == -EJUKEBOX) > > > goto out; > > > > > > @@ -1081,7 +1116,7 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc, > > > if (result) { > > > int pathlen = 0; > > > u64 base = 0; > > > - char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen, > > > + char *path = ceph_mdsc_build_path(dentry, &pathlen, > > > &base, 0); > > > > > > /* mark error on parent + clear complete */ > > > @@ -1089,13 +1124,13 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc, > > > ceph_dir_clear_complete(req->r_parent); > > > > > > /* drop the dentry -- we don't know its status */ > > > - if (!d_unhashed(req->r_dentry)) > > > - d_drop(req->r_dentry); > > > + if (!d_unhashed(dentry)) > > > + d_drop(dentry); > > > > > > /* mark inode itself for an error (since metadata is bogus) */ > > > mapping_set_error(req->r_old_inode->i_mapping, result); > > > > > > - pr_warn("ceph: async unlink failure path=(%llx)%s result=%d!\n", > > > + pr_warn("async unlink failure path=(%llx)%s result=%d!\n", > > Ditto here: why remove the prefix? > > The log will be: > > 2022-05-02T17:28:51.006593+00:00 smithi040 kernel: ceph: ceph: async > create failure path=(1)client.0/tmp/ceph/src/os/ObjectStore.h result=-17! > > And the prefix "ceph:" is really needed ? > Nope. Disregard my earlier comment. > > > base, IS_ERR(path) ? "<<bad>>" : path, result); > > > ceph_mdsc_free_path(path, pathlen); > > > } > > > @@ -1180,6 +1215,8 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) > > > > > > if (try_async && op == CEPH_MDS_OP_UNLINK && > > > (req->r_dir_caps = get_caps_for_async_unlink(dir, dentry))) { > > > + struct ceph_dentry_info *di = ceph_dentry(dentry); > > > + > > > dout("async unlink on %llu/%.*s caps=%s", ceph_ino(dir), > > > dentry->d_name.len, dentry->d_name.name, > > > ceph_cap_string(req->r_dir_caps)); > > > @@ -1187,6 +1224,16 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) > > > req->r_callback = ceph_async_unlink_cb; > > > req->r_old_inode = d_inode(dentry); > > > ihold(req->r_old_inode); > > > + > > > + spin_lock(&dentry->d_lock); > > > + di->flags |= CEPH_DENTRY_ASYNC_UNLINK; > > > + spin_unlock(&dentry->d_lock); > > > + > > > + spin_lock(&fsc->async_unlink_conflict_lock); > > > + hash_add_rcu(fsc->async_unlink_conflict, &di->hnode, > > > + dentry->d_name.hash); > > > + spin_unlock(&fsc->async_unlink_conflict_lock); > > > + > > > err = ceph_mdsc_submit_request(mdsc, dir, req); > > > if (!err) { > > > /* > > > @@ -1198,6 +1245,15 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) > > > } else if (err == -EJUKEBOX) { > > > try_async = false; > > > ceph_mdsc_put_request(req); > > > + > > > + spin_lock(&dentry->d_lock); > > > + di->flags &= ~CEPH_DENTRY_ASYNC_UNLINK; > > > + spin_unlock(&dentry->d_lock); > > > + > > > + spin_lock(&fsc->async_unlink_conflict_lock); > > > + hash_del_rcu(&di->hnode); > > > + spin_unlock(&fsc->async_unlink_conflict_lock); > > > + > > You're clearing the flag here before removing it from the hash. That > > might end up causing a waiter to trip the pr_warn in > > ceph_wait_on_conflict_unlink. You probably want to reverse the order > > here. > > Sure. Will fix it. > > -- Xiubo > -- Jeff Layton <jlayton@xxxxxxxxxx>