On Thu, 2022-03-03 at 11:26 +0800, xiubli@xxxxxxxxxx wrote: > From: Xiubo Li <xiubli@xxxxxxxxxx> > > For the readdir request the dentries will be pasred and dencrypted > in ceph_readdir_prepopulate(). And in ceph_readdir() we could just > get the dentry name from the dentry cache instead of parsing and > dencrypting them again. This could improve performance. > > Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx> > --- > fs/ceph/crypto.h | 8 +++++ > fs/ceph/dir.c | 74 +++++++++++++++++++++++--------------------- > fs/ceph/inode.c | 15 +++++++++ > fs/ceph/mds_client.h | 1 + > 4 files changed, 63 insertions(+), 35 deletions(-) > > diff --git a/fs/ceph/crypto.h b/fs/ceph/crypto.h > index 1e08f8a64ad6..9a00c60b8535 100644 > --- a/fs/ceph/crypto.h > +++ b/fs/ceph/crypto.h > @@ -83,6 +83,14 @@ static inline u32 ceph_fscrypt_auth_len(struct ceph_fscrypt_auth *fa) > */ > #define CEPH_NOHASH_NAME_MAX (189 - SHA256_DIGEST_SIZE) > > +/* > + * The encrypted long snap name will be in format of > + * "_${ENCRYPTED-LONG-SNAP-NAME}_${INODE-NUM}". And will set the max longth > + * to sizeof('_') + NAME_MAX + sizeof('_') + max of sizeof(${INO}) + extra 7 > + * bytes to align the total size to 8 bytes. > + */ > +#define CEPH_ENCRPTED_LONG_SNAP_NAME_MAX (1 + 255 + 1 + 16 + 7) > + Maybe CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX instead? > void ceph_fscrypt_set_ops(struct super_block *sb); > > void ceph_fscrypt_free_dummy_policy(struct ceph_fs_client *fsc); > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c > index 4da59810b036..fa3da3b29130 100644 > --- a/fs/ceph/dir.c > +++ b/fs/ceph/dir.c > @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) > int err; > unsigned frag = -1; > struct ceph_mds_reply_info_parsed *rinfo; > - struct fscrypt_str tname = FSTR_INIT(NULL, 0); > - struct fscrypt_str oname = FSTR_INIT(NULL, 0); > + char *dentry_name = NULL; > > dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos); > if (dfi->file_info.flags & CEPH_F_ATEND) > @@ -345,10 +344,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) > ctx->pos = 2; > } > > - err = fscrypt_prepare_readdir(inode); > - if (err) > - goto out; > - > spin_lock(&ci->i_ceph_lock); > /* request Fx cap. if have Fx, we don't need to release Fs cap > * for later create/unlink. */ > @@ -369,14 +364,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) > spin_unlock(&ci->i_ceph_lock); > } > > - err = ceph_fname_alloc_buffer(inode, &tname); > - if (err < 0) > - goto out; > - > - err = ceph_fname_alloc_buffer(inode, &oname); > - if (err < 0) > - goto out; > - > /* proceed with a normal readdir */ > more: > /* do we have the correct frag content buffered? */ > @@ -528,42 +515,59 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) > } > } > } > + > + dentry_name = kmalloc(CEPH_ENCRPTED_LONG_SNAP_NAME_MAX, GFP_KERNEL); > + if (!dentry_name) { > + err = -ENOMEM; > + ceph_mdsc_put_request(dfi->last_readdir); > + dfi->last_readdir = NULL; > + goto out; > + } > + If this happens (or an earlier error), all of the dentry references will leak. I think you probably need to move the cleanup into destroy_reply_info(). > for (; i < rinfo->dir_nr; i++) { > struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; > - struct ceph_fname fname = { .dir = inode, > - .name = rde->name, > - .name_len = rde->name_len, > - .ctext = rde->altname, > - .ctext_len = rde->altname_len }; > - u32 olen = oname.len; > - > - err = ceph_fname_to_usr(&fname, &tname, &oname, NULL); > - if (err) { > - pr_err("%s unable to decode %.*s, got %d\n", __func__, > - rde->name_len, rde->name, err); > - goto out; > - } > + struct dentry *dn = rde->dentry; > + int name_len; > > BUG_ON(rde->offset < ctx->pos); > BUG_ON(!rde->inode.in); > + BUG_ON(!rde->dentry); > > ctx->pos = rde->offset; > - dout("readdir (%d/%d) -> %llx '%.*s' %p\n", > - i, rinfo->dir_nr, ctx->pos, > - rde->name_len, rde->name, &rde->inode.in); > > - if (!dir_emit(ctx, oname.name, oname.len, > + spin_lock(&dn->d_lock); > + memcpy(dentry_name, dn->d_name.name, dn->d_name.len); > + name_len = dn->d_name.len; > + spin_unlock(&dn->d_lock); > + > + dentry_name[name_len] = '\0'; > + dout("readdir (%d/%d) -> %llx '%s' %p\n", > + i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in); > + > + dput(dn); > + rde->dentry = NULL; > + > + if (!dir_emit(ctx, dentry_name, name_len, > ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)), > le32_to_cpu(rde->inode.in->mode) >> 12)) { > dout("filldir stopping us...\n"); > + > + /* > + * dput the rest dentries. Must do this before > + * releasing the request. > + */ > + for (++i; i < rinfo->dir_nr; i++) { > + rde = rinfo->dir_entries + i; > + dput(rde->dentry); > + rde->dentry = NULL; > + } > + > err = 0; > ceph_mdsc_put_request(dfi->last_readdir); > dfi->last_readdir = NULL; > goto out; > } > > - /* Reset the lengths to their original allocated vals */ > - oname.len = olen; > ctx->pos++; > } > > @@ -621,8 +625,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) > err = 0; > dout("readdir %p file %p done.\n", inode, file); > out: > - ceph_fname_free_buffer(inode, &tname); > - ceph_fname_free_buffer(inode, &oname); > + if (dentry_name) > + kfree(dentry_name); > return err; > } > > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c > index e5a9838981ba..d0719feed792 100644 > --- a/fs/ceph/inode.c > +++ b/fs/ceph/inode.c > @@ -1902,6 +1902,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, > goto out; > } > > + rde->dentry = NULL; > dname.name = oname.name; > dname.len = oname.len; > dname.hash = full_name_hash(parent, dname.name, dname.len); > @@ -1962,6 +1963,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, > goto retry_lookup; > } > > + /* > + * ceph_readdir will use the dentry to get the name > + * to avoid doing the dencrypt again there. > + */ > + rde->dentry = dget(dn); > + > /* inode */ > if (d_really_is_positive(dn)) { > in = d_inode(dn); > @@ -2024,6 +2031,14 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, > dput(dn); > } > out: > + if (err) { > + for (; i >= 0; i--) { > + struct ceph_mds_reply_dir_entry *rde; > + > + rde = rinfo->dir_entries + i; > + dput(rde->dentry); > + } > + } > if (err == 0 && skipped == 0) { > set_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags); > req->r_readdir_cache_idx = cache_ctl.index; > diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h > index 0dfe24f94567..663d7754d57d 100644 > --- a/fs/ceph/mds_client.h > +++ b/fs/ceph/mds_client.h > @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in { > }; > > struct ceph_mds_reply_dir_entry { > + struct dentry *dentry; > char *name; > u8 *altname; > u32 name_len; -- Jeff Layton <jlayton@xxxxxxxxxx>