On Thu, 2022-03-10 at 16:22 +0800, Xiubo Li wrote: > On 3/9/22 9:59 PM, xiubli@xxxxxxxxxx wrote: > > From: Xiubo Li <xiubli@xxxxxxxxxx> > > > > For the readdir request the dentries will be pasred and dencrypted > > in ceph_readdir_prepopulate(). And in ceph_readdir() we could just > > get the dentry name from the dentry cache instead of parsing and > > dencrypting them again. This could improve performance. > > > > Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx> > > --- > > > > V7: > > - Fix the xfstest generic/006 crash bug about the rde->dentry == NULL. > > > > V6: > > - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX > > instead, since we are limiting the max length of snapshot name to > > 240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>). > > > > V5: > > - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro > > - release the rde->dentry in destroy_reply_info > > > > > > > > fs/ceph/dir.c | 56 ++++++++++++++++++++------------------------ > > fs/ceph/inode.c | 7 ++++++ > > fs/ceph/mds_client.c | 1 + > > fs/ceph/mds_client.h | 1 + > > 4 files changed, 34 insertions(+), 31 deletions(-) > > > > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c > > index 6df2a91af236..2397c34e9173 100644 > > --- a/fs/ceph/dir.c > > +++ b/fs/ceph/dir.c > > @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) > > int err; > > unsigned frag = -1; > > struct ceph_mds_reply_info_parsed *rinfo; > > - struct fscrypt_str tname = FSTR_INIT(NULL, 0); > > - struct fscrypt_str oname = FSTR_INIT(NULL, 0); > > + char *dentry_name = NULL; > > > > dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos); > > if (dfi->file_info.flags & CEPH_F_ATEND) > > @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) > > spin_unlock(&ci->i_ceph_lock); > > } > > > > - err = ceph_fname_alloc_buffer(inode, &tname); > > - if (err < 0) > > - goto out; > > - > > - err = ceph_fname_alloc_buffer(inode, &oname); > > - if (err < 0) > > - goto out; > > - > > /* proceed with a normal readdir */ > > more: > > /* do we have the correct frag content buffered? */ > > @@ -528,31 +519,36 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) > > } > > } > > } > > + > > + dentry_name = kmalloc(NAME_MAX, GFP_KERNEL); > > + if (!dentry_name) { > > + err = -ENOMEM; > > + ceph_mdsc_put_request(dfi->last_readdir); > > + dfi->last_readdir = NULL; > > + goto out; > > + } > > + > > This should move up before the 'more' tag. > Yep, good catch. > > > for (; i < rinfo->dir_nr; i++) { > > struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; > > - struct ceph_fname fname = { .dir = inode, > > - .name = rde->name, > > - .name_len = rde->name_len, > > - .ctext = rde->altname, > > - .ctext_len = rde->altname_len }; > > - u32 olen = oname.len; > > - > > - err = ceph_fname_to_usr(&fname, &tname, &oname, NULL); > > - if (err) { > > - pr_err("%s unable to decode %.*s, got %d\n", __func__, > > - rde->name_len, rde->name, err); > > - goto out; > > - } > > + struct dentry *dn = rde->dentry; > > + int name_len; > > > > BUG_ON(rde->offset < ctx->pos); > > BUG_ON(!rde->inode.in); > > + BUG_ON(!rde->dentry); > > > > ctx->pos = rde->offset; > > - dout("readdir (%d/%d) -> %llx '%.*s' %p\n", > > - i, rinfo->dir_nr, ctx->pos, > > - rde->name_len, rde->name, &rde->inode.in); > > > > - if (!dir_emit(ctx, oname.name, oname.len, > > + spin_lock(&dn->d_lock); > > + memcpy(dentry_name, dn->d_name.name, dn->d_name.len); > > + name_len = dn->d_name.len; > > + spin_unlock(&dn->d_lock); > > + > > + dentry_name[name_len] = '\0'; > > Possibly caused by this. Since it useless here and I will remove it. > > Seems plausible. If you send a v8 patch, I can give it another test and see if it fixes it. > > + dout("readdir (%d/%d) -> %llx '%s' %p\n", > > + i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in); > > + > > + if (!dir_emit(ctx, dentry_name, name_len, > > ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)), > > le32_to_cpu(rde->inode.in->mode) >> 12)) { > > /* > > @@ -566,8 +562,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) > > goto out; > > } > > > > - /* Reset the lengths to their original allocated vals */ > > - oname.len = olen; > > ctx->pos++; > > } > > > > @@ -625,8 +619,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) > > err = 0; > > dout("readdir %p file %p done.\n", inode, file); > > out: > > - ceph_fname_free_buffer(inode, &tname); > > - ceph_fname_free_buffer(inode, &oname); > > + if (dentry_name) > > + kfree(dentry_name); > > return err; > > } > > > > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c > > index b573a0f33450..19e5275eae1c 100644 > > --- a/fs/ceph/inode.c > > +++ b/fs/ceph/inode.c > > @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, > > goto out; > > } > > > > + rde->dentry = NULL; > > dname.name = oname.name; > > dname.len = oname.len; > > dname.hash = full_name_hash(parent, dname.name, dname.len); > > @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, > > goto retry_lookup; > > } > > > > + /* > > + * ceph_readdir will use the dentry to get the name > > + * to avoid doing the dencrypt again there. > > + */ > > + rde->dentry = dget(dn); > > + > > /* inode */ > > if (d_really_is_positive(dn)) { > > in = d_inode(dn); > > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c > > index 8d704ddd7291..9e0a51ef1dfa 100644 > > --- a/fs/ceph/mds_client.c > > +++ b/fs/ceph/mds_client.c > > @@ -733,6 +733,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info) > > > > kfree(rde->inode.fscrypt_auth); > > kfree(rde->inode.fscrypt_file); > > + dput(rde->dentry); > > } > > free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size)); > > } > > diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h > > index 0dfe24f94567..663d7754d57d 100644 > > --- a/fs/ceph/mds_client.h > > +++ b/fs/ceph/mds_client.h > > @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in { > > }; > > > > struct ceph_mds_reply_dir_entry { > > + struct dentry *dentry; > > char *name; > > u8 *altname; > > u32 name_len; > -- Jeff Layton <jlayton@xxxxxxxxxx>