On 3/10/22 6:36 PM, Jeff Layton wrote:
On Thu, 2022-03-10 at 14:08 +0800, Xiubo Li wrote:On 3/9/22 9:59 PM, xiubli@xxxxxxxxxx wrote:From: Xiubo Li <xiubli@xxxxxxxxxx> For the readdir request the dentries will be pasred and dencrypted in ceph_readdir_prepopulate(). And in ceph_readdir() we could just get the dentry name from the dentry cache instead of parsing and dencrypting them again. This could improve performance. Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx> --- V7: - Fix the xfstest generic/006 crash bug about the rde->dentry == NULL. V6: - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX instead, since we are limiting the max length of snapshot name to 240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>). V5: - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro - release the rde->dentry in destroy_reply_info fs/ceph/dir.c | 56 ++++++++++++++++++++------------------------ fs/ceph/inode.c | 7 ++++++ fs/ceph/mds_client.c | 1 + fs/ceph/mds_client.h | 1 + 4 files changed, 34 insertions(+), 31 deletions(-) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 6df2a91af236..2397c34e9173 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) int err; unsigned frag = -1; struct ceph_mds_reply_info_parsed *rinfo; - struct fscrypt_str tname = FSTR_INIT(NULL, 0); - struct fscrypt_str oname = FSTR_INIT(NULL, 0); + char *dentry_name = NULL;dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);if (dfi->file_info.flags & CEPH_F_ATEND) @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) spin_unlock(&ci->i_ceph_lock); }- err = ceph_fname_alloc_buffer(inode, &tname);- if (err < 0) - goto out; - - err = ceph_fname_alloc_buffer(inode, &oname); - if (err < 0) - goto out; - /* proceed with a normal readdir */ more: /* do we have the correct frag content buffered? */ @@ -528,31 +519,36 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) } } } + + dentry_name = kmalloc(NAME_MAX, GFP_KERNEL); + if (!dentry_name) { + err = -ENOMEM; + ceph_mdsc_put_request(dfi->last_readdir); + dfi->last_readdir = NULL; + goto out; + } + for (; i < rinfo->dir_nr; i++) { struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; - struct ceph_fname fname = { .dir = inode, - .name = rde->name, - .name_len = rde->name_len, - .ctext = rde->altname, - .ctext_len = rde->altname_len }; - u32 olen = oname.len; - - err = ceph_fname_to_usr(&fname, &tname, &oname, NULL); - if (err) { - pr_err("%s unable to decode %.*s, got %d\n", __func__, - rde->name_len, rde->name, err); - goto out; - } + struct dentry *dn = rde->dentry; + int name_len;BUG_ON(rde->offset < ctx->pos);BUG_ON(!rde->inode.in); + BUG_ON(!rde->dentry);ctx->pos = rde->offset;- dout("readdir (%d/%d) -> %llx '%.*s' %p\n", - i, rinfo->dir_nr, ctx->pos, - rde->name_len, rde->name, &rde->inode.in);- if (!dir_emit(ctx, oname.name, oname.len,+ spin_lock(&dn->d_lock); + memcpy(dentry_name, dn->d_name.name, dn->d_name.len); + name_len = dn->d_name.len; + spin_unlock(&dn->d_lock); +Hi Jeff, BTW, does the dn->d_lock is must here ? From the code comments, the d_lock intends to protect the 'd_flag' and 'd_lockref.count'. In ceph code I found some places are using the d_lock when accessing the d_name, some are not. And in none ceph code, they will almost never use the d_lock to protect the d_name.It's probably not needed. The d_name can only change if there are no other outstanding references to the dentry.
If so, the dentry_name = kmalloc() is not needed any more. - Xiubo
+ dentry_name[name_len] = '\0'; + dout("readdir (%d/%d) -> %llx '%s' %p\n", + i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in); + + if (!dir_emit(ctx, dentry_name, name_len, ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)), le32_to_cpu(rde->inode.in->mode) >> 12)) { /* @@ -566,8 +562,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) goto out; }- /* Reset the lengths to their original allocated vals */- oname.len = olen; ctx->pos++; }@@ -625,8 +619,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)err = 0; dout("readdir %p file %p done.\n", inode, file); out: - ceph_fname_free_buffer(inode, &tname); - ceph_fname_free_buffer(inode, &oname); + if (dentry_name) + kfree(dentry_name); return err; }diff --git a/fs/ceph/inode.c b/fs/ceph/inode.cindex b573a0f33450..19e5275eae1c 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, goto out; }+ rde->dentry = NULL;dname.name = oname.name; dname.len = oname.len; dname.hash = full_name_hash(parent, dname.name, dname.len); @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, goto retry_lookup; }+ /*+ * ceph_readdir will use the dentry to get the name + * to avoid doing the dencrypt again there. + */ + rde->dentry = dget(dn); + /* inode */ if (d_really_is_positive(dn)) { in = d_inode(dn); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 8d704ddd7291..9e0a51ef1dfa 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -733,6 +733,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)kfree(rde->inode.fscrypt_auth);kfree(rde->inode.fscrypt_file); + dput(rde->dentry); } free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size)); } diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 0dfe24f94567..663d7754d57d 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in { };struct ceph_mds_reply_dir_entry {+ struct dentry *dentry; char *name; u8 *altname; u32 name_len;