Re: [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir

Jeff Layton <jlayton@xxxxxxxxxx> · Thu, 10 Mar 2022 05:36:47 -0500

On Thu, 2022-03-10 at 14:08 +0800, Xiubo Li wrote:
> On 3/9/22 9:59 PM, xiubli@xxxxxxxxxx wrote:
> > From: Xiubo Li <xiubli@xxxxxxxxxx>
> > 
> > For the readdir request the dentries will be pasred and dencrypted
> > in ceph_readdir_prepopulate(). And in ceph_readdir() we could just
> > get the dentry name from the dentry cache instead of parsing and
> > dencrypting them again. This could improve performance.
> > 
> > Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx>
> > ---
> > 
> > V7:
> > - Fix the xfstest generic/006 crash bug about the rde->dentry == NULL.
> > 
> > V6:
> > - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX
> >    instead, since we are limiting the max length of snapshot name to
> >    240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>).
> > 
> > V5:
> > - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro
> > - release the rde->dentry in destroy_reply_info
> > 
> > 
> > 
> >   fs/ceph/dir.c        | 56 ++++++++++++++++++++------------------------
> >   fs/ceph/inode.c      |  7 ++++++
> >   fs/ceph/mds_client.c |  1 +
> >   fs/ceph/mds_client.h |  1 +
> >   4 files changed, 34 insertions(+), 31 deletions(-)
> > 
> > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> > index 6df2a91af236..2397c34e9173 100644
> > --- a/fs/ceph/dir.c
> > +++ b/fs/ceph/dir.c
> > @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >   	int err;
> >   	unsigned frag = -1;
> >   	struct ceph_mds_reply_info_parsed *rinfo;
> > -	struct fscrypt_str tname = FSTR_INIT(NULL, 0);
> > -	struct fscrypt_str oname = FSTR_INIT(NULL, 0);
> > +	char *dentry_name = NULL;
> >   
> >   	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
> >   	if (dfi->file_info.flags & CEPH_F_ATEND)
> > @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >   		spin_unlock(&ci->i_ceph_lock);
> >   	}
> >   
> > -	err = ceph_fname_alloc_buffer(inode, &tname);
> > -	if (err < 0)
> > -		goto out;
> > -
> > -	err = ceph_fname_alloc_buffer(inode, &oname);
> > -	if (err < 0)
> > -		goto out;
> > -
> >   	/* proceed with a normal readdir */
> >   more:
> >   	/* do we have the correct frag content buffered? */
> > @@ -528,31 +519,36 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >   			}
> >   		}
> >   	}
> > +
> > +	dentry_name = kmalloc(NAME_MAX, GFP_KERNEL);
> > +	if (!dentry_name) {
> > +		err = -ENOMEM;
> > +		ceph_mdsc_put_request(dfi->last_readdir);
> > +		dfi->last_readdir = NULL;
> > +		goto out;
> > +	}
> > +
> >   	for (; i < rinfo->dir_nr; i++) {
> >   		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
> > -		struct ceph_fname fname = { .dir	= inode,
> > -					    .name	= rde->name,
> > -					    .name_len	= rde->name_len,
> > -					    .ctext	= rde->altname,
> > -					    .ctext_len	= rde->altname_len };
> > -		u32 olen = oname.len;
> > -
> > -		err = ceph_fname_to_usr(&fname, &tname, &oname, NULL);
> > -		if (err) {
> > -			pr_err("%s unable to decode %.*s, got %d\n", __func__,
> > -			       rde->name_len, rde->name, err);
> > -			goto out;
> > -		}
> > +		struct dentry *dn = rde->dentry;
> > +		int name_len;
> >   
> >   		BUG_ON(rde->offset < ctx->pos);
> >   		BUG_ON(!rde->inode.in);
> > +		BUG_ON(!rde->dentry);
> >   
> >   		ctx->pos = rde->offset;
> > -		dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
> > -		     i, rinfo->dir_nr, ctx->pos,
> > -		     rde->name_len, rde->name, &rde->inode.in);
> >   
> > -		if (!dir_emit(ctx, oname.name, oname.len,
> > +		spin_lock(&dn->d_lock);
> > +		memcpy(dentry_name, dn->d_name.name, dn->d_name.len);
> > +		name_len = dn->d_name.len;
> > +		spin_unlock(&dn->d_lock);
> > +
> 
> Hi Jeff,
> 
> BTW, does the dn->d_lock is must here ? From the code comments, the 
> d_lock intends to protect the 'd_flag' and 'd_lockref.count'.
> 
> In ceph code I found some places are using the d_lock when accessing the 
> d_name, some are not. And in none ceph code, they will almost never use 
> the d_lock to protect the d_name.
> 

It's probably not needed. The d_name can only change if there are no
other outstanding references to the dentry.

> 
> 
> > +		dentry_name[name_len] = '\0';
> > +		dout("readdir (%d/%d) -> %llx '%s' %p\n",
> > +		     i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in);
> > +
> > +		if (!dir_emit(ctx, dentry_name, name_len,
> >   			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
> >   			      le32_to_cpu(rde->inode.in->mode) >> 12)) {
> >   			/*
> > @@ -566,8 +562,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >   			goto out;
> >   		}
> >   
> > -		/* Reset the lengths to their original allocated vals */
> > -		oname.len = olen;
> >   		ctx->pos++;
> >   	}
> >   
> > @@ -625,8 +619,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >   	err = 0;
> >   	dout("readdir %p file %p done.\n", inode, file);
> >   out:
> > -	ceph_fname_free_buffer(inode, &tname);
> > -	ceph_fname_free_buffer(inode, &oname);
> > +	if (dentry_name)
> > +		kfree(dentry_name);
> >   	return err;
> >   }
> >   
> > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> > index b573a0f33450..19e5275eae1c 100644
> > --- a/fs/ceph/inode.c
> > +++ b/fs/ceph/inode.c
> > @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
> >   			goto out;
> >   		}
> >   
> > +		rde->dentry = NULL;
> >   		dname.name = oname.name;
> >   		dname.len = oname.len;
> >   		dname.hash = full_name_hash(parent, dname.name, dname.len);
> > @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
> >   			goto retry_lookup;
> >   		}
> >   
> > +		/*
> > +		 * ceph_readdir will use the dentry to get the name
> > +		 * to avoid doing the dencrypt again there.
> > +		 */
> > +		rde->dentry = dget(dn);
> > +
> >   		/* inode */
> >   		if (d_really_is_positive(dn)) {
> >   			in = d_inode(dn);
> > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > index 8d704ddd7291..9e0a51ef1dfa 100644
> > --- a/fs/ceph/mds_client.c
> > +++ b/fs/ceph/mds_client.c
> > @@ -733,6 +733,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
> >   
> >   		kfree(rde->inode.fscrypt_auth);
> >   		kfree(rde->inode.fscrypt_file);
> > +		dput(rde->dentry);
> >   	}
> >   	free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
> >   }
> > diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> > index 0dfe24f94567..663d7754d57d 100644
> > --- a/fs/ceph/mds_client.h
> > +++ b/fs/ceph/mds_client.h
> > @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in {
> >   };
> >   
> >   struct ceph_mds_reply_dir_entry {
> > +	struct dentry		      *dentry;
> >   	char                          *name;
> >   	u8			      *altname;
> >   	u32                           name_len;
> 

-- 
Jeff Layton <jlayton@xxxxxxxxxx>