Re: [RFC PATCH 8/9] ceph: copy layout, max_size and truncate_size on successful sync create

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, 2020-01-13 at 11:51 +0800, Yan, Zheng wrote:
> On 1/11/20 4:56 AM, Jeff Layton wrote:
> > It doesn't do much good to do an asynchronous create unless we can do
> > I/O to it before the create reply comes in. That means we need layout
> > info the new file before we've gotten the response from the MDS.
> > 
> > All files created in a directory will initially inherit the same layout,
> > so copy off the requisite info from the first synchronous create in the
> > directory. Save it in the same fields in the directory inode, as those
> > are otherwise unsed for dir inodes. This means we need to be a bit
> > careful about only updating layout info on non-dir inodes.
> > 
> > Also, zero out the layout when we drop Dc caps in the dir.
> > 
> > Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
> > ---
> >   fs/ceph/caps.c  | 24 ++++++++++++++++++++----
> >   fs/ceph/file.c  | 24 +++++++++++++++++++++++-
> >   fs/ceph/inode.c |  4 ++--
> >   3 files changed, 45 insertions(+), 7 deletions(-)
> > 
> > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> > index 7fc87b693ba4..b96fb1378479 100644
> > --- a/fs/ceph/caps.c
> > +++ b/fs/ceph/caps.c
> > @@ -2847,7 +2847,7 @@ int ceph_get_caps(struct file *filp, int need, int want,
> >   			return ret;
> >   		}
> >   
> > -		if (S_ISREG(ci->vfs_inode.i_mode) &&
> > +		if (!S_ISDIR(ci->vfs_inode.i_mode) &&
> >   		    ci->i_inline_version != CEPH_INLINE_NONE &&
> >   		    (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
> >   		    i_size_read(inode) > 0) {
> > @@ -2944,9 +2944,17 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
> >   	if (had & CEPH_CAP_FILE_RD)
> >   		if (--ci->i_rd_ref == 0)
> >   			last++;
> > -	if (had & CEPH_CAP_FILE_CACHE)
> > -		if (--ci->i_rdcache_ref == 0)
> > +	if (had & CEPH_CAP_FILE_CACHE) {
> > +		if (--ci->i_rdcache_ref == 0) {
> >   			last++;
> > +			/* Zero out layout if we lost CREATE caps */
> > +			if (S_ISDIR(inode->i_mode) &&
> > +			    !(__ceph_caps_issued(ci, NULL) & CEPH_CAP_DIR_CREATE)) {
> > +				ceph_put_string(rcu_dereference_raw(ci->i_layout.pool_ns));
> > +				memset(&ci->i_layout, 0, sizeof(ci->i_layout));
> > +			}
> > +		}
> > +	}
> 
> should do this in __check_cap_issue
> 

That function doesn't get called from the put codepath. Suppose one task
is setting up an async create and a Dc (DIR_CREATE) cap revoke races in.
We zero out the layout and then the inode has a bogus layout set in it.

We can't wipe the cached layout until all of the Dc references are put.

> >   	if (had & CEPH_CAP_FILE_EXCL)
> >   		if (--ci->i_fx_ref == 0)
> >   			last++;
> > @@ -3264,7 +3272,8 @@ static void handle_cap_grant(struct inode *inode,
> >   		ci->i_subdirs = extra_info->nsubdirs;
> >   	}
> >   
> > -	if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) {
> > +	if (!S_ISDIR(inode->i_mode) &&
> > +	    (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
> >   		/* file layout may have changed */
> >   		s64 old_pool = ci->i_layout.pool_id;
> >   		struct ceph_string *old_ns;
> > @@ -3336,6 +3345,13 @@ static void handle_cap_grant(struct inode *inode,
> >   		     ceph_cap_string(cap->issued),
> >   		     ceph_cap_string(newcaps),
> >   		     ceph_cap_string(revoking));
> > +
> > +		if (S_ISDIR(inode->i_mode) &&
> > +		    (revoking & CEPH_CAP_DIR_CREATE) && !ci->i_rdcache_ref) {
> > +			ceph_put_string(rcu_dereference_raw(ci->i_layout.pool_ns));
> > +			memset(&ci->i_layout, 0, sizeof(ci->i_layout));
> > +		}
> 
> same here
> 
> > +
> >   		if (S_ISREG(inode->i_mode) &&
> >   		    (revoking & used & CEPH_CAP_FILE_BUFFER))
> >   			writeback = true;  /* initiate writeback; will delay ack */
> > diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> > index 1e6cdf2dfe90..d4d7a277faf1 100644
> > --- a/fs/ceph/file.c
> > +++ b/fs/ceph/file.c
> > @@ -430,6 +430,25 @@ int ceph_open(struct inode *inode, struct file *file)
> >   	return err;
> >   }
> >   
> > +/* Clone the layout from a synchronous create, if the dir now has Dc caps */
> > +static void
> > +copy_file_layout(struct inode *dst, struct inode *src)
> > +{
> > +	struct ceph_inode_info *cdst = ceph_inode(dst);
> > +	struct ceph_inode_info *csrc = ceph_inode(src);
> > +
> > +	spin_lock(&cdst->i_ceph_lock);
> > +	if ((__ceph_caps_issued(cdst, NULL) & CEPH_CAP_DIR_CREATE) &&
> > +	    !ceph_file_layout_is_valid(&cdst->i_layout)) {
> > +		memcpy(&cdst->i_layout, &csrc->i_layout,
> > +			sizeof(cdst->i_layout));
> > +		rcu_assign_pointer(cdst->i_layout.pool_ns,
> > +				   ceph_try_get_string(csrc->i_layout.pool_ns));
> > +		cdst->i_max_size = csrc->i_max_size;
> > +		cdst->i_truncate_size = csrc->i_truncate_size;
> > +	}
> > +	spin_unlock(&cdst->i_ceph_lock);
> > +}
> >   
> >   /*
> >    * Do a lookup + open with a single request.  If we get a non-existent
> > @@ -518,7 +537,10 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
> >   	} else {
> >   		dout("atomic_open finish_open on dn %p\n", dn);
> >   		if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
> > -			ceph_init_inode_acls(d_inode(dentry), &as_ctx);
> > +			struct inode *newino = d_inode(dentry);
> > +
> > +			copy_file_layout(dir, newino);
> > +			ceph_init_inode_acls(newino, &as_ctx);
> >   			file->f_mode |= FMODE_CREATED;
> >   		}
> >   		err = finish_open(file, dentry, ceph_open);
> > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> > index 9cfc093fd273..8b51051b79b0 100644
> > --- a/fs/ceph/inode.c
> > +++ b/fs/ceph/inode.c
> > @@ -848,8 +848,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
> >   		ci->i_subdirs = le64_to_cpu(info->subdirs);
> >   	}
> >   
> > -	if (new_version ||
> > -	    (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
> > +	if (!S_ISDIR(inode->i_mode) && (new_version ||
> > +	    (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)))) {
> >   		s64 old_pool = ci->i_layout.pool_id;
> >   		struct ceph_string *old_ns;
> >   
> > 

-- 
Jeff Layton <jlayton@xxxxxxxxxx>




[Index of Archives]     [CEPH Users]     [Ceph Large]     [Ceph Dev]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux