Re: [RFC PATCH 8/9] ceph: copy layout, max_size and truncate_size on successful sync create

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, 2020-01-13 at 22:56 +0800, Yan, Zheng wrote:
> On 1/13/20 9:26 PM, Jeff Layton wrote:
> > On Mon, 2020-01-13 at 11:51 +0800, Yan, Zheng wrote:
> > > On 1/11/20 4:56 AM, Jeff Layton wrote:
> > > > It doesn't do much good to do an asynchronous create unless we can do
> > > > I/O to it before the create reply comes in. That means we need layout
> > > > info the new file before we've gotten the response from the MDS.
> > > > 
> > > > All files created in a directory will initially inherit the same layout,
> > > > so copy off the requisite info from the first synchronous create in the
> > > > directory. Save it in the same fields in the directory inode, as those
> > > > are otherwise unsed for dir inodes. This means we need to be a bit
> > > > careful about only updating layout info on non-dir inodes.
> > > > 
> > > > Also, zero out the layout when we drop Dc caps in the dir.
> > > > 
> > > > Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
> > > > ---
> > > >    fs/ceph/caps.c  | 24 ++++++++++++++++++++----
> > > >    fs/ceph/file.c  | 24 +++++++++++++++++++++++-
> > > >    fs/ceph/inode.c |  4 ++--
> > > >    3 files changed, 45 insertions(+), 7 deletions(-)
> > > > 
> > > > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> > > > index 7fc87b693ba4..b96fb1378479 100644
> > > > --- a/fs/ceph/caps.c
> > > > +++ b/fs/ceph/caps.c
> > > > @@ -2847,7 +2847,7 @@ int ceph_get_caps(struct file *filp, int need, int want,
> > > >    			return ret;
> > > >    		}
> > > >    
> > > > -		if (S_ISREG(ci->vfs_inode.i_mode) &&
> > > > +		if (!S_ISDIR(ci->vfs_inode.i_mode) &&
> > > >    		    ci->i_inline_version != CEPH_INLINE_NONE &&
> > > >    		    (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
> > > >    		    i_size_read(inode) > 0) {
> > > > @@ -2944,9 +2944,17 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
> > > >    	if (had & CEPH_CAP_FILE_RD)
> > > >    		if (--ci->i_rd_ref == 0)
> > > >    			last++;
> > > > -	if (had & CEPH_CAP_FILE_CACHE)
> > > > -		if (--ci->i_rdcache_ref == 0)
> > > > +	if (had & CEPH_CAP_FILE_CACHE) {
> > > > +		if (--ci->i_rdcache_ref == 0) {
> > > >    			last++;
> > > > +			/* Zero out layout if we lost CREATE caps */
> > > > +			if (S_ISDIR(inode->i_mode) &&
> > > > +			    !(__ceph_caps_issued(ci, NULL) & CEPH_CAP_DIR_CREATE)) {
> > > > +				ceph_put_string(rcu_dereference_raw(ci->i_layout.pool_ns));
> > > > +				memset(&ci->i_layout, 0, sizeof(ci->i_layout));
> > > > +			}
> > > > +		}
> > > > +	}
> > > 
> > > should do this in __check_cap_issue
> > > 
> > 
> > That function doesn't get called from the put codepath. Suppose one task
> > is setting up an async create and a Dc (DIR_CREATE) cap revoke races in.
> > We zero out the layout and then the inode has a bogus layout set in it.
> > 
> > We can't wipe the cached layout until all of the Dc references are put.
> 
> how about:
> 
> get_caps_for_async_create() return the layout.
> pass the returned layout into ceph_finish_async_open()
> 

That still sounds racy.

What guarantees the stability of the cached layout while we're copying
it in get_caps_for_async_create()? I guess we could protect the new
cached layout field with the i_ceph_lock.

Is that a real improvement? I'm not sure.

> 
> 
> > > >    	if (had & CEPH_CAP_FILE_EXCL)
> > > >    		if (--ci->i_fx_ref == 0)
> > > >    			last++;
> > > > @@ -3264,7 +3272,8 @@ static void handle_cap_grant(struct inode *inode,
> > > >    		ci->i_subdirs = extra_info->nsubdirs;
> > > >    	}
> > > >    
> > > > -	if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) {
> > > > +	if (!S_ISDIR(inode->i_mode) &&
> > > > +	    (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
> > > >    		/* file layout may have changed */
> > > >    		s64 old_pool = ci->i_layout.pool_id;
> > > >    		struct ceph_string *old_ns;
> > > > @@ -3336,6 +3345,13 @@ static void handle_cap_grant(struct inode *inode,
> > > >    		     ceph_cap_string(cap->issued),
> > > >    		     ceph_cap_string(newcaps),
> > > >    		     ceph_cap_string(revoking));
> > > > +
> > > > +		if (S_ISDIR(inode->i_mode) &&
> > > > +		    (revoking & CEPH_CAP_DIR_CREATE) && !ci->i_rdcache_ref) {
> > > > +			ceph_put_string(rcu_dereference_raw(ci->i_layout.pool_ns));
> > > > +			memset(&ci->i_layout, 0, sizeof(ci->i_layout));
> > > > +		}
> > > 
> > > same here
> > > 
> > > > +
> > > >    		if (S_ISREG(inode->i_mode) &&
> > > >    		    (revoking & used & CEPH_CAP_FILE_BUFFER))
> > > >    			writeback = true;  /* initiate writeback; will delay ack */
> > > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> > > > index 1e6cdf2dfe90..d4d7a277faf1 100644
> > > > --- a/fs/ceph/file.c
> > > > +++ b/fs/ceph/file.c
> > > > @@ -430,6 +430,25 @@ int ceph_open(struct inode *inode, struct file *file)
> > > >    	return err;
> > > >    }
> > > >    
> > > > +/* Clone the layout from a synchronous create, if the dir now has Dc caps */
> > > > +static void
> > > > +copy_file_layout(struct inode *dst, struct inode *src)
> > > > +{
> > > > +	struct ceph_inode_info *cdst = ceph_inode(dst);
> > > > +	struct ceph_inode_info *csrc = ceph_inode(src);
> > > > +
> > > > +	spin_lock(&cdst->i_ceph_lock);
> > > > +	if ((__ceph_caps_issued(cdst, NULL) & CEPH_CAP_DIR_CREATE) &&
> > > > +	    !ceph_file_layout_is_valid(&cdst->i_layout)) {
> > > > +		memcpy(&cdst->i_layout, &csrc->i_layout,
> > > > +			sizeof(cdst->i_layout));
> > > > +		rcu_assign_pointer(cdst->i_layout.pool_ns,
> > > > +				   ceph_try_get_string(csrc->i_layout.pool_ns));
> > > > +		cdst->i_max_size = csrc->i_max_size;
> > > > +		cdst->i_truncate_size = csrc->i_truncate_size;
> > > > +	}
> > > > +	spin_unlock(&cdst->i_ceph_lock);
> > > > +}
> > > >    
> > > >    /*
> > > >     * Do a lookup + open with a single request.  If we get a non-existent
> > > > @@ -518,7 +537,10 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
> > > >    	} else {
> > > >    		dout("atomic_open finish_open on dn %p\n", dn);
> > > >    		if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
> > > > -			ceph_init_inode_acls(d_inode(dentry), &as_ctx);
> > > > +			struct inode *newino = d_inode(dentry);
> > > > +
> > > > +			copy_file_layout(dir, newino);
> > > > +			ceph_init_inode_acls(newino, &as_ctx);
> > > >    			file->f_mode |= FMODE_CREATED;
> > > >    		}
> > > >    		err = finish_open(file, dentry, ceph_open);
> > > > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> > > > index 9cfc093fd273..8b51051b79b0 100644
> > > > --- a/fs/ceph/inode.c
> > > > +++ b/fs/ceph/inode.c
> > > > @@ -848,8 +848,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
> > > >    		ci->i_subdirs = le64_to_cpu(info->subdirs);
> > > >    	}
> > > >    
> > > > -	if (new_version ||
> > > > -	    (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
> > > > +	if (!S_ISDIR(inode->i_mode) && (new_version ||
> > > > +	    (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)))) {
> > > >    		s64 old_pool = ci->i_layout.pool_id;
> > > >    		struct ceph_string *old_ns;
> > > >    
> > > > 

-- 
Jeff Layton <jlayton@xxxxxxxxxx>




[Index of Archives]     [CEPH Users]     [Ceph Large]     [Ceph Dev]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux