Re: [RFC PATCH 6/6] ceph: eliminate ceph_async_iput()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, 2021-06-16 at 16:26 +0100, Luis Henriques wrote:
> On Tue, Jun 15, 2021 at 10:57:30AM -0400, Jeff Layton wrote:
> > Now that we don't need to hold session->s_mutex or the snap_rwsem when
> > calling ceph_check_caps, we can eliminate ceph_async_iput and just use
> > normal iput calls.
> > 
> > Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
> > ---
> >  fs/ceph/caps.c       |  6 +++---
> >  fs/ceph/inode.c      | 25 +++----------------------
> >  fs/ceph/mds_client.c | 22 +++++++++++-----------
> >  fs/ceph/quota.c      |  6 +++---
> >  fs/ceph/snap.c       | 10 +++++-----
> >  fs/ceph/super.h      |  2 --
> >  6 files changed, 25 insertions(+), 46 deletions(-)
> > 
> > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> > index 5864d5088e27..fd9243e9a1b2 100644
> > --- a/fs/ceph/caps.c
> > +++ b/fs/ceph/caps.c
> > @@ -3147,7 +3147,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
> >  		wake_up_all(&ci->i_cap_wq);
> >  	while (put-- > 0) {
> >  		/* avoid calling iput_final() in osd dispatch threads */
> > -		ceph_async_iput(inode);
> > +		iput(inode);
> >  	}
> >  }
> >  
> > @@ -4136,7 +4136,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
> >  done_unlocked:
> >  	ceph_put_string(extra_info.pool_ns);
> >  	/* avoid calling iput_final() in mds dispatch threads */
> > -	ceph_async_iput(inode);
> > +	iput(inode);
> 
> To be honest, I'm not really convinced we can blindly substitute
> ceph_async_iput() by iput().  This case specifically can problematic
> because we may have called ceph_queue_vmtruncate() above (or
> handle_cap_grant()).  If we did, we have ci->i_work_mask set and the wq
> would have invoked __ceph_do_pending_vmtruncate().  Using the iput() here
> won't have that result.  Am I missing something?
> 

The point of this set is to make iput safe to run even when the s_mutex
and/or snap_rwsem is held. When we queue up the ci->i_work, we do take a
reference to the inode and still run iput there. This set just stops
queueing iputs themselves to the workqueue.

I really don't see a problem with this call site in ceph_handle_caps in
particular, as it's calling iput after dropping the s_mutex. Probably
that should not have been converted to use ceph_async_iput in the first
place.

> Cheers,
> --
> Luís
> 
> >  	return;
> >  
> >  flush_cap_releases:
> > @@ -4179,7 +4179,7 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
> >  			dout("check_delayed_caps on %p\n", inode);
> >  			ceph_check_caps(ci, 0, NULL);
> >  			/* avoid calling iput_final() in tick thread */
> > -			ceph_async_iput(inode);
> > +			iput(inode);
> >  			spin_lock(&mdsc->cap_delay_lock);
> >  		}
> >  	}
> > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> > index 6034821c9d63..5f1c27caf0b6 100644
> > --- a/fs/ceph/inode.c
> > +++ b/fs/ceph/inode.c
> > @@ -1567,7 +1567,7 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
> >  		}
> >  
> >  		/* avoid calling iput_final() in mds dispatch threads */
> > -		ceph_async_iput(in);
> > +		iput(in);
> >  	}
> >  
> >  	return err;
> > @@ -1770,7 +1770,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
> >  					ihold(in);
> >  					discard_new_inode(in);
> >  				}
> > -				ceph_async_iput(in);
> > +				iput(in);
> >  			}
> >  			d_drop(dn);
> >  			err = ret;
> > @@ -1783,7 +1783,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
> >  			if (ceph_security_xattr_deadlock(in)) {
> >  				dout(" skip splicing dn %p to inode %p"
> >  				     " (security xattr deadlock)\n", dn, in);
> > -				ceph_async_iput(in);
> > +				iput(in);
> >  				skipped++;
> >  				goto next_item;
> >  			}
> > @@ -1834,25 +1834,6 @@ bool ceph_inode_set_size(struct inode *inode, loff_t size)
> >  	return ret;
> >  }
> >  
> > -/*
> > - * Put reference to inode, but avoid calling iput_final() in current thread.
> > - * iput_final() may wait for reahahead pages. The wait can cause deadlock in
> > - * some contexts.
> > - */
> > -void ceph_async_iput(struct inode *inode)
> > -{
> > -	if (!inode)
> > -		return;
> > -	for (;;) {
> > -		if (atomic_add_unless(&inode->i_count, -1, 1))
> > -			break;
> > -		if (queue_work(ceph_inode_to_client(inode)->inode_wq,
> > -			       &ceph_inode(inode)->i_work))
> > -			break;
> > -		/* queue work failed, i_count must be at least 2 */
> > -	}
> > -}
> > -
> >  void ceph_queue_inode_work(struct inode *inode, int work_bit)
> >  {
> >  	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
> > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > index 87d3be10af25..1f960361b78e 100644
> > --- a/fs/ceph/mds_client.c
> > +++ b/fs/ceph/mds_client.c
> > @@ -825,13 +825,13 @@ void ceph_mdsc_release_request(struct kref *kref)
> >  	if (req->r_inode) {
> >  		ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
> >  		/* avoid calling iput_final() in mds dispatch threads */
> > -		ceph_async_iput(req->r_inode);
> > +		iput(req->r_inode);
> >  	}
> >  	if (req->r_parent) {
> >  		ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
> > -		ceph_async_iput(req->r_parent);
> > +		iput(req->r_parent);
> >  	}
> > -	ceph_async_iput(req->r_target_inode);
> > +	iput(req->r_target_inode);
> >  	if (req->r_dentry)
> >  		dput(req->r_dentry);
> >  	if (req->r_old_dentry)
> > @@ -845,7 +845,7 @@ void ceph_mdsc_release_request(struct kref *kref)
> >  		 */
> >  		ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
> >  				  CEPH_CAP_PIN);
> > -		ceph_async_iput(req->r_old_dentry_dir);
> > +		iput(req->r_old_dentry_dir);
> >  	}
> >  	kfree(req->r_path1);
> >  	kfree(req->r_path2);
> > @@ -961,7 +961,7 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
> >  
> >  	if (req->r_unsafe_dir) {
> >  		/* avoid calling iput_final() in mds dispatch threads */
> > -		ceph_async_iput(req->r_unsafe_dir);
> > +		iput(req->r_unsafe_dir);
> >  		req->r_unsafe_dir = NULL;
> >  	}
> >  
> > @@ -1132,7 +1132,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
> >  		cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
> >  	if (!cap) {
> >  		spin_unlock(&ci->i_ceph_lock);
> > -		ceph_async_iput(inode);
> > +		iput(inode);
> >  		goto random;
> >  	}
> >  	mds = cap->session->s_mds;
> > @@ -1143,7 +1143,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
> >  out:
> >  	/* avoid calling iput_final() while holding mdsc->mutex or
> >  	 * in mds dispatch threads */
> > -	ceph_async_iput(inode);
> > +	iput(inode);
> >  	return mds;
> >  
> >  random:
> > @@ -1548,7 +1548,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
> >  		if (last_inode) {
> >  			/* avoid calling iput_final() while holding
> >  			 * s_mutex or in mds dispatch threads */
> > -			ceph_async_iput(last_inode);
> > +			iput(last_inode);
> >  			last_inode = NULL;
> >  		}
> >  		if (old_cap) {
> > @@ -1582,7 +1582,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
> >  	session->s_cap_iterator = NULL;
> >  	spin_unlock(&session->s_cap_lock);
> >  
> > -	ceph_async_iput(last_inode);
> > +	iput(last_inode);
> >  	if (old_cap)
> >  		ceph_put_cap(session->s_mdsc, old_cap);
> >  
> > @@ -1723,7 +1723,7 @@ static void remove_session_caps(struct ceph_mds_session *session)
> >  
> >  			inode = ceph_find_inode(sb, vino);
> >  			 /* avoid calling iput_final() while holding s_mutex */
> > -			ceph_async_iput(inode);
> > +			iput(inode);
> >  
> >  			spin_lock(&session->s_cap_lock);
> >  		}
> > @@ -4370,7 +4370,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
> >  out:
> >  	mutex_unlock(&session->s_mutex);
> >  	/* avoid calling iput_final() in mds dispatch threads */
> > -	ceph_async_iput(inode);
> > +	iput(inode);
> >  	return;
> >  
> >  bad:
> > diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
> > index 4e32c9600ecc..988c6e04b327 100644
> > --- a/fs/ceph/quota.c
> > +++ b/fs/ceph/quota.c
> > @@ -75,7 +75,7 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
> >  	spin_unlock(&ci->i_ceph_lock);
> >  
> >  	/* avoid calling iput_final() in dispatch thread */
> > -	ceph_async_iput(inode);
> > +	iput(inode);
> >  }
> >  
> >  static struct ceph_quotarealm_inode *
> > @@ -248,7 +248,7 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
> >  		ci = ceph_inode(in);
> >  		has_quota = __ceph_has_any_quota(ci);
> >  		/* avoid calling iput_final() while holding mdsc->snap_rwsem */
> > -		ceph_async_iput(in);
> > +		iput(in);
> >  
> >  		next = realm->parent;
> >  		if (has_quota || !next)
> > @@ -384,7 +384,7 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
> >  			exceeded = true; /* Just break the loop */
> >  		}
> >  		/* avoid calling iput_final() while holding mdsc->snap_rwsem */
> > -		ceph_async_iput(in);
> > +		iput(in);
> >  
> >  		next = realm->parent;
> >  		if (exceeded || !next)
> > diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
> > index afc7f4c32364..f928b02bcd31 100644
> > --- a/fs/ceph/snap.c
> > +++ b/fs/ceph/snap.c
> > @@ -679,13 +679,13 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
> >  		spin_unlock(&realm->inodes_with_caps_lock);
> >  		/* avoid calling iput_final() while holding
> >  		 * mdsc->snap_rwsem or in mds dispatch threads */
> > -		ceph_async_iput(lastinode);
> > +		iput(lastinode);
> >  		lastinode = inode;
> >  		ceph_queue_cap_snap(ci);
> >  		spin_lock(&realm->inodes_with_caps_lock);
> >  	}
> >  	spin_unlock(&realm->inodes_with_caps_lock);
> > -	ceph_async_iput(lastinode);
> > +	iput(lastinode);
> >  
> >  	dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino);
> >  }
> > @@ -841,7 +841,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
> >  		ceph_flush_snaps(ci, &session);
> >  		/* avoid calling iput_final() while holding
> >  		 * session->s_mutex or in mds dispatch threads */
> > -		ceph_async_iput(inode);
> > +		iput(inode);
> >  		spin_lock(&mdsc->snap_flush_lock);
> >  	}
> >  	spin_unlock(&mdsc->snap_flush_lock);
> > @@ -985,12 +985,12 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
> >  
> >  			/* avoid calling iput_final() while holding
> >  			 * mdsc->snap_rwsem or mds in dispatch threads */
> > -			ceph_async_iput(inode);
> > +			iput(inode);
> >  			continue;
> >  
> >  skip_inode:
> >  			spin_unlock(&ci->i_ceph_lock);
> > -			ceph_async_iput(inode);
> > +			iput(inode);
> >  		}
> >  
> >  		/* we may have taken some of the old realm's children. */
> > diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> > index 31f0be9120dd..6b6332a5c113 100644
> > --- a/fs/ceph/super.h
> > +++ b/fs/ceph/super.h
> > @@ -988,8 +988,6 @@ extern int ceph_inode_holds_cap(struct inode *inode, int mask);
> >  extern bool ceph_inode_set_size(struct inode *inode, loff_t size);
> >  extern void __ceph_do_pending_vmtruncate(struct inode *inode);
> >  
> > -extern void ceph_async_iput(struct inode *inode);
> > -
> >  void ceph_queue_inode_work(struct inode *inode, int work_bit);
> >  
> >  static inline void ceph_queue_vmtruncate(struct inode *inode)
> > -- 
> > 2.31.1
> > 

-- 
Jeff Layton <jlayton@xxxxxxxxxx>




[Index of Archives]     [CEPH Users]     [Ceph Large]     [Ceph Dev]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux