Re: [PATCH] ceph: fix possible deadlock while holding Fcr to use getattr

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, 2022-04-22 at 17:25 +0800, Xiubo Li wrote:
> We can't use getattr to fetch inline data after getting Fcr caps,
> because it can cause deadlock. The solution is try uniline the
> inline data when opening the file, thanks David Howells' previous
> work on uninlining the inline data work.
> 
> It was caused from one possible call path:
>   ceph_filemap_fault()-->
>      ceph_get_caps(Fcr);
>      filemap_fault()-->
>         do_sync_mmap_readahead()-->
>            page_cache_ra_order()-->
>               read_pages()-->
>                  aops->readahead()-->
>                     netfs_readahead()-->
>                        netfs_begin_read()-->
>                           netfs_rreq_submit_slice()-->
>                              netfs_read_from_server()-->
>                                 netfs_ops->issue_read()-->
>                                    ceph_netfs_issue_read()-->
>                                       ceph_netfs_issue_op_inline()-->
>                                          getattr()
>       ceph_pu_caps_ref(Fcr);
> 
> This because if the Locker state is LOCK_EXEC_MIX for auth MDS, and
> the replica MDSes' lock state is LOCK_LOCK. Then the kclient could
> get 'Frwcb' caps from both auth and replica MDSes.
> 
> But if the getattr is sent to any MDS, the MDS needs to do Locker
> transition to LOCK_MIX first and then to LOCK_SYNC. But when
> transfering to LOCK_MIX state the MDS Locker need to revoke the Fcb
> caps back, but the kclient already holding it and waiting the MDS
> to finish.
> 
> Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx>
> ---
>  fs/ceph/addr.c | 65 ++++++--------------------------------------------
>  fs/ceph/file.c |  3 +--
>  2 files changed, 8 insertions(+), 60 deletions(-)
> 
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index 261bc8bb2ab8..b0b9a2f4adb0 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -244,61 +244,6 @@ static void finish_netfs_read(struct ceph_osd_request *req)
>  	iput(req->r_inode);
>  }
>  
> -static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
> -{
> -	struct netfs_io_request *rreq = subreq->rreq;
> -	struct inode *inode = rreq->inode;
> -	struct ceph_mds_reply_info_parsed *rinfo;
> -	struct ceph_mds_reply_info_in *iinfo;
> -	struct ceph_mds_request *req;
> -	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
> -	struct ceph_inode_info *ci = ceph_inode(inode);
> -	struct iov_iter iter;
> -	ssize_t err = 0;
> -	size_t len;
> -	int mode;
> -
> -	__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
> -	__clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
> -
> -	if (subreq->start >= inode->i_size)
> -		goto out;
> -
> -	/* We need to fetch the inline data. */
> -	mode = ceph_try_to_choose_auth_mds(inode, CEPH_STAT_CAP_INLINE_DATA);
> -	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, mode);
> -	if (IS_ERR(req)) {
> -		err = PTR_ERR(req);
> -		goto out;
> -	}
> -	req->r_ino1 = ci->i_vino;
> -	req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INLINE_DATA);
> -	req->r_num_caps = 2;
> -
> -	err = ceph_mdsc_do_request(mdsc, NULL, req);
> -	if (err < 0)
> -		goto out;
> -
> -	rinfo = &req->r_reply_info;
> -	iinfo = &rinfo->targeti;
> -	if (iinfo->inline_version == CEPH_INLINE_NONE) {
> -		/* The data got uninlined */
> -		ceph_mdsc_put_request(req);
> -		return false;
> -	}
> -
> -	len = min_t(size_t, iinfo->inline_len - subreq->start, subreq->len);
> -	iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, subreq->start, len);
> -	err = copy_to_iter(iinfo->inline_data + subreq->start, len, &iter);
> -	if (err == 0)
> -		err = -EFAULT;
> -
> -	ceph_mdsc_put_request(req);
> -out:
> -	netfs_subreq_terminated(subreq, err, false);
> -	return true;
> -}
> -
>  static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
>  {
>  	struct netfs_io_request *rreq = subreq->rreq;
> @@ -313,9 +258,13 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
>  	int err = 0;
>  	u64 len = subreq->len;
>  
> -	if (ci->i_inline_version != CEPH_INLINE_NONE &&
> -	    ceph_netfs_issue_op_inline(subreq))
> -		return;
> +	/*
> +	 * We have uninlined the inline data when openning the file,
> +	 * or we must send a GETATTR request to the MDS, which is
> +	 * buggy and will cause deadlock while holding the Fcr
> +	 * reference in ceph_filemap_fault().
> +	 */
> +	BUG_ON(ci->i_inline_version != CEPH_INLINE_NONE);
>  
>  	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, subreq->start, &len,
>  			0, 1, CEPH_OSD_OP_READ,
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 6c9e837aa1d3..a98a61ec4ada 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -241,8 +241,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
>  	INIT_LIST_HEAD(&fi->rw_contexts);
>  	fi->filp_gen = READ_ONCE(ceph_inode_to_client(inode)->filp_gen);
>  
> -	if ((file->f_mode & FMODE_WRITE) &&
> -	    ci->i_inline_version != CEPH_INLINE_NONE) {
> +	if (ci->i_inline_version != CEPH_INLINE_NONE) {
>  		ret = ceph_uninline_data(file);
>  		if (ret < 0)
>  			goto error;

Will we always be able to guarantee that we can uninline the file? I
think it's possible to use RADOS pool caps to limit some clients to
read-only. That's what ceph_pool_perm_check is all about.

If we do want to go this route, then you may need to fix
ceph_pool_perm_check to check for CEPH_I_POOL_WR when opening an inlined
file for read.
-- 
Jeff Layton <jlayton@xxxxxxxxxx>



[Index of Archives]     [CEPH Users]     [Ceph Large]     [Ceph Dev]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux