On Mon, 2022-04-25 at 19:58 +0800, Xiubo Li wrote: > 1, mount with wsync. > 2, create a file with O_RDWR, and the request was sent to mds.0: > > ceph_atomic_open()--> > ceph_mdsc_do_request(openc) > finish_open(file, dentry, ceph_open)--> > ceph_open()--> > ceph_init_file()--> > ceph_init_file_info()--> > ceph_uninline_data()--> > { > ... > if (inline_version == 1 || /* initial version, no data */ > inline_version == CEPH_INLINE_NONE) > goto out_unlock; > ... > } > > The inline_version will be 1, which is the initial version for the > new create file. And here the ci->i_inline_version will keep with 1, > it's buggy. > > 3, buffer write to the file immediately: > > ceph_write_iter()--> > ceph_get_caps(file, need=Fw, want=Fb, ...); > generic_perform_write()--> > a_ops->write_begin()--> > ceph_write_begin()--> > netfs_write_begin()--> > netfs_begin_read()--> > netfs_rreq_submit_slice()--> > netfs_read_from_server()--> > rreq->netfs_ops->issue_read()--> > ceph_netfs_issue_read()--> > { > ... > if (ci->i_inline_version != CEPH_INLINE_NONE && > ceph_netfs_issue_op_inline(subreq)) > return; > ... > } > ceph_put_cap_refs(ci, Fwb); > > The ceph_netfs_issue_op_inline() will send a getattr(Fsr) request to > mds.1. > > 4, then the mds.1 will request the rd lock for CInode::filelock from > the auth mds.0, the mds.0 will do the CInode::filelock state transation > from excl --> sync, but it need to revoke the Fxwb caps back from the > clients. > > While the kernel client has aleady held the Fwb caps and waiting for > the getattr(Fsr). > > It's deadlock!!!! > > URL: https://tracker.ceph.com/issues/55377 > Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx> > --- > fs/ceph/addr.c | 33 +++++++++++++++++++-------------- > 1 file changed, 19 insertions(+), 14 deletions(-) > > diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c > index 02722ac86d73..15e7b48cbc95 100644 > --- a/fs/ceph/addr.c > +++ b/fs/ceph/addr.c > @@ -1641,7 +1641,7 @@ int ceph_uninline_data(struct file *file) > struct inode *inode = file_inode(file); > struct ceph_inode_info *ci = ceph_inode(inode); > struct ceph_fs_client *fsc = ceph_inode_to_client(inode); > - struct ceph_osd_request *req; > + struct ceph_osd_request *req = NULL; > struct ceph_cap_flush *prealloc_cf; > struct folio *folio = NULL; > u64 inline_version = CEPH_INLINE_NONE; > @@ -1649,10 +1649,23 @@ int ceph_uninline_data(struct file *file) > int err = 0; > u64 len; > > + spin_lock(&ci->i_ceph_lock); > + inline_version = ci->i_inline_version; > + spin_unlock(&ci->i_ceph_lock); > + > + dout("uninline_data %p %llx.%llx inline_version %llu\n", > + inode, ceph_vinop(inode), inline_version); > + > + if (inline_version == CEPH_INLINE_NONE) > + return 0; > + > prealloc_cf = ceph_alloc_cap_flush(); > if (!prealloc_cf) > return -ENOMEM; > > + if (inline_version == 1) /* initial version, no data */ > + goto out_uninline; > + > folio = read_mapping_folio(inode->i_mapping, 0, file); > if (IS_ERR(folio)) { > err = PTR_ERR(folio); > @@ -1661,17 +1674,6 @@ int ceph_uninline_data(struct file *file) > > folio_lock(folio); > > - spin_lock(&ci->i_ceph_lock); > - inline_version = ci->i_inline_version; > - spin_unlock(&ci->i_ceph_lock); > - > - dout("uninline_data %p %llx.%llx inline_version %llu\n", > - inode, ceph_vinop(inode), inline_version); > - > - if (inline_version == 1 || /* initial version, no data */ > - inline_version == CEPH_INLINE_NONE) > - goto out_unlock; > - > len = i_size_read(inode); > if (len > folio_size(folio)) > len = folio_size(folio); > @@ -1736,6 +1738,7 @@ int ceph_uninline_data(struct file *file) > ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency, > req->r_end_latency, len, err); > > +out_uninline: > if (!err) { > int dirty; > > @@ -1754,8 +1757,10 @@ int ceph_uninline_data(struct file *file) > if (err == -ECANCELED) > err = 0; > out_unlock: > - folio_unlock(folio); > - folio_put(folio); > + if (folio) { > + folio_unlock(folio); > + folio_put(folio); > + } > out: > ceph_free_cap_flush(prealloc_cf); > dout("uninline_data %p %llx.%llx inline_version %llu = %d\n", Nice catch! Reviewed-by: Jeff Layton <jlayton@xxxxxxxxxx>