On Tue, 2022-04-19 at 08:58 +0800, Xiubo Li wrote: > Before waiting for a request's safe reply, we will send the mdlog > flush request to the relevant MDS. And this will also flush the > mdlog for all the other unsafe requests in the same session, so > we can record the last session and no need to flush mdlog again > in the next loop. But there still have cases that it may send the > mdlog flush requst twice or more, but that should be not often. > > Rename wait_unsafe_requests() to flush_mdlog_and_wait_inode_unsafe_requests() > to make it more descriptive. > > URL: https://tracker.ceph.com/issues/55284 > Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx> > --- > > V4: > - Fixed the lock inversion bug. > > > > fs/ceph/mds_client.c | 34 ++++++++++++++++++++++++++++------ > 1 file changed, 28 insertions(+), 6 deletions(-) > > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c > index 0da85c9ce73a..58827af57b7f 100644 > --- a/fs/ceph/mds_client.c > +++ b/fs/ceph/mds_client.c > @@ -5093,15 +5093,17 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) > } > > /* > - * wait for all write mds requests to flush. > + * flush the mdlog and wait for all write mds requests to flush. > */ > -static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) > +static void flush_mdlog_and_wait_mdsc_unsafe_requests(struct ceph_mds_client *mdsc, > + u64 want_tid) > { > struct ceph_mds_request *req = NULL, *nextreq; > + struct ceph_mds_session *last_session = NULL; > struct rb_node *n; > > mutex_lock(&mdsc->mutex); > - dout("wait_unsafe_requests want %lld\n", want_tid); > + dout("%s want %lld\n", __func__, want_tid); > restart: > req = __get_oldest_req(mdsc); > while (req && req->r_tid <= want_tid) { > @@ -5113,14 +5115,33 @@ static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) > nextreq = NULL; > if (req->r_op != CEPH_MDS_OP_SETFILELOCK && > (req->r_op & CEPH_MDS_OP_WRITE)) { > + struct ceph_mds_session *s; > + > /* write op */ > ceph_mdsc_get_request(req); > if (nextreq) > ceph_mdsc_get_request(nextreq); > + > + s = req->r_session; > + if (!s) { > + req = nextreq; > + continue; > + } > + s = ceph_get_mds_session(s); > mutex_unlock(&mdsc->mutex); > - dout("wait_unsafe_requests wait on %llu (want %llu)\n", > + > + /* send flush mdlog request to MDS */ > + if (last_session != s) { > + send_flush_mdlog(s); > + ceph_put_mds_session(last_session); > + last_session = s; > + } else { > + ceph_put_mds_session(s); > + } > + dout("%s wait on %llu (want %llu)\n", __func__, > req->r_tid, want_tid); > wait_for_completion(&req->r_safe_completion); > + > mutex_lock(&mdsc->mutex); > ceph_mdsc_put_request(req); > if (!nextreq) > @@ -5135,7 +5156,8 @@ static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) > req = nextreq; > } > mutex_unlock(&mdsc->mutex); > - dout("wait_unsafe_requests done\n"); > + ceph_put_mds_session(last_session); > + dout("%s done\n", __func__); > } > > void ceph_mdsc_sync(struct ceph_mds_client *mdsc) > @@ -5164,7 +5186,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) > dout("sync want tid %lld flush_seq %lld\n", > want_tid, want_flush); > > - wait_unsafe_requests(mdsc, want_tid); > + flush_mdlog_and_wait_mdsc_unsafe_requests(mdsc, want_tid); > wait_caps_flush(mdsc, want_flush); > } > Reviewed-by: Jeff Layton <jlayton@xxxxxxxxxx>