Re: [PATCH] ceph: retry the same mds later after the new session is opened

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This can be seen almost every time when mounting:


 92959 <7>[13220.815822] ceph: mount start 00000000e3b1070a
 92960 <6>[13220.817349] libceph: mon2 (1)192.168.195.165:40566 session established
 92961 <7>[13220.818639] ceph:  handle_map epoch 58 len 889
 92962 <7>[13220.818644] ceph:  mdsmap_decode 1/3 4216 mds0.5 (1)192.168.195.165:6813 up:active            ==> max_mds == 3  92963 <7>[13220.818646] ceph:  mdsmap_decode 2/3 4339 mds1.55 (1)192.168.195.165:6815 up:active  92964 <7>[13220.818646] ceph:  mdsmap_decode 3/3 4340 mds2.57 (1)192.168.195.165:6814 up:active  92965 <7>[13220.818648] ceph:  mdsmap_decode m_enabled: 1, m_damaged: 0, m_num_laggy: 0
 92966 <7>[13220.818648] ceph:  mdsmap_decode success epoch 58
 92967 <7>[13220.818649] ceph:  check_new_map new 58 old 0
 92968 <6>[13220.820577] libceph: client5819 fsid 6c39c42b-5888-4c6a-ba32-89acfa5a2353
 92969 <7>[13220.821827] ceph:  mount opening path \t
 92970 <7>[13220.821887] ceph:  open_root_inode opening ''
 92971 <7>[13220.821892] ceph:  do_request on 000000005666e7bf
 92972 <7>[13220.821893] ceph:  submit_request on 000000005666e7bf for inode 0000000057da6992
 92973 <7>[13220.821896] ceph:  __register_request 000000005666e7bf tid 1
 92974 <7>[13220.821898] ceph:  __choose_mds 0000000057da6992 is_hash=0 (0) mode 0  92975 <7>[13220.821899] ceph:  choose_mds chose random mds0             ====================> random mds0
 92976 <7>[13220.821901] ceph:  register_session: realloc to 1
 92977 <7>[13220.821902] ceph:  register_session: mds0
 92978 <7>[13220.821905] ceph:  mdsc get_session 000000001649fdbd 2 -> 3
 92979 <7>[13220.821905] ceph:  mdsc con_get 000000001649fdbd ok (3)
 92980 <7>[13220.821910] ceph:  mdsc get_session 000000001649fdbd 3 -> 4
 92981 <7>[13220.821912] ceph:  do_request mds0 session 000000001649fdbd state new
 92982 <7>[13220.821913] ceph:  open_session to mds0 (up:active)
 92983 [...]
 92984 <7>[13220.822167] ceph:  do_request waiting
 92985 [...]
 92986 <7>[13220.833112] ceph:  handle_session mds0 open 000000001649fdbd state opening seq 0  92987 <7>[13220.833113] ceph:  renewed_caps mds0 ttl now 4307945924, was fresh, now stale
 92988 <7>[13220.833114] ceph:   wake request 000000005666e7bf tid 1
 92989 <7>[13220.833116] ceph:  mdsc put_session 000000001649fdbd 4 -> 3
 92990 <7>[13220.833117] ceph:  __choose_mds 0000000057da6992 is_hash=0 (0) mode 0  92991 <7>[13220.833118] ceph:  choose_mds chose random mds1   ========================> random mds1
 92992 <7>[13220.833119] ceph:  register_session: realloc to 2
 92993 <7>[13220.833121] ceph:  register_session: mds1
 92994 <7>[13220.833122] ceph:  mdsc get_session 00000000534bd3ef 2 -> 3
 92995 <7>[13220.833123] ceph:  mdsc con_get 00000000534bd3ef ok (3)
 92996 <7>[13220.833124] ceph:  mdsc get_session 00000000534bd3ef 3 -> 4
 92997 <7>[13220.833124] ceph:  do_request mds1 session 00000000534bd3ef state new
 92998 <7>[13220.833126] ceph:  open_session to mds1 (up:active)
 92999 [...]
 93000 <7>[13220.845883] ceph:  handle_session mds1 open 00000000534bd3ef state opening seq 0  93001 <7>[13220.845884] ceph:  renewed_caps mds1 ttl now 4307945935, was fresh, now stale
 93002 <7>[13220.845885] ceph:   wake request 000000005666e7bf tid 1
 93003 <7>[13220.845887] ceph:  mdsc put_session 00000000534bd3ef 4 -> 3
 93004 <7>[13220.845888] ceph:  __choose_mds 0000000057da6992 is_hash=0 (0) mode 0  93005 <7>[13220.845889] ceph:  choose_mds chose random mds2         ================> random mds2
 93006 <7>[13220.845890] ceph:  register_session: realloc to 4
 93007 <7>[13220.845891] ceph:  register_session: mds2
 93008 <7>[13220.845892] ceph:  mdsc get_session 0000000076259af8 2 -> 3
 93009 <7>[13220.845892] ceph:  mdsc con_get 0000000076259af8 ok (3)
 93010 <7>[13220.845893] ceph:  mdsc get_session 0000000076259af8 3 -> 4
 93011 <7>[13220.845893] ceph:  do_request mds2 session 0000000076259af8 state new
 93012 <7>[13220.845894] ceph:  open_session to mds2 (up:active)
  [...]
 93014 <7>[13220.852986] ceph:  handle_session mds2 open 0000000076259af8 state opening seq 0  93015 <7>[13220.852987] ceph:  renewed_caps mds2 ttl now 4307945948, was fresh, now stale
 93016 <7>[13220.852988] ceph:   wake request 000000005666e7bf tid 1
 93017 <7>[13220.852990] ceph:  mdsc put_session 0000000076259af8 4 -> 3
 93018 <7>[13220.852991] ceph:  __choose_mds 0000000057da6992 is_hash=0 (0) mode 0  93019 <7>[13220.852992] ceph:  choose_mds chose random mds0                          ===============> choose random mds0 again
 93020 <7>[13220.852993] ceph:  mdsc get_session 0000000076259af8 3 -> 4
 93021 <7>[13220.852994] ceph:  mdsc get_session 0000000076259af8 4 -> 5
 93022 <7>[13220.852994] ceph:  do_request mds0 session 0000000076259af8 state open  93023 <7>[13220.852996] ceph:  prepare_send_request 000000005666e7bf tid 1 getattr (attempt 1)
 93024 <7>[13220.852997] ceph:   path
 93025 <7>[13220.852998] ceph: r_parent = 0000000057da6992
 93026 [...]
 93027 <7>[13220.853414] ceph:  handle_reply 000000005666e7bf
 93028 <7>[13220.853415] ceph:  __unregister_request 000000005666e7bf tid 1
 93029 <7>[13220.853416] ceph:  got safe reply 1, mds0, last request: Yes
 93030 <7>[13220.853417] ceph:  handle_reply tid 1 result 0
 93031 <7>[13220.853421] ceph:  fill_trace 000000005666e7bf is_dentry 0 is_target 1
 93032 <7>[13220.853424] ceph:  alloc_inode 00000000a823bfed
 93033 <7>[13220.853427] ceph:  get_inode created new inode 00000000a823bfed 1.fffffffffffffffe ino 1  93034 <7>[13220.853428] ceph:  get_inode on 1=1.fffffffffffffffe got 00000000a823bfed  93035 <7>[13220.853429] ceph:  fill_inode 00000000a823bfed ino 1.fffffffffffffffe v 81286 had 0
 93036 <7>[13220.853431] ceph:  00000000a823bfed mode 040755 uid.gid 0.0
 93037 <7>[13220.853433] ceph:  truncate_size 0 -> 18446744073709551615
 93038 <7>[13220.853435] ceph:  fill_trace done err=0
 93039 <7>[13220.853441] ceph:  mdsc con_put 0000000076259af8 (4)
 93040 <7>[13220.853442] ceph:  mdsc put_session 0000000076259af8 5 -> 4
 93041 <7>[13220.853446] ceph:  do_request waited, got 0
 93042 <7>[13220.853447] ceph:  do_request 000000005666e7bf done, result 0
 93043 <7>[13220.853447] ceph:  open_root_inode success
 93044 <7>[13220.853452] ceph:  open_root_inode success, root dentry is 0000000037517481
[...]
 93050 <7>[13220.853457] ceph:  mount success


On 2019/12/9 20:47, xiubli@xxxxxxxxxx wrote:
From: Xiubo Li<xiubli@xxxxxxxxxx>

With max_mds > 1 and for a request which are choosing a random
mds rank and if the relating session is not opened yet, the request
will wait the session been opened and resend again. While every
time the request is beening __do_request, it will release the
req->session first and choose a random one again, so this time it
may choose another random mds rank. The worst case is that it will
open all the mds sessions one by one just before it can be
successfully sent out out.

Signed-off-by: Xiubo Li<xiubli@xxxxxxxxxx>
---
  fs/ceph/mds_client.c | 20 ++++++++++++++++----
  1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 68f3b5ed6ac8..d747e9baf9c9 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -876,7 +876,8 @@ static struct inode *get_nonsnap_parent(struct dentry *dentry)
   * Called under mdsc->mutex.
   */
  static int __choose_mds(struct ceph_mds_client *mdsc,
-			struct ceph_mds_request *req)
+			struct ceph_mds_request *req,
+			bool *random)
  {
  	struct inode *inode;
  	struct ceph_inode_info *ci;
@@ -886,6 +887,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
  	u32 hash = req->r_direct_hash;
  	bool is_hash = test_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
+ if (random)
+		*random = false;
+
  	/*
  	 * is there a specific mds we should try?  ignore hint if we have
  	 * no session and the mds is not up (active or recovering).
@@ -1021,6 +1025,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
  	return mds;
random:
+	if (random)
+		*random = true;
+
  	mds = ceph_mdsmap_get_random_mds(mdsc->mdsmap);
  	dout("choose_mds chose random mds%d\n", mds);
  	return mds;
@@ -2556,6 +2563,7 @@ static void __do_request(struct ceph_mds_client *mdsc,
  	struct ceph_mds_session *session = NULL;
  	int mds = -1;
  	int err = 0;
+	bool random;
if (req->r_err || test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
  		if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
@@ -2596,7 +2604,7 @@ static void __do_request(struct ceph_mds_client *mdsc,
put_request_session(req); - mds = __choose_mds(mdsc, req);
+	mds = __choose_mds(mdsc, req, &random);
  	if (mds < 0 ||
  	    ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) {
  		dout("do_request no mds or not active, waiting for map\n");
@@ -2624,8 +2632,12 @@ static void __do_request(struct ceph_mds_client *mdsc,
  			goto out_session;
  		}
  		if (session->s_state == CEPH_MDS_SESSION_NEW ||
-		    session->s_state == CEPH_MDS_SESSION_CLOSING)
+		    session->s_state == CEPH_MDS_SESSION_CLOSING) {
  			__open_session(mdsc, session);
+			/* retry the same mds later */
+			if (random)
+				req->r_resend_mds = mds;
+		}
  		list_add(&req->r_wait, &session->s_waiting);
  		goto out_session;
  	}
@@ -2890,7 +2902,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
  			mutex_unlock(&mdsc->mutex);
  			goto out;
  		} else  {
-			int mds = __choose_mds(mdsc, req);
+			int mds = __choose_mds(mdsc, req, NULL);
  			if (mds >= 0 && mds != req->r_session->s_mds) {
  				dout("but auth changed, so resending\n");
  				__do_request(mdsc, req);






[Index of Archives]     [CEPH Users]     [Ceph Large]     [Ceph Dev]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux