[PATCH 20/39] mds: include replica nonce in MMDSCacheRejoin::inode_strong

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: "Yan, Zheng" <zheng.z.yan@xxxxxxxxx>

So the recovering MDS can properly handle cache expire messages.
Also increase the nonce value when sending the cache rejoin acks.

Signed-off-by: Yan, Zheng <zheng.z.yan@xxxxxxxxx>
---
 src/mds/MDCache.cc             | 35 +++++++++++++++++++++++------------
 src/messages/MMDSCacheRejoin.h | 11 +++++++----
 2 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
index 008a8a2..8ba676e 100644
--- a/src/mds/MDCache.cc
+++ b/src/mds/MDCache.cc
@@ -3538,6 +3538,7 @@ void MDCache::rejoin_send_rejoins()
       if (p->first == 0 && root) {
 	p->second->add_weak_inode(root->vino());
 	p->second->add_strong_inode(root->vino(),
+				    root->get_replica_nonce(),
 				    root->get_caps_wanted(),
 				    root->filelock.get_state(),
 				    root->nestlock.get_state(),
@@ -3551,6 +3552,7 @@ void MDCache::rejoin_send_rejoins()
       if (CInode *in = get_inode(MDS_INO_MDSDIR(p->first))) {
 	p->second->add_weak_inode(in->vino());
 	p->second->add_strong_inode(in->vino(),
+				    in->get_replica_nonce(),
 				    in->get_caps_wanted(),
 				    in->filelock.get_state(),
 				    in->nestlock.get_state(),
@@ -3709,6 +3711,7 @@ void MDCache::rejoin_walk(CDir *dir, MMDSCacheRejoin *rejoin)
 	CInode *in = dnl->get_inode();
 	dout(15) << " add_strong_inode " << *in << dendl;
 	rejoin->add_strong_inode(in->vino(),
+				 in->get_replica_nonce(),
 				 in->get_caps_wanted(),
 				 in->filelock.get_state(),
 				 in->nestlock.get_state(),
@@ -4248,7 +4251,7 @@ void MDCache::handle_cache_rejoin_strong(MMDSCacheRejoin *strong)
 	dir = rejoin_invent_dirfrag(p->first);
     }
     if (dir) {
-      dir->add_replica(from);
+      dir->add_replica(from, p->second.nonce);
       dir->dir_rep = p->second.dir_rep;
     } else {
       dout(10) << " frag " << p->first << " doesn't match dirfragtree " << *diri << dendl;
@@ -4263,7 +4266,7 @@ void MDCache::handle_cache_rejoin_strong(MMDSCacheRejoin *strong)
 	  dir = rejoin_invent_dirfrag(p->first);
 	else
 	  dout(10) << " have(approx) " << *dir << dendl;
-	dir->add_replica(from);
+	dir->add_replica(from, p->second.nonce);
 	dir->dir_rep = p->second.dir_rep;
       }
       refragged = true;
@@ -4327,7 +4330,7 @@ void MDCache::handle_cache_rejoin_strong(MMDSCacheRejoin *strong)
 	mdr->locks.insert(&dn->lock);
       }
 
-      dn->add_replica(from);
+      dn->add_replica(from, q->second.nonce);
       dout(10) << " have " << *dn << dendl;
       
       // inode?
@@ -4412,7 +4415,7 @@ void MDCache::handle_cache_rejoin_strong(MMDSCacheRejoin *strong)
 	  dout(10) << " sender has dentry but not inode, adding them as a replica" << dendl;
 	}
 	
-	in->add_replica(from);
+	in->add_replica(from, p->second.nonce);
 	dout(10) << " have " << *in << dendl;
       }
     }
@@ -5176,7 +5179,7 @@ void MDCache::rejoin_send_acks()
       for (map<int,int>::iterator r = dir->replicas_begin();
 	   r != dir->replicas_end();
 	   ++r) 
-	ack[r->first]->add_strong_dirfrag(dir->dirfrag(), r->second, dir->dir_rep);
+	ack[r->first]->add_strong_dirfrag(dir->dirfrag(), ++r->second, dir->dir_rep);
 	   
       for (CDir::map_t::iterator q = dir->items.begin();
 	   q != dir->items.end();
@@ -5192,7 +5195,7 @@ void MDCache::rejoin_send_acks()
 					   dnl->is_primary() ? dnl->get_inode()->ino():inodeno_t(0),
 					   dnl->is_remote() ? dnl->get_remote_ino():inodeno_t(0),
 					   dnl->is_remote() ? dnl->get_remote_d_type():0,
-					   r->second,
+					   ++r->second,
 					   dn->lock.get_replica_state());
 	
 	if (!dnl->is_primary())
@@ -5205,7 +5208,7 @@ void MDCache::rejoin_send_acks()
 	     r != in->replicas_end();
 	     ++r) {
 	  ack[r->first]->add_inode_base(in);
-	  ack[r->first]->add_inode_locks(in, r->second);
+	  ack[r->first]->add_inode_locks(in, ++r->second);
 	}
 	
 	// subdirs in this subtree?
@@ -5220,14 +5223,14 @@ void MDCache::rejoin_send_acks()
 	 r != root->replicas_end();
 	 ++r) {
       ack[r->first]->add_inode_base(root);
-      ack[r->first]->add_inode_locks(root, r->second);
+      ack[r->first]->add_inode_locks(root, ++r->second);
     }
   if (myin)
     for (map<int,int>::iterator r = myin->replicas_begin();
 	 r != myin->replicas_end();
 	 ++r) {
       ack[r->first]->add_inode_base(myin);
-      ack[r->first]->add_inode_locks(myin, r->second);
+      ack[r->first]->add_inode_locks(myin, ++r->second);
     }
 
   // include inode base for any inodes whose scatterlocks may have updated
@@ -5728,6 +5731,12 @@ void MDCache::send_expire_messages(map<int, MCacheExpire*>& expiremap)
   for (map<int, MCacheExpire*>::iterator it = expiremap.begin();
        it != expiremap.end();
        ++it) {
+    if (mds->mdsmap->get_state(it->first) < MDSMap::STATE_REJOIN ||
+	(mds->mdsmap->get_state(it->first) == MDSMap::STATE_REJOIN &&
+	 rejoin_sent.count(it->first) == 0)) {
+      it->second->put();
+      continue;
+    }
     dout(7) << "sending cache_expire to " << it->first << dendl;
     mds->send_message_mds(it->second, it->first);
   }
@@ -9640,9 +9649,11 @@ void MDCache::handle_dentry_link(MDentryLink *m)
     CInode *in = add_replica_inode(p, NULL, finished);
     assert(in->get_num_ref() == 0);
     assert(in->get_parent_dn() == NULL);
-    MCacheExpire* expire = new MCacheExpire(mds->get_nodeid());
-    expire->add_inode(m->get_subtree(), in->vino(), in->get_replica_nonce());
-    mds->send_message_mds(expire, m->get_source().num());
+    map<int, MCacheExpire*> expiremap;
+    int from = m->get_source().num();
+    expiremap[from] = new MCacheExpire(mds->get_nodeid());
+    expiremap[from]->add_inode(m->get_subtree(), in->vino(), in->get_replica_nonce());
+    send_expire_messages(expiremap);
     remove_inode(in);
   }
 
diff --git a/src/messages/MMDSCacheRejoin.h b/src/messages/MMDSCacheRejoin.h
index 825400d..b88f551 100644
--- a/src/messages/MMDSCacheRejoin.h
+++ b/src/messages/MMDSCacheRejoin.h
@@ -43,19 +43,22 @@ class MMDSCacheRejoin : public Message {
 
   // -- types --
   struct inode_strong { 
+    int32_t nonce;
     int32_t caps_wanted;
     int32_t filelock, nestlock, dftlock;
     inode_strong() {}
-    inode_strong(int cw, int dl, int nl, int dftl) : 
-      caps_wanted(cw),
+    inode_strong(int n, int cw, int dl, int nl, int dftl) :
+      nonce(n), caps_wanted(cw),
       filelock(dl), nestlock(nl), dftlock(dftl) { }
     void encode(bufferlist &bl) const {
+      ::encode(nonce, bl);
       ::encode(caps_wanted, bl);
       ::encode(filelock, bl);
       ::encode(nestlock, bl);
       ::encode(dftlock, bl);
     }
     void decode(bufferlist::iterator &bl) {
+      ::decode(nonce, bl);
       ::decode(caps_wanted, bl);
       ::decode(filelock, bl);
       ::decode(nestlock, bl);
@@ -208,8 +211,8 @@ public:
   void add_weak_inode(vinodeno_t i) {
     weak_inodes.insert(i);
   }
-  void add_strong_inode(vinodeno_t i, int cw, int dl, int nl, int dftl) {
-    strong_inodes[i] = inode_strong(cw, dl, nl, dftl);
+  void add_strong_inode(vinodeno_t i, int n, int cw, int dl, int nl, int dftl) {
+    strong_inodes[i] = inode_strong(n, cw, dl, nl, dftl);
   }
   void add_inode_locks(CInode *in, __u32 nonce) {
     ::encode(in->inode.ino, inode_locks);
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux