On Sun, Mar 17, 2013 at 7:51 AM, Yan, Zheng <zheng.z.yan@xxxxxxxxx> wrote: > From: "Yan, Zheng" <zheng.z.yan@xxxxxxxxx> > > For active MDS, it may receive resolve/resolve message before receiving resolve/rejoin, maybe? Other than that, Reviewed-by: Greg Farnum <greg@xxxxxxxxxxx> > the mdsmap message that claims the MDS cluster is in resolving/rejoning > state. So instead of set the gather MDS set when receiving the mdsmap. > set them in advance when detecting MDS' failure. > > Signed-off-by: Yan, Zheng <zheng.z.yan@xxxxxxxxx> > --- > src/mds/MDCache.cc | 41 +++++++++++++++++++---------------------- > src/mds/MDCache.h | 5 ++--- > 2 files changed, 21 insertions(+), 25 deletions(-) > > diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc > index 73c1d59..69db1dd 100644 > --- a/src/mds/MDCache.cc > +++ b/src/mds/MDCache.cc > @@ -2432,18 +2432,17 @@ void MDCache::resolve_start() > if (rootdir) > adjust_subtree_auth(rootdir, CDIR_AUTH_UNKNOWN); > } > + resolve_gather = recovery_set; > + resolve_gather.erase(mds->get_nodeid()); > + rejoin_gather = resolve_gather; > } > > void MDCache::send_resolves() > { > - // reset resolve state > - got_resolve.clear(); > - other_ambiguous_imports.clear(); > - > send_slave_resolves(); > if (!resolve_ack_gather.empty()) { > dout(10) << "send_resolves still waiting for resolve ack from (" > - << need_resolve_ack << ")" << dendl; > + << resolve_ack_gather << ")" << dendl; > return; > } > if (!need_resolve_rollback.empty()) { > @@ -2495,7 +2494,7 @@ void MDCache::send_slave_resolves() > ++p) { > dout(10) << "sending slave resolve to mds." << p->first << dendl; > mds->send_message_mds(p->second, p->first); > - need_resolve_ack.insert(p->first); > + resolve_ack_gather.insert(p->first); > } > } > > @@ -2598,16 +2597,15 @@ void MDCache::handle_mds_failure(int who) > recovery_set.erase(mds->get_nodeid()); > dout(1) << "handle_mds_failure mds." << who << " : recovery peers are " << recovery_set << dendl; > > - // adjust my recovery lists > - wants_resolve.erase(who); // MDS will ask again > - got_resolve.erase(who); // i'll get another. > + resolve_gather.insert(who); > discard_delayed_resolve(who); > > + rejoin_gather.insert(who); > rejoin_sent.erase(who); // i need to send another > rejoin_ack_gather.erase(who); // i'll need/get another. > > - dout(10) << " wants_resolve " << wants_resolve << dendl; > - dout(10) << " got_resolve " << got_resolve << dendl; > + dout(10) << " resolve_gather " << resolve_gather << dendl; > + dout(10) << " resolve_ack_gather " << resolve_ack_gather << dendl; > dout(10) << " rejoin_sent " << rejoin_sent << dendl; > dout(10) << " rejoin_gather " << rejoin_gather << dendl; > dout(10) << " rejoin_ack_gather " << rejoin_ack_gather << dendl; > @@ -2788,7 +2786,7 @@ void MDCache::handle_resolve(MMDSResolve *m) > return; > } > > - if (!need_resolve_ack.empty() || !need_resolve_rollback.empty()) { > + if (!resolve_ack_gather.empty() || !need_resolve_rollback.empty()) { > dout(10) << "delay processing subtree resolve" << dendl; > discard_delayed_resolve(from); > delayed_resolve[from] = m; > @@ -2875,7 +2873,7 @@ void MDCache::handle_resolve(MMDSResolve *m) > } > > // did i get them all? > - got_resolve.insert(from); > + resolve_gather.erase(from); > > maybe_resolve_finish(); > > @@ -2901,12 +2899,12 @@ void MDCache::discard_delayed_resolve(int who) > > void MDCache::maybe_resolve_finish() > { > - assert(need_resolve_ack.empty()); > + assert(resolve_ack_gather.empty()); > assert(need_resolve_rollback.empty()); > > - if (got_resolve != recovery_set) { > - dout(10) << "maybe_resolve_finish still waiting for more resolves, got (" > - << got_resolve << "), need (" << recovery_set << ")" << dendl; > + if (!resolve_gather.empty()) { > + dout(10) << "maybe_resolve_finish still waiting for resolves (" > + << resolve_gather << ")" << dendl; > return; > } else { > dout(10) << "maybe_resolve_finish got all resolves+resolve_acks, done." << dendl; > @@ -2926,7 +2924,7 @@ void MDCache::handle_resolve_ack(MMDSResolveAck *ack) > dout(10) << "handle_resolve_ack " << *ack << " from " << ack->get_source() << dendl; > int from = ack->get_source().num(); > > - if (!need_resolve_ack.count(from)) { > + if (!resolve_ack_gather.count(from)) { > ack->put(); > return; > } > @@ -3001,8 +2999,8 @@ void MDCache::handle_resolve_ack(MMDSResolveAck *ack) > assert(p->second->slave_to_mds != from); > } > > - need_resolve_ack.erase(from); > - if (need_resolve_ack.empty() && need_resolve_rollback.empty()) { > + resolve_ack_gather.erase(from); > + if (resolve_ack_gather.empty() && need_resolve_rollback.empty()) { > send_subtree_resolves(); > process_delayed_resolve(); > } > @@ -3069,7 +3067,7 @@ void MDCache::finish_rollback(metareqid_t reqid) { > if (mds->is_resolve()) > finish_uncommitted_slave_update(reqid, need_resolve_rollback[reqid]); > need_resolve_rollback.erase(reqid); > - if (need_resolve_ack.empty() && need_resolve_rollback.empty()) { > + if (resolve_ack_gather.empty() && need_resolve_rollback.empty()) { > send_subtree_resolves(); > process_delayed_resolve(); > } > @@ -3417,7 +3415,6 @@ void MDCache::rejoin_send_rejoins() > if (*p == mds->get_nodeid()) continue; // nothing to myself! > if (rejoin_sent.count(*p)) continue; // already sent a rejoin to this node! > if (mds->is_rejoin()) { > - rejoin_gather.insert(*p); > rejoins[*p] = new MMDSCacheRejoin(MMDSCacheRejoin::OP_WEAK); > rejoins[*p]->copy_cap_exports(cap_export_bl); > } else if (mds->mdsmap->is_rejoin(*p)) > diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h > index 10e3dd7..278debf 100644 > --- a/src/mds/MDCache.h > +++ b/src/mds/MDCache.h > @@ -329,9 +329,8 @@ protected: > friend class ECommitted; > > bool resolves_pending; > - set<int> wants_resolve; // nodes i need to send my resolve to > - set<int> got_resolve; // nodes i got resolves from > - set<int> need_resolve_ack; // nodes i need a resolve_ack from > + set<int> resolve_gather; // nodes i need resolves from > + set<int> resolve_ack_gather; // nodes i need a resolve_ack from > map<metareqid_t, int> need_resolve_rollback; // rollbacks i'm writing to the journal > map<int, MMDSResolve*> delayed_resolve; > > -- > 1.7.11.7 > -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html