Re: [PATCH 14/39] mds: set resolve/rejoin gather MDS set in advance

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun, Mar 17, 2013 at 7:51 AM, Yan, Zheng <zheng.z.yan@xxxxxxxxx> wrote:
> From: "Yan, Zheng" <zheng.z.yan@xxxxxxxxx>
>
> For active MDS, it may receive resolve/resolve message before receiving

resolve/rejoin, maybe?
Other than that,
Reviewed-by: Greg Farnum <greg@xxxxxxxxxxx>

> the mdsmap message that claims the MDS cluster is in resolving/rejoning
> state. So instead of set the gather MDS set when receiving the mdsmap.
> set them in advance when detecting MDS' failure.
>
> Signed-off-by: Yan, Zheng <zheng.z.yan@xxxxxxxxx>
> ---
>  src/mds/MDCache.cc | 41 +++++++++++++++++++----------------------
>  src/mds/MDCache.h  |  5 ++---
>  2 files changed, 21 insertions(+), 25 deletions(-)
>
> diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
> index 73c1d59..69db1dd 100644
> --- a/src/mds/MDCache.cc
> +++ b/src/mds/MDCache.cc
> @@ -2432,18 +2432,17 @@ void MDCache::resolve_start()
>      if (rootdir)
>        adjust_subtree_auth(rootdir, CDIR_AUTH_UNKNOWN);
>    }
> +  resolve_gather = recovery_set;
> +  resolve_gather.erase(mds->get_nodeid());
> +  rejoin_gather = resolve_gather;
>  }
>
>  void MDCache::send_resolves()
>  {
> -  // reset resolve state
> -  got_resolve.clear();
> -  other_ambiguous_imports.clear();
> -
>    send_slave_resolves();
>    if (!resolve_ack_gather.empty()) {
>      dout(10) << "send_resolves still waiting for resolve ack from ("
> -             << need_resolve_ack << ")" << dendl;
> +            << resolve_ack_gather << ")" << dendl;
>      return;
>    }
>    if (!need_resolve_rollback.empty()) {
> @@ -2495,7 +2494,7 @@ void MDCache::send_slave_resolves()
>         ++p) {
>      dout(10) << "sending slave resolve to mds." << p->first << dendl;
>      mds->send_message_mds(p->second, p->first);
> -    need_resolve_ack.insert(p->first);
> +    resolve_ack_gather.insert(p->first);
>    }
>  }
>
> @@ -2598,16 +2597,15 @@ void MDCache::handle_mds_failure(int who)
>    recovery_set.erase(mds->get_nodeid());
>    dout(1) << "handle_mds_failure mds." << who << " : recovery peers are " << recovery_set << dendl;
>
> -  // adjust my recovery lists
> -  wants_resolve.erase(who);   // MDS will ask again
> -  got_resolve.erase(who);     // i'll get another.
> +  resolve_gather.insert(who);
>    discard_delayed_resolve(who);
>
> +  rejoin_gather.insert(who);
>    rejoin_sent.erase(who);        // i need to send another
>    rejoin_ack_gather.erase(who);  // i'll need/get another.
>
> -  dout(10) << " wants_resolve " << wants_resolve << dendl;
> -  dout(10) << " got_resolve " << got_resolve << dendl;
> +  dout(10) << " resolve_gather " << resolve_gather << dendl;
> +  dout(10) << " resolve_ack_gather " << resolve_ack_gather << dendl;
>    dout(10) << " rejoin_sent " << rejoin_sent << dendl;
>    dout(10) << " rejoin_gather " << rejoin_gather << dendl;
>    dout(10) << " rejoin_ack_gather " << rejoin_ack_gather << dendl;
> @@ -2788,7 +2786,7 @@ void MDCache::handle_resolve(MMDSResolve *m)
>      return;
>    }
>
> -  if (!need_resolve_ack.empty() || !need_resolve_rollback.empty()) {
> +  if (!resolve_ack_gather.empty() || !need_resolve_rollback.empty()) {
>      dout(10) << "delay processing subtree resolve" << dendl;
>      discard_delayed_resolve(from);
>      delayed_resolve[from] = m;
> @@ -2875,7 +2873,7 @@ void MDCache::handle_resolve(MMDSResolve *m)
>    }
>
>    // did i get them all?
> -  got_resolve.insert(from);
> +  resolve_gather.erase(from);
>
>    maybe_resolve_finish();
>
> @@ -2901,12 +2899,12 @@ void MDCache::discard_delayed_resolve(int who)
>
>  void MDCache::maybe_resolve_finish()
>  {
> -  assert(need_resolve_ack.empty());
> +  assert(resolve_ack_gather.empty());
>    assert(need_resolve_rollback.empty());
>
> -  if (got_resolve != recovery_set) {
> -    dout(10) << "maybe_resolve_finish still waiting for more resolves, got ("
> -            << got_resolve << "), need (" << recovery_set << ")" << dendl;
> +  if (!resolve_gather.empty()) {
> +    dout(10) << "maybe_resolve_finish still waiting for resolves ("
> +            << resolve_gather << ")" << dendl;
>      return;
>    } else {
>      dout(10) << "maybe_resolve_finish got all resolves+resolve_acks, done." << dendl;
> @@ -2926,7 +2924,7 @@ void MDCache::handle_resolve_ack(MMDSResolveAck *ack)
>    dout(10) << "handle_resolve_ack " << *ack << " from " << ack->get_source() << dendl;
>    int from = ack->get_source().num();
>
> -  if (!need_resolve_ack.count(from)) {
> +  if (!resolve_ack_gather.count(from)) {
>      ack->put();
>      return;
>    }
> @@ -3001,8 +2999,8 @@ void MDCache::handle_resolve_ack(MMDSResolveAck *ack)
>        assert(p->second->slave_to_mds != from);
>    }
>
> -  need_resolve_ack.erase(from);
> -  if (need_resolve_ack.empty() && need_resolve_rollback.empty()) {
> +  resolve_ack_gather.erase(from);
> +  if (resolve_ack_gather.empty() && need_resolve_rollback.empty()) {
>      send_subtree_resolves();
>      process_delayed_resolve();
>    }
> @@ -3069,7 +3067,7 @@ void MDCache::finish_rollback(metareqid_t reqid) {
>    if (mds->is_resolve())
>      finish_uncommitted_slave_update(reqid, need_resolve_rollback[reqid]);
>    need_resolve_rollback.erase(reqid);
> -  if (need_resolve_ack.empty() && need_resolve_rollback.empty()) {
> +  if (resolve_ack_gather.empty() && need_resolve_rollback.empty()) {
>      send_subtree_resolves();
>      process_delayed_resolve();
>    }
> @@ -3417,7 +3415,6 @@ void MDCache::rejoin_send_rejoins()
>      if (*p == mds->get_nodeid())  continue;  // nothing to myself!
>      if (rejoin_sent.count(*p)) continue;     // already sent a rejoin to this node!
>      if (mds->is_rejoin()) {
> -      rejoin_gather.insert(*p);
>        rejoins[*p] = new MMDSCacheRejoin(MMDSCacheRejoin::OP_WEAK);
>        rejoins[*p]->copy_cap_exports(cap_export_bl);
>      } else if (mds->mdsmap->is_rejoin(*p))
> diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h
> index 10e3dd7..278debf 100644
> --- a/src/mds/MDCache.h
> +++ b/src/mds/MDCache.h
> @@ -329,9 +329,8 @@ protected:
>    friend class ECommitted;
>
>    bool resolves_pending;
> -  set<int> wants_resolve;   // nodes i need to send my resolve to
> -  set<int> got_resolve;     // nodes i got resolves from
> -  set<int> need_resolve_ack;   // nodes i need a resolve_ack from
> +  set<int> resolve_gather;     // nodes i need resolves from
> +  set<int> resolve_ack_gather; // nodes i need a resolve_ack from
>    map<metareqid_t, int> need_resolve_rollback;  // rollbacks i'm writing to the journal
>    map<int, MMDSResolve*> delayed_resolve;
>
> --
> 1.7.11.7
>
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux