Re: [PATCH 27/39] mds: send lock action message when auth MDS is in proper state.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 03/21/2013 11:12 AM, Gregory Farnum wrote:
> On Sun, Mar 17, 2013 at 7:51 AM, Yan, Zheng <zheng.z.yan@xxxxxxxxx> wrote:
>> From: "Yan, Zheng" <zheng.z.yan@xxxxxxxxx>
>>
>> For rejoining object, don't send lock ACK message because lock states
>> are still uncertain. The lock ACK may confuse object's auth MDS and
>> trigger assertion.
>>
>> If object's auth MDS is not active, just skip sending NUDGE, REQRDLOCK
>> and REQSCATTER messages. MDCache::handle_mds_recovery() will take care
>> of them.
>>
>> Also defer caps release message until clientreplay or active
>>
>> Signed-off-by: Yan, Zheng <zheng.z.yan@xxxxxxxxx>
>> ---
>>  src/mds/Locker.cc  | 46 ++++++++++++++++++++++++++++++----------------
>>  src/mds/MDCache.cc | 13 +++++++++++--
>>  2 files changed, 41 insertions(+), 18 deletions(-)
>>
>> diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc
>> index 28920d4..ece39e3 100644
>> --- a/src/mds/Locker.cc
>> +++ b/src/mds/Locker.cc
>> @@ -658,6 +658,13 @@ void Locker::eval_gather(SimpleLock *lock, bool first, bool *pneed_issue, list<C
>>        // replica: tell auth
>>        int auth = lock->get_parent()->authority().first;
>>
>> +      if (lock->get_parent()->is_rejoining() &&
>> +         mds->mdsmap->get_state(auth) == MDSMap::STATE_REJOIN) {
>> +       dout(7) << "eval_gather finished gather, but still rejoining "
>> +               << *lock->get_parent() << dendl;
>> +       return;
>> +      }
>> +
>>        if (mds->mdsmap->get_state(auth) >= MDSMap::STATE_REJOIN) {
>>         switch (lock->get_state()) {
>>         case LOCK_SYNC_LOCK:
>> @@ -1050,9 +1057,11 @@ bool Locker::_rdlock_kick(SimpleLock *lock, bool as_anon)
>>      } else {
>>        // request rdlock state change from auth
>>        int auth = lock->get_parent()->authority().first;
>> -      dout(10) << "requesting rdlock from auth on "
>> -              << *lock << " on " << *lock->get_parent() << dendl;
>> -      mds->send_message_mds(new MLock(lock, LOCK_AC_REQRDLOCK, mds->get_nodeid()), auth);
>> +      if (mds->mdsmap->is_clientreplay_or_active_or_stopping(auth)) {
>> +       dout(10) << "requesting rdlock from auth on "
>> +                << *lock << " on " << *lock->get_parent() << dendl;
>> +       mds->send_message_mds(new MLock(lock, LOCK_AC_REQRDLOCK, mds->get_nodeid()), auth);
>> +      }
>>        return false;
>>      }
>>    }
>> @@ -1272,9 +1281,11 @@ bool Locker::wrlock_start(SimpleLock *lock, MDRequest *mut, bool nowait)
>>        // replica.
>>        // auth should be auth_pinned (see acquire_locks wrlock weird mustpin case).
>>        int auth = lock->get_parent()->authority().first;
>> -      dout(10) << "requesting scatter from auth on "
>> -              << *lock << " on " << *lock->get_parent() << dendl;
>> -      mds->send_message_mds(new MLock(lock, LOCK_AC_REQSCATTER, mds->get_nodeid()), auth);
>> +      if (mds->mdsmap->is_clientreplay_or_active_or_stopping(auth)) {
>> +       dout(10) << "requesting scatter from auth on "
>> +                << *lock << " on " << *lock->get_parent() << dendl;
>> +       mds->send_message_mds(new MLock(lock, LOCK_AC_REQSCATTER, mds->get_nodeid()), auth);
>> +      }
>>        break;
>>      }
>>    }
>> @@ -1899,13 +1910,19 @@ void Locker::request_inode_file_caps(CInode *in)
>>      }
>>
>>      int auth = in->authority().first;
>> +    if (in->is_rejoining() &&
>> +       mds->mdsmap->get_state(auth) == MDSMap::STATE_REJOIN) {
>> +      mds->wait_for_active_peer(auth, new C_MDL_RequestInodeFileCaps(this, in));
>> +      return;
>> +    }
>> +
>>      dout(7) << "request_inode_file_caps " << ccap_string(wanted)
>>              << " was " << ccap_string(in->replica_caps_wanted)
>>              << " on " << *in << " to mds." << auth << dendl;
>>
>>      in->replica_caps_wanted = wanted;
>>
>> -    if (mds->mdsmap->get_state(auth) >= MDSMap::STATE_REJOIN)
>> +    if (mds->mdsmap->is_clientreplay_or_active_or_stopping(auth))
>>        mds->send_message_mds(new MInodeFileCaps(in->ino(), in->replica_caps_wanted),
>>                             auth);
>>    }
>> @@ -1924,14 +1941,6 @@ void Locker::handle_inode_file_caps(MInodeFileCaps *m)
>>    assert(in);
>>    assert(in->is_auth());
>>
>> -  if (mds->is_rejoin() &&
>> -      in->is_rejoining()) {
>> -    dout(7) << "handle_inode_file_caps still rejoining " << *in << ", dropping " << *m << dendl;
>> -    m->put();
>> -    return;
>> -  }
> 
> This is okay since we catch it in the follow-on functions (I assume
> that's why you removed it, to avoid checks at more levels than
> necessary), but if you could note that's why in the commit message
> it'll prevent anyone else from needing to go check like I did. :)
> 

if an inode is auth, it can not be rejoining. that's why I removed it.

Thanks
Yan, Zheng


> The code looks good.
> Reviewed-by: Greg Farnum <greg@xxxxxxxxxxx>
> 
>> -
>> -
>>    dout(7) << "handle_inode_file_caps replica mds." << from << " wants caps " << ccap_string(m->get_caps()) << " on " << *in << dendl;
>>
>>    if (m->get_caps())
>> @@ -2850,6 +2859,11 @@ void Locker::handle_client_cap_release(MClientCapRelease *m)
>>    client_t client = m->get_source().num();
>>    dout(10) << "handle_client_cap_release " << *m << dendl;
>>
>> +  if (!mds->is_clientreplay() && !mds->is_active() && !mds->is_stopping()) {
>> +    mds->wait_for_replay(new C_MDS_RetryMessage(mds, m));
>> +    return;
>> +  }
>> +
>>    for (vector<ceph_mds_cap_item>::iterator p = m->caps.begin(); p != m->caps.end(); ++p) {
>>      inodeno_t ino((uint64_t)p->ino);
>>      CInode *in = mdcache->get_inode(ino);
>> @@ -3859,7 +3873,7 @@ void Locker::scatter_nudge(ScatterLock *lock, Context *c, bool forcelockchange)
>>              << *lock << " on " << *p << dendl;
>>      // request unscatter?
>>      int auth = lock->get_parent()->authority().first;
>> -    if (mds->mdsmap->get_state(auth) >= MDSMap::STATE_ACTIVE)
>> +    if (mds->mdsmap->is_clientreplay_or_active_or_stopping(auth))
>>        mds->send_message_mds(new MLock(lock, LOCK_AC_NUDGE, mds->get_nodeid()), auth);
>>
>>      // wait...
>> diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
>> index 459b400..973a4d0 100644
>> --- a/src/mds/MDCache.cc
>> +++ b/src/mds/MDCache.cc
>> @@ -3321,8 +3321,10 @@ void MDCache::recalc_auth_bits()
>>
>>    if (root) {
>>      root->inode_auth.first = mds->mdsmap->get_root();
>> -    if (mds->whoami != root->inode_auth.first)
>> +    if (mds->whoami != root->inode_auth.first) {
>>        root->state_clear(CInode::STATE_AUTH);
>> +      root->state_set(CInode::STATE_REJOINING);
>> +    }
>>    }
>>
>>    set<CInode*> subtree_inodes;
>> @@ -3336,8 +3338,10 @@ void MDCache::recalc_auth_bits()
>>         ++p) {
>>
>>      CInode *inode = p->first->get_inode();
>> -    if (inode->is_mdsdir() && inode->ino() != MDS_INO_MDSDIR(mds->get_nodeid()))
>> +    if (inode->is_mdsdir() && inode->ino() != MDS_INO_MDSDIR(mds->get_nodeid())) {
>>        inode->state_clear(CInode::STATE_AUTH);
>> +      inode->state_set(CInode::STATE_REJOINING);
>> +    }
>>
>>      list<CDir*> dfq;  // dirfrag queue
>>      dfq.push_back(p->first);
>> @@ -3542,6 +3546,7 @@ void MDCache::rejoin_send_rejoins()
>>                                     root->filelock.get_state(),
>>                                     root->nestlock.get_state(),
>>                                     root->dirfragtreelock.get_state());
>> +       root->state_set(CInode::STATE_REJOINING);
>>         if (root->is_dirty_scattered()) {
>>           dout(10) << " sending scatterlock state on root " << *root << dendl;
>>           p->second->add_scatterlock_state(root);
>> @@ -3555,6 +3560,7 @@ void MDCache::rejoin_send_rejoins()
>>                                     in->filelock.get_state(),
>>                                     in->nestlock.get_state(),
>>                                     in->dirfragtreelock.get_state());
>> +       in->state_set(CInode::STATE_REJOINING);
>>        }
>>      }
>>    }
>> @@ -3694,6 +3700,7 @@ void MDCache::rejoin_walk(CDir *dir, MMDSCacheRejoin *rejoin)
>>      // STRONG
>>      dout(15) << " add_strong_dirfrag " << *dir << dendl;
>>      rejoin->add_strong_dirfrag(dir->dirfrag(), dir->get_replica_nonce(), dir->get_dir_rep());
>> +    dir->state_set(CDir::STATE_REJOINING);
>>
>>      for (CDir::map_t::iterator p = dir->items.begin();
>>          p != dir->items.end();
>> @@ -3707,6 +3714,7 @@ void MDCache::rejoin_walk(CDir *dir, MMDSCacheRejoin *rejoin)
>>                                 dnl->is_remote() ? dnl->get_remote_d_type():0,
>>                                 dn->get_replica_nonce(),
>>                                 dn->lock.get_state());
>> +      dn->state_set(CDentry::STATE_REJOINING);
>>        if (dnl->is_primary()) {
>>         CInode *in = dnl->get_inode();
>>         dout(15) << " add_strong_inode " << *in << dendl;
>> @@ -3716,6 +3724,7 @@ void MDCache::rejoin_walk(CDir *dir, MMDSCacheRejoin *rejoin)
>>                                  in->filelock.get_state(),
>>                                  in->nestlock.get_state(),
>>                                  in->dirfragtreelock.get_state());
>> +       in->state_set(CInode::STATE_REJOINING);
>>         in->get_nested_dirfrags(nested);
>>         if (in->is_dirty_scattered()) {
>>           dout(10) << " sending scatterlock state on " << *in << dendl;
>> --
>> 1.7.11.7
>>

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux