Re: [PATCH 20/39] mds: include replica nonce in MMDSCacheRejoin::inode_strong

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, 20 Mar 2013, Gregory Farnum wrote:
> On Sun, Mar 17, 2013 at 7:51 AM, Yan, Zheng <zheng.z.yan@xxxxxxxxx> wrote:
> > From: "Yan, Zheng" <zheng.z.yan@xxxxxxxxx>
> >
> > So the recovering MDS can properly handle cache expire messages.
> > Also increase the nonce value when sending the cache rejoin acks.
> >
> > Signed-off-by: Yan, Zheng <zheng.z.yan@xxxxxxxxx>
> > ---
> >  src/mds/MDCache.cc             | 35 +++++++++++++++++++++++------------
> >  src/messages/MMDSCacheRejoin.h | 11 +++++++----
> >  2 files changed, 30 insertions(+), 16 deletions(-)
> >
> > diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
> > index 008a8a2..8ba676e 100644
> > --- a/src/mds/MDCache.cc
> > +++ b/src/mds/MDCache.cc
> > @@ -3538,6 +3538,7 @@ void MDCache::rejoin_send_rejoins()
> >        if (p->first == 0 && root) {
> >         p->second->add_weak_inode(root->vino());
> >         p->second->add_strong_inode(root->vino(),
> > +                                   root->get_replica_nonce(),
> >                                     root->get_caps_wanted(),
> >                                     root->filelock.get_state(),
> >                                     root->nestlock.get_state(),
> > @@ -3551,6 +3552,7 @@ void MDCache::rejoin_send_rejoins()
> >        if (CInode *in = get_inode(MDS_INO_MDSDIR(p->first))) {
> >         p->second->add_weak_inode(in->vino());
> >         p->second->add_strong_inode(in->vino(),
> > +                                   in->get_replica_nonce(),
> >                                     in->get_caps_wanted(),
> >                                     in->filelock.get_state(),
> >                                     in->nestlock.get_state(),
> > @@ -3709,6 +3711,7 @@ void MDCache::rejoin_walk(CDir *dir, MMDSCacheRejoin *rejoin)
> >         CInode *in = dnl->get_inode();
> >         dout(15) << " add_strong_inode " << *in << dendl;
> >         rejoin->add_strong_inode(in->vino(),
> > +                                in->get_replica_nonce(),
> >                                  in->get_caps_wanted(),
> >                                  in->filelock.get_state(),
> >                                  in->nestlock.get_state(),
> > @@ -4248,7 +4251,7 @@ void MDCache::handle_cache_rejoin_strong(MMDSCacheRejoin *strong)
> >         dir = rejoin_invent_dirfrag(p->first);
> >      }
> >      if (dir) {
> > -      dir->add_replica(from);
> > +      dir->add_replica(from, p->second.nonce);
> >        dir->dir_rep = p->second.dir_rep;
> >      } else {
> >        dout(10) << " frag " << p->first << " doesn't match dirfragtree " << *diri << dendl;
> > @@ -4263,7 +4266,7 @@ void MDCache::handle_cache_rejoin_strong(MMDSCacheRejoin *strong)
> >           dir = rejoin_invent_dirfrag(p->first);
> >         else
> >           dout(10) << " have(approx) " << *dir << dendl;
> > -       dir->add_replica(from);
> > +       dir->add_replica(from, p->second.nonce);
> >         dir->dir_rep = p->second.dir_rep;
> >        }
> >        refragged = true;
> > @@ -4327,7 +4330,7 @@ void MDCache::handle_cache_rejoin_strong(MMDSCacheRejoin *strong)
> >         mdr->locks.insert(&dn->lock);
> >        }
> >
> > -      dn->add_replica(from);
> > +      dn->add_replica(from, q->second.nonce);
> >        dout(10) << " have " << *dn << dendl;
> >
> >        // inode?
> > @@ -4412,7 +4415,7 @@ void MDCache::handle_cache_rejoin_strong(MMDSCacheRejoin *strong)
> >           dout(10) << " sender has dentry but not inode, adding them as a replica" << dendl;
> >         }
> >
> > -       in->add_replica(from);
> > +       in->add_replica(from, p->second.nonce);
> >         dout(10) << " have " << *in << dendl;
> >        }
> >      }
> > @@ -5176,7 +5179,7 @@ void MDCache::rejoin_send_acks()
> >        for (map<int,int>::iterator r = dir->replicas_begin();
> >            r != dir->replicas_end();
> >            ++r)
> > -       ack[r->first]->add_strong_dirfrag(dir->dirfrag(), r->second, dir->dir_rep);
> > +       ack[r->first]->add_strong_dirfrag(dir->dirfrag(), ++r->second, dir->dir_rep);
> >
> >        for (CDir::map_t::iterator q = dir->items.begin();
> >            q != dir->items.end();
> > @@ -5192,7 +5195,7 @@ void MDCache::rejoin_send_acks()
> >                                            dnl->is_primary() ? dnl->get_inode()->ino():inodeno_t(0),
> >                                            dnl->is_remote() ? dnl->get_remote_ino():inodeno_t(0),
> >                                            dnl->is_remote() ? dnl->get_remote_d_type():0,
> > -                                          r->second,
> > +                                          ++r->second,
> >                                            dn->lock.get_replica_state());
> >
> >         if (!dnl->is_primary())
> > @@ -5205,7 +5208,7 @@ void MDCache::rejoin_send_acks()
> >              r != in->replicas_end();
> >              ++r) {
> >           ack[r->first]->add_inode_base(in);
> > -         ack[r->first]->add_inode_locks(in, r->second);
> > +         ack[r->first]->add_inode_locks(in, ++r->second);
> >         }
> >
> >         // subdirs in this subtree?
> > @@ -5220,14 +5223,14 @@ void MDCache::rejoin_send_acks()
> >          r != root->replicas_end();
> >          ++r) {
> >        ack[r->first]->add_inode_base(root);
> > -      ack[r->first]->add_inode_locks(root, r->second);
> > +      ack[r->first]->add_inode_locks(root, ++r->second);
> >      }
> >    if (myin)
> >      for (map<int,int>::iterator r = myin->replicas_begin();
> >          r != myin->replicas_end();
> >          ++r) {
> >        ack[r->first]->add_inode_base(myin);
> > -      ack[r->first]->add_inode_locks(myin, r->second);
> > +      ack[r->first]->add_inode_locks(myin, ++r->second);
> >      }
> >
> >    // include inode base for any inodes whose scatterlocks may have updated
> > @@ -5728,6 +5731,12 @@ void MDCache::send_expire_messages(map<int, MCacheExpire*>& expiremap)
> >    for (map<int, MCacheExpire*>::iterator it = expiremap.begin();
> >         it != expiremap.end();
> >         ++it) {
> > +    if (mds->mdsmap->get_state(it->first) < MDSMap::STATE_REJOIN ||
> > +       (mds->mdsmap->get_state(it->first) == MDSMap::STATE_REJOIN &&
> > +        rejoin_sent.count(it->first) == 0)) {
> > +      it->second->put();
> > +      continue;
> > +    }
> >      dout(7) << "sending cache_expire to " << it->first << dendl;
> >      mds->send_message_mds(it->second, it->first);
> >    }
> > @@ -9640,9 +9649,11 @@ void MDCache::handle_dentry_link(MDentryLink *m)
> >      CInode *in = add_replica_inode(p, NULL, finished);
> >      assert(in->get_num_ref() == 0);
> >      assert(in->get_parent_dn() == NULL);
> > -    MCacheExpire* expire = new MCacheExpire(mds->get_nodeid());
> > -    expire->add_inode(m->get_subtree(), in->vino(), in->get_replica_nonce());
> > -    mds->send_message_mds(expire, m->get_source().num());
> > +    map<int, MCacheExpire*> expiremap;
> > +    int from = m->get_source().num();
> > +    expiremap[from] = new MCacheExpire(mds->get_nodeid());
> > +    expiremap[from]->add_inode(m->get_subtree(), in->vino(), in->get_replica_nonce());
> > +    send_expire_messages(expiremap);
> >      remove_inode(in);
> >    }
> >
> > diff --git a/src/messages/MMDSCacheRejoin.h b/src/messages/MMDSCacheRejoin.h
> > index 825400d..b88f551 100644
> > --- a/src/messages/MMDSCacheRejoin.h
> > +++ b/src/messages/MMDSCacheRejoin.h
> > @@ -43,19 +43,22 @@ class MMDSCacheRejoin : public Message {
> >
> >    // -- types --
> >    struct inode_strong {
> > +    int32_t nonce;
> >      int32_t caps_wanted;
> >      int32_t filelock, nestlock, dftlock;
> >      inode_strong() {}
> > -    inode_strong(int cw, int dl, int nl, int dftl) :
> > -      caps_wanted(cw),
> > +    inode_strong(int n, int cw, int dl, int nl, int dftl) :
> > +      nonce(n), caps_wanted(cw),
> >        filelock(dl), nestlock(nl), dftlock(dftl) { }
> >      void encode(bufferlist &bl) const {
> > +      ::encode(nonce, bl);
> >        ::encode(caps_wanted, bl);
> >        ::encode(filelock, bl);
> >        ::encode(nestlock, bl);
> >        ::encode(dftlock, bl);
> >      }
> >      void decode(bufferlist::iterator &bl) {
> > +      ::decode(nonce, bl);
> >        ::decode(caps_wanted, bl);
> >        ::decode(filelock, bl);
> >        ::decode(nestlock, bl);
> 
> This is a wire format change without any versioning to cover it ?
> we're going to need to at a minimum add feature bits to cover this. It
> might be more appropriate to introduce proper versioning at the same
> time, though. You should find examples of everything you need in my
> recent encoding changes.

Since we're not too concerned about rolling upgrades for the mds cluster, 
we could just bump the CEPH_MDSC_PROTOCOL instead of spending another 
feature bit (we're halfway through them!).

...but we should also move to the new encoding macros opportunistically :)

s

> 
> The rest looks good.
> -Greg
> 
> > @@ -208,8 +211,8 @@ public:
> >    void add_weak_inode(vinodeno_t i) {
> >      weak_inodes.insert(i);
> >    }
> > -  void add_strong_inode(vinodeno_t i, int cw, int dl, int nl, int dftl) {
> > -    strong_inodes[i] = inode_strong(cw, dl, nl, dftl);
> > +  void add_strong_inode(vinodeno_t i, int n, int cw, int dl, int nl, int dftl) {
> > +    strong_inodes[i] = inode_strong(n, cw, dl, nl, dftl);
> >    }
> >    void add_inode_locks(CInode *in, __u32 nonce) {
> >      ::encode(in->inode.ino, inode_locks);
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux