From: "Yan, Zheng" <zheng.z.yan@xxxxxxxxx> The MDS may crash after journaling the new max size, but before sending the new max size to the client. Later when the MDS recovers, the client re-requests the new max size, but the MDS finds max size unchanged. So the client waits for the new max size forever. This issue can be avoided by checking client cap's last_sent, share inode max size if it is zero. (reconnected cap's last_sent is zero) Signed-off-by: Yan, Zheng <zheng.z.yan@xxxxxxxxx> --- src/mds/Locker.cc | 18 ++++++++++++++---- src/mds/Locker.h | 2 +- src/mds/MDCache.cc | 2 ++ 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 0055a19..4d45f99 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -2089,7 +2089,7 @@ bool Locker::check_inode_max_size(CInode *in, bool force_wrlock, } -void Locker::share_inode_max_size(CInode *in) +void Locker::share_inode_max_size(CInode *in, Capability *only_cap) { /* * only share if currently issued a WR cap. if client doesn't have it, @@ -2097,9 +2097,12 @@ void Locker::share_inode_max_size(CInode *in) * the cap later. */ dout(10) << "share_inode_max_size on " << *in << dendl; - for (map<client_t,Capability*>::iterator it = in->client_caps.begin(); - it != in->client_caps.end(); - ++it) { + map<client_t, Capability*>::iterator it; + if (only_cap) + it = in->client_caps.find(only_cap->get_client()); + else + it = in->client_caps.begin(); + for (; it != in->client_caps.end(); ++it) { const client_t client = it->first; Capability *cap = it->second; if (cap->is_suppress()) @@ -2115,6 +2118,8 @@ void Locker::share_inode_max_size(CInode *in) in->encode_cap_message(m, cap); mds->send_message_client_counted(m, client); } + if (only_cap) + break; } } @@ -2398,6 +2403,11 @@ void Locker::handle_client_caps(MClientCaps *m) bool did_issue = eval(in, CEPH_CAP_LOCKS); if (!did_issue && (cap->wanted() & ~cap->pending())) issue_caps(in, cap); + if (cap->get_last_seq() == 0 && + (cap->pending() & (CEPH_CAP_FILE_WR|CEPH_CAP_FILE_BUFFER))) { + cap->issue_norevoke(cap->issued()); + share_inode_max_size(in, cap); + } } } diff --git a/src/mds/Locker.h b/src/mds/Locker.h index 3f79996..d98104f 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -276,7 +276,7 @@ public: void calc_new_client_ranges(CInode *in, uint64_t size, map<client_t, client_writeable_range_t>& new_ranges); bool check_inode_max_size(CInode *in, bool force_wrlock=false, bool update_size=false, uint64_t newsize=0, utime_t mtime=utime_t()); - void share_inode_max_size(CInode *in); + void share_inode_max_size(CInode *in, Capability *only_cap=0); private: friend class C_MDL_CheckMaxSize; diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 194f983..459b400 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -5073,6 +5073,8 @@ void MDCache::do_cap_import(Session *session, CInode *in, Capability *cap) SnapRealm *realm = in->find_snaprealm(); if (realm->have_past_parents_open()) { dout(10) << "do_cap_import " << session->info.inst.name << " mseq " << cap->get_mseq() << " on " << *in << dendl; + if (cap->get_last_seq() == 0) + cap->issue_norevoke(cap->issued()); // reconnected cap cap->set_last_issue(); MClientCaps *reap = new MClientCaps(CEPH_CAP_OP_IMPORT, in->ino(), -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html