On Tue, May 28, 2019 at 9:53 PM Jeff Layton <jlayton@xxxxxxxxxx> wrote: > > On Thu, 2019-05-23 at 16:13 +0800, Yan, Zheng wrote: > > d_name_cmp() and update_dentry_lease() lock and unlock dentry->d_lock > > respectively. Dentry may get renamed between them. The fix is moving > > the dentry name compare into update_dentry_lease(). > > > > This patch introduce two version of update_dentry_lease(). One version > > is for the case that parent inode is locked. It does not need to check > > parent/target inode and dentry name. Another version is for the case > > that parent inode is not locked. It checks arent/target inode and dentry > > name after locking dentry->d_lock. > > > > Signed-off-by: "Yan, Zheng" <zyan@xxxxxxxxxx> > > --- > > fs/ceph/inode.c | 164 ++++++++++++++++++++++++++---------------------- > > 1 file changed, 88 insertions(+), 76 deletions(-) > > > > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c > > index 8cfece240ffe..e47a25495be5 100644 > > --- a/fs/ceph/inode.c > > +++ b/fs/ceph/inode.c > > @@ -1031,59 +1031,38 @@ static int fill_inode(struct inode *inode, struct page *locked_page, > > } > > > > /* > > - * caller should hold session s_mutex. > > + * caller should hold session s_mutex and dentry->d_lock. > > */ > > -static void update_dentry_lease(struct dentry *dentry, > > - struct ceph_mds_reply_lease *lease, > > - struct ceph_mds_session *session, > > - unsigned long from_time, > > - struct ceph_vino *tgt_vino, > > - struct ceph_vino *dir_vino) > > +static void __update_dentry_lease(struct inode *dir, struct dentry *dentry, > > + struct ceph_mds_reply_lease *lease, > > + struct ceph_mds_session *session, > > + unsigned long from_time, > > + struct ceph_mds_session **old_lease_session) > > { > > struct ceph_dentry_info *di = ceph_dentry(dentry); > > long unsigned duration = le32_to_cpu(lease->duration_ms); > > long unsigned ttl = from_time + (duration * HZ) / 1000; > > long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000; > > - struct inode *dir; > > - struct ceph_mds_session *old_lease_session = NULL; > > - > > - /* > > - * Make sure dentry's inode matches tgt_vino. NULL tgt_vino means that > > - * we expect a negative dentry. > > - */ > > - if (!tgt_vino && d_really_is_positive(dentry)) > > - return; > > - > > - if (tgt_vino && (d_really_is_negative(dentry) || > > - !ceph_ino_compare(d_inode(dentry), tgt_vino))) > > - return; > > > > - spin_lock(&dentry->d_lock); > > dout("update_dentry_lease %p duration %lu ms ttl %lu\n", > > dentry, duration, ttl); > > > > - dir = d_inode(dentry->d_parent); > > - > > - /* make sure parent matches dir_vino */ > > - if (!ceph_ino_compare(dir, dir_vino)) > > - goto out_unlock; > > - > > /* only track leases on regular dentries */ > > if (ceph_snap(dir) != CEPH_NOSNAP) > > - goto out_unlock; > > + return; > > > > di->lease_shared_gen = atomic_read(&ceph_inode(dir)->i_shared_gen); > > if (duration == 0) { > > __ceph_dentry_dir_lease_touch(di); > > - goto out_unlock; > > + return; > > } > > > > if (di->lease_gen == session->s_cap_gen && > > time_before(ttl, di->time)) > > - goto out_unlock; /* we already have a newer lease. */ > > + return; /* we already have a newer lease. */ > > > > if (di->lease_session && di->lease_session != session) { > > - old_lease_session = di->lease_session; > > + *old_lease_session = di->lease_session; > > di->lease_session = NULL; > > } > > > > @@ -1096,6 +1075,62 @@ static void update_dentry_lease(struct dentry *dentry, > > di->time = ttl; > > > > __ceph_dentry_lease_touch(di); > > +} > > + > > +static inline void update_dentry_lease(struct inode *dir, struct dentry *dentry, > > + struct ceph_mds_reply_lease *lease, > > + struct ceph_mds_session *session, > > + unsigned long from_time) > > +{ > > + struct ceph_mds_session *old_lease_session = NULL; > > + spin_lock(&dentry->d_lock); > > + __update_dentry_lease(dir, dentry, lease, session, from_time, > > + &old_lease_session); > > + spin_unlock(&dentry->d_lock); > > + if (old_lease_session) > > + ceph_put_mds_session(old_lease_session); > > +} > > + > > +/* > > + * update dentry lease without having parent inode locked > > + */ > > +static void update_dentry_lease_careful(struct dentry *dentry, > > + struct ceph_mds_reply_lease *lease, > > + struct ceph_mds_session *session, > > + unsigned long from_time, > > + char *dname, u32 dname_len, > > + struct ceph_vino *pdvino, > > + struct ceph_vino *ptvino) > > + > > This argument list is huge. I wonder if we'd be better off passing in a > pointer to "req" instead and getting some of the fields from that. For > instance, session, from_time, etc... > If we pass 'req' to update_dentry_lease_careful(), we need to re-define some local variables already exist in ceph_fill_trace(). It requires more code. Regards Yan, Zheng > > +{ > > + struct inode *dir; > > + struct ceph_mds_session *old_lease_session = NULL; > > + > > + spin_lock(&dentry->d_lock); > > + /* make sure dentry's name matches target */ > > + if (dentry->d_name.len != dname_len || > > + memcmp(dentry->d_name.name, dname, dname_len)) > > + goto out_unlock; > > + > > + dir = d_inode(dentry->d_parent); > > + /* make sure parent matches dvino */ > > + if (!ceph_ino_compare(dir, pdvino)) > > + goto out_unlock; > > + > > + /* make sure dentry's inode matches target. NULL ptvino means that > > + * we expect a negative dentry */ > > + if (ptvino) { > > + if (d_really_is_negative(dentry)) > > + goto out_unlock; > > + if (!ceph_ino_compare(d_inode(dentry), ptvino)) > > + goto out_unlock; > > + } else { > > + if (d_really_is_positive(dentry)) > > + goto out_unlock; > > + } > > + > > + __update_dentry_lease(dir, dentry, lease, session, > > + from_time, &old_lease_session); > > out_unlock: > > spin_unlock(&dentry->d_lock); > > if (old_lease_session) > > @@ -1160,19 +1195,6 @@ static int splice_dentry(struct dentry **pdn, struct inode *in) > > return 0; > > } > > > > -static int d_name_cmp(struct dentry *dentry, const char *name, size_t len) > > -{ > > - int ret; > > - > > - /* take d_lock to ensure dentry->d_name stability */ > > - spin_lock(&dentry->d_lock); > > - ret = dentry->d_name.len - len; > > - if (!ret) > > - ret = memcmp(dentry->d_name.name, name, len); > > - spin_unlock(&dentry->d_lock); > > - return ret; > > -} > > - > > /* > > * Incorporate results into the local cache. This is either just > > * one inode, or a directory, dentry, and possibly linked-to inode (e.g., > > @@ -1375,10 +1397,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > > } else if (have_lease) { > > if (d_unhashed(dn)) > > d_add(dn, NULL); > > - update_dentry_lease(dn, rinfo->dlease, > > - session, > > - req->r_request_started, > > - NULL, &dvino); > > + update_dentry_lease(dir, dn, > > + rinfo->dlease, session, > > + req->r_request_started); > > } > > goto done; > > } > > @@ -1400,11 +1421,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > > } > > > > if (have_lease) { > > - tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); > > - tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); > > - update_dentry_lease(dn, rinfo->dlease, session, > > - req->r_request_started, > > - &tvino, &dvino); > > + update_dentry_lease(dir, dn, > > + rinfo->dlease, session, > > + req->r_request_started); > > } > > dout(" final dn %p\n", dn); > > } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP || > > @@ -1422,27 +1441,20 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > > err = splice_dentry(&req->r_dentry, in); > > if (err < 0) > > goto done; > > - } else if (rinfo->head->is_dentry && > > - !d_name_cmp(req->r_dentry, rinfo->dname, rinfo->dname_len)) { > > + } else if (rinfo->head->is_dentry && req->r_dentry) { > > + /* parent inode is not locked, be carefull */ > > struct ceph_vino *ptvino = NULL; > > - > > - if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) || > > - le32_to_cpu(rinfo->dlease->duration_ms)) { > > - dvino.ino = le64_to_cpu(rinfo->diri.in->ino); > > - dvino.snap = le64_to_cpu(rinfo->diri.in->snapid); > > - > > - if (rinfo->head->is_target) { > > - tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); > > - tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); > > - ptvino = &tvino; > > - } > > - > > - update_dentry_lease(req->r_dentry, rinfo->dlease, > > - session, req->r_request_started, ptvino, > > - &dvino); > > - } else { > > - dout("%s: no dentry lease or dir cap\n", __func__); > > + dvino.ino = le64_to_cpu(rinfo->diri.in->ino); > > + dvino.snap = le64_to_cpu(rinfo->diri.in->snapid); > > + if (rinfo->head->is_target) { > > + tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); > > + tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); > > + ptvino = &tvino; > > } > > + update_dentry_lease_careful(req->r_dentry, rinfo->dlease, > > + session, req->r_request_started, > > + rinfo->dname, rinfo->dname_len, > > + &dvino, ptvino); > > } > > done: > > dout("fill_trace done err=%d\n", err); > > @@ -1604,7 +1616,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, > > /* FIXME: release caps/leases if error occurs */ > > for (i = 0; i < rinfo->dir_nr; i++) { > > struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; > > - struct ceph_vino tvino, dvino; > > + struct ceph_vino tvino; > > > > dname.name = rde->name; > > dname.len = rde->name_len; > > @@ -1705,9 +1717,9 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, > > > > ceph_dentry(dn)->offset = rde->offset; > > > > - dvino = ceph_vino(d_inode(parent)); > > - update_dentry_lease(dn, rde->lease, req->r_session, > > - req->r_request_started, &tvino, &dvino); > > + update_dentry_lease(d_inode(parent), dn, > > + rde->lease, req->r_session, > > + req->r_request_started); > > > > if (err == 0 && skipped == 0 && cache_ctl.index >= 0) { > > ret = fill_readdir_cache(d_inode(parent), dn, > > That said... > > Reviewed-by: Jeff Layton <jlayton@xxxxxxxxxx> >