From: Xiubo Li <xiubli@xxxxxxxxxx> commit aaf67de78807c59c35bafb5003d4fb457c764800 upstream. When trimming the caps and just after the 'session->s_cap_lock' is released in ceph_iterate_session_caps() the cap maybe removed by another thread, and when using the stale cap memory in the callbacks it will trigger use-after-free crash. We need to check the existence of the cap just after the 'ci->i_ceph_lock' being acquired. And do nothing if it's already removed. Cc: stable@xxxxxxxxxxxxxxx Link: https://tracker.ceph.com/issues/43272 Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx> Reviewed-by: Luís Henriques <lhenriques@xxxxxxx> Signed-off-by: Ilya Dryomov <idryomov@xxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- fs/ceph/caps.c | 2 - fs/ceph/debugfs.c | 16 +++++++---- fs/ceph/mds_client.c | 72 ++++++++++++++++++++++++++++++++------------------- fs/ceph/mds_client.h | 3 -- fs/ceph/super.h | 2 + 5 files changed, 61 insertions(+), 34 deletions(-) --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -430,7 +430,7 @@ void ceph_reservation_status(struct ceph * * Called with i_ceph_lock held. */ -static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds) +struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds) { struct ceph_cap *cap; struct rb_node *n = ci->i_caps.rb_node; --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -248,14 +248,20 @@ static int metrics_caps_show(struct seq_ return 0; } -static int caps_show_cb(struct inode *inode, struct ceph_cap *cap, void *p) +static int caps_show_cb(struct inode *inode, int mds, void *p) { + struct ceph_inode_info *ci = ceph_inode(inode); struct seq_file *s = p; + struct ceph_cap *cap; - seq_printf(s, "0x%-17llx%-3d%-17s%-17s\n", ceph_ino(inode), - cap->session->s_mds, - ceph_cap_string(cap->issued), - ceph_cap_string(cap->implemented)); + spin_lock(&ci->i_ceph_lock); + cap = __get_cap_for_mds(ci, mds); + if (cap) + seq_printf(s, "0x%-17llx%-3d%-17s%-17s\n", ceph_ino(inode), + cap->session->s_mds, + ceph_cap_string(cap->issued), + ceph_cap_string(cap->implemented)); + spin_unlock(&ci->i_ceph_lock); return 0; } --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1632,8 +1632,8 @@ static void cleanup_session_requests(str * Caller must hold session s_mutex. */ int ceph_iterate_session_caps(struct ceph_mds_session *session, - int (*cb)(struct inode *, struct ceph_cap *, - void *), void *arg) + int (*cb)(struct inode *, int mds, void *), + void *arg) { struct list_head *p; struct ceph_cap *cap; @@ -1645,6 +1645,8 @@ int ceph_iterate_session_caps(struct cep spin_lock(&session->s_cap_lock); p = session->s_caps.next; while (p != &session->s_caps) { + int mds; + cap = list_entry(p, struct ceph_cap, session_caps); inode = igrab(&cap->ci->netfs.inode); if (!inode) { @@ -1652,6 +1654,7 @@ int ceph_iterate_session_caps(struct cep continue; } session->s_cap_iterator = cap; + mds = cap->mds; spin_unlock(&session->s_cap_lock); if (last_inode) { @@ -1663,7 +1666,7 @@ int ceph_iterate_session_caps(struct cep old_cap = NULL; } - ret = cb(inode, cap, arg); + ret = cb(inode, mds, arg); last_inode = inode; spin_lock(&session->s_cap_lock); @@ -1696,20 +1699,25 @@ out: return ret; } -static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, - void *arg) +static int remove_session_caps_cb(struct inode *inode, int mds, void *arg) { struct ceph_inode_info *ci = ceph_inode(inode); bool invalidate = false; - int iputs; + struct ceph_cap *cap; + int iputs = 0; - dout("removing cap %p, ci is %p, inode is %p\n", - cap, ci, &ci->netfs.inode); spin_lock(&ci->i_ceph_lock); - iputs = ceph_purge_inode_cap(inode, cap, &invalidate); + cap = __get_cap_for_mds(ci, mds); + if (cap) { + dout(" removing cap %p, ci is %p, inode is %p\n", + cap, ci, &ci->netfs.inode); + + iputs = ceph_purge_inode_cap(inode, cap, &invalidate); + } spin_unlock(&ci->i_ceph_lock); - wake_up_all(&ci->i_cap_wq); + if (cap) + wake_up_all(&ci->i_cap_wq); if (invalidate) ceph_queue_invalidate(inode); while (iputs--) @@ -1780,8 +1788,7 @@ enum { * * caller must hold s_mutex. */ -static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap, - void *arg) +static int wake_up_session_cb(struct inode *inode, int mds, void *arg) { struct ceph_inode_info *ci = ceph_inode(inode); unsigned long ev = (unsigned long)arg; @@ -1792,12 +1799,14 @@ static int wake_up_session_cb(struct ino ci->i_requested_max_size = 0; spin_unlock(&ci->i_ceph_lock); } else if (ev == RENEWCAPS) { - if (cap->cap_gen < atomic_read(&cap->session->s_cap_gen)) { - /* mds did not re-issue stale cap */ - spin_lock(&ci->i_ceph_lock); + struct ceph_cap *cap; + + spin_lock(&ci->i_ceph_lock); + cap = __get_cap_for_mds(ci, mds); + /* mds did not re-issue stale cap */ + if (cap && cap->cap_gen < atomic_read(&cap->session->s_cap_gen)) cap->issued = cap->implemented = CEPH_CAP_PIN; - spin_unlock(&ci->i_ceph_lock); - } + spin_unlock(&ci->i_ceph_lock); } else if (ev == FORCE_RO) { } wake_up_all(&ci->i_cap_wq); @@ -1959,16 +1968,22 @@ out: * Yes, this is a bit sloppy. Our only real goal here is to respond to * memory pressure from the MDS, though, so it needn't be perfect. */ -static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) +static int trim_caps_cb(struct inode *inode, int mds, void *arg) { int *remaining = arg; struct ceph_inode_info *ci = ceph_inode(inode); int used, wanted, oissued, mine; + struct ceph_cap *cap; if (*remaining <= 0) return -1; spin_lock(&ci->i_ceph_lock); + cap = __get_cap_for_mds(ci, mds); + if (!cap) { + spin_unlock(&ci->i_ceph_lock); + return 0; + } mine = cap->issued | cap->implemented; used = __ceph_caps_used(ci); wanted = __ceph_caps_file_wanted(ci); @@ -3911,26 +3926,22 @@ out_unlock: /* * Encode information about a cap for a reconnect with the MDS. */ -static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, - void *arg) +static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) { union { struct ceph_mds_cap_reconnect v2; struct ceph_mds_cap_reconnect_v1 v1; } rec; - struct ceph_inode_info *ci = cap->ci; + struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_reconnect_state *recon_state = arg; struct ceph_pagelist *pagelist = recon_state->pagelist; struct dentry *dentry; + struct ceph_cap *cap; char *path; - int pathlen = 0, err; + int pathlen = 0, err = 0; u64 pathbase; u64 snap_follows; - dout(" adding %p ino %llx.%llx cap %p %lld %s\n", - inode, ceph_vinop(inode), cap, cap->cap_id, - ceph_cap_string(cap->issued)); - dentry = d_find_primary(inode); if (dentry) { /* set pathbase to parent dir when msg_version >= 2 */ @@ -3947,6 +3958,15 @@ static int reconnect_caps_cb(struct inod } spin_lock(&ci->i_ceph_lock); + cap = __get_cap_for_mds(ci, mds); + if (!cap) { + spin_unlock(&ci->i_ceph_lock); + goto out_err; + } + dout(" adding %p ino %llx.%llx cap %p %lld %s\n", + inode, ceph_vinop(inode), cap, cap->cap_id, + ceph_cap_string(cap->issued)); + cap->seq = 0; /* reset cap seq */ cap->issue_seq = 0; /* and issue_seq */ cap->mseq = 0; /* and migrate_seq */ --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -541,8 +541,7 @@ extern void ceph_flush_cap_releases(stru extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc); extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr); extern int ceph_iterate_session_caps(struct ceph_mds_session *session, - int (*cb)(struct inode *, - struct ceph_cap *, void *), + int (*cb)(struct inode *, int mds, void *), void *arg); extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1192,6 +1192,8 @@ extern void ceph_kick_flushing_caps(stru struct ceph_mds_session *session); void ceph_kick_flushing_inode_caps(struct ceph_mds_session *session, struct ceph_inode_info *ci); +extern struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, + int mds); extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds); extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int caps,