On Sat, 2021-07-17 at 18:06 +0800, Xiyu Yang wrote: > refcount_t type and corresponding API can protect refcounters from > accidental underflow and overflow and further use-after-free situations. > > Signed-off-by: Xiyu Yang <xiyuyang19@xxxxxxxxxxxx> > Signed-off-by: Xin Tan <tanxin.ctf@xxxxxxxxx> > --- > fs/ceph/snap.c | 15 ++++++++------- > fs/ceph/super.h | 3 ++- > 2 files changed, 10 insertions(+), 8 deletions(-) > > diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c > index 4ac0606dcbd4..d4ec9c5118bd 100644 > --- a/fs/ceph/snap.c > +++ b/fs/ceph/snap.c > @@ -68,14 +68,15 @@ void ceph_get_snap_realm(struct ceph_mds_client *mdsc, > lockdep_assert_held(&mdsc->snap_rwsem); > > dout("get_realm %p %d -> %d\n", realm, > - atomic_read(&realm->nref), atomic_read(&realm->nref)+1); > + refcount_read(&realm->nref), refcount_read(&realm->nref)+1); > /* > * since we _only_ increment realm refs or empty the empty > * list with snap_rwsem held, adjusting the empty list here is > * safe. we do need to protect against concurrent empty list > * additions, however. > */ > - if (atomic_inc_return(&realm->nref) == 1) { > + refcount_inc(&realm->nref); > + if (refcount_read(&realm->nref) == 1) { The above is potentially racy as you've turned a single atomic operation into two. Another task could come in and increment or decrement realm->nref just after your recount_inc but before the refcount_read, and then the read would show the wrong result. FWIW, Yejune Deng (cc'ed) proposed a very similar patch a few months ago that caused this regression: https://tracker.ceph.com/issues/50281 > spin_lock(&mdsc->snap_empty_lock); > list_del_init(&realm->empty_item); > spin_unlock(&mdsc->snap_empty_lock); > @@ -121,7 +122,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm( > if (!realm) > return ERR_PTR(-ENOMEM); > > - atomic_set(&realm->nref, 1); /* for caller */ > + refcount_set(&realm->nref, 1); /* for caller */ > realm->ino = ino; > INIT_LIST_HEAD(&realm->children); > INIT_LIST_HEAD(&realm->child_item); > @@ -209,8 +210,8 @@ static void __put_snap_realm(struct ceph_mds_client *mdsc, > lockdep_assert_held_write(&mdsc->snap_rwsem); > > dout("__put_snap_realm %llx %p %d -> %d\n", realm->ino, realm, > - atomic_read(&realm->nref), atomic_read(&realm->nref)-1); > - if (atomic_dec_and_test(&realm->nref)) > + refcount_read(&realm->nref), refcount_read(&realm->nref)-1); > + if (refcount_dec_and_test(&realm->nref)) > __destroy_snap_realm(mdsc, realm); > } > > @@ -221,8 +222,8 @@ void ceph_put_snap_realm(struct ceph_mds_client *mdsc, > struct ceph_snap_realm *realm) > { > dout("put_snap_realm %llx %p %d -> %d\n", realm->ino, realm, > - atomic_read(&realm->nref), atomic_read(&realm->nref)-1); > - if (!atomic_dec_and_test(&realm->nref)) > + refcount_read(&realm->nref), refcount_read(&realm->nref)-1); > + if (!refcount_dec_and_test(&realm->nref)) > return; > > if (down_write_trylock(&mdsc->snap_rwsem)) { > diff --git a/fs/ceph/super.h b/fs/ceph/super.h > index 6b6332a5c113..3abb00d7a0eb 100644 > --- a/fs/ceph/super.h > +++ b/fs/ceph/super.h > @@ -2,6 +2,7 @@ > #ifndef _FS_CEPH_SUPER_H > #define _FS_CEPH_SUPER_H > > +#include <linux/refcount.h> > #include <linux/ceph/ceph_debug.h> > > #include <asm/unaligned.h> > @@ -859,7 +860,7 @@ struct ceph_readdir_cache_control { > struct ceph_snap_realm { > u64 ino; > struct inode *inode; > - atomic_t nref; > + refcount_t nref; > struct rb_node node; > > u64 created, seq; -- Jeff Layton <jlayton@xxxxxxxxxx>