On Mon, Apr 09, 2018 at 11:57:07AM +0200, Christoph Hellwig wrote: > The filestreams allocator stores an xfs_fstrm_item structure in the MRU to > cache inode number to agno mappings for a particular length of time. Each > xfs_fstrm_item contains the internal MRU structure, an inode pointer and > agno value. > > The inode pointer stored in the xfs_fstrm_item is not referenced, however, > which means the inode itself can be removed and reclaimed before the MRU > item is freed. If this occurs, xfs_fstrm_free_func() can access freed or > unrelated memory through xfs_fstrm_item->ip and crash. > > The obvious solution is to grab an inode reference for xfs_fstrm_item. > The filestream mechanism only actually uses the inode pointer as a means > to access the xfs_mount, however. Rather than add unnecessary > complexity, simplify the implementation to store an xfs_mount pointer in > struct xfs_mru_cache, and pass it to the free callback. This also > requires updates to the tracepoint class to provide the associated data > via parameters rather than the inode and a minor hack to peek at the MRU > key to establish the inode number at free time. > > Based on debugging work and an earlier patch from Brian Foster, who > also wrote most of this changelog. > > Reported-by: Brian Foster <bfoster@xxxxxxxxxx> > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > Reviewed-by: Brian Foster <bfoster@xxxxxxxxxx> Looks ok, doesn't seem to cause any testing regressions, Reviewed-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> --D > --- > fs/xfs/xfs_filestream.c | 19 +++++++++---------- > fs/xfs/xfs_mru_cache.c | 8 +++++--- > fs/xfs/xfs_mru_cache.h | 8 ++++---- > fs/xfs/xfs_trace.h | 14 +++++++------- > 4 files changed, 25 insertions(+), 24 deletions(-) > > diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c > index 043ca3808ea2..5131a6e25fc9 100644 > --- a/fs/xfs/xfs_filestream.c > +++ b/fs/xfs/xfs_filestream.c > @@ -34,7 +34,6 @@ > > struct xfs_fstrm_item { > struct xfs_mru_cache_elem mru; > - struct xfs_inode *ip; > xfs_agnumber_t ag; /* AG in use for this directory */ > }; > > @@ -122,14 +121,15 @@ xfs_filestream_put_ag( > > static void > xfs_fstrm_free_func( > + void *data, > struct xfs_mru_cache_elem *mru) > { > + struct xfs_mount *mp = data; > struct xfs_fstrm_item *item = > container_of(mru, struct xfs_fstrm_item, mru); > > - xfs_filestream_put_ag(item->ip->i_mount, item->ag); > - > - trace_xfs_filestream_free(item->ip, item->ag); > + xfs_filestream_put_ag(mp, item->ag); > + trace_xfs_filestream_free(mp, mru->key, item->ag); > > kmem_free(item); > } > @@ -165,7 +165,7 @@ xfs_filestream_pick_ag( > trylock = XFS_ALLOC_FLAG_TRYLOCK; > > for (nscan = 0; 1; nscan++) { > - trace_xfs_filestream_scan(ip, ag); > + trace_xfs_filestream_scan(mp, ip->i_ino, ag); > > pag = xfs_perag_get(mp, ag); > > @@ -265,7 +265,6 @@ xfs_filestream_pick_ag( > goto out_put_ag; > > item->ag = *agp; > - item->ip = ip; > > err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru); > if (err) { > @@ -333,7 +332,7 @@ xfs_filestream_lookup_ag( > ag = container_of(mru, struct xfs_fstrm_item, mru)->ag; > xfs_mru_cache_done(mp->m_filestream); > > - trace_xfs_filestream_lookup(ip, ag); > + trace_xfs_filestream_lookup(mp, ip->i_ino, ag); > goto out; > } > > @@ -399,7 +398,7 @@ xfs_filestream_new_ag( > * Only free the item here so we skip over the old AG earlier. > */ > if (mru) > - xfs_fstrm_free_func(mru); > + xfs_fstrm_free_func(mp, mru); > > IRELE(pip); > exit: > @@ -426,8 +425,8 @@ xfs_filestream_mount( > * timer tunable to within about 10 percent. This requires at least 10 > * groups. > */ > - return xfs_mru_cache_create(&mp->m_filestream, xfs_fstrm_centisecs * 10, > - 10, xfs_fstrm_free_func); > + return xfs_mru_cache_create(&mp->m_filestream, mp, > + xfs_fstrm_centisecs * 10, 10, xfs_fstrm_free_func); > } > > void > diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c > index f8a674d7f092..70eea7ae2876 100644 > --- a/fs/xfs/xfs_mru_cache.c > +++ b/fs/xfs/xfs_mru_cache.c > @@ -112,6 +112,7 @@ struct xfs_mru_cache { > xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */ > struct delayed_work work; /* Workqueue data for reaping. */ > unsigned int queued; /* work has been queued */ > + void *data; > }; > > static struct workqueue_struct *xfs_mru_reap_wq; > @@ -259,7 +260,7 @@ _xfs_mru_cache_clear_reap_list( > > list_for_each_entry_safe(elem, next, &tmp, list_node) { > list_del_init(&elem->list_node); > - mru->free_func(elem); > + mru->free_func(mru->data, elem); > } > > spin_lock(&mru->lock); > @@ -326,6 +327,7 @@ xfs_mru_cache_uninit(void) > int > xfs_mru_cache_create( > struct xfs_mru_cache **mrup, > + void *data, > unsigned int lifetime_ms, > unsigned int grp_count, > xfs_mru_cache_free_func_t free_func) > @@ -369,7 +371,7 @@ xfs_mru_cache_create( > > mru->grp_time = grp_time; > mru->free_func = free_func; > - > + mru->data = data; > *mrup = mru; > > exit: > @@ -492,7 +494,7 @@ xfs_mru_cache_delete( > > elem = xfs_mru_cache_remove(mru, key); > if (elem) > - mru->free_func(elem); > + mru->free_func(mru->data, elem); > } > > /* > diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h > index fb5245ba5ff7..b3f3fbdfcc47 100644 > --- a/fs/xfs/xfs_mru_cache.h > +++ b/fs/xfs/xfs_mru_cache.h > @@ -26,13 +26,13 @@ struct xfs_mru_cache_elem { > }; > > /* Function pointer type for callback to free a client's data pointer. */ > -typedef void (*xfs_mru_cache_free_func_t)(struct xfs_mru_cache_elem *elem); > +typedef void (*xfs_mru_cache_free_func_t)(void *, struct xfs_mru_cache_elem *); > > int xfs_mru_cache_init(void); > void xfs_mru_cache_uninit(void); > -int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms, > - unsigned int grp_count, > - xfs_mru_cache_free_func_t free_func); > +int xfs_mru_cache_create(struct xfs_mru_cache **mrup, void *data, > + unsigned int lifetime_ms, unsigned int grp_count, > + xfs_mru_cache_free_func_t free_func); > void xfs_mru_cache_destroy(struct xfs_mru_cache *mru); > int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, > struct xfs_mru_cache_elem *elem); > diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h > index a982c0b623d0..8955254b900e 100644 > --- a/fs/xfs/xfs_trace.h > +++ b/fs/xfs/xfs_trace.h > @@ -506,8 +506,8 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); > DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); > > DECLARE_EVENT_CLASS(xfs_filestream_class, > - TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno), > - TP_ARGS(ip, agno), > + TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, xfs_agnumber_t agno), > + TP_ARGS(mp, ino, agno), > TP_STRUCT__entry( > __field(dev_t, dev) > __field(xfs_ino_t, ino) > @@ -515,10 +515,10 @@ DECLARE_EVENT_CLASS(xfs_filestream_class, > __field(int, streams) > ), > TP_fast_assign( > - __entry->dev = VFS_I(ip)->i_sb->s_dev; > - __entry->ino = ip->i_ino; > + __entry->dev = mp->m_super->s_dev; > + __entry->ino = ino; > __entry->agno = agno; > - __entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno); > + __entry->streams = xfs_filestream_peek_ag(mp, agno); > ), > TP_printk("dev %d:%d ino 0x%llx agno %u streams %d", > MAJOR(__entry->dev), MINOR(__entry->dev), > @@ -528,8 +528,8 @@ DECLARE_EVENT_CLASS(xfs_filestream_class, > ) > #define DEFINE_FILESTREAM_EVENT(name) \ > DEFINE_EVENT(xfs_filestream_class, name, \ > - TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno), \ > - TP_ARGS(ip, agno)) > + TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, xfs_agnumber_t agno), \ > + TP_ARGS(mp, ino, agno)) > DEFINE_FILESTREAM_EVENT(xfs_filestream_free); > DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup); > DEFINE_FILESTREAM_EVENT(xfs_filestream_scan); > -- > 2.16.3 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html