For reference, here's the debugging code I'm using. Enable the followng: CONFIG_DEBUG_PAGE_REF CONFIG_DEBUG_PAGE_MARK CONFIG_DEBUG_PAGE_REF_ONLY_MARKED and then enable the page_ref tracepoints: echo 1 >/sys/kernel/debug/tracing/events/page_ref/enable echo 1 >/sys/kernel/debug/tracing/events/block/bio/enable echo 1 >/sys/kernel/debug/tracing/events/block/bio_endio/enable David --- block/bio.c | 50 ++++++++++++++++++++- fs/iomap/buffered-io.c | 10 ++++ fs/pipe.c | 1 fs/splice.c | 23 +++++++++ include/linux/bio.h | 4 - include/linux/blk_types.h | 2 include/linux/page-flags.h | 12 +++++ include/linux/page_ref.h | 34 ++++++++------ include/linux/uio.h | 1 include/trace/events/block.h | 95 +++++++++++++++++++++++++++++++++++++++++ include/trace/events/mmflags.h | 9 +++ lib/iov_iter.c | 28 ++++++++++++ mm/Kconfig.debug | 17 +++++++ mm/page_alloc.c | 3 + mm/readahead.c | 8 +++ 15 files changed, 276 insertions(+), 21 deletions(-) diff --git a/block/bio.c b/block/bio.c index fc57f0aa098e..ae0997688e08 100644 --- a/block/bio.c +++ b/block/bio.c @@ -20,6 +20,7 @@ #include <linux/blk-crypto.h> #include <linux/xarray.h> +#include <trace/events/page_ref.h> #include <trace/events/block.h> #include "blk.h" #include "blk-rq-qos.h" @@ -214,6 +215,8 @@ struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs, void bio_uninit(struct bio *bio) { + if (bio_flagged(bio, BIO_TRACE)) + trace_bio(bio, bio_trace_where_uninit, 0); #ifdef CONFIG_BLK_CGROUP if (bio->bi_blkg) { blkg_put(bio->bi_blkg); @@ -232,6 +235,9 @@ static void bio_free(struct bio *bio) struct bio_set *bs = bio->bi_pool; void *p = bio; + if (bio_flagged(bio, BIO_TRACE)) + trace_bio(bio, bio_trace_where_free, 0); + WARN_ON_ONCE(!bs); bio_uninit(bio); @@ -247,6 +253,9 @@ static void bio_free(struct bio *bio) void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, unsigned short max_vecs, blk_opf_t opf) { + static atomic_t bio_debug_ids; + + bio->bi_debug_id = atomic_inc_return(&bio_debug_ids); bio->bi_next = NULL; bio->bi_bdev = bdev; bio->bi_opf = opf; @@ -1110,6 +1119,13 @@ void __bio_add_page(struct bio *bio, struct page *page, { struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt]; + if (PageDebugMark(page)) { + trace_page_ref_set(page, 666); + bio_set_flag(bio, BIO_TRACE); + trace_bio(bio, bio_trace_where_add_page, + page_to_pfn(page)); + } + WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); WARN_ON_ONCE(bio_full(bio, len)); @@ -1172,12 +1188,23 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty) { struct bvec_iter_all iter_all; struct bio_vec *bvec; + unsigned int i = 0; + + if (bio_flagged(bio, BIO_TRACE)) + trace_bio(bio, bio_trace_where_rel_pages, 0); bio_for_each_segment_all(bvec, bio, iter_all) { - if (mark_dirty && !PageCompound(bvec->bv_page)) - set_page_dirty_lock(bvec->bv_page); - bio_release_page(bio, bvec->bv_page); + if (PageDebugMark(bvec->bv_page)) + trace_page_ref_set(bvec->bv_page, 980 + i++); } + + if (bio_flagged(bio, BIO_PAGE_REFFED) || + bio_flagged(bio, BIO_PAGE_PINNED)) + bio_for_each_segment_all(bvec, bio, iter_all) { + if (mark_dirty && !PageCompound(bvec->bv_page)) + set_page_dirty_lock(bvec->bv_page); + bio_release_page(bio, bvec->bv_page); + } } EXPORT_SYMBOL_GPL(__bio_release_pages); @@ -1445,6 +1472,9 @@ void bio_free_pages(struct bio *bio) struct bio_vec *bvec; struct bvec_iter_all iter_all; + if (bio_flagged(bio, BIO_TRACE)) + trace_bio(bio, bio_trace_where_free_pages, 0); + bio_for_each_segment_all(bvec, bio, iter_all) __free_page(bvec->bv_page); } @@ -1534,6 +1564,8 @@ void bio_check_pages_dirty(struct bio *bio) struct bvec_iter_all iter_all; bio_for_each_segment_all(bvec, bio, iter_all) { + if (PageDebugMark(bvec->bv_page)) + trace_page_ref_set(bvec->bv_page, 654); if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page)) goto defer; } @@ -1583,6 +1615,8 @@ static inline bool bio_remaining_done(struct bio *bio) **/ void bio_endio(struct bio *bio) { + struct bvec_iter_all iter_all; + struct bio_vec *bvec; again: if (!bio_remaining_done(bio)) return; @@ -1591,6 +1625,14 @@ void bio_endio(struct bio *bio) rq_qos_done_bio(bio); + if (bio_flagged(bio, BIO_TRACE)) + trace_bio(bio, bio_trace_where_endio, 0); + + bio_for_each_segment_all(bvec, bio, iter_all) { + if (PageDebugMark(bvec->bv_page)) + trace_page_ref_set(bvec->bv_page, 623); + } + if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) { trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio); bio_clear_flag(bio, BIO_TRACE_COMPLETION); @@ -1612,6 +1654,8 @@ void bio_endio(struct bio *bio) blk_throtl_bio_endio(bio); /* release cgroup info */ bio_uninit(bio); + if (bio_flagged(bio, BIO_TRACE)) + trace_bio_endio(bio); if (bio->bi_end_io) bio->bi_end_io(bio); } diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 356193e44cf0..21790ce471d3 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -18,6 +18,7 @@ #include <linux/sched/signal.h> #include <linux/migrate.h> #include "trace.h" +#include <trace/events/block.h> #include "../internal.h" @@ -619,6 +620,12 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos, goto out_no_page; } +#if 0 +#define XFS_SUPER_MAGIC 0x58465342 /* "XFSB" */ + if (folio->mapping->host->i_sb->s_magic == XFS_SUPER_MAGIC) + folio_set_debug_mark(folio); +#endif + /* * Now we have a locked folio, before we do anything with it we need to * check that the iomap we have cached is not stale. The inode extent @@ -1311,6 +1318,9 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error) next = bio->bi_private; /* walk all folios in bio, ending page IO on them */ + if (bio_flagged(bio, BIO_TRACE)) + trace_bio(bio, bio_trace_where_ioend, 0); + bio_for_each_folio_all(fi, bio) { iomap_finish_folio_write(inode, fi.folio, fi.length, error); diff --git a/fs/pipe.c b/fs/pipe.c index 42c7ff41c2db..56293d706ef3 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -208,6 +208,7 @@ void generic_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { put_page(buf->page); + buf->page = (void *)0xaa55aa55aa55aa55UL; } EXPORT_SYMBOL(generic_pipe_buf_release); diff --git a/fs/splice.c b/fs/splice.c index 5969b7a1d353..fc59b5038f2e 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -34,6 +34,7 @@ #include <linux/gfp.h> #include <linux/socket.h> #include <linux/sched/signal.h> +#include <trace/events/page_ref.h> #include "internal.h" @@ -304,6 +305,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, int ret; iov_iter_pipe(&to, ITER_DEST, pipe, len); + to.debug = true; init_sync_kiocb(&kiocb, in); kiocb.ki_pos = *ppos; ret = call_read_iter(in, &kiocb, &to); @@ -597,6 +599,9 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, return ret; } +static struct page *splice_tmp; +static DEFINE_MUTEX(splice_tmp_lock); + /** * iter_file_splice_write - splice data from a pipe to a file * @pipe: pipe info @@ -626,6 +631,19 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, GFP_KERNEL); ssize_t ret; + mutex_lock(&splice_tmp_lock); + if (!splice_tmp) { + pr_notice("alloc splice_tmp\n"); + splice_tmp = alloc_page(GFP_USER); + if (splice_tmp) { + SetPageDebugMark(splice_tmp); + page_ref_add(splice_tmp, 100); + } + } + mutex_unlock(&splice_tmp_lock); + if (!splice_tmp) + return -ENOMEM; + if (unlikely(!array)) return -ENOMEM; @@ -675,7 +693,12 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, goto done; } + if (PageDebugMark(buf->page)) + trace_page_ref_set(buf->page, 888); + array[n].bv_page = buf->page; + //array[n].bv_page = splice_tmp; + //trace_page_ref_set(splice_tmp, 887); array[n].bv_len = this_len; array[n].bv_offset = buf->offset; left -= this_len; diff --git a/include/linux/bio.h b/include/linux/bio.h index b2c09997d79c..cafa26637067 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -484,8 +484,8 @@ void zero_fill_bio(struct bio *bio); static inline void bio_release_pages(struct bio *bio, bool mark_dirty) { - if (bio_flagged(bio, BIO_PAGE_REFFED) || - bio_flagged(bio, BIO_PAGE_PINNED)) + //if (bio_flagged(bio, BIO_PAGE_REFFED) || + // bio_flagged(bio, BIO_PAGE_PINNED)) __bio_release_pages(bio, mark_dirty); } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a0e339ff3d09..b4e563595a5a 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -302,6 +302,7 @@ struct bio { struct bio_vec *bi_io_vec; /* the actual vec list */ struct bio_set *bi_pool; + unsigned int bi_debug_id; /* Tracing debug ID */ /* * We can inline a number of vecs at the end of the bio, to avoid @@ -334,6 +335,7 @@ enum { BIO_QOS_MERGED, /* but went through rq_qos merge path */ BIO_REMAPPED, BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */ + BIO_TRACE, /* Trace bio lifetime */ BIO_FLAG_LAST }; diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 69e93a0c1277..80cbf784239e 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -138,6 +138,9 @@ enum pageflags { #endif #ifdef CONFIG_KASAN_HW_TAGS PG_skip_kasan_poison, +#endif +#ifdef CONFIG_DEBUG_PAGE_MARK + PG_debug_mark, #endif __NR_PAGEFLAGS, @@ -694,6 +697,15 @@ static __always_inline bool PageKsm(struct page *page) TESTPAGEFLAG_FALSE(Ksm, ksm) #endif +#ifdef CONFIG_DEBUG_PAGE_MARK +/* + * Debug marks are just used for page_ref tracepoint control and display. + */ +PAGEFLAG(DebugMark, debug_mark, PF_ANY) +#else +TESTPAGEFLAG_FALSE(DebugMark, debug_mark) +#endif + u64 stable_page_flags(struct page *page); /** diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index d7c2d33baa7f..7bc1a94d9cbb 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -24,7 +24,11 @@ DECLARE_TRACEPOINT(page_ref_unfreeze); * * See trace_##name##_enabled(void) in include/linux/tracepoint.h */ -#define page_ref_tracepoint_active(t) tracepoint_enabled(t) +#ifndef CONFIG_DEBUG_PAGE_REF_ONLY_MARKED +#define page_ref_tracepoint_active(p, t) tracepoint_enabled(t) +#else +#define page_ref_tracepoint_active(p, t) (tracepoint_enabled(t) && PageDebugMark(p)) +#endif extern void __page_ref_set(struct page *page, int v); extern void __page_ref_mod(struct page *page, int v); @@ -36,7 +40,7 @@ extern void __page_ref_unfreeze(struct page *page, int v); #else -#define page_ref_tracepoint_active(t) false +#define page_ref_tracepoint_active(page, t) false static inline void __page_ref_set(struct page *page, int v) { @@ -97,7 +101,7 @@ static inline int page_count(const struct page *page) static inline void set_page_count(struct page *page, int v) { atomic_set(&page->_refcount, v); - if (page_ref_tracepoint_active(page_ref_set)) + if (page_ref_tracepoint_active(page, page_ref_set)) __page_ref_set(page, v); } @@ -118,7 +122,7 @@ static inline void init_page_count(struct page *page) static inline void page_ref_add(struct page *page, int nr) { atomic_add(nr, &page->_refcount); - if (page_ref_tracepoint_active(page_ref_mod)) + if (page_ref_tracepoint_active(page, page_ref_mod)) __page_ref_mod(page, nr); } @@ -130,7 +134,7 @@ static inline void folio_ref_add(struct folio *folio, int nr) static inline void page_ref_sub(struct page *page, int nr) { atomic_sub(nr, &page->_refcount); - if (page_ref_tracepoint_active(page_ref_mod)) + if (page_ref_tracepoint_active(page, page_ref_mod)) __page_ref_mod(page, -nr); } @@ -143,7 +147,7 @@ static inline int page_ref_sub_return(struct page *page, int nr) { int ret = atomic_sub_return(nr, &page->_refcount); - if (page_ref_tracepoint_active(page_ref_mod_and_return)) + if (page_ref_tracepoint_active(page, page_ref_mod_and_return)) __page_ref_mod_and_return(page, -nr, ret); return ret; } @@ -156,7 +160,7 @@ static inline int folio_ref_sub_return(struct folio *folio, int nr) static inline void page_ref_inc(struct page *page) { atomic_inc(&page->_refcount); - if (page_ref_tracepoint_active(page_ref_mod)) + if (page_ref_tracepoint_active(page, page_ref_mod)) __page_ref_mod(page, 1); } @@ -168,7 +172,7 @@ static inline void folio_ref_inc(struct folio *folio) static inline void page_ref_dec(struct page *page) { atomic_dec(&page->_refcount); - if (page_ref_tracepoint_active(page_ref_mod)) + if (page_ref_tracepoint_active(page, page_ref_mod)) __page_ref_mod(page, -1); } @@ -181,7 +185,7 @@ static inline int page_ref_sub_and_test(struct page *page, int nr) { int ret = atomic_sub_and_test(nr, &page->_refcount); - if (page_ref_tracepoint_active(page_ref_mod_and_test)) + if (page_ref_tracepoint_active(page, page_ref_mod_and_test)) __page_ref_mod_and_test(page, -nr, ret); return ret; } @@ -195,7 +199,7 @@ static inline int page_ref_inc_return(struct page *page) { int ret = atomic_inc_return(&page->_refcount); - if (page_ref_tracepoint_active(page_ref_mod_and_return)) + if (page_ref_tracepoint_active(page, page_ref_mod_and_return)) __page_ref_mod_and_return(page, 1, ret); return ret; } @@ -209,7 +213,7 @@ static inline int page_ref_dec_and_test(struct page *page) { int ret = atomic_dec_and_test(&page->_refcount); - if (page_ref_tracepoint_active(page_ref_mod_and_test)) + if (page_ref_tracepoint_active(page, page_ref_mod_and_test)) __page_ref_mod_and_test(page, -1, ret); return ret; } @@ -223,7 +227,7 @@ static inline int page_ref_dec_return(struct page *page) { int ret = atomic_dec_return(&page->_refcount); - if (page_ref_tracepoint_active(page_ref_mod_and_return)) + if (page_ref_tracepoint_active(page, page_ref_mod_and_return)) __page_ref_mod_and_return(page, -1, ret); return ret; } @@ -237,7 +241,7 @@ static inline bool page_ref_add_unless(struct page *page, int nr, int u) { bool ret = atomic_add_unless(&page->_refcount, nr, u); - if (page_ref_tracepoint_active(page_ref_mod_unless)) + if (page_ref_tracepoint_active(page, page_ref_mod_unless)) __page_ref_mod_unless(page, nr, ret); return ret; } @@ -317,7 +321,7 @@ static inline int page_ref_freeze(struct page *page, int count) { int ret = likely(atomic_cmpxchg(&page->_refcount, count, 0) == count); - if (page_ref_tracepoint_active(page_ref_freeze)) + if (page_ref_tracepoint_active(page, page_ref_freeze)) __page_ref_freeze(page, count, ret); return ret; } @@ -333,7 +337,7 @@ static inline void page_ref_unfreeze(struct page *page, int count) VM_BUG_ON(count == 0); atomic_set_release(&page->_refcount, count); - if (page_ref_tracepoint_active(page_ref_unfreeze)) + if (page_ref_tracepoint_active(page, page_ref_unfreeze)) __page_ref_unfreeze(page, count); } diff --git a/include/linux/uio.h b/include/linux/uio.h index 514e3b7b06b8..89272c05d74d 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -45,6 +45,7 @@ struct iov_iter { bool nofault; bool data_source; bool user_backed; + bool debug; union { size_t iov_offset; int last_offset; diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 7f4dfbdf12a6..7eabf99b4317 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -12,6 +12,56 @@ #define RWBS_LEN 8 +/* + * Declare tracing information enums and their string mappings for display. + */ +#define bio_trace_wheres \ + EM(bio_trace_where_add_page, "ADD-PG") \ + EM(bio_trace_where_endio, "END-IO") \ + EM(bio_trace_where_free, "FREE ") \ + EM(bio_trace_where_free_pages, "FREEPG") \ + EM(bio_trace_where_init, "INIT ") \ + EM(bio_trace_where_ioend, "IOEND ") \ + EM(bio_trace_where_rel_pages, "REL-PG") \ + E_(bio_trace_where_uninit, "UNINIT") + +/* + * Generate enums for tracing information. + */ +#ifndef __BIO_DECLARE_TRACE_ENUMS_ONCE_ONLY +#define __BIO_DECLARE_TRACE_ENUMS_ONCE_ONLY + +#undef EM +#undef E_ +#define EM(a, b) a, +#define E_(a, b) a + +enum bio_trace_where { bio_trace_wheres } __mode(byte); + +#endif /* end __BIO_DECLARE_TRACE_ENUMS_ONCE_ONLY */ + +/* + * Export enum symbols via userspace. + */ +#undef EM +#undef E_ + +#ifndef BIO_TRACE_ONLY_DEFINE_ENUMS + +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define E_(a, b) TRACE_DEFINE_ENUM(a); + +bio_trace_wheres; + +/* + * Now redefine the EM() and E_() macros to map the enums to the strings that + * will be printed in the output. + */ +#undef EM +#undef E_ +#define EM(a, b) { a, b }, +#define E_(a, b) { a, b } + DECLARE_EVENT_CLASS(block_buffer, TP_PROTO(struct buffer_head *bh), @@ -552,6 +602,51 @@ TRACE_EVENT(block_rq_remap, (unsigned long long)__entry->old_sector, __entry->nr_bios) ); +TRACE_EVENT(bio, + TP_PROTO(struct bio *bio, enum bio_trace_where where, + unsigned long info), + + TP_ARGS(bio, where, info), + + TP_STRUCT__entry( + __field(unsigned int, bi_debug_id ) + __field(enum bio_trace_where, where ) + __field(unsigned long, info ) + ), + + TP_fast_assign( + __entry->bi_debug_id = bio->bi_debug_id; + __entry->where = where; + __entry->info = info; + ), + + TP_printk("bio=%08x %s I=%lx", + __entry->bi_debug_id, + __print_symbolic(__entry->where, bio_trace_wheres), + __entry->info) +); + +TRACE_EVENT(bio_endio, + TP_PROTO(struct bio *bio), + + TP_ARGS(bio), + + TP_STRUCT__entry( + __field(unsigned int, bi_debug_id ) + __field(const void *, bi_end_io ) + ), + + TP_fast_assign( + __entry->bi_debug_id = bio->bi_debug_id; + __entry->bi_end_io = bio->bi_end_io; + ), + + TP_printk("bio=%08x %pSR", + __entry->bi_debug_id, + __entry->bi_end_io) +); + +#endif /* BIO_TRACE_ONLY_DEFINE_ENUMS */ #endif /* _TRACE_BLOCK_H */ /* This part must be outside protection */ diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 412b5a46374c..5f3b9b0e4b53 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -103,6 +103,12 @@ #define IF_HAVE_PG_SKIP_KASAN_POISON(flag,string) #endif +#ifdef CONFIG_DEBUG_PAGE_MARK +#define IF_HAVE_PG_DEBUG_MARK(flag,string) ,{1UL << flag, string} +#else +#define IF_HAVE_PG_DEBUG_MARK(flag,string) +#endif + #define __def_pageflag_names \ {1UL << PG_locked, "locked" }, \ {1UL << PG_waiters, "waiters" }, \ @@ -132,7 +138,8 @@ IF_HAVE_PG_IDLE(PG_young, "young" ) \ IF_HAVE_PG_IDLE(PG_idle, "idle" ) \ IF_HAVE_PG_ARCH_X(PG_arch_2, "arch_2" ) \ IF_HAVE_PG_ARCH_X(PG_arch_3, "arch_3" ) \ -IF_HAVE_PG_SKIP_KASAN_POISON(PG_skip_kasan_poison, "skip_kasan_poison") +IF_HAVE_PG_SKIP_KASAN_POISON(PG_skip_kasan_poison, "skip_kasan_poison") \ +IF_HAVE_PG_DEBUG_MARK(PG_debug_mark, "debug_mark" ) #define show_page_flags(flags) \ (flags) ? __print_flags(flags, "|", \ diff --git a/lib/iov_iter.c b/lib/iov_iter.c index d69a05950555..b3b2f1e6dc1b 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -10,9 +10,11 @@ #include <linux/vmalloc.h> #include <linux/splice.h> #include <linux/compat.h> +#include <linux/page-flags.h> #include <net/checksum.h> #include <linux/scatterlist.h> #include <linux/instrumented.h> +#include <trace/events/page_ref.h> #define PIPE_PARANOIA /* for now */ @@ -1331,6 +1333,10 @@ static ssize_t pipe_get_pages(struct iov_iter *i, struct page *page = append_pipe(i, left, &off); if (!page) break; + if (i->debug && !PageDebugMark(page)) { + //SetPageDebugMark(page); + //get_page(page); + } chunk = min_t(size_t, left, PAGE_SIZE - off); get_page(*p++ = page); } @@ -1917,6 +1923,9 @@ void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state) i->nr_segs = state->nr_segs; } +static struct page *extract_tmp; +static DEFINE_MUTEX(extract_tmp_lock); + /* * Extract a list of contiguous pages from an ITER_PIPE iterator. This does * not get references of its own on the pages, nor does it get a pin on them. @@ -1936,6 +1945,19 @@ static ssize_t iov_iter_extract_pipe_pages(struct iov_iter *i, struct page **p; size_t left; + mutex_lock(&extract_tmp_lock); + if (!extract_tmp) { + pr_notice("alloc extract_tmp\n"); + extract_tmp = alloc_page(GFP_USER); + if (extract_tmp) { + SetPageDebugMark(extract_tmp); + page_ref_add(extract_tmp, 200); + } + } + mutex_unlock(&extract_tmp_lock); + if (!extract_tmp) + return -ENOMEM; + if (!sanity(i)) return -EFAULT; @@ -1955,9 +1977,15 @@ static ssize_t iov_iter_extract_pipe_pages(struct iov_iter *i, struct page *page = append_pipe(i, left, &offset); if (!page) break; + if (i->debug && !PageDebugMark(page)) { + SetPageDebugMark(page); + trace_page_ref_set(page, 777); + //get_page(page); + } chunk = min_t(size_t, left, PAGE_SIZE - offset); left -= chunk; *p++ = page; + //*p++ = extract_tmp; } if (!j) return -EFAULT; diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index fca699ad1fb0..111a946a676f 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -149,6 +149,23 @@ config DEBUG_PAGE_REF kernel code. However the runtime performance overhead is virtually nil until the tracepoints are actually enabled. +config DEBUG_PAGE_MARK + bool "Reserve a page bit to mark pages to be debugged" + depends on DEBUG_PAGE_REF + help + This option adds an extra page flag that can be used to mark pages + for debugging. The mark can be observed in the page_ref tracepoints. + The mark isn't set on any pages without alteration of the code. This + is intended for filesystem debugging and code to set the mark must be + added manually into the source. + +config DEBUG_PAGE_REF_ONLY_MARKED + bool "Only trace marked pages" + depends on DEBUG_PAGE_REF && DEBUG_PAGE_MARK + help + This option restricts the page_ref tracepoints to only track marked + pages. + config DEBUG_RODATA_TEST bool "Testcase for the marking rodata read-only" depends on STRICT_KERNEL_RWX diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0745aedebb37..37f146e5b2eb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1102,6 +1102,9 @@ static inline void __free_one_page(struct page *page, VM_BUG_ON(!zone_is_initialized(zone)); VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page); +#ifdef CONFIG_DEBUG_PAGE_MARK + ClearPageDebugMark(page); +#endif VM_BUG_ON(migratetype == -1); if (likely(!is_migrate_isolate(migratetype))) diff --git a/mm/readahead.c b/mm/readahead.c index b10f0cf81d80..458559fd0e67 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -248,6 +248,12 @@ void page_cache_ra_unbounded(struct readahead_control *ractl, folio = filemap_alloc_folio(gfp_mask, 0); if (!folio) break; +#if 0 +#define XFS_SUPER_MAGIC 0x58465342 /* "XFSB" */ + if (mapping->host->i_sb->s_magic == XFS_SUPER_MAGIC) + folio_set_debug_mark(folio); +#endif + if (filemap_add_folio(mapping, folio, index + i, gfp_mask) < 0) { folio_put(folio); @@ -809,6 +815,7 @@ void readahead_expand(struct readahead_control *ractl, page = __page_cache_alloc(gfp_mask); if (!page) return; + //SetPageDebugMark(page); if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) { put_page(page); return; @@ -832,6 +839,7 @@ void readahead_expand(struct readahead_control *ractl, page = __page_cache_alloc(gfp_mask); if (!page) return; + //SetPageDebugMark(page); if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) { put_page(page); return;