[PATCH] trace: Add tracepoints to fs subsystem

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Jiaying Zhang <jiayingz@xxxxxxxxxx>

These few fs tracepoints are useful while debugging latency issues in
filesystems and were used specifically for debugging various writeback
subsystem issues. This patch adds entry and exit tracepoints for the
following functions, viz.:
wait_on_buffer
block_write_full_page
mpage_readpages
file_read

Signed-off-by: Vaibhav Nagarnaik <vnagarnaik@xxxxxxxxxx>
---
 fs/buffer.c               |   10 +++
 fs/mpage.c                |    3 +
 include/trace/events/fs.h |  162 +++++++++++++++++++++++++++++++++++++++++++++
 mm/filemap.c              |    4 +-
 4 files changed, 178 insertions(+), 1 deletions(-)
 create mode 100644 include/trace/events/fs.h

diff --git a/fs/buffer.c b/fs/buffer.c
index a08bb8e..1c118f4 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -42,6 +42,9 @@
 #include <linux/mpage.h>
 #include <linux/bit_spinlock.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/fs.h>
+
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
 
 #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
@@ -82,7 +85,9 @@ EXPORT_SYMBOL(unlock_buffer);
  */
 void __wait_on_buffer(struct buffer_head * bh)
 {
+	trace_fs_buffer_wait_enter(bh);
 	wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
+	trace_fs_buffer_wait_exit(bh);
 }
 EXPORT_SYMBOL(__wait_on_buffer);
 
@@ -1647,6 +1652,8 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	head = page_buffers(page);
 	bh = head;
 
+	trace_block_write_full_page_enter(inode, block, last_block);
+
 	/*
 	 * Get all the dirty buffers mapped to disk addresses and
 	 * handle any aliases from the underlying blockdev's mapping.
@@ -1736,6 +1743,9 @@ done:
 		 * here on.
 		 */
 	}
+
+	trace_block_write_full_page_exit(inode, nr_underway, err);
+
 	return err;
 
 recover:
diff --git a/fs/mpage.c b/fs/mpage.c
index 0afc809..1c3b8e1 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -28,6 +28,7 @@
 #include <linux/backing-dev.h>
 #include <linux/pagevec.h>
 
+#include <trace/events/fs.h>
 /*
  * I/O completion handler for multipage BIOs.
  *
@@ -373,6 +374,8 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
 	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
 		struct page *page = list_entry(pages->prev, struct page, lru);
 
+		if (page_idx == 0)
+			trace_mpage_readpages(page, mapping, nr_pages);
 		prefetchw(&page->flags);
 		list_del(&page->lru);
 		if (!add_to_page_cache_lru(page, mapping,
diff --git a/include/trace/events/fs.h b/include/trace/events/fs.h
new file mode 100644
index 0000000..95f7bc8
--- /dev/null
+++ b/include/trace/events/fs.h
@@ -0,0 +1,162 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM fs
+
+#if !defined(_TRACE_FS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FS_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(fs_buffer_wait,
+
+	TP_PROTO(struct buffer_head *bh),
+
+	TP_ARGS(bh),
+
+	TP_STRUCT__entry(
+		__field(	void *,	bh	)
+	),
+
+	TP_fast_assign(
+		__entry->bh = bh;
+	),
+
+	TP_printk("bh %p", __entry->bh)
+);
+
+DEFINE_EVENT(fs_buffer_wait, fs_buffer_wait_enter,
+
+	TP_PROTO(struct buffer_head *bh),
+
+	TP_ARGS(bh)
+);
+
+DEFINE_EVENT(fs_buffer_wait, fs_buffer_wait_exit,
+
+	TP_PROTO(struct buffer_head *bh),
+
+	TP_ARGS(bh)
+);
+
+TRACE_EVENT(block_write_full_page_enter,
+
+	TP_PROTO(struct inode *inode, sector_t block, sector_t last_block),
+
+	TP_ARGS(inode, block, last_block),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,		dev		)
+		__field(	unsigned long,	ino		)
+		__field(	sector_t,	block		)
+		__field(	sector_t,	last_block	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->ino		= inode->i_ino;
+		__entry->block		= block;
+		__entry->last_block	= last_block;
+	),
+
+	TP_printk("dev %d,%d ino %lu block %lu last block %lu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  (unsigned long)__entry->block,
+		  (unsigned long)__entry->last_block)
+);
+
+TRACE_EVENT(block_write_full_page_exit,
+
+	TP_PROTO(struct inode *inode, int nr_underway, int err),
+
+	TP_ARGS(inode, nr_underway, err),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,		dev		)
+		__field(	unsigned long,	ino		)
+		__field(	int,		nr_underway	)
+		__field(	int,		err		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->ino		= inode->i_ino;
+		__entry->nr_underway	= nr_underway;
+		__entry->err		= err;
+	),
+
+	TP_printk("dev %d,%d ino %lu nr_underway %d err %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino, __entry->nr_underway, __entry->err)
+);
+
+DECLARE_EVENT_CLASS(file_read,
+	TP_PROTO(struct inode *inode, loff_t pos, size_t len),
+
+	TP_ARGS(inode, pos, len),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,	ino			)
+		__field(	dev_t,	dev			)
+		__field(	loff_t,	pos			)
+		__field(	size_t,	len			)
+	),
+
+	TP_fast_assign(
+		__entry->ino	= inode->i_ino;
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->pos	= pos;
+		__entry->len	= len;
+	),
+
+	TP_printk("dev %d,%d ino %lu pos %llu len %lu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		   __entry->pos,  __entry->len)
+);
+
+DEFINE_EVENT(file_read, file_read_enter,
+
+	TP_PROTO(struct inode *inode, loff_t pos, size_t len),
+
+	TP_ARGS(inode, pos, len)
+);
+
+DEFINE_EVENT(file_read, file_read_exit,
+
+	TP_PROTO(struct inode *inode, loff_t pos, size_t len),
+
+	TP_ARGS(inode, pos, len)
+);
+
+TRACE_EVENT(mpage_readpages,
+	TP_PROTO(struct page *page, struct address_space *mapping,
+		 unsigned nr_pages),
+
+	TP_ARGS(page, mapping, nr_pages),
+
+	TP_STRUCT__entry(
+		__field(	pgoff_t, index			)
+		__field(	ino_t,	ino			)
+		__field(	dev_t,	dev			)
+		__field(	unsigned,	nr_pages	)
+
+	),
+
+	TP_fast_assign(
+		__entry->index	= page->index;
+		__entry->ino	= mapping->host->i_ino;
+		__entry->dev	= mapping->host->i_sb->s_dev;
+		__entry->nr_pages	= nr_pages;
+	),
+
+	TP_printk("dev %d,%d ino %lu page_index %lu nr_pages %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  __entry->index, __entry->nr_pages)
+);
+
+#endif /* _TRACE_FS_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
diff --git a/mm/filemap.c b/mm/filemap.c
index c641edf..94e549c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -42,7 +42,7 @@
 #include <linux/buffer_head.h> /* for try_to_free_buffers */
 
 #include <asm/mman.h>
-
+#include <trace/events/fs.h>
 /*
  * Shared mappings implemented 30.11.1994. It's not fully working yet,
  * though.
@@ -1054,6 +1054,7 @@ static void do_generic_file_read(struct file *filp, loff_t *ppos,
 	unsigned int prev_offset;
 	int error;
 
+	trace_file_read_enter(inode, *ppos, desc->count);
 	index = *ppos >> PAGE_CACHE_SHIFT;
 	prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT;
 	prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1);
@@ -1254,6 +1255,7 @@ out:
 	ra->prev_pos <<= PAGE_CACHE_SHIFT;
 	ra->prev_pos |= prev_offset;
 
+	trace_file_read_exit(inode, *ppos, desc->written);
 	*ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;
 	file_accessed(filp);
 }
-- 
1.7.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux