Hi,
I am trying to duplicate the purpose of /proc/pid/io for a homework to understand how procfs / seq_file works. Although there is no mandate to use seq_file but it seems very interesting to me.
The aim is to print an output similar to /proc/pid/io but for all processes at once using a custom proc file name "/proc/proc_io".
The project is organized as follows:
main.c -> It is responsible for kernel module init / deinit
sequence.c -> This is where I have put in the processing logic
utils.h -> bunch of macros for debugging
The debugging logs come as follows:
=========
[ +4.814129] <pl_open:15>
[ +0.000017] <pl_seq_start:53>
[ +0.000003] <pl_seq_start:55> init_task: [ffffffff81e11500] pos: [0]
[ +0.000001] <pl_seq_show:103>
[ +0.000004] <pl_seq_next:72>
[ +0.000001] <pl_seq_next:79> n_tsk: [ffff880225a28000]
[ +0.000002] <pl_seq_show:103>
[ +0.000003] <pl_seq_next:72>
[ +0.000002] <pl_seq_next:79> n_tsk: [ffff880225a28dc0]
[ +0.000001] <pl_seq_show:103>
[ +0.000003] <pl_seq_next:72>
[ +0.000001] <pl_seq_next:79> n_tsk: [ffff880225a29b80]
[ +0.000002] <pl_seq_show:103>
[ +0.000002] <pl_seq_next:72>
[ +0.000002] <pl_seq_next:79> n_tsk: [ffff880225a2b700]
[ +0.000001] <pl_seq_show:103>
[ +0.000003] <pl_seq_next:72>
[ +0.000001] <pl_seq_next:79> n_tsk: [ffff880225a2d280]
[ +0.000002] <pl_seq_show:103>
[ +0.000002] <pl_seq_next:72>
[ +0.000001] <pl_seq_next:79> n_tsk: [ffff880225a2e040]
[ +0.000002] <pl_seq_show:103>
[ +0.000002] <pl_seq_next:72>
[ +0.000002] <pl_seq_next:79> n_tsk: [ffff880225a2ee00]
[ +0.000001] <pl_seq_show:103>
[ +0.000003] <pl_seq_next:72>
[ +0.000001] <pl_seq_next:79> n_tsk: [ffff880225a78000]
[ +0.000002] <pl_seq_show:103>
[ +0.000002] <pl_seq_next:72>
[ +0.000002] <pl_seq_next:79> n_tsk: [ffff880225a7ee00]
[ +0.000001] <pl_seq_show:103>
[ +0.000003] <pl_seq_next:72>
[ +0.000001] <pl_seq_next:79> n_tsk: [ffff880225aa8000]
[ +0.000001] <pl_seq_show:103>
[ +0.000003] <pl_seq_next:72>
[ +0.000002] <pl_seq_next:79> n_tsk: [ffff880225aa8dc0]
[ +0.000001] <pl_seq_show:103>
[ +0.000002] <pl_seq_next:72>
[ +0.000002] <pl_seq_next:79> n_tsk: [ffff880225aaa940]
[ +0.000001] <pl_seq_show:103>
[ +0.000003] <pl_seq_next:72>
[ +0.000001] <pl_seq_next:79> n_tsk: [ffff880225aab700]
[ +0.000002] <pl_seq_show:103>
[ +0.000002] <pl_seq_next:72>
[ +0.000002] <pl_seq_next:79> n_tsk: [ffff880225aac4c0]
[ +0.000001] <pl_seq_show:103>
[ +0.000003] <pl_seq_next:72>
[ +0.000001] <pl_seq_next:79> n_tsk: [ffff880225aad280]
[ +0.000001] <pl_seq_show:103>
[ +0.000003] <pl_seq_next:72>
[ +0.000001] <pl_seq_next:79> n_tsk: [ffff880225aaee00]
[ +0.000002] <pl_seq_show:103>
[ +0.000002] <pl_seq_next:72>
[ +0.000002] <pl_seq_next:79> n_tsk: [ffff880225048000]
[ +0.000001] <pl_seq_show:103>
[ +0.000003] <pl_seq_next:72>
[ +0.000001] <pl_seq_next:79> n_tsk: [ffff880225048dc0]
[ +0.000002] <pl_seq_show:103>
[ +0.000002] <pl_seq_next:72>
[ +0.000002] <pl_seq_next:79> n_tsk: [ffff880225049b80]
[ +0.000001] <pl_seq_show:103>
[ +0.000003] <pl_seq_next:72>
[ +0.000001] <pl_seq_next:79> n_tsk: [ffff88022504b700]
[ +0.000001] <pl_seq_show:103>
[ +0.000003] <pl_seq_next:72>
[ +0.000002] <pl_seq_next:79> n_tsk: [ffff88022504c4c0]
[ +0.000001] <pl_seq_show:103>
[ +0.000002] <pl_seq_next:72>
[ +0.000002] <pl_seq_next:79> n_tsk: [ffff88022504d280]
[ +0.000001] <pl_seq_show:103>
[ +0.000003] <pl_seq_next:72>
[ +0.000001] <pl_seq_next:79> n_tsk: [ffff88022504e040]
[ +0.000002] <pl_seq_show:103>
[ +0.000002] <pl_seq_next:72>
[ +0.000001] <pl_seq_next:79> n_tsk: [ffff8802250f0000]
[ +0.000002] <pl_seq_show:103>
[ +0.000002] <pl_seq_next:72>
[ +0.000002] <pl_seq_next:79> n_tsk: [ffff8802250f0dc0]
[ +0.000001] <pl_seq_show:103>
[ +0.000003] <pl_seq_next:72>
[ +0.000001] <pl_seq_next:79> n_tsk: [ffff8802250f1b80]
[ +0.000002] <pl_seq_show:103>
[ +0.000002] <pl_seq_next:72>
[ +0.000002] <pl_seq_next:79> n_tsk: [ffff8802250f2940]
[ +0.000001] <pl_seq_show:103>
[ +0.000002] <pl_seq_next:72>
[ +0.000002] <pl_seq_next:79> n_tsk: [ffff8802250f44c0]
[ +0.000001] <pl_seq_show:103>
[ +0.000003] <pl_seq_next:72>
[ +0.000001] <pl_seq_next:79> n_tsk: [ffff8802250f5280]
[ +0.000002] <pl_seq_show:103>
[ +0.000002] <pl_seq_next:72>
[ +0.000002] <pl_seq_next:79> n_tsk: [ffff8802250f6040]
[ +0.000001] <pl_seq_show:103>
[ +0.000003] <pl_seq_next:72>
[ +0.000001] <pl_seq_next:79> n_tsk: [ffff8802250f6e00]
[ +0.000001] <pl_seq_show:103>
[ +0.000003] <pl_seq_next:72>
[ +0.000001] <pl_seq_next:79> n_tsk: [ffff8802251a0dc0]
[ +0.000002] <pl_seq_show:103>
[ +0.000002] <pl_seq_next:72>
[ +0.000002] <pl_seq_next:79> n_tsk: [ffff8802251a1b80]
[ +0.000001] <pl_seq_show:103>
[ +0.000003] <pl_seq_stop:92>
[ +0.000276] <pl_seq_start:53>
[ +0.000002] <pl_seq_start:55> init_task: [ffffffff81e11500] pos: [1]
[ +0.000001] <pl_seq_stop:92>
=========
Towards the end, the sequence is as follows:
[ +0.000001] <pl_seq_show:103>
[ +0.000003] <pl_seq_stop:92>
i.e. stop() is called after show().
This is making my output truncated to only first few processes.
Although I always return 0 from show(), I fail to understand why stop() is being invoked just after show().
Can you please help me or redirect me somewhere to understand why this is happening?
Thank You.
Gaurav
#pragma once #include <linux/kernel.h> #define DEBUG_ENABLE /* uncomment to enable debugging logs */ #ifdef DEBUG_ENABLE #define dbg(fmt,args...) \ do { \ printk(KERN_DEBUG "<%s:%d> " fmt "\n", __func__, __LINE__, ##args); \ } while (0) #else #define dbg(fmt,args...) #endif #define err(fmt,args...) \ do { \ printk(KERN_ERR "<%s:%d> " fmt "\n", __func__, __LINE__, ##args); \ } while (0) #define info(fmt,args...) \ do { \ printk(KERN_INFO "<%s:%d> " fmt "\n", __func__, __LINE__, ##args); \ } while (0)
#pragma once #include <linux/seq_file.h> /* return sequence operations */ inline struct seq_operations * get_sequence_ops(void);
#include "sequence.h" #include "utils.h" #include <linux/module.h> #include <linux/proc_fs.h> #define PROC_NAME "proc_io" static struct proc_dir_entry *pl; static int pl_open(struct inode *inode, struct file *file) { struct seq_operations *sops; dbg(""); /* initialize sequential file, register operations * Ref: https://www.kernel.org/doc/htmldocs/filesystems/API-seq-open.html */ sops = get_sequence_ops(); return seq_open(file, sops); } /* file operations */ static struct file_operations fops = { .owner = THIS_MODULE, .open = pl_open, /* read method for sequential files */ .read = seq_read, /* llseek method for sequential files */ .llseek = seq_lseek, /* free the structures associated with sequential file */ .release = seq_release, }; static void _pl_module_exit(void) { dbg(""); if (pl != NULL) proc_remove(pl); } static int __init pl_module_init(void) { dbg(""); /* create /proc/proc_io */ pl = proc_create(PROC_NAME, 0, NULL, &fops); if (pl == NULL) { err("Failed to create proc_io"); goto error; } return 0; error: _pl_module_exit(); return -1; } static void __exit pl_module_exit(void) { _pl_module_exit(); } module_init(pl_module_init); module_exit(pl_module_exit); MODULE_AUTHOR("Gaurav Kalra"); MODULE_DESCRIPTION("PR02 per Process I/O Usage"); MODULE_LICENSE("GPL");
Attachment:
Makefile
Description: Binary data
#include "sequence.h" #include "utils.h" #include <linux/sched.h> #include <linux/task_io_accounting_ops.h> /* Organization of task information in kernel: struct task_struct { ... pid_t pid; ... struct list_head tasks; ... char comm[TASK_COMM_LEN]; ... struct task_io_accounting ioac; }; struct list_head { struct list_head *next, *prev; }; struct task_io_accounting { #ifdef CONFIG_TASK_XACCT u64 rchar; //bytes read u64 wchar; //bytes written u64 syscr; //# of read syscalls u64 syscw; //# of write syscalls #endif #ifdef CONFIG_TASK_IO_ACCOUNTING //The number of bytes which this task has caused to be read from storage. u64 read_bytes; //The number of bytes which this task has caused, or shall cause to be written to disk. u64 write_bytes; //A task can cause "negative" IO too. If this task truncates some //dirty pagecache, some IO which another task has been accounted for //(in its write_bytes) will not be happening. We _could_ just //subtract that from the truncating task's write_bytes, but there is //information loss in doing that. u64 cancelled_write_bytes; #endif }; */ static void * pl_seq_start(struct seq_file *m, loff_t *pos) { dbg(""); dbg("init_task: [%p] pos: [%lld]", &init_task, *pos); /* new sequence, return init_task */ if (*pos == 0) { return &init_task; } /* sequence end, terminate */ else { *pos = 0; return NULL; } } static void * pl_seq_next(struct seq_file *m, void *v, loff_t *pos) { struct task_struct *n_tsk, *c_tsk; dbg(""); /* set current task */ c_tsk = v; /* return next task */ if ((n_tsk = next_task(c_tsk)) != &init_task) { dbg("n_tsk: [%p]", n_tsk); return n_tsk; } dbg("init_task: [%p] c_tsk: [%p] n_tsk: [%p] pos: [%lld]", &init_task, c_tsk, n_tsk, *pos); /* if next task == init_task, terminate */ return NULL; } static void pl_seq_stop(struct seq_file *m, void *v) { dbg(""); return; /* nop */ } static int pl_seq_show(struct seq_file *m, void *v) { struct task_struct *tsk = v, *t; char buf[TASK_COMM_LEN]; struct task_io_accounting acct = tsk->ioac; /* initialize accounting data */ dbg(""); /* account each thread * Ref: https://github.com/torvalds/linux/blob/master/fs/proc/base.c * Function: do_io_accounting() */ t = tsk; task_io_accounting_add(&acct, &tsk->signal->ioac); while_each_thread(tsk, t) task_io_accounting_add(&acct, &t->ioac); /* print information */ seq_printf(m, "%s [PID: %u]\n" "\trchar: %llu\n" "\twchar: %llu\n" "\tsyscr: %llu\n" "\tsyscw: %llu\n" "\tread_bytes: %llu\n" "\twrite_bytes: %llu\n" "\tcancelled_write_bytes: %llu\n\n", get_task_comm(buf, tsk), task_pid_nr(tsk), (unsigned long long)acct.rchar, (unsigned long long)acct.wchar, (unsigned long long)acct.syscr, (unsigned long long)acct.syscw, (unsigned long long)acct.read_bytes, (unsigned long long)acct.write_bytes, (unsigned long long)acct.cancelled_write_bytes); /* return success */ return 0; } /* sequence operations */ static struct seq_operations sops = { /* sets the iterator up and returns the first element of sequence */ .start = pl_seq_start, /* returns the next element of sequence */ .next = pl_seq_next, /* shuts it down */ .stop = pl_seq_stop, /* prints element into the buffer */ .show = pl_seq_show, }; inline struct seq_operations * get_sequence_ops(void) { return &sops; }
_______________________________________________ Kernelnewbies mailing list Kernelnewbies@xxxxxxxxxxxxxxxxx https://lists.kernelnewbies.org/mailman/listinfo/kernelnewbies