One of the downsides of epoll is that if you have a hung epoll-based program, you can't track down which fds it's waiting for; those fds are hidden inside the epoll strutures in the kernel and you can't see them. At all. This patch exposes this information in /proc/<pid>/fdinfo/<fd>, so lsof can show it, or coders can just cat it during debugging. It has a few stylistic issues and the minor detail of not having been tested; but other than that, what do you think? All those event-driven programs out there need debugging. If lsof is good, so is this... Signed-Off-By: Chip Salzenberg <chip@xxxxxxxxx> fs/eventpoll.c | 46 ++++++++++++++++++++++++++++++++------------- fs/proc/fd.c | 7 ++++++- include/linux/eventpoll.h | 19 +++++++++++++++++++ 3 files changed, 58 insertions(+), 14 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index cd96649..9212503 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -34,6 +34,7 @@ #include <linux/mutex.h> #include <linux/anon_inodes.h> #include <linux/device.h> +#include <linux/seq_file.h> #include <asm/uaccess.h> #include <asm/io.h> #include <asm/mman.h> @@ -297,13 +298,6 @@ ctl_table epoll_table[] = { }; #endif /* CONFIG_SYSCTL */ -static const struct file_operations eventpoll_fops; - -static inline int is_file_epoll(struct file *f) -{ - return f->f_op == &eventpoll_fops; -} - /* Setup the structure that is used as key for the RB tree */ static inline void ep_set_ffd(struct epoll_filefd *ffd, struct file *file, int fd) @@ -784,7 +778,7 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) } /* File callbacks that implement the eventpoll file behaviour */ -static const struct file_operations eventpoll_fops = { +const struct file_operations eventpoll_fops = { .release = ep_eventpoll_release, .poll = ep_eventpoll_poll, .llseek = noop_llseek, @@ -1065,7 +1059,7 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests) list_for_each_entry(epi, &file->f_ep_links, fllink) { child_file = epi->ep->file; - if (is_file_epoll(child_file)) { + if (is_file_eventpoll(child_file)) { if (list_empty(&child_file->f_ep_links)) { if (path_count_inc(call_nests)) { error = -1; @@ -1551,7 +1545,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) list_add(&ep->visited_list_link, &visited_list); for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { epi = rb_entry(rbp, struct epitem, rbn); - if (unlikely(is_file_epoll(epi->ffd.file))) { + if (unlikely(is_file_eventpoll(epi->ffd.file))) { ep_tovisit = epi->ffd.file->private_data; if (ep_tovisit->visited) continue; @@ -1719,7 +1713,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, * adding an epoll file descriptor inside itself. */ error = -EINVAL; - if (file == tfile || !is_file_epoll(file)) + if (file == tfile || !is_file_eventpoll(file)) goto error_tgt_fput; /* @@ -1745,7 +1739,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, did_lock_epmutex = 1; } if (op == EPOLL_CTL_ADD) { - if (is_file_epoll(tfile)) { + if (is_file_eventpoll(tfile)) { error = -ELOOP; if (ep_loop_check(ep, tfile) != 0) { clear_tfile_check_list(); @@ -1831,7 +1825,7 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, * the user passed to us _is_ an eventpoll file. */ error = -EINVAL; - if (!is_file_epoll(f.file)) + if (!is_file_eventpoll(f.file)) goto error_fput; /* @@ -1892,6 +1886,32 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, return error; } +#ifdef CONFIG_PROC_FS + +/* return a printable representation of the eventpoll state for /proc/#/fdinfo/# */ +void eventpoll_proc_fdinfo(struct seq_file *m, struct file *file) { + struct eventpoll *ep; + struct epitem *epi = NULL; + struct rb_node *rbp; + + BUG_ON(!is_file_eventpoll(file)); + ep = file->private_data; + mutex_lock_nested(&ep->mtx, 0); + + for (rbp = ep->rbr.rb_node; rbp; ) { + epi = rb_entry(rbp, struct epitem, rbn); + if (rbp == ep->rbr.rb_node) + seq_puts(m, "eventpoll:"); + seq_printf(m, " %d:%x:%llx", epi->ffd.fd, epi->event.events, epi->event.data); + } + if (epi) + seq_putc(m, '\n'); + + mutex_unlock(&ep->mtx); +} + +#endif /* CONFIG_PROC_FS */ + static int __init eventpoll_init(void) { struct sysinfo si; diff --git a/fs/proc/fd.c b/fs/proc/fd.c index f28a875..ba2d6dd 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -8,6 +8,9 @@ #include <linux/security.h> #include <linux/file.h> #include <linux/seq_file.h> +#ifdef CONFIG_EPOLL +#include <linux/eventpoll.h> +#endif #include <linux/proc_fs.h> @@ -48,8 +51,10 @@ static int seq_show(struct seq_file *m, void *v) } if (!ret) { - seq_printf(m, "pos:\t%lli\nflags:\t0%o\n", + seq_printf(m, "pos:\t%lli\nflags:\t0%o\n", (long long)file->f_pos, f_flags); + if (unlikely(is_file_eventpoll(file))) + eventpoll_proc_fdinfo(m, file); fput(file); } diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 6daf6d4..b179ab4 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -18,10 +18,13 @@ /* Forward declarations to avoid compiler errors */ struct file; +struct seq_file; #ifdef CONFIG_EPOLL +#include <linux/fs.h> + /* Used to initialize the epoll bits inside the "struct file" */ static inline void eventpoll_init_file(struct file *file) { @@ -61,11 +64,27 @@ static inline void eventpoll_release(struct file *file) eventpoll_release_file(file); } +extern const struct file_operations eventpoll_fops; + +static inline bool is_file_eventpoll(struct file *f) +{ + return f->f_op == &eventpoll_fops; +} + #else static inline void eventpoll_init_file(struct file *file) {} static inline void eventpoll_release(struct file *file) {} +static inline bool is_file_eventpoll(struct file *f) { return false; } #endif + +#if defined(CONFIG_EPOLL) && defined(CONFIG_PROC_FS) +void eventpoll_proc_fdinfo(struct seq_file *m, struct file *file); +#else +static inline void eventpoll_proc_fdinfo(struct seq_file *m, struct file *file) {} +#endif + + #endif /* #ifndef _LINUX_EVENTPOLL_H */ -- Chip Salzenberg
Signed-Off-By: Chip Salzenberg <chip@xxxxxxxxx> fs/eventpoll.c | 46 ++++++++++++++++++++++++++++++++------------- fs/proc/fd.c | 7 ++++++- include/linux/eventpoll.h | 19 +++++++++++++++++++ 3 files changed, 58 insertions(+), 14 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index cd96649..9212503 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -34,6 +34,7 @@ #include <linux/mutex.h> #include <linux/anon_inodes.h> #include <linux/device.h> +#include <linux/seq_file.h> #include <asm/uaccess.h> #include <asm/io.h> #include <asm/mman.h> @@ -297,13 +298,6 @@ ctl_table epoll_table[] = { }; #endif /* CONFIG_SYSCTL */ -static const struct file_operations eventpoll_fops; - -static inline int is_file_epoll(struct file *f) -{ - return f->f_op == &eventpoll_fops; -} - /* Setup the structure that is used as key for the RB tree */ static inline void ep_set_ffd(struct epoll_filefd *ffd, struct file *file, int fd) @@ -784,7 +778,7 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) } /* File callbacks that implement the eventpoll file behaviour */ -static const struct file_operations eventpoll_fops = { +const struct file_operations eventpoll_fops = { .release = ep_eventpoll_release, .poll = ep_eventpoll_poll, .llseek = noop_llseek, @@ -1065,7 +1059,7 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests) list_for_each_entry(epi, &file->f_ep_links, fllink) { child_file = epi->ep->file; - if (is_file_epoll(child_file)) { + if (is_file_eventpoll(child_file)) { if (list_empty(&child_file->f_ep_links)) { if (path_count_inc(call_nests)) { error = -1; @@ -1551,7 +1545,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) list_add(&ep->visited_list_link, &visited_list); for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { epi = rb_entry(rbp, struct epitem, rbn); - if (unlikely(is_file_epoll(epi->ffd.file))) { + if (unlikely(is_file_eventpoll(epi->ffd.file))) { ep_tovisit = epi->ffd.file->private_data; if (ep_tovisit->visited) continue; @@ -1719,7 +1713,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, * adding an epoll file descriptor inside itself. */ error = -EINVAL; - if (file == tfile || !is_file_epoll(file)) + if (file == tfile || !is_file_eventpoll(file)) goto error_tgt_fput; /* @@ -1745,7 +1739,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, did_lock_epmutex = 1; } if (op == EPOLL_CTL_ADD) { - if (is_file_epoll(tfile)) { + if (is_file_eventpoll(tfile)) { error = -ELOOP; if (ep_loop_check(ep, tfile) != 0) { clear_tfile_check_list(); @@ -1831,7 +1825,7 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, * the user passed to us _is_ an eventpoll file. */ error = -EINVAL; - if (!is_file_epoll(f.file)) + if (!is_file_eventpoll(f.file)) goto error_fput; /* @@ -1892,6 +1886,32 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, return error; } +#ifdef CONFIG_PROC_FS + +/* return a printable representation of the eventpoll state for /proc/#/fdinfo/# */ +void eventpoll_proc_fdinfo(struct seq_file *m, struct file *file) { + struct eventpoll *ep; + struct epitem *epi = NULL; + struct rb_node *rbp; + + BUG_ON(!is_file_eventpoll(file)); + ep = file->private_data; + mutex_lock_nested(&ep->mtx, 0); + + for (rbp = ep->rbr.rb_node; rbp; ) { + epi = rb_entry(rbp, struct epitem, rbn); + if (rbp == ep->rbr.rb_node) + seq_puts(m, "eventpoll:"); + seq_printf(m, " %d:%x:%llx", epi->ffd.fd, epi->event.events, epi->event.data); + } + if (epi) + seq_putc(m, '\n'); + + mutex_unlock(&ep->mtx); +} + +#endif /* CONFIG_PROC_FS */ + static int __init eventpoll_init(void) { struct sysinfo si; diff --git a/fs/proc/fd.c b/fs/proc/fd.c index f28a875..ba2d6dd 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -8,6 +8,9 @@ #include <linux/security.h> #include <linux/file.h> #include <linux/seq_file.h> +#ifdef CONFIG_EPOLL +#include <linux/eventpoll.h> +#endif #include <linux/proc_fs.h> @@ -48,8 +51,10 @@ static int seq_show(struct seq_file *m, void *v) } if (!ret) { - seq_printf(m, "pos:\t%lli\nflags:\t0%o\n", + seq_printf(m, "pos:\t%lli\nflags:\t0%o\n", (long long)file->f_pos, f_flags); + if (unlikely(is_file_eventpoll(file))) + eventpoll_proc_fdinfo(m, file); fput(file); } diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 6daf6d4..b179ab4 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -18,10 +18,13 @@ /* Forward declarations to avoid compiler errors */ struct file; +struct seq_file; #ifdef CONFIG_EPOLL +#include <linux/fs.h> + /* Used to initialize the epoll bits inside the "struct file" */ static inline void eventpoll_init_file(struct file *file) { @@ -61,11 +64,27 @@ static inline void eventpoll_release(struct file *file) eventpoll_release_file(file); } +extern const struct file_operations eventpoll_fops; + +static inline bool is_file_eventpoll(struct file *f) +{ + return f->f_op == &eventpoll_fops; +} + #else static inline void eventpoll_init_file(struct file *file) {} static inline void eventpoll_release(struct file *file) {} +static inline bool is_file_eventpoll(struct file *f) { return false; } #endif + +#if defined(CONFIG_EPOLL) && defined(CONFIG_PROC_FS) +void eventpoll_proc_fdinfo(struct seq_file *m, struct file *file); +#else +static inline void eventpoll_proc_fdinfo(struct seq_file *m, struct file *file) {} +#endif + + #endif /* #ifndef _LINUX_EVENTPOLL_H */