On Sat, 20 Jun 2009, Davide Libenzi wrote: > On Sat, 20 Jun 2009, Davide Libenzi wrote: > > > How about the one below? > > Maybe with an interface that can be undone w/out a file* :) This is another alternative, based on a low-carb diet of your notifier patch. Same concept of de-coupling VFS refcount from eventfd memory context, and allowing a poll callback register/unregister. AFAICS, based on my limited knowledge of the IRQfd policies, your ->release() path needs to eventfd_pollcb_unregister() and wait for all pending works to be done. - Davide --- fs/eventfd.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++- include/linux/eventfd.h | 23 ++++++++++++++++ 2 files changed, 91 insertions(+), 1 deletion(-) Index: linux-2.6.mod/fs/eventfd.c =================================================================== --- linux-2.6.mod.orig/fs/eventfd.c 2009-06-20 16:25:45.000000000 -0700 +++ linux-2.6.mod/fs/eventfd.c 2009-06-20 16:35:22.000000000 -0700 @@ -17,8 +17,10 @@ #include <linux/eventfd.h> #include <linux/syscalls.h> #include <linux/module.h> +#include <linux/kref.h> struct eventfd_ctx { + struct kref kref; wait_queue_head_t wqh; /* * Every time that a write(2) is performed on an eventfd, the @@ -59,9 +61,29 @@ int eventfd_signal(struct file *file, in } EXPORT_SYMBOL_GPL(eventfd_signal); +static void eventfd_free(struct kref *kref) +{ + struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref); + + kfree(ctx); +} + +static void eventfd_get(struct eventfd_ctx *ctx) +{ + kref_get(&ctx->kref); +} + +static void eventfd_put(struct eventfd_ctx *ctx) +{ + kref_put(&ctx->kref, eventfd_free); +} + static int eventfd_release(struct inode *inode, struct file *file) { - kfree(file->private_data); + struct eventfd_ctx *ctx = file->private_data; + + wake_up_poll(&ctx->wqh, POLLHUP); + eventfd_put(ctx); return 0; } @@ -217,6 +239,7 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, if (!ctx) return -ENOMEM; + kref_init(&ctx->kref); init_waitqueue_head(&ctx->wqh); ctx->count = count; ctx->flags = flags; @@ -237,3 +260,47 @@ SYSCALL_DEFINE1(eventfd, unsigned int, c return sys_eventfd2(count, 0); } +static void eventfd_pollcb_ptqueue(struct file *file, wait_queue_head_t *wqh, + poll_table *pt) +{ + struct eventfd_pollcb *ecb; + + ecb = container_of(pt, struct eventfd_pollcb, pt); + + add_wait_queue(wqh, &ecb->wait); +} + +int eventfd_pollcb_register(struct file *file, struct eventfd_pollcb *ecb, + wait_queue_func_t cbf) +{ + struct eventfd_ctx *ctx; + unsigned int events; + + if (file->f_op != &eventfd_fops) + return -EINVAL; + + ctx = file->private_data; + + /* + * Install our own custom wake-up handling so we are notified via + * a callback whenever someone signals the underlying eventfd. + */ + init_waitqueue_func_entry(&ecb->wait, cbf); + init_poll_funcptr(&ecb->pt, eventfd_pollcb_ptqueue); + + events = file->f_op->poll(file, &ecb->pt); + + eventfd_get(ctx); + ecb->ctx = ctx; + + return (events & POLLIN) ? 1 : 0; +} +EXPORT_SYMBOL_GPL(eventfd_pollcb_register); + +void eventfd_pollcb_unregister(struct eventfd_pollcb *ecb) +{ + remove_wait_queue(&ecb->ctx->wqh, &ecb->wait); + eventfd_put(ecb->ctx); +} +EXPORT_SYMBOL_GPL(eventfd_pollcb_unregister); + Index: linux-2.6.mod/include/linux/eventfd.h =================================================================== --- linux-2.6.mod.orig/include/linux/eventfd.h 2009-06-20 16:25:45.000000000 -0700 +++ linux-2.6.mod/include/linux/eventfd.h 2009-06-20 16:38:20.000000000 -0700 @@ -8,6 +8,20 @@ #ifndef _LINUX_EVENTFD_H #define _LINUX_EVENTFD_H +#include <linux/wait.h> +#include <linux/poll.h> +#include <linux/file.h> +#include <linux/list.h> +#include <linux/kref.h> + +struct eventfd_ctx; + +struct eventfd_pollcb { + poll_table pt; + struct eventfd_ctx *ctx; + wait_queue_t wait; +}; + #ifdef CONFIG_EVENTFD /* For O_CLOEXEC and O_NONBLOCK */ @@ -29,12 +43,21 @@ struct file *eventfd_fget(int fd); int eventfd_signal(struct file *file, int n); +int eventfd_pollcb_register(struct file *file, struct eventfd_pollcb *ecb, + wait_queue_func_t cbf); +void eventfd_pollcb_unregister(struct eventfd_pollcb *ecb); #else /* CONFIG_EVENTFD */ #define eventfd_fget(fd) ERR_PTR(-ENOSYS) static inline int eventfd_signal(struct file *file, int n) { return 0; } +static inline int eventfd_pollcb_register(struct file *file, + struct eventfd_pollcb *ecb, + wait_queue_func_t cbf) +{ return -ENOSYS; } +static inline void eventfd_pollcb_unregister(struct eventfd_pollcb *ecb) +{ } #endif /* CONFIG_EVENTFD */ -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html