This is a backport of commit: 03db343a6320f780937078433fa7d8da955e6fce modified in a way that introduces some code duplication on the one hand, but reduces the risk of regressing existing eventfd users on the other hand. KVM needs a wait to atomically remove themselves from the eventfd ->poll() wait queue head, in order to handle correctly their IRQfd deassign operation. This patch introduces such API, plus a way to read an eventfd from its context. Signed-off-by: Michael S. Tsirkin <mst@xxxxxxxxxx> --- Avi, Davidel, how about only including the following part for -stable then? Reason is, I still would like to be able to use irqfd there, and getting spurious interrupts 100% of times unmask is done isn't a very good idea IMO ... fs/eventfd.c | 35 +++++++++++++++++++++++++++++++++++ include/linux/eventfd.h | 9 +++++++++ 2 files changed, 44 insertions(+), 0 deletions(-) diff --git a/fs/eventfd.c b/fs/eventfd.c index 8b47e42..ea9c18a 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -135,6 +135,41 @@ static unsigned int eventfd_poll(struct file *file, poll_table *wait) return events; } +static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) +{ + *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count; + ctx->count -= *cnt; +} + +/** + * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue. + * @ctx: [in] Pointer to eventfd context. + * @wait: [in] Wait queue to be removed. + * @cnt: [out] Pointer to the 64bit conter value. + * + * Returns zero if successful, or the following error codes: + * + * -EAGAIN : The operation would have blocked. + * + * This is used to atomically remove a wait queue entry from the eventfd wait + * queue head, and read/reset the counter value. + */ +int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait, + __u64 *cnt) +{ + unsigned long flags; + + spin_lock_irqsave(&ctx->wqh.lock, flags); + eventfd_ctx_do_read(ctx, cnt); + __remove_wait_queue(&ctx->wqh, wait); + if (*cnt != 0 && waitqueue_active(&ctx->wqh)) + wake_up_locked_poll(&ctx->wqh, POLLOUT); + spin_unlock_irqrestore(&ctx->wqh.lock, flags); + + return *cnt != 0 ? 0 : -EAGAIN; +} +EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue); + static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index 94dd103..85eac48 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -10,6 +10,7 @@ #include <linux/fcntl.h> #include <linux/file.h> +#include <linux/wait.h> /* * CAREFUL: Check include/asm-generic/fcntl.h when defining @@ -34,6 +35,8 @@ struct file *eventfd_fget(int fd); struct eventfd_ctx *eventfd_ctx_fdget(int fd); struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); int eventfd_signal(struct eventfd_ctx *ctx, int n); +int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait, + __u64 *cnt); #else /* CONFIG_EVENTFD */ @@ -61,6 +64,12 @@ static inline void eventfd_ctx_put(struct eventfd_ctx *ctx) } +static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, + wait_queue_t *wait, __u64 *cnt) +{ + return -ENOSYS; +} + #endif #endif /* _LINUX_EVENTFD_H */ -- 1.6.6.144.g5c3af -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html