This is generic ring buffer code that might be useful for bsg, SCSI target infrastructure, kevent, etc. I named this event channel, however there should be more better names. The user interface is pretty similar to kevent but there are some differences: - I added the sigmask parameter to a system call to wait events from kernel (this has been one of the big topics in the kevent threads; Ulrich wants it though Evgeniy doesn't). - kevnets needs fake file descriptors to talk with user space while bsg has its own character devices. So while sys_kevent_init creates a file descriptor, sys_ec_init binds a file descriptor to ring buffers (that is a process tells sys_ec_init to setup ring buffers for bind this file descriptor). - bsg and SCSI target infrastructure a bi-directional interface while kevent only needs notification from kernel to user space. A process can tell kernel to perform ready requests in a ring buffer via ec_send system call. I have not started to convert kevent to use this yet. It's doable though lots of modifications are necessary. Surely it's tricky to do it cleanly. Signed-off-by: FUJITA Tomonori <fujita.tomonori@xxxxxxxxxxxxx> --- include/asm-i386/unistd.h | 6 +- include/asm-x86_64/unistd.h | 12 +- include/linux/eventchannel.h | 36 ++++ include/linux/eventchannel_if.h | 15 ++ include/linux/syscalls.h | 5 + init/Kconfig | 7 + kernel/Makefile | 1 + kernel/eventchannel.c | 387 +++++++++++++++++++++++++++++++++++++++ 8 files changed, 466 insertions(+), 3 deletions(-) diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index 833fa17..32a0d4d 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -325,10 +325,14 @@ #define __NR_vmsplice 316 #define __NR_move_pages 317 #define __NR_getcpu 318 #define __NR_epoll_pwait 319 +#define __NR_ec_wait 320 +#define __NR_ec_commit 321 +#define __NR_ec_send 322 +#define __NR_ec_init 323 #ifdef __KERNEL__ -#define NR_syscalls 320 +#define NR_syscalls 324 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h index c5f596e..8922da3 100644 --- a/include/asm-x86_64/unistd.h +++ b/include/asm-x86_64/unistd.h @@ -619,8 +619,16 @@ #define __NR_vmsplice 278 __SYSCALL(__NR_vmsplice, sys_vmsplice) #define __NR_move_pages 279 __SYSCALL(__NR_move_pages, sys_move_pages) - -#define __NR_syscall_max __NR_move_pages +#define __NR_ec_wait 280 +__SYSCALL(__NR_ec_wait, sys_ec_wait) +#define __NR_ec_commit 281 +__SYSCALL(__NR_ec_commit, sys_ec_commit) +#define __NR_ec_send 282 +__SYSCALL(__NR_ec_send, sys_ec_send) +#define __NR_ec_init 283 +__SYSCALL(__NR_ec_init, sys_ec_init) + +#define __NR_syscall_max __NR_ec_init #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff --git a/include/linux/eventchannel.h b/include/linux/eventchannel.h new file mode 100644 index 0000000..e67b707 --- /dev/null +++ b/include/linux/eventchannel.h @@ -0,0 +1,36 @@ +#ifndef __EVENTCHANNEL_H +#define __EVENTCHANNEL_H + +struct ec_ring_info { + struct mutex ring_lock; + unsigned int kidx, uidx, full, ring_size, ring_over; + struct ec_ring __user *pring; + + wait_queue_head_t *wq; + int *nr_ready_event; +}; + +struct ec_info { + struct ec_ring_info kuring, ukring; +}; + +struct ec_operations { + int (*ec_init)(struct file *, struct ec_ring *, struct ec_ring *, + unsigned int, unsigned int); + struct ec_info *(*file_to_ecinfo)(struct file *); + int (*prepare_send_event_to_user)(struct file *); + int (*send_event_to_user)(struct file *, char *); + int (*prepare_recv_event_from_user)(struct file *); + int (*recv_event_from_user)(struct file *, char *); +}; + +extern struct ec_info * +ec_info_alloc(struct ec_ring __user *kupring, wait_queue_head_t *kuwq, + int *nr_kuevent, struct ec_ring __user *ukpring, + wait_queue_head_t *ukwq, int *nr_ukevent, + unsigned int num, unsigned int flags); +extern void ec_info_free(struct ec_info *eci); +extern int ec_register(int type, int event_size, struct ec_operations *ec_op, + struct file_operations *f_op); + +#endif diff --git a/include/linux/eventchannel_if.h b/include/linux/eventchannel_if.h new file mode 100644 index 0000000..ea00a18 --- /dev/null +++ b/include/linux/eventchannel_if.h @@ -0,0 +1,15 @@ +#ifndef __EVENTCHANNEL_IF_H +#define __EVENTCHANNEL_IF_H + +#define EC_TYPE_BSG 0 +#define EC_TYPE_SCSI_TGT 1 +#define EC_TYPE_KEVENT 2 +#define EC_TYPE_MAX EC_TYPE_KEVENT + +struct ec_ring { + unsigned int ring_kidx; + unsigned int ring_over; + unsigned long event[0]; +}; + +#endif diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 1912c6c..15567c3 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -605,4 +605,9 @@ asmlinkage long sys_getcpu(unsigned __us int kernel_execve(const char *filename, char *const argv[], char *const envp[]); +asmlinkage long sys_ec_wait(int efd, unsigned int num, unsigned int old_uidx, + struct timespec __user *ts, struct siginfo __user *si, + unsigned int flags); +asmlinkage long sys_ec_commit(int efd, unsigned int new_uidx, unsigned int over); +asmlinkage long sys_ec_send(int efd, unsigned int num, unsigned int over); #endif diff --git a/init/Kconfig b/init/Kconfig index a3f83e2..cdddb18 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -437,6 +437,13 @@ config EPOLL Disabling this option will cause the kernel to be built without support for epoll family of system calls. +config EVENT_CHANNEL + bool "Enable event channel support" + default y + help + Disabling this option will cause the kernel to be built without + support for event channel family of system calls. + config SHMEM bool "Use full shmem filesystem" if EMBEDDED default y diff --git a/kernel/Makefile b/kernel/Makefile index 14f4d45..ed577c5 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -50,6 +50,7 @@ obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_UTS_NS) += utsname.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o +obj-$(CONFIG_EVENT_CHANNEL) += eventchannel.o ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) # According to Alan Modra <alan@xxxxxxxxxxxxxxxx>, the -fno-omit-frame-pointer is diff --git a/kernel/eventchannel.c b/kernel/eventchannel.c new file mode 100644 index 0000000..33f5741 --- /dev/null +++ b/kernel/eventchannel.c @@ -0,0 +1,387 @@ +/* + * Event Channel functions + * + * 2006 Copyright (c) Evgeniy Polyakov <johnpol@xxxxxxxxxxx> + * + * Copyright (C) 2007 FUJITA Tomonori <tomof@xxxxxxx> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/sched.h> +#include <linux/eventchannel.h> +#include <linux/eventchannel_if.h> +#include <asm/uaccess.h> + +#undef EC_DEBUG + +#ifdef EC_DEBUG +#define dprintk(fmt, args...) printk(KERN_ERR "%s %d: " fmt, __FUNCTION__, __LINE__, ##args) +#else +#define dprintk(fmt, args...) +#endif + +struct ec_type_table { + int event_size; + struct ec_operations *ec_op; + struct file_operations *f_op; +}; + +struct ec_type_table ec_table[EC_TYPE_MAX]; + +static int ec_ring_space(struct ec_ring_info *ri, + unsigned int fore, unsigned int rear) +{ + if (ri->full) + return 0; + + return (rear > fore) ? + rear - fore : ri->ring_size - (fore - rear); +} + +static void ec_ring_idx_inc(unsigned int *idx, unsigned int size) +{ + if (++*idx >= size) + *idx = 0; +} + +static struct file *ec_fget(int efd, int *type) +{ + int i; + struct file *file; + + file = fget(efd); + if (!file) + return NULL; + + for (i = 0; i <= EC_TYPE_MAX; i++) { + if (file->f_op == ec_table[i].f_op) { + *type = i; + return file; + } + } + + dprintk("this descriptor is not event channel\n"); + fput(file); + return NULL; +} + +/* TODO: absolute timeout, signal, etc */ +asmlinkage long +sys_ec_wait(int efd, unsigned int num, unsigned int old_uidx, + struct timespec __user *ts, struct siginfo __user *si, + unsigned int flags) +{ + int ret, count, type; + long timeout = MAX_SCHEDULE_TIMEOUT; + struct timespec t; + struct file *file; + struct ec_info *eci; + struct ec_ring_info *ri; + struct ec_operations *ec_op; + + file = ec_fget(efd, &type); + if (!file) + return -EBADF; + + ec_op = ec_table[type].ec_op; + eci = ec_op->file_to_ecinfo(file); + if (!eci) { + fput(file); + return -EINVAL; + } + + ri = &eci->kuring; + + if (ts) { + if (copy_from_user(&t, ts, sizeof(t))) { + ret = -EFAULT; + goto out; + } + + if (!timespec_valid(&t)) { + ret = -EINVAL; + goto out; + } + + timeout = timespec_to_jiffies(&t); + ret = wait_event_interruptible_timeout(*ri->wq, + (*ri->nr_ready_event && + ec_ring_space(ri, ri->kidx, ri->uidx)), + timeout); + if (ret < 0) + return ret; + } + + if (ec_op->prepare_send_event_to_user) + ec_op->prepare_send_event_to_user(file); + + for (count = 0; count < num; count++) { + void *buf; + + mutex_lock(&ri->ring_lock); + + dprintk("%d %d %u %u\n", count, num, ri->kidx, ri->uidx); + + ret = ec_ring_space(ri, ri->kidx, ri->uidx); + if (!ret) { + mutex_unlock(&ri->ring_lock); + break; + } + + buf = ri->pring->event + + ri->kidx * ec_table[type].event_size; + + ret = ec_op->send_event_to_user(file, buf); + if (ret) { + mutex_unlock(&ri->ring_lock); + break; + } + + if (++ri->kidx == ri->ring_size) + ri->kidx = 0; + + if (ri->kidx == ri->uidx) + ri->full = 1; + + dprintk("%u %u %u\n", ri->kidx, ri->uidx, ri->full); + + if (put_user(ri->kidx, &ri->pring->ring_kidx)) { + mutex_unlock(&ri->ring_lock); + ret = -EFAULT; + goto out; + } + + mutex_unlock(&ri->ring_lock); + } + ret = count; +out: + fput(file); + return ret; +} + +asmlinkage long +sys_ec_commit(int efd, unsigned int new_uidx, unsigned int over) +{ + int type, ret = -EINVAL; + struct file *file; + struct ec_info *eci; + struct ec_ring_info *ri; + + file = ec_fget(efd, &type); + if (!file) + return -EBADF; + + eci = ec_table[type].ec_op->file_to_ecinfo(file); + if (!eci) { + fput(file); + return -EINVAL; + } + + ri = &eci->kuring; + + mutex_lock(&ri->ring_lock); + + dprintk("%u %u %u\n", new_uidx, ri->kidx, ri->uidx); + + if (new_uidx >= ri->ring_size) + goto out; + + if ((over != ri->ring_over - 1) && (over != ri->ring_over)) + goto out; + + if (ri->uidx < ri->kidx && ri->kidx < new_uidx) + goto out; + + if (new_uidx > ri->uidx) { + if (over != ri->ring_over) + goto out; + + ret = new_uidx - ri->uidx; + ri->uidx = new_uidx; + ri->full = 0; + } else if (new_uidx < ri->uidx) { + ret = ri->ring_size - (ri->uidx - new_uidx); + ri->uidx = new_uidx; + ri->ring_over++; + ri->full = 0; + + if (put_user(ri->ring_over, &ri->pring->ring_over)) { + ret = -EFAULT; + goto out; + } + } else + ret = 0; + +out: + mutex_unlock(&ri->ring_lock); + + fput(file); + return ret; +} + +asmlinkage long +sys_ec_send(int efd, unsigned int num, unsigned int over) +{ + int type, ret = -EINVAL, i; + struct file *file; + struct ec_info *eci; + struct ec_ring_info *ri; + struct ec_operations *ec_op; + + file = ec_fget(efd, &type); + if (!file) + return -EBADF; + + ec_op = ec_table[type].ec_op; + eci = ec_op->file_to_ecinfo(file); + if (!eci) { + fput(file); + return -EINVAL; + } + + ri = &eci->ukring; + + if (ec_op->prepare_recv_event_from_user) + ec_op->prepare_recv_event_from_user(file); + + mutex_lock(&ri->ring_lock); + + if (num > ri->ring_size) + goto out; + + ret = ec_ring_space(ri, ri->uidx, ri->kidx); + if (!ret) + goto out; + + if (num >= ret) { + num = ret; + ri->full = 1; + } + + /* + * TODO: kernel threads can work for some people (not bsg now) + * However, we need poll for ukring for it. + */ + + for (i = 0; i < num; i++) { + char *buf = (char *) ri->pring->event + + ri->kidx * ec_table[type].event_size; + + dprintk("%u %u %u\n", num, ri->kidx, ri->uidx); + ret = ec_op->recv_event_from_user(file, buf); + if (ret) + break; + ec_ring_idx_inc(&ri->kidx, ri->ring_size); + } + ret = i; + ri->full = 0; + + if (put_user(ri->kidx, &ri->pring->ring_kidx)) + ret = -EFAULT; + +out: + mutex_unlock(&ri->ring_lock); + + fput(file); + return ret; +} + +static void ec_ring_init(struct ec_ring_info *ri, int num, + struct ec_ring __user *pring, wait_queue_head_t *wq, + int *nr_event) +{ + ri->wq = wq; + ri->nr_ready_event = nr_event; + mutex_init(&ri->ring_lock); + ri->ring_size = num; + ri->pring = pring; +} + +struct ec_info * +ec_info_alloc(struct ec_ring __user *kupring, wait_queue_head_t *kuwq, + int *nr_kuevent, struct ec_ring __user *ukpring, + wait_queue_head_t *ukwq, int *nr_ukevent, + unsigned int num, unsigned int flags) +{ + struct ec_info *eci; + + eci = kzalloc(sizeof(*eci), GFP_KERNEL); + if (!eci) + return NULL; + + ec_ring_init(&eci->kuring, num, kupring, kuwq, nr_kuevent); + ec_ring_init(&eci->ukring, num, ukpring, ukwq, nr_ukevent); + + return eci; +} +EXPORT_SYMBOL_GPL(ec_info_alloc); + +void ec_info_free(struct ec_info *eci) +{ + kfree(eci); +} +EXPORT_SYMBOL_GPL(ec_info_free); + +asmlinkage long +sys_ec_init(int efd, int type, struct ec_ring __user *kupring, + struct ec_ring __user *ukpring, unsigned int num, unsigned int flags) +{ + int ret; + struct file *file; + + if (type > EC_TYPE_MAX) + return -EINVAL; + + file = fget(efd); + if (!file) + return -EBADF; + + ret = ec_table[type].ec_op->ec_init(file, kupring, ukpring, num, flags); + + dprintk("%d %p %p %d\n", type, kupring, ukpring, num); + + fput(file); + + return ret; +} + +int ec_register(int type, int event_size, struct ec_operations *ec_op, + struct file_operations *f_op) +{ + if (type > EC_TYPE_MAX) + return 1; + + if (!ec_op || !f_op) + return 1; + + if (ec_table[EC_TYPE_MAX].ec_op) + return 1; + + ec_table[type].ec_op = ec_op; + ec_table[type].f_op = f_op; + ec_table[type].event_size = event_size; + + return 0; +} +EXPORT_SYMBOL_GPL(ec_register); -- 1.4.3.2 - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html