André Almeida <andrealmeid@xxxxxxxxxxxxx> writes: > Create a new set of futex syscalls known as futex2. This new interface > is aimed to implement a more maintainable code, while removing obsolete > features and expanding it with new functionalities. Hi André. Some comments below > +/* kernel/futex2.c */ > +asmlinkage long sys_futex_wait(void __user *uaddr, unsigned int val, > + unsigned int flags, > + struct __kernel_timespec __user __user *timo); Duplicated __user attribute > +asmlinkage long sys_futex_wake(void __user *uaddr, unsigned int nr_wake, > + unsigned int flags); > + > /* kernel/hrtimer.c */ > asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, > struct __kernel_timespec __user *rmtp); > diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h > index 728752917785..57e19200f7e4 100644 > --- a/include/uapi/asm-generic/unistd.h > +++ b/include/uapi/asm-generic/unistd.h > @@ -862,8 +862,14 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise) > #define __NR_epoll_pwait2 441 > __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) > > +#define __NR_futex_wait 442 > +__SYSCALL(__NR_futex_wait, sys_futex_wait) > + > +#define __NR_futex_wake 443 > +__SYSCALL(__NR_futex_wake, sys_futex_wake) > + > #undef __NR_syscalls > -#define __NR_syscalls 442 > +#define __NR_syscalls 444 > > /* > * 32 bit systems traditionally used different > diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h > index a89eb0accd5e..9fbdaaf4f254 100644 > --- a/include/uapi/linux/futex.h > +++ b/include/uapi/linux/futex.h > @@ -41,6 +41,62 @@ > #define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ > FUTEX_PRIVATE_FLAG) > > +/* Size argument to futex2 syscall */ > +#define FUTEX_8 0 > +#define FUTEX_16 1 > +#define FUTEX_32 2 > + > +#define FUTEX_SIZE_MASK 0x3 > + > +#define FUTEX_SHARED_FLAG 8 > + > +#define FUTEX_NUMA_FLAG 16 > + > +/** > + * struct futexXX_numa - struct for NUMA-aware futex operation > + * @value: futex value > + * @hint: node id to operate > + */ > + > +struct futex8_numa { > + __u8 value; > + __s8 hint; > +}; > + > +struct futex16_numa { > + __u16 value; > + __s16 hint; > +}; > + > +struct futex32_numa { > + __u32 value; > + __s32 hint; > +}; This patchset doesn't use these structures as far as I can see. Maybe these should be on a later patchset, when there is actual support for numa awareness ? > + > +#define FUTEX_WAITV_MAX 128 > + > +/** > + * struct futex_waitv - A waiter for vectorized wait > + * @uaddr: User address to wait on > + * @val: Expected value at uaddr > + * @flags: Flags for this waiter > + */ > +struct futex_waitv { > + void *uaddr; > + unsigned int val; > + unsigned int flags; > +}; Shouldn't this be in patch 3? > + > +/** > + * struct futex_requeue - Define an address and its flags for requeue operation > + * @uaddr: User address of one of the requeue arguments > + * @flags: Flags for this address > + */ > +struct futex_requeue { > + void *uaddr; > + unsigned int flags; > +}; Shouldn't this be in patch 4? > +/** > + * struct futexv_head - List of futexes to be waited > + * @task: Task to be awaken > + * @hint: Was someone on this list awakened? > + * @objects: List of futexes > + */ > +struct futexv_head { > + struct task_struct *task; > + bool hint; > + struct futex_waiter objects[0]; > +}; this structure is also used for a single futex. maybe struct futex_waiter_head? > + > +/** > + * struct futex_bucket - A bucket of futex's hash table > + * @waiters: Number of waiters in the bucket > + * @lock: Bucket lock > + * @list: List of waiters on this bucket > + */ > +struct futex_bucket { > + atomic_t waiters; > + spinlock_t lock; > + struct list_head list; > +}; > + > +/** > + * struct futex_single_waiter - Wrapper for a futexv_head of one element > + * @futexv: Single futexv element > + * @waiter: Single waiter element > + */ > +struct futex_single_waiter { > + struct futexv_head futexv; > + struct futex_waiter waiter; > +} __packed; Is this struct necessary? can't you just allocate the necessary space, i.e. a struct futexv_head with 1 futexv_head->object? > + > +/* Mask for futex2 flag operations */ > +#define FUTEX2_MASK (FUTEX_SIZE_MASK | FUTEX_SHARED_FLAG | \ > + FUTEX_CLOCK_REALTIME) SHARED_FLAG should be in patch 2 > + > +/* Mask for sys_futex_waitv flag */ > +#define FUTEXV_MASK (FUTEX_CLOCK_REALTIME) > + > +/* Mask for each futex in futex_waitv list */ > +#define FUTEXV_WAITER_MASK (FUTEX_SIZE_MASK | FUTEX_SHARED_FLAG) > + > +struct futex_bucket *futex_table; > +unsigned int futex2_hashsize; > + > +/* > + * Reflects a new waiter being added to the waitqueue. > + */ > +static inline void bucket_inc_waiters(struct futex_bucket *bucket) > +{ > +#ifdef CONFIG_SMP > + atomic_inc(&bucket->waiters); > + /* > + * Issue a barrier after adding so futex_wake() will see that the > + * value had increased > + */ > + smp_mb__after_atomic(); > +#endif > +} > + > +/* > + * Reflects a waiter being removed from the waitqueue by wakeup > + * paths. > + */ > +static inline void bucket_dec_waiters(struct futex_bucket *bucket) > +{ > +#ifdef CONFIG_SMP > + atomic_dec(&bucket->waiters); > +#endif > +} > + > +/* > + * Get the number of waiters in a bucket > + */ > +static inline int bucket_get_waiters(struct futex_bucket *bucket) > +{ > +#ifdef CONFIG_SMP > + /* > + * Issue a barrier before reading so we get an updated value from > + * futex_wait() > + */ > + smp_mb(); > + return atomic_read(&bucket->waiters); > +#else > + return 1; > +#endif > +} > + > +/** > + * futex_get_bucket - Check if the user address is valid, prepare internal > + * data and calculate the hash > + * @uaddr: futex user address > + * @key: data that uniquely identifies a futex > + * > + * Return: address of bucket on success, error code otherwise > + */ > +static struct futex_bucket *futex_get_bucket(void __user *uaddr, > + struct futex_key *key) > +{ > + uintptr_t address = (uintptr_t)uaddr; > + u32 hash_key; > + > + /* Checking if uaddr is valid and accessible */ > + if (unlikely(!IS_ALIGNED(address, sizeof(u32)))) > + return ERR_PTR(-EINVAL); > + if (unlikely(!access_ok(address, sizeof(u32)))) > + return ERR_PTR(-EFAULT); This says the code only supports 32-bit. So, maybe drop the other FUTEX_SIZE defines for now > + > + key->offset = address % PAGE_SIZE; > + address -= key->offset; > + key->pointer = (u64)address; > + key->index = (unsigned long)current->mm; Why split the key in offset and pointer and waste 1/3 more space to store each key? > + > + /* Generate hash key for this futex using uaddr and current->mm */ > + hash_key = jhash2((u32 *)key, sizeof(*key) / sizeof(u32), 0); > + > + /* Since HASH_SIZE is 2^n, subtracting 1 makes a perfect bit mask */ > + return &futex_table[hash_key & (futex2_hashsize - 1)]; If someone inadvertely changes futex2_hashsize to something not 2^n this will silently break. futex2_hashsize should be constant and you need a BUILD_BUG_ON(). > +static int futex_enqueue(struct futexv_head *futexv, unsigned int nr_futexes, > + int *awakened) > +{ > + int i, ret; > + u32 uval, *uaddr, val; > + struct futex_bucket *bucket; > + > +retry: > + set_current_state(TASK_INTERRUPTIBLE); > + > + for (i = 0; i < nr_futexes; i++) { > + uaddr = (u32 * __user)futexv->objects[i].uaddr; > + val = (u32)futexv->objects[i].val; > + > + bucket = futexv->objects[i].bucket; > + > + bucket_inc_waiters(bucket); > + spin_lock(&bucket->lock); > + > + ret = futex_get_user(&uval, uaddr); > + > + if (unlikely(ret)) { > + spin_unlock(&bucket->lock); > + > + bucket_dec_waiters(bucket); > + __set_current_state(TASK_RUNNING); > + *awakened = futex_dequeue_multiple(futexv, i); > + > + if (__get_user(uval, uaddr)) > + return -EFAULT; > + > + if (*awakened >= 0) > + return 1; If you are awakened, you don't need to waste time with trying to get the next key. > +/** > + * futex_wait - Setup the timer (if there's one) and wait on a list of futexes > + * @futexv: List of futexes > + * @nr_futexes: Length of futexv > + * @timo: Timeout > + * @flags: Timeout flags > + * > + * Return: > + * * 0 >= - Hint of which futex woke us > + * * 0 < - Error code > + */ > +static int futex_set_timer_and_wait(struct futexv_head *futexv, > + unsigned int nr_futexes, > + struct __kernel_timespec __user *timo, > + unsigned int flags) > +{ > + struct hrtimer_sleeper timeout; > + int ret; > + > + if (timo) { > + ret = futex_setup_time(timo, &timeout, flags); > + if (ret) > + return ret; > + } > + > + ret = __futex_wait(futexv, nr_futexes, timo ? &timeout : NULL); > + > + if (timo) > + hrtimer_cancel(&timeout.timer); > + > + return ret; > +} I'm having a hard time understanding why this function exists. part of the futex is set up outside of it, part inside. Not sure if this isn't just part of sys_futex_wait. Thanks, -- Gabriel Krisman Bertazi