On Fri, Jul 21 2023 at 12:22, Peter Zijlstra wrote: > struct futex_hash_bucket *futex_hash(union futex_key *key) > { > - u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4, > + u32 hash = jhash2((u32 *)key, > + offsetof(typeof(*key), both.offset) / sizeof(u32), > key->both.offset); > + int node = key->both.node; > > - return &futex_queues[hash & (futex_hashsize - 1)]; > + if (node == -1) { > + /* > + * In case of !FLAGS_NUMA, use some unused hash bits to pick a > + * node -- this ensures regular futexes are interleaved across > + * the nodes and avoids having to allocate multiple > + * hash-tables. > + * > + * NOTE: this isn't perfectly uniform, but it is fast and > + * handles sparse node masks. > + */ > + node = (hash >> futex_hashshift) % nr_node_ids; Is nr_node_ids guaranteed to be stable after init? It's marked __read_mostly, but not __ro_after_init. > + if (!node_possible(node)) { > + node = find_next_bit_wrap(node_possible_map.bits, > + nr_node_ids, node); > + } > + } > + > + return &futex_queues[node][hash & (futex_hashsize - 1)]; > } > fshared = flags & FLAGS_SHARED; > + size = futex_size(flags); > > /* > * The futex address must be "naturally" aligned. > */ > key->both.offset = address % PAGE_SIZE; > - if (unlikely((address % sizeof(u32)) != 0)) > + if (unlikely((address % size) != 0)) > return -EINVAL; Hmm. Shouldn't that have changed with the allowance of the 1 and 2 byte futexes? > address -= key->both.offset; > > - if (unlikely(!access_ok(uaddr, sizeof(u32)))) > + if (flags & FLAGS_NUMA) > + size *= 2; > + > + if (unlikely(!access_ok(uaddr, size))) > return -EFAULT; > > if (unlikely(should_fail_futex(fshared))) > return -EFAULT; > > + key->both.node = -1; Please put this into an else path. > + if (flags & FLAGS_NUMA) { > + void __user *naddr = uaddr + size/2; size / 2; > + > + if (futex_get_value(&node, naddr, flags)) > + return -EFAULT; > + > + if (node == -1) { > + node = numa_node_id(); > + if (futex_put_value(node, naddr, flags)) > + return -EFAULT; > + } > + > + if (node >= MAX_NUMNODES || !node_possible(node)) > + return -EINVAL; That's clearly an else path too. No point in checking whether numa_node_id() is valid. > + key->both.node = node; > + } > > +static inline unsigned int futex_size(unsigned int flags) > +{ > + unsigned int size = flags & FLAGS_SIZE_MASK; > + return 1 << size; /* {0,1,2,3} -> {1,2,4,8} */ > +} > + > static inline bool futex_flags_valid(unsigned int flags) > { > /* Only 64bit futexes for 64bit code */ > @@ -77,13 +83,19 @@ static inline bool futex_flags_valid(uns > if ((flags & FLAGS_SIZE_MASK) != FLAGS_SIZE_32) > return false; > > - return true; > -} > + /* > + * Must be able to represent both NUMA_NO_NODE and every valid nodeid > + * in a futex word. > + */ > + if (flags & FLAGS_NUMA) { > + int bits = 8 * futex_size(flags); > + u64 max = ~0ULL; > + max >>= 64 - bits; Your newline key is broken, right? > + if (nr_node_ids >= max) > + return false; > + } Thanks, tglx