On Mon, Jul 19, 2021 at 11:13 AM Thierry Delisle <tdelisle@xxxxxxxxxxxx> wrote: > > > Latency/efficiency: on worker wakeup an idle server can be picked from > > the list and context-switched into synchronously, on the same CPU. > > Using FDs and select/poll/epoll will add extra layers of abstractions; > > synchronous context-switches (not yet fully implemented in UMCG) will > > most likely be impossible. This patchset seems much more efficient and > > lightweight than whatever can be built on top of FDs. > > I can believe that. > > Are you planning to support separate scheduling instances within a > single user > space? That is having multiple sets of server threads and workers can > only run > within a specific set. Yes, this is naturally supported in the current patchset on the kernel side, and is supported in libumcg (to be posted, later when the kernel side is settled); internally at Google, some applications use different "groups" of workers/servers per NUMA node. > > I believe the problem with the idle_servers_ptr as specified is that it > is not > possible to reclaim used nodes safely. I don't see any indication of which > nodes the kernel can concurrently access and on which some memory > reclamation > scheme could be based. Please see the attached atomic_stack.h file - I use it in my tests, things seem to be working. Specifically, atomic_stack_gc does the cleanup. For the kernel side of things, see the third patch in this patchset. > > What is the benefit of having users maintain themselves a list of idle > servers > rather than each servers marking themselves as 'out of work' and having the > kernel maintain the list? To keep the kernel side light and simple. To also protect the kernel from spinning if userspace misbehaves. Basically, the overall approach is to delegate most of the work to the userspace, and keep the bare minimum in the kernel.
/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __ATOMIC_STACK_H #define __ATOMIC_STACK_H #include <assert.h> #include <stdatomic.h> #include <stdbool.h> #include <stddef.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> /* * Atomic Stack: a sigle-linked stack that allows fully concurrent * push/pop operations. * * The stack is simple: head->node->node->NULL. * * * Sample usage: * * struct my_stack_node { * type1 val1; * type2 val2; * uint64_t next; * } __attribute__((aligned(8))); * * uint64_t head = 0UL; * struct my_stack_node *node; * * node = malloc(sizeof(struct my_stack_node)); * node->val1 = xxx; * node->val2 = yyy; * atomic_stack_push(&head, &node->next); * * assert(node == container_of(atomic_stack_pop(&head), * struct my_stack_node, next)); */ /** * atomic_stack_is_empty - check if the stack is empty * @head - a pointer to the head of the stack. */ static inline bool atomic_stack_is_empty(uint64_t *head) { uint64_t curr = atomic_load_explicit(head, memory_order_acquire); do { uint64_t next; if (!curr) return true; assert(!(curr & 1UL)); next = atomic_load_explicit((uint64_t *)curr, memory_order_acquire); if (!(next & 1UL)) return false; /* found a non-deleted node */ curr = next & ~1UL; } while (true); } /** * atomic_stack_push - push a node onto the stack * @head - a pointer to the head of the stack; * @node - a pointer to the node to push. */ static inline void atomic_stack_push(uint64_t *head, uint64_t *node) { while (true) { uint64_t first = atomic_load_explicit(head, memory_order_acquire); atomic_store_explicit(node, first, memory_order_release); if (atomic_compare_exchange_weak_explicit(head, &first, (uint64_t)node, memory_order_acq_rel, memory_order_relaxed)) return; } } /** * atomic_stack_pop - pop a node from the stack * @head - a pointer to the head of the stack. * * Returns a pointer to the popped node (to be used in container_of), or NULL * if the stack is empty. */ static inline uint64_t* atomic_stack_pop(uint64_t *head) { uint64_t *curr = (uint64_t *)atomic_load_explicit(head, memory_order_acquire); do { uint64_t next; if (!curr) return NULL; assert(!((uint64_t)curr & 1UL)); next = atomic_load_explicit(curr, memory_order_acquire); if (next & 1UL) { /* curr is deleted */ curr = (uint64_t *)(next & ~1UL); continue; } if (atomic_compare_exchange_strong_explicit(curr, &next, next | 1UL, memory_order_release, memory_order_relaxed)) return curr; curr = (uint64_t *)(next & ~1UL); } while (true); } static inline bool atomic_stack_remove(uint64_t *head, uint64_t *node) { uint64_t *curr = (uint64_t *)atomic_load_explicit(head, memory_order_acquire); do { uint64_t next; if (!curr) return false; next = atomic_load_explicit(curr, memory_order_acquire); if (next & 1UL) { /* curr is deleted */ if (curr == node) return false; curr = (uint64_t *)(next & ~1UL); continue; } if (curr == node) return atomic_compare_exchange_strong_explicit( curr, &next, next | 1UL, memory_order_release, memory_order_relaxed); curr = (uint64_t *)(next & ~1UL); } while (true); } /** * atomic_stack_pop_all - pop all nodes from the stack * @head - a pointer to the head of the stack. * * Returns a pointer to the first node (to be used in container_of), or NULL * if the stack is empty. */ static inline uint64_t* atomic_stack_pop_all(uint64_t *head) { while (true) { bool ok; uint64_t first = atomic_load_explicit(head, memory_order_acquire); if (!first) return NULL; ok = atomic_compare_exchange_strong_explicit(head, &first, 0UL, memory_order_release, memory_order_relaxed); assert(ok); return (uint64_t *)(first); } } /** * atomic_stack_gc - remove popped/deleted elements from the stack * @head - a pointer to the head of the stack. * * Note: it is most likely unsafe to run several instances * of atomic_stack_gc() concurrently, but a single instance * should be safe to run concurrently with push/pop/remove. */ static inline void atomic_stack_gc(uint64_t *head) { uint64_t *prev = head; while (true) { uint64_t curr, next; assert (!(1UL & (uint64_t)prev)); curr = atomic_load_explicit(prev, memory_order_acquire); if (!curr) return; if (curr & 1UL) { prev = head; /* prev marked deleted; restart */ continue; } next = atomic_load_explicit((uint64_t *)curr, memory_order_acquire); if (!next) return; if (next & 1UL) { /* curr marked deleted */ if (atomic_compare_exchange_strong_explicit(prev, &curr, next & ~1UL, memory_order_release, memory_order_relaxed)) { continue; } prev = head; /* prev marked as deleted; restart */ continue; } prev = (uint64_t *)curr; } } static inline void atomic_stack_print(uint64_t *head, const char *ctx) { uint64_t curr = (uint64_t)head; int cnt = 0; fprintf(stderr, "%s stack: ", ctx); do { if (++cnt > 6) break; fprintf(stderr, "0x%lx => ", curr); if (curr & 1UL) curr &= ~1UL; if (!curr) break; curr = *(uint64_t *)curr; } while (curr); fprintf(stderr, " (nil)\n"); } #endif /* __ATOMIC_STACK_H */