From: Jyri Sarha <jyri.sarha@xxxxxxxxx> The old free list implementation used objects in FIFO style. This is bad because it tries keep all the objects ever used alive and in memory. This minimizes the changes that an allocated object is already in cache. When there is shortage of physical memory this may also increase change that newly allocated object is swapped out. LIFO (e.g. stack) style free list should help these issues. Like the old one the new implementation is also lock free. This version (v2.1) of the patch has a potential weakness fixed. The previous version (2.0) did segfault when popping from empty flist, this does not. --- src/pulsecore/flist.c | 213 ++++++++++++++----------------------------------- 1 files changed, 59 insertions(+), 154 deletions(-) diff --git a/src/pulsecore/flist.c b/src/pulsecore/flist.c index 7e5ee24..193a7cb 100644 --- a/src/pulsecore/flist.c +++ b/src/pulsecore/flist.c @@ -1,7 +1,9 @@ /*** This file is part of PulseAudio. - Copyright 2006-2008 Lennart Poettering + Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). + + Contact: Jyri Sarha <Jyri.Sarha at nokia.com> PulseAudio is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as @@ -34,198 +36,101 @@ #include "flist.h" -/* Algorithm is not perfect, in a few corner cases it will fail to pop - * from the flist although it isn't empty, and fail to push into the - * flist, although it isn't full. - * - * We keep a fixed size array of entries, each item is an atomic - * pointer. - * - * To accelerate finding of used/unused cells we maintain a read and a - * write index which is used like a ring buffer. After each push we - * increase the write index and after each pop we increase the read - * index. - * - * The indexes are incremented atomically and are never truncated to - * the buffer size. Instead we assume that the buffer size is a power - * of two and that the truncation can thus be done by applying a - * simple AND on read. - * - * To make sure that we do not look for empty cells indefinitely we - * maintain a length value which stores the number of used cells. From - * this value the number of unused cells is easily calculated. Please - * note that the length value is not updated atomically with the read - * and write index and might thus be a few cells off the real - * value. To deal with this we always look for N_EXTRA_SCAN extra - * cells when pushing/popping entries. - * - * It might make sense to replace this implementation with a link list - * stack or queue, which however requires DCAS to be simple. Patches - * welcome. - * - * Please note that this algorithm is home grown.*/ - #define FLIST_SIZE 128 -#define N_EXTRA_SCAN 3 -/* For debugging purposes we can define _Y to put and extra thread - * yield between each operation. */ +struct pa_flist_elem { + pa_atomic_ptr_t next; + pa_atomic_ptr_t ptr; +}; -#ifdef PROFILE -#define _Y pa_thread_yield() -#else -#define _Y do { } while(0) -#endif +typedef struct pa_flist_elem pa_flist_elem; struct pa_flist { unsigned size; - pa_atomic_t length; - pa_atomic_t read_idx; - pa_atomic_t write_idx; + pa_atomic_ptr_t stored; + pa_atomic_ptr_t empty; + pa_flist_elem table[0]; }; -#define PA_FLIST_CELLS(x) ((pa_atomic_ptr_t*) ((uint8_t*) (x) + PA_ALIGN(sizeof(struct pa_flist)))) +static pa_flist_elem *stack_pop(pa_atomic_ptr_t *list) { + pa_flist_elem *poped; + pa_assert(list); + + do { + poped = (pa_flist_elem *) pa_atomic_ptr_load(list); + } while (poped != NULL && !pa_atomic_ptr_cmpxchg(list, poped, pa_atomic_ptr_load(&poped->next))); + + return poped; +} + +static void stack_push(pa_atomic_ptr_t *list, pa_flist_elem *new_elem) { + pa_flist_elem *next; + pa_assert(list); + + do { + next = pa_atomic_ptr_load(list); + pa_atomic_ptr_store(&new_elem->next, next); + } while (!pa_atomic_ptr_cmpxchg(list, next, new_elem)); +} pa_flist *pa_flist_new(unsigned size) { pa_flist *l; + unsigned i; if (!size) size = FLIST_SIZE; - pa_assert(pa_is_power_of_two(size)); - - l = pa_xmalloc0(PA_ALIGN(sizeof(pa_flist)) + (sizeof(pa_atomic_ptr_t) * size)); + l = pa_xmalloc0(sizeof(pa_flist) + sizeof(pa_flist_elem) * size); l->size = size; - - pa_atomic_store(&l->read_idx, 0); - pa_atomic_store(&l->write_idx, 0); - pa_atomic_store(&l->length, 0); - + pa_atomic_ptr_store(&l->stored, NULL); + pa_atomic_ptr_store(&l->empty, NULL); + for (i=0; i < size; i++) { + stack_push(&l->empty, &l->table[i]); + } return l; } -static unsigned reduce(pa_flist *l, unsigned value) { - return value & (l->size - 1); -} - void pa_flist_free(pa_flist *l, pa_free_cb_t free_cb) { pa_assert(l); if (free_cb) { - pa_atomic_ptr_t*cells; - unsigned idx; - - cells = PA_FLIST_CELLS(l); - - for (idx = 0; idx < l->size; idx ++) { - void *p; - - if ((p = pa_atomic_ptr_load(&cells[idx]))) - free_cb(p); - } + pa_flist_elem *elem; + while((elem = stack_pop(&l->stored))) + free_cb(pa_atomic_ptr_load(&elem->ptr)); } pa_xfree(l); } -int pa_flist_push(pa_flist*l, void *p) { - unsigned idx, n; - pa_atomic_ptr_t*cells; -#ifdef PROFILE - unsigned len; -#endif - +int pa_flist_push(pa_flist *l, void *p) { + pa_flist_elem *elem; pa_assert(l); pa_assert(p); - cells = PA_FLIST_CELLS(l); - - n = l->size + N_EXTRA_SCAN - (unsigned) pa_atomic_load(&l->length); - -#ifdef PROFILE - len = n; -#endif - - _Y; - idx = reduce(l, (unsigned) pa_atomic_load(&l->write_idx)); - - for (; n > 0 ; n--) { - - _Y; - - if (pa_atomic_ptr_cmpxchg(&cells[idx], NULL, p)) { - - _Y; - pa_atomic_inc(&l->write_idx); - - _Y; - pa_atomic_inc(&l->length); - - return 0; - } - - _Y; - idx = reduce(l, idx + 1); + elem = stack_pop(&l->empty); + if (elem == NULL) { + pa_log_warn("flist is full"); + return -1; } + pa_atomic_ptr_store(&elem->ptr, p); + stack_push(&l->stored, elem); -#ifdef PROFILE - if (len > N_EXTRA_SCAN) - pa_log_warn("Didn't find free cell after %u iterations.", len); -#endif - - return -1; + return 0; } -void* pa_flist_pop(pa_flist*l) { - unsigned idx, n; - pa_atomic_ptr_t *cells; -#ifdef PROFILE - unsigned len; -#endif - +void* pa_flist_pop(pa_flist *l) { + pa_flist_elem *elem; + void *ptr; pa_assert(l); - cells = PA_FLIST_CELLS(l); - - n = (unsigned) pa_atomic_load(&l->length) + N_EXTRA_SCAN; + elem = stack_pop(&l->stored); + if (elem == NULL) + return NULL; -#ifdef PROFILE - len = n; -#endif - - _Y; - idx = reduce(l, (unsigned) pa_atomic_load(&l->read_idx)); - - for (; n > 0 ; n--) { - void *p; - - _Y; - p = pa_atomic_ptr_load(&cells[idx]); - - if (p) { - - _Y; - if (!pa_atomic_ptr_cmpxchg(&cells[idx], p, NULL)) - continue; + ptr = pa_atomic_ptr_load(&elem->ptr); - _Y; - pa_atomic_inc(&l->read_idx); - - _Y; - pa_atomic_dec(&l->length); - - return p; - } - - _Y; - idx = reduce(l, idx+1); - } - -#ifdef PROFILE - if (len > N_EXTRA_SCAN) - pa_log_warn("Didn't find used cell after %u iterations.", len); -#endif + stack_push(&l->empty, elem); - return NULL; + return ptr; } -- 1.6.3.3