On Wed, 11 Jan 2017 13:03:06 -0500 Dan Streetman <ddstreet@xxxxxxxx> wrote: <snip> > >>> provided that I take the lock for the headless case above. That will > >>> work won't it? > >> > >> in this specific case - since every single kref_put in the driver is > >> protected by the pool lock - yeah, you can do that, since you know > >> that specific kref_put didn't free the page and no other kref_put > >> could happen since you're holding the pool lock. > >> > >> but that specific requirement isn't made obvious by the code, and I > >> can see how a future patch could release the pool lock between the > >> kref_put and lru list add, without realizing that introduces a bug. > >> isn't it better to just add it to the lru list before you put the > >> kref? just a suggestion. > > > > That would require to add it to the lru separately for headless pages > > above, I don't think it makes things better or less error prone. > > I would rather add a comment before list_add. > > > >> i'd also suggest the pages_nr dec go into the page release function, > >> instead of checking every kref_put return value; that's easier and > >> less prone to forgetting to check one of the kref_puts. > > > > That will mean z3fold_header should contain a pointer to its pool. I'm > > not sure it is worth it but I can do that :) > > the header's already rounded up to chunk size, so if there's room then > it won't take any extra memory. but it works either way. So let's have it like this then: With both coming and already present locking optimizations, introducing kref to reference-count z3fold objects is the right thing to do. Moreover, it makes buddied list no longer necessary, and allows for a simpler handling of headless pages. Signed-off-by: Vitaly Wool <vitalywool@xxxxxxxxx> --- mm/z3fold.c | 151 ++++++++++++++++++++++++++---------------------------------- 1 file changed, 66 insertions(+), 85 deletions(-) diff --git a/mm/z3fold.c b/mm/z3fold.c index 4325bde..f0d2eaf 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -52,6 +52,7 @@ enum buddy { * z3fold page, except for HEADLESS pages * @buddy: links the z3fold page into the relevant list in the pool * @page_lock: per-page lock + * @refcount: reference cound for the z3fold page * @first_chunks: the size of the first buddy in chunks, 0 if free * @middle_chunks: the size of the middle buddy in chunks, 0 if free * @last_chunks: the size of the last buddy in chunks, 0 if free @@ -60,6 +61,7 @@ enum buddy { struct z3fold_header { struct list_head buddy; spinlock_t page_lock; + struct kref refcount; unsigned short first_chunks; unsigned short middle_chunks; unsigned short last_chunks; @@ -95,8 +97,6 @@ struct z3fold_header { * @unbuddied: array of lists tracking z3fold pages that contain 2- buddies; * the lists each z3fold page is added to depends on the size of * its free region. - * @buddied: list tracking the z3fold pages that contain 3 buddies; - * these z3fold pages are full * @lru: list tracking the z3fold pages in LRU order by most recently * added buddy. * @pages_nr: number of z3fold pages in the pool. @@ -109,7 +109,6 @@ struct z3fold_header { struct z3fold_pool { spinlock_t lock; struct list_head unbuddied[NCHUNKS]; - struct list_head buddied; struct list_head lru; atomic64_t pages_nr; const struct z3fold_ops *ops; @@ -121,8 +120,7 @@ struct z3fold_pool { * Internal z3fold page flags */ enum z3fold_page_flags { - UNDER_RECLAIM = 0, - PAGE_HEADLESS, + PAGE_HEADLESS = 0, MIDDLE_CHUNK_MAPPED, }; @@ -146,11 +144,11 @@ static struct z3fold_header *init_z3fold_page(struct page *page) struct z3fold_header *zhdr = page_address(page); INIT_LIST_HEAD(&page->lru); - clear_bit(UNDER_RECLAIM, &page->private); clear_bit(PAGE_HEADLESS, &page->private); clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); spin_lock_init(&zhdr->page_lock); + kref_init(&zhdr->refcount); zhdr->first_chunks = 0; zhdr->middle_chunks = 0; zhdr->last_chunks = 0; @@ -161,9 +159,21 @@ static struct z3fold_header *init_z3fold_page(struct page *page) } /* Resets the struct page fields and frees the page */ -static void free_z3fold_page(struct z3fold_header *zhdr) +static void free_z3fold_page(struct page *page) { - __free_page(virt_to_page(zhdr)); + __free_page(page); +} + +static void release_z3fold_page(struct kref *ref) +{ + struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, + refcount); + struct page *page = virt_to_page(zhdr); + if (!list_empty(&zhdr->buddy)) + list_del(&zhdr->buddy); + if (!list_empty(&page->lru)) + list_del(&page->lru); + free_z3fold_page(page); } /* Lock a z3fold page */ @@ -257,7 +267,6 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp, spin_lock_init(&pool->lock); for_each_unbuddied_list(i, 0) INIT_LIST_HEAD(&pool->unbuddied[i]); - INIT_LIST_HEAD(&pool->buddied); INIT_LIST_HEAD(&pool->lru); atomic64_set(&pool->pages_nr, 0); pool->ops = ops; @@ -378,6 +387,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, spin_unlock(&pool->lock); continue; } + kref_get(&zhdr->refcount); list_del_init(&zhdr->buddy); spin_unlock(&pool->lock); @@ -394,10 +404,12 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, else if (zhdr->middle_chunks == 0) bud = MIDDLE; else { + z3fold_page_unlock(zhdr); spin_lock(&pool->lock); - list_add(&zhdr->buddy, &pool->buddied); + if (kref_put(&zhdr->refcount, + release_z3fold_page)) + atomic64_dec(&pool->pages_nr); spin_unlock(&pool->lock); - z3fold_page_unlock(zhdr); pr_err("No free chunks in unbuddied\n"); WARN_ON(1); continue; @@ -438,9 +450,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, /* Add to unbuddied list */ freechunks = num_free_chunks(zhdr); list_add(&zhdr->buddy, &pool->unbuddied[freechunks]); - } else { - /* Add to buddied list */ - list_add(&zhdr->buddy, &pool->buddied); } headless: @@ -504,52 +513,29 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) } } - if (test_bit(UNDER_RECLAIM, &page->private)) { - /* z3fold page is under reclaim, reclaim will free */ - if (bud != HEADLESS) - z3fold_page_unlock(zhdr); - return; - } - - /* Remove from existing buddy list */ - if (bud != HEADLESS) { - spin_lock(&pool->lock); - /* - * this object may have been removed from its list by - * z3fold_alloc(). In that case we just do nothing, - * z3fold_alloc() will allocate an object and add the page - * to the relevant list. - */ - if (!list_empty(&zhdr->buddy)) { - list_del(&zhdr->buddy); - } else { - spin_unlock(&pool->lock); - z3fold_page_unlock(zhdr); - return; - } - spin_unlock(&pool->lock); - } - - if (bud == HEADLESS || - (zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 && - zhdr->last_chunks == 0)) { - /* z3fold page is empty, free */ + if (bud == HEADLESS) { spin_lock(&pool->lock); list_del(&page->lru); spin_unlock(&pool->lock); - clear_bit(PAGE_HEADLESS, &page->private); - if (bud != HEADLESS) - z3fold_page_unlock(zhdr); - free_z3fold_page(zhdr); + free_z3fold_page(page); atomic64_dec(&pool->pages_nr); } else { - z3fold_compact_page(zhdr); - /* Add to the unbuddied list */ + if (zhdr->first_chunks != 0 || zhdr->middle_chunks != 0 || + zhdr->last_chunks != 0) { + z3fold_compact_page(zhdr); + /* Add to the unbuddied list */ + spin_lock(&pool->lock); + if (!list_empty(&zhdr->buddy)) + list_del(&zhdr->buddy); + freechunks = num_free_chunks(zhdr); + list_add(&zhdr->buddy, &pool->unbuddied[freechunks]); + spin_unlock(&pool->lock); + } + z3fold_page_unlock(zhdr); spin_lock(&pool->lock); - freechunks = num_free_chunks(zhdr); - list_add(&zhdr->buddy, &pool->unbuddied[freechunks]); + if (kref_put(&zhdr->refcount, release_z3fold_page)) + atomic64_dec(&pool->pages_nr); spin_unlock(&pool->lock); - z3fold_page_unlock(zhdr); } } @@ -608,13 +594,13 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) return -EINVAL; } page = list_last_entry(&pool->lru, struct page, lru); - list_del(&page->lru); + list_del_init(&page->lru); - /* Protect z3fold page against free */ - set_bit(UNDER_RECLAIM, &page->private); zhdr = page_address(page); if (!test_bit(PAGE_HEADLESS, &page->private)) { - list_del(&zhdr->buddy); + if (!list_empty(&zhdr->buddy)) + list_del_init(&zhdr->buddy); + kref_get(&zhdr->refcount); spin_unlock(&pool->lock); z3fold_page_lock(zhdr); /* @@ -655,30 +641,19 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) goto next; } next: - if (!test_bit(PAGE_HEADLESS, &page->private)) - z3fold_page_lock(zhdr); - clear_bit(UNDER_RECLAIM, &page->private); - if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) || - (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 && - zhdr->middle_chunks == 0)) { - /* - * All buddies are now free, free the z3fold page and - * return success. - */ - if (!test_and_clear_bit(PAGE_HEADLESS, &page->private)) - z3fold_page_unlock(zhdr); - free_z3fold_page(zhdr); - atomic64_dec(&pool->pages_nr); - return 0; - } else if (!test_bit(PAGE_HEADLESS, &page->private)) { - if (zhdr->first_chunks != 0 && - zhdr->last_chunks != 0 && - zhdr->middle_chunks != 0) { - /* Full, add to buddied list */ - spin_lock(&pool->lock); - list_add(&zhdr->buddy, &pool->buddied); - spin_unlock(&pool->lock); + if (test_bit(PAGE_HEADLESS, &page->private)) { + if (ret == 0) { + free_z3fold_page(page); + return 0; } else { + spin_lock(&pool->lock); + } + } else { + z3fold_page_lock(zhdr); + if ((zhdr->first_chunks || zhdr->last_chunks || + zhdr->middle_chunks) && + !(zhdr->first_chunks && zhdr->last_chunks && + zhdr->middle_chunks)) { z3fold_compact_page(zhdr); /* add to unbuddied list */ spin_lock(&pool->lock); @@ -687,13 +662,19 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) &pool->unbuddied[freechunks]); spin_unlock(&pool->lock); } - } - - if (!test_bit(PAGE_HEADLESS, &page->private)) z3fold_page_unlock(zhdr); + spin_lock(&pool->lock); + if (kref_put(&zhdr->refcount, release_z3fold_page)) + atomic64_dec(&pool->pages_nr); + return 0; + } + } - spin_lock(&pool->lock); - /* add to beginning of LRU */ + /* + * Add to the beginning of LRU. + * Pool lock has to be kept here to ensure the page has + * not already been released + */ list_add(&page->lru, &pool->lru); } spin_unlock(&pool->lock); -- 2.4.2 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>