On Wed, Jan 11, 2017 at 3:51 PM, Vitaly Wool <vitalywool@xxxxxxxxx> wrote: > On Wed, 11 Jan 2017 13:03:06 -0500 > Dan Streetman <ddstreet@xxxxxxxx> wrote: > > <snip> >> >>> provided that I take the lock for the headless case above. That will >> >>> work won't it? >> >> >> >> in this specific case - since every single kref_put in the driver is >> >> protected by the pool lock - yeah, you can do that, since you know >> >> that specific kref_put didn't free the page and no other kref_put >> >> could happen since you're holding the pool lock. >> >> >> >> but that specific requirement isn't made obvious by the code, and I >> >> can see how a future patch could release the pool lock between the >> >> kref_put and lru list add, without realizing that introduces a bug. >> >> isn't it better to just add it to the lru list before you put the >> >> kref? just a suggestion. >> > >> > That would require to add it to the lru separately for headless pages >> > above, I don't think it makes things better or less error prone. >> > I would rather add a comment before list_add. >> > >> >> i'd also suggest the pages_nr dec go into the page release function, >> >> instead of checking every kref_put return value; that's easier and >> >> less prone to forgetting to check one of the kref_puts. >> > >> > That will mean z3fold_header should contain a pointer to its pool. I'm >> > not sure it is worth it but I can do that :) >> >> the header's already rounded up to chunk size, so if there's room then >> it won't take any extra memory. but it works either way. > > So let's have it like this then: > > > With both coming and already present locking optimizations, > introducing kref to reference-count z3fold objects is the right > thing to do. Moreover, it makes buddied list no longer necessary, > and allows for a simpler handling of headless pages. > > Signed-off-by: Vitaly Wool <vitalywool@xxxxxxxxx> Reviewed-by: Dan Streetman <ddstreet@xxxxxxxx> > --- > mm/z3fold.c | 151 ++++++++++++++++++++++++++---------------------------------- > 1 file changed, 66 insertions(+), 85 deletions(-) > > diff --git a/mm/z3fold.c b/mm/z3fold.c > index 4325bde..f0d2eaf 100644 > --- a/mm/z3fold.c > +++ b/mm/z3fold.c > @@ -52,6 +52,7 @@ enum buddy { > * z3fold page, except for HEADLESS pages > * @buddy: links the z3fold page into the relevant list in the pool > * @page_lock: per-page lock > + * @refcount: reference cound for the z3fold page > * @first_chunks: the size of the first buddy in chunks, 0 if free > * @middle_chunks: the size of the middle buddy in chunks, 0 if free > * @last_chunks: the size of the last buddy in chunks, 0 if free > @@ -60,6 +61,7 @@ enum buddy { > struct z3fold_header { > struct list_head buddy; > spinlock_t page_lock; > + struct kref refcount; > unsigned short first_chunks; > unsigned short middle_chunks; > unsigned short last_chunks; > @@ -95,8 +97,6 @@ struct z3fold_header { > * @unbuddied: array of lists tracking z3fold pages that contain 2- buddies; > * the lists each z3fold page is added to depends on the size of > * its free region. > - * @buddied: list tracking the z3fold pages that contain 3 buddies; > - * these z3fold pages are full > * @lru: list tracking the z3fold pages in LRU order by most recently > * added buddy. > * @pages_nr: number of z3fold pages in the pool. > @@ -109,7 +109,6 @@ struct z3fold_header { > struct z3fold_pool { > spinlock_t lock; > struct list_head unbuddied[NCHUNKS]; > - struct list_head buddied; > struct list_head lru; > atomic64_t pages_nr; > const struct z3fold_ops *ops; > @@ -121,8 +120,7 @@ struct z3fold_pool { > * Internal z3fold page flags > */ > enum z3fold_page_flags { > - UNDER_RECLAIM = 0, > - PAGE_HEADLESS, > + PAGE_HEADLESS = 0, > MIDDLE_CHUNK_MAPPED, > }; > > @@ -146,11 +144,11 @@ static struct z3fold_header *init_z3fold_page(struct page *page) > struct z3fold_header *zhdr = page_address(page); > > INIT_LIST_HEAD(&page->lru); > - clear_bit(UNDER_RECLAIM, &page->private); > clear_bit(PAGE_HEADLESS, &page->private); > clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); > > spin_lock_init(&zhdr->page_lock); > + kref_init(&zhdr->refcount); > zhdr->first_chunks = 0; > zhdr->middle_chunks = 0; > zhdr->last_chunks = 0; > @@ -161,9 +159,21 @@ static struct z3fold_header *init_z3fold_page(struct page *page) > } > > /* Resets the struct page fields and frees the page */ > -static void free_z3fold_page(struct z3fold_header *zhdr) > +static void free_z3fold_page(struct page *page) > { > - __free_page(virt_to_page(zhdr)); > + __free_page(page); > +} > + > +static void release_z3fold_page(struct kref *ref) > +{ > + struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, > + refcount); > + struct page *page = virt_to_page(zhdr); > + if (!list_empty(&zhdr->buddy)) > + list_del(&zhdr->buddy); > + if (!list_empty(&page->lru)) > + list_del(&page->lru); > + free_z3fold_page(page); > } > > /* Lock a z3fold page */ > @@ -257,7 +267,6 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp, > spin_lock_init(&pool->lock); > for_each_unbuddied_list(i, 0) > INIT_LIST_HEAD(&pool->unbuddied[i]); > - INIT_LIST_HEAD(&pool->buddied); > INIT_LIST_HEAD(&pool->lru); > atomic64_set(&pool->pages_nr, 0); > pool->ops = ops; > @@ -378,6 +387,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, > spin_unlock(&pool->lock); > continue; > } > + kref_get(&zhdr->refcount); > list_del_init(&zhdr->buddy); > spin_unlock(&pool->lock); > > @@ -394,10 +404,12 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, > else if (zhdr->middle_chunks == 0) > bud = MIDDLE; > else { > + z3fold_page_unlock(zhdr); > spin_lock(&pool->lock); > - list_add(&zhdr->buddy, &pool->buddied); > + if (kref_put(&zhdr->refcount, > + release_z3fold_page)) > + atomic64_dec(&pool->pages_nr); > spin_unlock(&pool->lock); > - z3fold_page_unlock(zhdr); > pr_err("No free chunks in unbuddied\n"); > WARN_ON(1); > continue; > @@ -438,9 +450,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, > /* Add to unbuddied list */ > freechunks = num_free_chunks(zhdr); > list_add(&zhdr->buddy, &pool->unbuddied[freechunks]); > - } else { > - /* Add to buddied list */ > - list_add(&zhdr->buddy, &pool->buddied); > } > > headless: > @@ -504,52 +513,29 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) > } > } > > - if (test_bit(UNDER_RECLAIM, &page->private)) { > - /* z3fold page is under reclaim, reclaim will free */ > - if (bud != HEADLESS) > - z3fold_page_unlock(zhdr); > - return; > - } > - > - /* Remove from existing buddy list */ > - if (bud != HEADLESS) { > - spin_lock(&pool->lock); > - /* > - * this object may have been removed from its list by > - * z3fold_alloc(). In that case we just do nothing, > - * z3fold_alloc() will allocate an object and add the page > - * to the relevant list. > - */ > - if (!list_empty(&zhdr->buddy)) { > - list_del(&zhdr->buddy); > - } else { > - spin_unlock(&pool->lock); > - z3fold_page_unlock(zhdr); > - return; > - } > - spin_unlock(&pool->lock); > - } > - > - if (bud == HEADLESS || > - (zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 && > - zhdr->last_chunks == 0)) { > - /* z3fold page is empty, free */ > + if (bud == HEADLESS) { > spin_lock(&pool->lock); > list_del(&page->lru); > spin_unlock(&pool->lock); > - clear_bit(PAGE_HEADLESS, &page->private); > - if (bud != HEADLESS) > - z3fold_page_unlock(zhdr); > - free_z3fold_page(zhdr); > + free_z3fold_page(page); > atomic64_dec(&pool->pages_nr); > } else { > - z3fold_compact_page(zhdr); > - /* Add to the unbuddied list */ > + if (zhdr->first_chunks != 0 || zhdr->middle_chunks != 0 || > + zhdr->last_chunks != 0) { > + z3fold_compact_page(zhdr); > + /* Add to the unbuddied list */ > + spin_lock(&pool->lock); > + if (!list_empty(&zhdr->buddy)) > + list_del(&zhdr->buddy); > + freechunks = num_free_chunks(zhdr); > + list_add(&zhdr->buddy, &pool->unbuddied[freechunks]); > + spin_unlock(&pool->lock); > + } > + z3fold_page_unlock(zhdr); > spin_lock(&pool->lock); > - freechunks = num_free_chunks(zhdr); > - list_add(&zhdr->buddy, &pool->unbuddied[freechunks]); > + if (kref_put(&zhdr->refcount, release_z3fold_page)) > + atomic64_dec(&pool->pages_nr); > spin_unlock(&pool->lock); > - z3fold_page_unlock(zhdr); > } > > } > @@ -608,13 +594,13 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) > return -EINVAL; > } > page = list_last_entry(&pool->lru, struct page, lru); > - list_del(&page->lru); > + list_del_init(&page->lru); > > - /* Protect z3fold page against free */ > - set_bit(UNDER_RECLAIM, &page->private); > zhdr = page_address(page); > if (!test_bit(PAGE_HEADLESS, &page->private)) { > - list_del(&zhdr->buddy); > + if (!list_empty(&zhdr->buddy)) > + list_del_init(&zhdr->buddy); > + kref_get(&zhdr->refcount); > spin_unlock(&pool->lock); > z3fold_page_lock(zhdr); > /* > @@ -655,30 +641,19 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) > goto next; > } > next: > - if (!test_bit(PAGE_HEADLESS, &page->private)) > - z3fold_page_lock(zhdr); > - clear_bit(UNDER_RECLAIM, &page->private); > - if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) || > - (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 && > - zhdr->middle_chunks == 0)) { > - /* > - * All buddies are now free, free the z3fold page and > - * return success. > - */ > - if (!test_and_clear_bit(PAGE_HEADLESS, &page->private)) > - z3fold_page_unlock(zhdr); > - free_z3fold_page(zhdr); > - atomic64_dec(&pool->pages_nr); > - return 0; > - } else if (!test_bit(PAGE_HEADLESS, &page->private)) { > - if (zhdr->first_chunks != 0 && > - zhdr->last_chunks != 0 && > - zhdr->middle_chunks != 0) { > - /* Full, add to buddied list */ > - spin_lock(&pool->lock); > - list_add(&zhdr->buddy, &pool->buddied); > - spin_unlock(&pool->lock); > + if (test_bit(PAGE_HEADLESS, &page->private)) { > + if (ret == 0) { > + free_z3fold_page(page); > + return 0; > } else { > + spin_lock(&pool->lock); > + } > + } else { > + z3fold_page_lock(zhdr); > + if ((zhdr->first_chunks || zhdr->last_chunks || > + zhdr->middle_chunks) && > + !(zhdr->first_chunks && zhdr->last_chunks && > + zhdr->middle_chunks)) { > z3fold_compact_page(zhdr); > /* add to unbuddied list */ > spin_lock(&pool->lock); > @@ -687,13 +662,19 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) > &pool->unbuddied[freechunks]); > spin_unlock(&pool->lock); > } > - } > - > - if (!test_bit(PAGE_HEADLESS, &page->private)) > z3fold_page_unlock(zhdr); > + spin_lock(&pool->lock); > + if (kref_put(&zhdr->refcount, release_z3fold_page)) > + atomic64_dec(&pool->pages_nr); > + return 0; > + } > + } > > - spin_lock(&pool->lock); > - /* add to beginning of LRU */ > + /* > + * Add to the beginning of LRU. > + * Pool lock has to be kept here to ensure the page has > + * not already been released > + */ > list_add(&page->lru, &pool->lru); > } > spin_unlock(&pool->lock); > -- > 2.4.2 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>