On Tue, Sep 22, 2015 at 11:49 PM, Dan Streetman <ddstreet@xxxxxxxx> wrote: > On Tue, Sep 22, 2015 at 8:17 AM, Vitaly Wool <vitalywool@xxxxxxxxx> wrote: >> Currently zbud is only capable of allocating not more than >> PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE. This is okay as >> long as only zswap is using it, but other users of zbud may >> (and likely will) want to allocate up to PAGE_SIZE. This patch >> addresses that by skipping the creation of zbud internal >> structure in the beginning of an allocated page (such pages are >> then called 'headless'). >> >> As a zbud page is no longer guaranteed to contain zbud header, the >> following changes had to be applied throughout the code: >> * page->lru to be used for zbud page lists >> * page->private to hold 'under_reclaim' flag >> >> page->private will also be used to indicate if this page contains >> a zbud header in the beginning or not ('headless' flag). >> >> Signed-off-by: Vitaly Wool <vitalywool@xxxxxxxxx> >> --- >> mm/zbud.c | 194 +++++++++++++++++++++++++++++++++++++++++--------------------- >> 1 file changed, 128 insertions(+), 66 deletions(-) >> >> diff --git a/mm/zbud.c b/mm/zbud.c >> index fa48bcdf..7b51eb6 100644 >> --- a/mm/zbud.c >> +++ b/mm/zbud.c >> @@ -105,18 +105,25 @@ struct zbud_pool { >> >> /* >> * struct zbud_header - zbud page metadata occupying the first chunk of each >> - * zbud page. >> + * zbud page, except for HEADLESS pages > > hmm, personally I like "FULL" better than HEADLESS...it's only > headless because it's a full page. Yep, but OTOH, a page with two buddies is also full, so I just wanted to avoid ambiguity here. I have no personal naming preference TBO. :) >> * @buddy: links the zbud page into the unbuddied/buddied lists in the pool >> - * @lru: links the zbud page into the lru list in the pool >> * @first_chunks: the size of the first buddy in chunks, 0 if free >> * @last_chunks: the size of the last buddy in chunks, 0 if free >> */ >> struct zbud_header { >> struct list_head buddy; >> - struct list_head lru; >> unsigned int first_chunks; >> unsigned int last_chunks; >> - bool under_reclaim; >> +}; >> + >> +/* >> + * struct zbud_page_priv - zbud flags to be stored in page->private >> + * @under_reclaim: if a zbud page is under reclaim >> + * @headless: indicates a page where zbud header didn't fit >> + */ >> +struct zbud_page_priv { >> + bool under_reclaim:1; >> + bool headless:1; >> }; > > Hmm, this is just my personal opinion, but I'm not a fan of casting > ->private as a struct, if we're only using it as a bitmap. I'd > suggest just defining bits as an enum (like page flags), e.g. > > enum zbud_flags { > ZBUD_UNDER_RECLAIM, > ZBUD_FULL_PAGE, > }; Agreed, will rework that. > or some names similar to that. then it can be checked with a simple > test_bit() call, and set_bit()/clear_bit(). > > alternately, there are the already-existing PG_private and > PG_private_2 bits in the page flags...but unless we need ->private for > something else, it probably makes more sense to just use it instead of > the PG_private flags. IIRC I used PG_private in my first patch but the thing also is, it's safer to have under_reclaim flag in struct page, too, so page->private seems to be a better fit. >> >> /***************** >> @@ -221,6 +228,7 @@ MODULE_ALIAS("zpool-zbud"); >> *****************/ >> /* Just to make the code easier to read */ >> enum buddy { >> + HEADLESS, >> FIRST, >> LAST >> }; >> @@ -237,12 +245,15 @@ static int size_to_chunks(size_t size) >> /* Initializes the zbud header of a newly allocated zbud page */ >> static struct zbud_header *init_zbud_page(struct page *page) >> { >> + struct zbud_page_priv *ppriv = (struct zbud_page_priv *)page->private; >> struct zbud_header *zhdr = page_address(page); >> + >> + INIT_LIST_HEAD(&page->lru); >> + ppriv->under_reclaim = 0; > > don't forget to initialize the headless/full bit too. (I'll mention > that the page allocation code does clear ->private before handing it > to us, so it should already be 0. but let's not rely on that) > >> + >> zhdr->first_chunks = 0; >> zhdr->last_chunks = 0; >> INIT_LIST_HEAD(&zhdr->buddy); >> - INIT_LIST_HEAD(&zhdr->lru); >> - zhdr->under_reclaim = 0; >> return zhdr; >> } >> >> @@ -267,11 +278,22 @@ static unsigned long encode_handle(struct zbud_header *zhdr, enum buddy bud) >> * over the zbud header in the first chunk. >> */ >> handle = (unsigned long)zhdr; >> - if (bud == FIRST) >> + switch (bud) { >> + case FIRST: >> /* skip over zbud header */ >> handle += ZHDR_SIZE_ALIGNED; >> - else /* bud == LAST */ >> + break; >> + case LAST: >> handle += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT); >> + break; >> + case HEADLESS: >> + break; >> + default: >> + /* this should never happen */ >> + pr_err("zbud: invalid buddy value %d\n", bud); >> + handle = 0; >> + break; >> + } >> return handle; >> } >> >> @@ -287,6 +309,7 @@ static int num_free_chunks(struct zbud_header *zhdr) >> /* >> * Rather than branch for different situations, just use the fact that >> * free buddies have a length of zero to simplify everything. >> + * NB: can't be used with HEADLESS pages. >> */ >> return NCHUNKS - zhdr->first_chunks - zhdr->last_chunks; >> } >> @@ -353,31 +376,40 @@ void zbud_destroy_pool(struct zbud_pool *pool) >> int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp, >> unsigned long *handle) >> { >> - int chunks, i, freechunks; >> + int chunks = 0, i, freechunks; >> struct zbud_header *zhdr = NULL; >> + struct zbud_page_priv *ppriv; >> enum buddy bud; >> struct page *page; >> >> if (!size || (gfp & __GFP_HIGHMEM)) >> return -EINVAL; >> - if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) >> + >> + if (size > PAGE_SIZE) >> return -ENOSPC; >> - chunks = size_to_chunks(size); >> - spin_lock(&pool->lock); >> >> - /* First, try to find an unbuddied zbud page. */ >> - zhdr = NULL; >> - for_each_unbuddied_list(i, chunks) { >> - if (!list_empty(&pool->unbuddied[i])) { >> - zhdr = list_first_entry(&pool->unbuddied[i], >> - struct zbud_header, buddy); >> - list_del(&zhdr->buddy); >> - if (zhdr->first_chunks == 0) >> - bud = FIRST; >> - else >> - bud = LAST; >> - goto found; >> + if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) >> + bud = HEADLESS; >> + else { >> + chunks = size_to_chunks(size); >> + spin_lock(&pool->lock); >> + >> + /* First, try to find an unbuddied zbud page. */ >> + zhdr = NULL; >> + for_each_unbuddied_list(i, chunks) { >> + if (!list_empty(&pool->unbuddied[i])) { >> + zhdr = list_first_entry(&pool->unbuddied[i], >> + struct zbud_header, buddy); >> + list_del(&zhdr->buddy); >> + page = virt_to_page(zhdr); >> + if (zhdr->first_chunks == 0) >> + bud = FIRST; >> + else >> + bud = LAST; >> + goto found; >> + } >> } >> + bud = FIRST; >> } >> >> /* Couldn't find unbuddied zbud page, create new one */ >> @@ -388,27 +420,31 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp, >> spin_lock(&pool->lock); >> pool->pages_nr++; >> zhdr = init_zbud_page(page); >> - bud = FIRST; >> >> found: >> - if (bud == FIRST) >> - zhdr->first_chunks = chunks; >> - else >> - zhdr->last_chunks = chunks; >> + ppriv = (struct zbud_page_priv *)page->private; >> + if (bud != HEADLESS) { > > another personal opinion...it might look simpler if you just do: > > if (bud == FULL) { > set_bit(ZBUD_FULL_PAGE, &page->private); > goto full; > } Agreed here too. >> + if (bud == FIRST) >> + zhdr->first_chunks = chunks; >> + else >> + zhdr->last_chunks = chunks; >> + >> + if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0) { >> + /* Add to unbuddied list */ >> + freechunks = num_free_chunks(zhdr); >> + list_add(&zhdr->buddy, &pool->unbuddied[freechunks]); >> + } else { >> + /* Add to buddied list */ >> + list_add(&zhdr->buddy, &pool->buddied); >> + } >> >> - if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0) { >> - /* Add to unbuddied list */ >> - freechunks = num_free_chunks(zhdr); >> - list_add(&zhdr->buddy, &pool->unbuddied[freechunks]); >> - } else { >> - /* Add to buddied list */ >> - list_add(&zhdr->buddy, &pool->buddied); >> - } >> + /* Add/move zbud page to beginning of LRU */ >> + if (!list_empty(&page->lru)) >> + list_del(&page->lru); >> + } else >> + ppriv->headless = true; >> > > insert label here... > > full: > >> - /* Add/move zbud page to beginning of LRU */ >> - if (!list_empty(&zhdr->lru)) >> - list_del(&zhdr->lru); > > no matter the case, this is safe; leave it here for simplicity. a > full page will have a empty list head. > >> - list_add(&zhdr->lru, &pool->lru); >> + list_add(&page->lru, &pool->lru); >> >> *handle = encode_handle(zhdr, bud); >> spin_unlock(&pool->lock); >> @@ -430,28 +466,41 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle) >> { >> struct zbud_header *zhdr; >> int freechunks; >> + struct page *page; >> + enum buddy bud; >> + struct zbud_page_priv *ppriv; >> >> spin_lock(&pool->lock); >> zhdr = handle_to_zbud_header(handle); >> + page = virt_to_page(zhdr); >> + ppriv = (struct zbud_page_priv *)page->private; >> >> - /* If first buddy, handle will be page aligned */ >> - if ((handle - ZHDR_SIZE_ALIGNED) & ~PAGE_MASK) >> + if (!(handle & ~PAGE_MASK)) /* HEADLESS page stored */ >> + bud = HEADLESS; >> + else if ((handle - ZHDR_SIZE_ALIGNED) & ~PAGE_MASK) { >> + bud = LAST; >> zhdr->last_chunks = 0; >> - else >> + } else { >> + /* If first buddy, handle will be page aligned */ >> + bud = FIRST; >> zhdr->first_chunks = 0; >> + } >> >> - if (zhdr->under_reclaim) { >> + if (ppriv->under_reclaim) { >> /* zbud page is under reclaim, reclaim will free */ >> spin_unlock(&pool->lock); >> return; >> } >> >> - /* Remove from existing buddy list */ >> - list_del(&zhdr->buddy); >> + if (bud != HEADLESS) { >> + /* Remove from existing buddy list */ >> + list_del(&zhdr->buddy); >> + } >> >> - if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { >> + if (bud == HEADLESS || >> + (zhdr->first_chunks == 0 && zhdr->last_chunks == 0)) { >> /* zbud page is empty, free */ >> - list_del(&zhdr->lru); >> + list_del(&page->lru); >> free_zbud_page(zhdr); >> pool->pages_nr--; >> } else { >> @@ -505,6 +554,8 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries) >> { >> int i, ret, freechunks; >> struct zbud_header *zhdr; >> + struct page *page; >> + struct zbud_page_priv *ppriv; >> unsigned long first_handle = 0, last_handle = 0; >> >> spin_lock(&pool->lock); >> @@ -514,21 +565,31 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries) >> return -EINVAL; >> } >> for (i = 0; i < retries; i++) { >> - zhdr = list_tail_entry(&pool->lru, struct zbud_header, lru); >> - list_del(&zhdr->lru); >> - list_del(&zhdr->buddy); >> + page = list_tail_entry(&pool->lru, struct page, lru); >> + ppriv = (struct zbud_page_priv *)page->private; >> + list_del(&page->lru); >> + >> /* Protect zbud page against free */ >> - zhdr->under_reclaim = true; >> - /* >> - * We need encode the handles before unlocking, since we can >> - * race with free that will set (first|last)_chunks to 0 >> - */ >> - first_handle = 0; >> - last_handle = 0; >> - if (zhdr->first_chunks) >> - first_handle = encode_handle(zhdr, FIRST); >> - if (zhdr->last_chunks) >> - last_handle = encode_handle(zhdr, LAST); >> + ppriv->under_reclaim = true; >> + zhdr = page_address(page); >> + if (!ppriv->headless) { >> + list_del(&zhdr->buddy); >> + /* >> + * We need encode the handles before unlocking, since >> + * we can race with free that will set >> + * (first|last)_chunks to 0 >> + */ >> + first_handle = 0; >> + last_handle = 0; >> + if (zhdr->first_chunks) >> + first_handle = encode_handle(zhdr, FIRST); >> + if (zhdr->last_chunks) >> + last_handle = encode_handle(zhdr, LAST); >> + } else { >> + first_handle = encode_handle(zhdr, HEADLESS); >> + last_handle = 0; >> + } >> + >> spin_unlock(&pool->lock); >> >> /* Issue the eviction callback(s) */ >> @@ -544,8 +605,9 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries) >> } >> next: >> spin_lock(&pool->lock); >> - zhdr->under_reclaim = false; >> - if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { >> + ppriv->under_reclaim = false; >> + if (ppriv->headless || >> + (zhdr->first_chunks == 0 && zhdr->last_chunks == 0)) { > > oops, this isn't right. you're assuming ->headless means the eviction > succeeded. However if pool->ops->evict() failed, that isn't true. Aha, good catch, thanks. Then it looks like we need separate handling for the 'headless' case here, e. g. if (ppriv->headless) { if (ret == 0) { free_zbud_page(zhdr); pool->pages_nr--; spin_unlock(&pool->lock); } } else if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { ... The block under 'if (ret == 0)' may also become a static inline to avoid code duplication. ~vitaly On Tue, Sep 22, 2015 at 11:49 PM, Dan Streetman <ddstreet@xxxxxxxx> wrote: > On Tue, Sep 22, 2015 at 8:17 AM, Vitaly Wool <vitalywool@xxxxxxxxx> wrote: >> Currently zbud is only capable of allocating not more than >> PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE. This is okay as >> long as only zswap is using it, but other users of zbud may >> (and likely will) want to allocate up to PAGE_SIZE. This patch >> addresses that by skipping the creation of zbud internal >> structure in the beginning of an allocated page (such pages are >> then called 'headless'). >> >> As a zbud page is no longer guaranteed to contain zbud header, the >> following changes had to be applied throughout the code: >> * page->lru to be used for zbud page lists >> * page->private to hold 'under_reclaim' flag >> >> page->private will also be used to indicate if this page contains >> a zbud header in the beginning or not ('headless' flag). >> >> Signed-off-by: Vitaly Wool <vitalywool@xxxxxxxxx> >> --- >> mm/zbud.c | 194 +++++++++++++++++++++++++++++++++++++++++--------------------- >> 1 file changed, 128 insertions(+), 66 deletions(-) >> >> diff --git a/mm/zbud.c b/mm/zbud.c >> index fa48bcdf..7b51eb6 100644 >> --- a/mm/zbud.c >> +++ b/mm/zbud.c >> @@ -105,18 +105,25 @@ struct zbud_pool { >> >> /* >> * struct zbud_header - zbud page metadata occupying the first chunk of each >> - * zbud page. >> + * zbud page, except for HEADLESS pages > > hmm, personally I like "FULL" better than HEADLESS...it's only > headless because it's a full page. > >> * @buddy: links the zbud page into the unbuddied/buddied lists in the pool >> - * @lru: links the zbud page into the lru list in the pool >> * @first_chunks: the size of the first buddy in chunks, 0 if free >> * @last_chunks: the size of the last buddy in chunks, 0 if free >> */ >> struct zbud_header { >> struct list_head buddy; >> - struct list_head lru; >> unsigned int first_chunks; >> unsigned int last_chunks; >> - bool under_reclaim; >> +}; >> + >> +/* >> + * struct zbud_page_priv - zbud flags to be stored in page->private >> + * @under_reclaim: if a zbud page is under reclaim >> + * @headless: indicates a page where zbud header didn't fit >> + */ >> +struct zbud_page_priv { >> + bool under_reclaim:1; >> + bool headless:1; >> }; > > Hmm, this is just my personal opinion, but I'm not a fan of casting > ->private as a struct, if we're only using it as a bitmap. I'd > suggest just defining bits as an enum (like page flags), e.g. > > enum zbud_flags { > ZBUD_UNDER_RECLAIM, > ZBUD_FULL_PAGE, > }; > > or some names similar to that. then it can be checked with a simple > test_bit() call, and set_bit()/clear_bit(). > > alternately, there are the already-existing PG_private and > PG_private_2 bits in the page flags...but unless we need ->private for > something else, it probably makes more sense to just use it instead of > the PG_private flags. > >> >> /***************** >> @@ -221,6 +228,7 @@ MODULE_ALIAS("zpool-zbud"); >> *****************/ >> /* Just to make the code easier to read */ >> enum buddy { >> + HEADLESS, >> FIRST, >> LAST >> }; >> @@ -237,12 +245,15 @@ static int size_to_chunks(size_t size) >> /* Initializes the zbud header of a newly allocated zbud page */ >> static struct zbud_header *init_zbud_page(struct page *page) >> { >> + struct zbud_page_priv *ppriv = (struct zbud_page_priv *)page->private; >> struct zbud_header *zhdr = page_address(page); >> + >> + INIT_LIST_HEAD(&page->lru); >> + ppriv->under_reclaim = 0; > > don't forget to initialize the headless/full bit too. (I'll mention > that the page allocation code does clear ->private before handing it > to us, so it should already be 0. but let's not rely on that) > >> + >> zhdr->first_chunks = 0; >> zhdr->last_chunks = 0; >> INIT_LIST_HEAD(&zhdr->buddy); >> - INIT_LIST_HEAD(&zhdr->lru); >> - zhdr->under_reclaim = 0; >> return zhdr; >> } >> >> @@ -267,11 +278,22 @@ static unsigned long encode_handle(struct zbud_header *zhdr, enum buddy bud) >> * over the zbud header in the first chunk. >> */ >> handle = (unsigned long)zhdr; >> - if (bud == FIRST) >> + switch (bud) { >> + case FIRST: >> /* skip over zbud header */ >> handle += ZHDR_SIZE_ALIGNED; >> - else /* bud == LAST */ >> + break; >> + case LAST: >> handle += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT); >> + break; >> + case HEADLESS: >> + break; >> + default: >> + /* this should never happen */ >> + pr_err("zbud: invalid buddy value %d\n", bud); >> + handle = 0; >> + break; >> + } >> return handle; >> } >> >> @@ -287,6 +309,7 @@ static int num_free_chunks(struct zbud_header *zhdr) >> /* >> * Rather than branch for different situations, just use the fact that >> * free buddies have a length of zero to simplify everything. >> + * NB: can't be used with HEADLESS pages. >> */ >> return NCHUNKS - zhdr->first_chunks - zhdr->last_chunks; >> } >> @@ -353,31 +376,40 @@ void zbud_destroy_pool(struct zbud_pool *pool) >> int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp, >> unsigned long *handle) >> { >> - int chunks, i, freechunks; >> + int chunks = 0, i, freechunks; >> struct zbud_header *zhdr = NULL; >> + struct zbud_page_priv *ppriv; >> enum buddy bud; >> struct page *page; >> >> if (!size || (gfp & __GFP_HIGHMEM)) >> return -EINVAL; >> - if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) >> + >> + if (size > PAGE_SIZE) >> return -ENOSPC; >> - chunks = size_to_chunks(size); >> - spin_lock(&pool->lock); >> >> - /* First, try to find an unbuddied zbud page. */ >> - zhdr = NULL; >> - for_each_unbuddied_list(i, chunks) { >> - if (!list_empty(&pool->unbuddied[i])) { >> - zhdr = list_first_entry(&pool->unbuddied[i], >> - struct zbud_header, buddy); >> - list_del(&zhdr->buddy); >> - if (zhdr->first_chunks == 0) >> - bud = FIRST; >> - else >> - bud = LAST; >> - goto found; >> + if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) >> + bud = HEADLESS; >> + else { >> + chunks = size_to_chunks(size); >> + spin_lock(&pool->lock); >> + >> + /* First, try to find an unbuddied zbud page. */ >> + zhdr = NULL; >> + for_each_unbuddied_list(i, chunks) { >> + if (!list_empty(&pool->unbuddied[i])) { >> + zhdr = list_first_entry(&pool->unbuddied[i], >> + struct zbud_header, buddy); >> + list_del(&zhdr->buddy); >> + page = virt_to_page(zhdr); >> + if (zhdr->first_chunks == 0) >> + bud = FIRST; >> + else >> + bud = LAST; >> + goto found; >> + } >> } >> + bud = FIRST; >> } >> >> /* Couldn't find unbuddied zbud page, create new one */ >> @@ -388,27 +420,31 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp, >> spin_lock(&pool->lock); >> pool->pages_nr++; >> zhdr = init_zbud_page(page); >> - bud = FIRST; >> >> found: >> - if (bud == FIRST) >> - zhdr->first_chunks = chunks; >> - else >> - zhdr->last_chunks = chunks; >> + ppriv = (struct zbud_page_priv *)page->private; >> + if (bud != HEADLESS) { > > another personal opinion...it might look simpler if you just do: > > if (bud == FULL) { > set_bit(ZBUD_FULL_PAGE, &page->private); > goto full; > } > >> + if (bud == FIRST) >> + zhdr->first_chunks = chunks; >> + else >> + zhdr->last_chunks = chunks; >> + >> + if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0) { >> + /* Add to unbuddied list */ >> + freechunks = num_free_chunks(zhdr); >> + list_add(&zhdr->buddy, &pool->unbuddied[freechunks]); >> + } else { >> + /* Add to buddied list */ >> + list_add(&zhdr->buddy, &pool->buddied); >> + } >> >> - if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0) { >> - /* Add to unbuddied list */ >> - freechunks = num_free_chunks(zhdr); >> - list_add(&zhdr->buddy, &pool->unbuddied[freechunks]); >> - } else { >> - /* Add to buddied list */ >> - list_add(&zhdr->buddy, &pool->buddied); >> - } >> + /* Add/move zbud page to beginning of LRU */ >> + if (!list_empty(&page->lru)) >> + list_del(&page->lru); >> + } else >> + ppriv->headless = true; >> > > insert label here... > > full: > >> - /* Add/move zbud page to beginning of LRU */ >> - if (!list_empty(&zhdr->lru)) >> - list_del(&zhdr->lru); > > no matter the case, this is safe; leave it here for simplicity. a > full page will have a empty list head. > >> - list_add(&zhdr->lru, &pool->lru); >> + list_add(&page->lru, &pool->lru); >> >> *handle = encode_handle(zhdr, bud); >> spin_unlock(&pool->lock); >> @@ -430,28 +466,41 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle) >> { >> struct zbud_header *zhdr; >> int freechunks; >> + struct page *page; >> + enum buddy bud; >> + struct zbud_page_priv *ppriv; >> >> spin_lock(&pool->lock); >> zhdr = handle_to_zbud_header(handle); >> + page = virt_to_page(zhdr); >> + ppriv = (struct zbud_page_priv *)page->private; >> >> - /* If first buddy, handle will be page aligned */ >> - if ((handle - ZHDR_SIZE_ALIGNED) & ~PAGE_MASK) >> + if (!(handle & ~PAGE_MASK)) /* HEADLESS page stored */ >> + bud = HEADLESS; >> + else if ((handle - ZHDR_SIZE_ALIGNED) & ~PAGE_MASK) { >> + bud = LAST; >> zhdr->last_chunks = 0; >> - else >> + } else { >> + /* If first buddy, handle will be page aligned */ >> + bud = FIRST; >> zhdr->first_chunks = 0; >> + } >> >> - if (zhdr->under_reclaim) { >> + if (ppriv->under_reclaim) { >> /* zbud page is under reclaim, reclaim will free */ >> spin_unlock(&pool->lock); >> return; >> } >> >> - /* Remove from existing buddy list */ >> - list_del(&zhdr->buddy); >> + if (bud != HEADLESS) { >> + /* Remove from existing buddy list */ >> + list_del(&zhdr->buddy); >> + } >> >> - if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { >> + if (bud == HEADLESS || >> + (zhdr->first_chunks == 0 && zhdr->last_chunks == 0)) { >> /* zbud page is empty, free */ >> - list_del(&zhdr->lru); >> + list_del(&page->lru); >> free_zbud_page(zhdr); >> pool->pages_nr--; >> } else { >> @@ -505,6 +554,8 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries) >> { >> int i, ret, freechunks; >> struct zbud_header *zhdr; >> + struct page *page; >> + struct zbud_page_priv *ppriv; >> unsigned long first_handle = 0, last_handle = 0; >> >> spin_lock(&pool->lock); >> @@ -514,21 +565,31 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries) >> return -EINVAL; >> } >> for (i = 0; i < retries; i++) { >> - zhdr = list_tail_entry(&pool->lru, struct zbud_header, lru); >> - list_del(&zhdr->lru); >> - list_del(&zhdr->buddy); >> + page = list_tail_entry(&pool->lru, struct page, lru); >> + ppriv = (struct zbud_page_priv *)page->private; >> + list_del(&page->lru); >> + >> /* Protect zbud page against free */ >> - zhdr->under_reclaim = true; >> - /* >> - * We need encode the handles before unlocking, since we can >> - * race with free that will set (first|last)_chunks to 0 >> - */ >> - first_handle = 0; >> - last_handle = 0; >> - if (zhdr->first_chunks) >> - first_handle = encode_handle(zhdr, FIRST); >> - if (zhdr->last_chunks) >> - last_handle = encode_handle(zhdr, LAST); >> + ppriv->under_reclaim = true; >> + zhdr = page_address(page); >> + if (!ppriv->headless) { >> + list_del(&zhdr->buddy); >> + /* >> + * We need encode the handles before unlocking, since >> + * we can race with free that will set >> + * (first|last)_chunks to 0 >> + */ >> + first_handle = 0; >> + last_handle = 0; >> + if (zhdr->first_chunks) >> + first_handle = encode_handle(zhdr, FIRST); >> + if (zhdr->last_chunks) >> + last_handle = encode_handle(zhdr, LAST); >> + } else { >> + first_handle = encode_handle(zhdr, HEADLESS); >> + last_handle = 0; >> + } >> + >> spin_unlock(&pool->lock); >> >> /* Issue the eviction callback(s) */ >> @@ -544,8 +605,9 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries) >> } >> next: >> spin_lock(&pool->lock); >> - zhdr->under_reclaim = false; >> - if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { >> + ppriv->under_reclaim = false; >> + if (ppriv->headless || >> + (zhdr->first_chunks == 0 && zhdr->last_chunks == 0)) { > > oops, this isn't right. you're assuming ->headless means the eviction > succeeded. However if pool->ops->evict() failed, that isn't true. > >> /* >> * Both buddies are now free, free the zbud page and >> * return success. >> @@ -565,7 +627,7 @@ next: >> } >> >> /* add to beginning of LRU */ >> - list_add(&zhdr->lru, &pool->lru); >> + list_add(&page->lru, &pool->lru); >> } >> spin_unlock(&pool->lock); >> return -EAGAIN; >> -- >> 2.4.2 >> -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>