> -----Original Message----- > From: owner-linux-mm@xxxxxxxxx [mailto:owner-linux-mm@xxxxxxxxx] On > Behalf Of Dennis Zhou > Sent: 2019年2月28日 10:19 > To: Dennis Zhou <dennis@xxxxxxxxxx>; Tejun Heo <tj@xxxxxxxxxx>; Christoph > Lameter <cl@xxxxxxxxx> > Cc: Vlad Buslov <vladbu@xxxxxxxxxxxx>; kernel-team@xxxxxx; > linux-mm@xxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx > Subject: [PATCH 06/12] percpu: set PCPU_BITMAP_BLOCK_SIZE to PAGE_SIZE > > Previously, block size was flexible based on the constraint that the > GCD(PCPU_BITMAP_BLOCK_SIZE, PAGE_SIZE) > 1. However, this carried the > overhead that keeping a floating number of populated free pages required > scanning over the free regions of a chunk. > > Setting the block size to be fixed at PAGE_SIZE lets us know when an empty > page becomes used as we will break a full contig_hint of a block. > This means we no longer have to scan the whole chunk upon breaking a > contig_hint which empty page management piggybacks off. A later patch > takes advantage of this to optimize the allocation path by only scanning > forward using the scan_hint introduced later too. > > Signed-off-by: Dennis Zhou <dennis@xxxxxxxxxx> > --- > include/linux/percpu.h | 12 ++--- > mm/percpu-km.c | 2 +- > mm/percpu.c | 111 +++++++++++++++++------------------------ > 3 files changed, 49 insertions(+), 76 deletions(-) > > diff --git a/include/linux/percpu.h b/include/linux/percpu.h index > 70b7123f38c7..9909dc0e273a 100644 > --- a/include/linux/percpu.h > +++ b/include/linux/percpu.h > @@ -26,16 +26,10 @@ > #define PCPU_MIN_ALLOC_SHIFT 2 > #define PCPU_MIN_ALLOC_SIZE (1 << PCPU_MIN_ALLOC_SHIFT) > > -/* number of bits per page, used to trigger a scan if blocks are > PAGE_SIZE > */ > -#define PCPU_BITS_PER_PAGE (PAGE_SIZE >> > PCPU_MIN_ALLOC_SHIFT) > - > /* > - * This determines the size of each metadata block. There are several > subtle > - * constraints around this constant. The reserved region must be a multiple > of > - * PCPU_BITMAP_BLOCK_SIZE. Additionally, PCPU_BITMAP_BLOCK_SIZE > must be a > - * multiple of PAGE_SIZE or PAGE_SIZE must be a multiple of > - * PCPU_BITMAP_BLOCK_SIZE to align with the populated page map. The > unit_size > - * also has to be a multiple of PCPU_BITMAP_BLOCK_SIZE to ensure full > blocks. > + * The PCPU_BITMAP_BLOCK_SIZE must be the same size as PAGE_SIZE as > the > + * updating of hints is used to manage the nr_empty_pop_pages in both > + * the chunk and globally. > */ > #define PCPU_BITMAP_BLOCK_SIZE PAGE_SIZE > #define PCPU_BITMAP_BLOCK_BITS (PCPU_BITMAP_BLOCK_SIZE >> > \ > diff --git a/mm/percpu-km.c b/mm/percpu-km.c index > 0f643dc2dc65..c10bf7466596 100644 > --- a/mm/percpu-km.c > +++ b/mm/percpu-km.c > @@ -70,7 +70,7 @@ static struct pcpu_chunk *pcpu_create_chunk(gfp_t > gfp) > chunk->base_addr = page_address(pages) - pcpu_group_offsets[0]; > > spin_lock_irqsave(&pcpu_lock, flags); > - pcpu_chunk_populated(chunk, 0, nr_pages, false); > + pcpu_chunk_populated(chunk, 0, nr_pages); > spin_unlock_irqrestore(&pcpu_lock, flags); > > pcpu_stats_chunk_alloc(); > diff --git a/mm/percpu.c b/mm/percpu.c > index 3d7deece9556..967c9cc3a928 100644 > --- a/mm/percpu.c > +++ b/mm/percpu.c > @@ -527,37 +527,21 @@ static void pcpu_chunk_relocate(struct > pcpu_chunk *chunk, int oslot) > __pcpu_chunk_move(chunk, nslot, oslot < nslot); } > > -/** > - * pcpu_cnt_pop_pages- counts populated backing pages in range > +/* > + * pcpu_update_empty_pages - update empty page counters > * @chunk: chunk of interest > - * @bit_off: start offset > - * @bits: size of area to check > + * @nr: nr of empty pages > * > - * Calculates the number of populated pages in the region > - * [page_start, page_end). This keeps track of how many empty populated > - * pages are available and decide if async work should be scheduled. > - * > - * RETURNS: > - * The nr of populated pages. > + * This is used to keep track of the empty pages now based on the > + premise > + * a pcpu_block_md covers a page. The hint update functions recognize > + if > + * a block is made full or broken to calculate deltas for keeping track > + of > + * free pages. > */ > -static inline int pcpu_cnt_pop_pages(struct pcpu_chunk *chunk, int bit_off, > - int bits) > +static inline void pcpu_update_empty_pages(struct pcpu_chunk *chunk, > +int nr) > { > - int page_start = PFN_UP(bit_off * PCPU_MIN_ALLOC_SIZE); > - int page_end = PFN_DOWN((bit_off + bits) * PCPU_MIN_ALLOC_SIZE); > - > - if (page_start >= page_end) > - return 0; > - > - /* > - * bitmap_weight counts the number of bits set in a bitmap up to > - * the specified number of bits. This is counting the populated > - * pages up to page_end and then subtracting the populated pages > - * up to page_start to count the populated pages in > - * [page_start, page_end). > - */ > - return bitmap_weight(chunk->populated, page_end) - > - bitmap_weight(chunk->populated, page_start); > + chunk->nr_empty_pop_pages += nr; > + if (chunk != pcpu_reserved_chunk) > + pcpu_nr_empty_pop_pages += nr; > } > > /* > @@ -611,36 +595,19 @@ static void pcpu_chunk_update(struct pcpu_chunk > *chunk, int bit_off, int bits) > * Updates: > * chunk->contig_bits > * chunk->contig_bits_start > - * nr_empty_pop_pages (chunk and global) > */ > static void pcpu_chunk_refresh_hint(struct pcpu_chunk *chunk) { > - int bit_off, bits, nr_empty_pop_pages; > + int bit_off, bits; > > /* clear metadata */ > chunk->contig_bits = 0; > > bit_off = chunk->first_bit; > - bits = nr_empty_pop_pages = 0; > + bits = 0; > pcpu_for_each_md_free_region(chunk, bit_off, bits) { > pcpu_chunk_update(chunk, bit_off, bits); > - > - nr_empty_pop_pages += pcpu_cnt_pop_pages(chunk, bit_off, bits); > } > - > - /* > - * Keep track of nr_empty_pop_pages. > - * > - * The chunk maintains the previous number of free pages it held, > - * so the delta is used to update the global counter. The reserved > - * chunk is not part of the free page count as they are populated > - * at init and are special to serving reserved allocations. > - */ > - if (chunk != pcpu_reserved_chunk) > - pcpu_nr_empty_pop_pages += > - (nr_empty_pop_pages - chunk->nr_empty_pop_pages); > - > - chunk->nr_empty_pop_pages = nr_empty_pop_pages; > } > > /** > @@ -712,6 +679,7 @@ static void pcpu_block_refresh_hint(struct > pcpu_chunk *chunk, int index) static void > pcpu_block_update_hint_alloc(struct pcpu_chunk *chunk, int bit_off, > int bits) > { > + int nr_empty_pages = 0; > struct pcpu_block_md *s_block, *e_block, *block; > int s_index, e_index; /* block indexes of the freed allocation */ > int s_off, e_off; /* block offsets of the freed allocation */ > @@ -736,6 +704,9 @@ static void pcpu_block_update_hint_alloc(struct > pcpu_chunk *chunk, int bit_off, > * If the allocation breaks the contig_hint, a scan is required to > * restore this hint. > */ > + if (s_block->contig_hint == PCPU_BITMAP_BLOCK_BITS) > + nr_empty_pages++; > + > if (s_off == s_block->first_free) > s_block->first_free = find_next_zero_bit( > pcpu_index_alloc_map(chunk, s_index), @@ -763,6 > +734,9 @@ static void pcpu_block_update_hint_alloc(struct pcpu_chunk > *chunk, int bit_off, > * Update e_block. > */ > if (s_index != e_index) { > + if (e_block->contig_hint == PCPU_BITMAP_BLOCK_BITS) > + nr_empty_pages++; > + > /* > * When the allocation is across blocks, the end is along > * the left part of the e_block. > @@ -787,6 +761,7 @@ static void pcpu_block_update_hint_alloc(struct > pcpu_chunk *chunk, int bit_off, > } > > /* update in-between md_blocks */ > + nr_empty_pages += (e_index - s_index - 1); > for (block = s_block + 1; block < e_block; block++) { > block->contig_hint = 0; > block->left_free = 0; > @@ -794,6 +769,9 @@ static void pcpu_block_update_hint_alloc(struct > pcpu_chunk *chunk, int bit_off, > } > } > > + if (nr_empty_pages) > + pcpu_update_empty_pages(chunk, -1 * nr_empty_pages); > + > /* > * The only time a full chunk scan is required is if the chunk > * contig hint is broken. Otherwise, it means a smaller space @@ > -826,6 +804,7 @@ static void pcpu_block_update_hint_alloc(struct > pcpu_chunk *chunk, int bit_off, static void > pcpu_block_update_hint_free(struct pcpu_chunk *chunk, int bit_off, > int bits) > { > + int nr_empty_pages = 0; > struct pcpu_block_md *s_block, *e_block, *block; > int s_index, e_index; /* block indexes of the freed allocation */ > int s_off, e_off; /* block offsets of the freed allocation */ > @@ -879,14 +858,19 @@ static void pcpu_block_update_hint_free(struct > pcpu_chunk *chunk, int bit_off, > > /* update s_block */ > e_off = (s_index == e_index) ? end : PCPU_BITMAP_BLOCK_BITS; > + if (!start && e_off == PCPU_BITMAP_BLOCK_BITS) > + nr_empty_pages++; > pcpu_block_update(s_block, start, e_off); > > /* freeing in the same block */ > if (s_index != e_index) { > /* update e_block */ > + if (end == PCPU_BITMAP_BLOCK_BITS) > + nr_empty_pages++; > pcpu_block_update(e_block, 0, end); > > /* reset md_blocks in the middle */ > + nr_empty_pages += (e_index - s_index - 1); > for (block = s_block + 1; block < e_block; block++) { > block->first_free = 0; > block->contig_hint_start = 0; > @@ -896,15 +880,16 @@ static void pcpu_block_update_hint_free(struct > pcpu_chunk *chunk, int bit_off, > } > } > > + if (nr_empty_pages) > + pcpu_update_empty_pages(chunk, nr_empty_pages); > + > /* > - * Refresh chunk metadata when the free makes a page free, a block > - * free, or spans across blocks. The contig hint may be off by up to > - * a page, but if the hint is contained in a block, it will be accurate > - * with the else condition below. > + * Refresh chunk metadata when the free makes a block free or spans > + * across blocks. The contig_hint may be off by up to a page, but if > + * the contig_hint is contained in a block, it will be accurate with > + * the else condition below. > */ > - if ((ALIGN_DOWN(end, min(PCPU_BITS_PER_PAGE, > PCPU_BITMAP_BLOCK_BITS)) > > - ALIGN(start, min(PCPU_BITS_PER_PAGE, > PCPU_BITMAP_BLOCK_BITS))) || > - s_index != e_index) > + if (((end - start) >= PCPU_BITMAP_BLOCK_BITS) || s_index != e_index) > pcpu_chunk_refresh_hint(chunk); > else > pcpu_chunk_update(chunk, pcpu_block_off_to_off(s_index, start), > @@ -1164,9 +1149,7 @@ static struct pcpu_chunk * __init > pcpu_alloc_first_chunk(unsigned long tmp_addr, > chunk->immutable = true; > bitmap_fill(chunk->populated, chunk->nr_pages); > chunk->nr_populated = chunk->nr_pages; > - chunk->nr_empty_pop_pages = > - pcpu_cnt_pop_pages(chunk, start_offset / PCPU_MIN_ALLOC_SIZE, > - map_size / PCPU_MIN_ALLOC_SIZE); > + chunk->nr_empty_pop_pages = chunk->nr_pages; > > chunk->contig_bits = map_size / PCPU_MIN_ALLOC_SIZE; > chunk->free_bytes = map_size; > @@ -1261,7 +1244,6 @@ static void pcpu_free_chunk(struct pcpu_chunk > *chunk) > * @chunk: pcpu_chunk which got populated > * @page_start: the start page > * @page_end: the end page > - * @for_alloc: if this is to populate for allocation > * > * Pages in [@page_start,@page_end) have been populated to @chunk. > Update > * the bookkeeping information accordingly. Must be called after each > @@ -1271,7 +1253,7 @@ static void pcpu_free_chunk(struct pcpu_chunk > *chunk) > * is to serve an allocation in that area. > */ > static void pcpu_chunk_populated(struct pcpu_chunk *chunk, int page_start, > - int page_end, bool for_alloc) > + int page_end) > { > int nr = page_end - page_start; > > @@ -1281,10 +1263,7 @@ static void pcpu_chunk_populated(struct > pcpu_chunk *chunk, int page_start, > chunk->nr_populated += nr; > pcpu_nr_populated += nr; > > - if (!for_alloc) { > - chunk->nr_empty_pop_pages += nr; > - pcpu_nr_empty_pop_pages += nr; > - } > + pcpu_update_empty_pages(chunk, nr); > } > > /** > @@ -1306,9 +1285,9 @@ static void pcpu_chunk_depopulated(struct > pcpu_chunk *chunk, > > bitmap_clear(chunk->populated, page_start, nr); > chunk->nr_populated -= nr; > - chunk->nr_empty_pop_pages -= nr; > - pcpu_nr_empty_pop_pages -= nr; > pcpu_nr_populated -= nr; > + > + pcpu_update_empty_pages(chunk, -1 * nr); > } > > /* > @@ -1523,7 +1502,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t > align, bool reserved, > err = "failed to populate"; > goto fail_unlock; > } > - pcpu_chunk_populated(chunk, rs, re, true); > + pcpu_chunk_populated(chunk, rs, re); > spin_unlock_irqrestore(&pcpu_lock, flags); > } > > @@ -1722,7 +1701,7 @@ static void pcpu_balance_workfn(struct > work_struct *work) > if (!ret) { > nr_to_pop -= nr; > spin_lock_irq(&pcpu_lock); > - pcpu_chunk_populated(chunk, rs, rs + nr, false); > + pcpu_chunk_populated(chunk, rs, rs + nr); > spin_unlock_irq(&pcpu_lock); > } else { > nr_to_pop = 0; > -- Reviewed-by: Peng Fan <peng.fan@xxxxxxx> > 2.17.1