The last patch implements reclaim by adding 2 additional lists where a chunk's lifecycle is: active_slot -> to_depopulate_slot -> sidelined_slot This worked great because we're able to nicely converge paths into isolation. However, it's a bit aggressive to run for every free page. Let's accumulate a few free pages before we do this. To do this, the new lifecycle is: active_slot -> sidelined_slot -> to_depopulate_slot -> sidelined_slot The transition from sidelined_slot -> to_depopulate_slot occurs on a threshold instead of before where it directly went to the to_depopulate_slot. pcpu_nr_isolated_empty_pop_pages[] is introduced to aid with this. Suggested-by: Roman Gushchin <guro@xxxxxx> Signed-off-by: Dennis Zhou <dennis@xxxxxxxxxx> --- mm/percpu-internal.h | 1 + mm/percpu-stats.c | 8 ++++++-- mm/percpu.c | 44 +++++++++++++++++++++++++++++++++++++------- 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h index 10604dce806f..b3e43b016276 100644 --- a/mm/percpu-internal.h +++ b/mm/percpu-internal.h @@ -92,6 +92,7 @@ extern int pcpu_nr_slots; extern int pcpu_sidelined_slot; extern int pcpu_to_depopulate_slot; extern int pcpu_nr_empty_pop_pages[]; +extern int pcpu_nr_isolated_empty_pop_pages[]; extern struct pcpu_chunk *pcpu_first_chunk; extern struct pcpu_chunk *pcpu_reserved_chunk; diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c index 2125981acfb9..facc804eb86c 100644 --- a/mm/percpu-stats.c +++ b/mm/percpu-stats.c @@ -145,7 +145,7 @@ static int percpu_stats_show(struct seq_file *m, void *v) int slot, max_nr_alloc; int *buffer; enum pcpu_chunk_type type; - int nr_empty_pop_pages; + int nr_empty_pop_pages, nr_isolated_empty_pop_pages; alloc_buffer: spin_lock_irq(&pcpu_lock); @@ -167,8 +167,11 @@ static int percpu_stats_show(struct seq_file *m, void *v) } nr_empty_pop_pages = 0; - for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) + nr_isolated_empty_pop_pages = 0; + for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) { nr_empty_pop_pages += pcpu_nr_empty_pop_pages[type]; + nr_isolated_empty_pop_pages += pcpu_nr_isolated_empty_pop_pages[type]; + } #define PL(X) \ seq_printf(m, " %-20s: %12lld\n", #X, (long long int)pcpu_stats_ai.X) @@ -202,6 +205,7 @@ static int percpu_stats_show(struct seq_file *m, void *v) PU(min_alloc_size); PU(max_alloc_size); P("empty_pop_pages", nr_empty_pop_pages); + P("iso_empty_pop_pages", nr_isolated_empty_pop_pages); seq_putc(m, '\n'); #undef PU diff --git a/mm/percpu.c b/mm/percpu.c index 79eebc80860d..ba13e683d022 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -110,6 +110,9 @@ #define PCPU_EMPTY_POP_PAGES_LOW 2 #define PCPU_EMPTY_POP_PAGES_HIGH 4 +/* only schedule reclaim if there are at least N empty pop pages sidelined */ +#define PCPU_EMPTY_POP_RECLAIM_THRESHOLD 4 + #ifdef CONFIG_SMP /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ #ifndef __addr_to_pcpu_ptr @@ -183,6 +186,7 @@ static LIST_HEAD(pcpu_map_extend_chunks); * The reserved chunk doesn't contribute to the count. */ int pcpu_nr_empty_pop_pages[PCPU_NR_CHUNK_TYPES]; +int pcpu_nr_isolated_empty_pop_pages[PCPU_NR_CHUNK_TYPES]; /* * The number of populated pages in use by the allocator, protected by @@ -582,8 +586,10 @@ static void pcpu_isolate_chunk(struct pcpu_chunk *chunk) if (!chunk->isolated) { chunk->isolated = true; pcpu_nr_empty_pop_pages[type] -= chunk->nr_empty_pop_pages; + pcpu_nr_isolated_empty_pop_pages[type] += + chunk->nr_empty_pop_pages; + list_move(&chunk->list, &pcpu_slot[pcpu_sidelined_slot]); } - list_move(&chunk->list, &pcpu_slot[pcpu_to_depopulate_slot]); } static void pcpu_reintegrate_chunk(struct pcpu_chunk *chunk) @@ -595,6 +601,8 @@ static void pcpu_reintegrate_chunk(struct pcpu_chunk *chunk) if (chunk->isolated) { chunk->isolated = false; pcpu_nr_empty_pop_pages[type] += chunk->nr_empty_pop_pages; + pcpu_nr_isolated_empty_pop_pages[type] -= + chunk->nr_empty_pop_pages; pcpu_chunk_relocate(chunk, -1); } } @@ -610,9 +618,15 @@ static void pcpu_reintegrate_chunk(struct pcpu_chunk *chunk) */ static inline void pcpu_update_empty_pages(struct pcpu_chunk *chunk, int nr) { + enum pcpu_chunk_type type = pcpu_chunk_type(chunk); + chunk->nr_empty_pop_pages += nr; - if (chunk != pcpu_reserved_chunk && !chunk->isolated) - pcpu_nr_empty_pop_pages[pcpu_chunk_type(chunk)] += nr; + if (chunk != pcpu_reserved_chunk) { + if (chunk->isolated) + pcpu_nr_isolated_empty_pop_pages[type] += nr; + else + pcpu_nr_empty_pop_pages[type] += nr; + } } /* @@ -2138,10 +2152,13 @@ static void pcpu_reclaim_populated(enum pcpu_chunk_type type) struct list_head *pcpu_slot = pcpu_chunk_list(type); struct pcpu_chunk *chunk; struct pcpu_block_md *block; + LIST_HEAD(to_depopulate); int i, end; spin_lock_irq(&pcpu_lock); + list_splice_init(&pcpu_slot[pcpu_to_depopulate_slot], &to_depopulate); + restart: /* * Once a chunk is isolated to the to_depopulate list, the chunk is no @@ -2149,9 +2166,9 @@ static void pcpu_reclaim_populated(enum pcpu_chunk_type type) * other accessor is the free path which only returns area back to the * allocator not touching the populated bitmap. */ - while (!list_empty(&pcpu_slot[pcpu_to_depopulate_slot])) { - chunk = list_first_entry(&pcpu_slot[pcpu_to_depopulate_slot], - struct pcpu_chunk, list); + while (!list_empty(&to_depopulate)) { + chunk = list_first_entry(&to_depopulate, struct pcpu_chunk, + list); WARN_ON(chunk->immutable); /* @@ -2208,6 +2225,13 @@ static void pcpu_reclaim_populated(enum pcpu_chunk_type type) &pcpu_slot[pcpu_sidelined_slot]); } + if (pcpu_nr_isolated_empty_pop_pages[type] >= + PCPU_EMPTY_POP_RECLAIM_THRESHOLD) { + list_splice_tail_init(&pcpu_slot[pcpu_sidelined_slot], + &pcpu_slot[pcpu_to_depopulate_slot]); + pcpu_schedule_balance_work(); + } + spin_unlock_irq(&pcpu_lock); } @@ -2291,7 +2315,13 @@ void free_percpu(void __percpu *ptr) } } else if (pcpu_should_reclaim_chunk(chunk)) { pcpu_isolate_chunk(chunk); - need_balance = true; + if (chunk->free_bytes == pcpu_unit_size || + pcpu_nr_isolated_empty_pop_pages[pcpu_chunk_type(chunk)] >= + PCPU_EMPTY_POP_RECLAIM_THRESHOLD) { + list_splice_tail_init(&pcpu_slot[pcpu_sidelined_slot], + &pcpu_slot[pcpu_to_depopulate_slot]); + need_balance = true; + } } trace_percpu_free_percpu(chunk->base_addr, off, ptr); -- 2.31.1.368.gbe11c130af-goog