Pages containing buffer_heads that are in one of the per-CPU buffer_head LRU caches will be pinned and thus cannot be migrated. This can prevent CMA allocations from succeeding, which are often used on platforms with co-processors (such as a DSP) that can only use physically contiguous memory. It can also prevent memory hot-unplugging from succeeding, which involves migrating at least MIN_MEMORY_BLOCK_SIZE bytes of memory, which ranges from 8 MiB to 1 GiB based on the architecture in use. Correspondingly, invalidate the BH LRU caches before a migration starts and stop any buffer_head from being cached in the LRU caches, until migration has finished. Signed-off-by: Chris Goldsworthy <cgoldswo@xxxxxxxxxxxxxx> Cc: Minchan Kim <minchan@xxxxxxxxxx> Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- fs/buffer.c | 54 +++++++++++++++++++++++++++++++++++++++++++-- include/linux/buffer_head.h | 8 +++++++ include/linux/migrate.h | 2 ++ mm/migrate.c | 19 ++++++++++++++++ mm/page_alloc.c | 3 +++ mm/swap.c | 7 +++++- 6 files changed, 90 insertions(+), 3 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 96c7604..634e474 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1274,6 +1274,10 @@ struct bh_lru { static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }}; +/* These are used to control the BH LRU invalidation during page migration */ +static struct cpumask lru_needs_invalidation; +static bool bh_lru_disabled = false; + #ifdef CONFIG_SMP #define bh_lru_lock() local_irq_disable() #define bh_lru_unlock() local_irq_enable() @@ -1292,7 +1296,9 @@ static inline void check_irqs_on(void) /* * Install a buffer_head into this cpu's LRU. If not already in the LRU, it is * inserted at the front, and the buffer_head at the back if any is evicted. - * Or, if already in the LRU it is moved to the front. + * Or, if already in the LRU it is moved to the front. Note that if LRU is + * disabled because of an ongoing page migration, we won't insert bh into the + * LRU. */ static void bh_lru_install(struct buffer_head *bh) { @@ -1303,6 +1309,9 @@ static void bh_lru_install(struct buffer_head *bh) check_irqs_on(); bh_lru_lock(); + if (bh_lru_disabled) + goto out; + b = this_cpu_ptr(&bh_lrus); for (i = 0; i < BH_LRU_SIZE; i++) { swap(evictee, b->bhs[i]); @@ -1313,6 +1322,7 @@ static void bh_lru_install(struct buffer_head *bh) } get_bh(bh); +out: bh_lru_unlock(); brelse(evictee); } @@ -1328,6 +1338,10 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size) check_irqs_on(); bh_lru_lock(); + + if (bh_lru_disabled) + goto out; + for (i = 0; i < BH_LRU_SIZE; i++) { struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]); @@ -1346,6 +1360,7 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size) break; } } +out: bh_lru_unlock(); return ret; } @@ -1446,7 +1461,7 @@ EXPORT_SYMBOL(__bread_gfp); * This doesn't race because it runs in each cpu either in irq * or with preempt disabled. */ -static void invalidate_bh_lru(void *arg) +void invalidate_bh_lru(void *arg) { struct bh_lru *b = &get_cpu_var(bh_lrus); int i; @@ -1477,6 +1492,41 @@ void invalidate_bh_lrus(void) } EXPORT_SYMBOL_GPL(invalidate_bh_lrus); +bool need_bh_lru_invalidation(int cpu) +{ + return cpumask_test_cpu(cpu, &lru_needs_invalidation); +} + +void bh_lru_disable(void) +{ + int cpu; + + bh_lru_disabled = true; + + /* + * This barrier ensures that invocations of bh_lru_install() + * after this barrier see that bh_lru_disabled == true (until + * bh_lru_enable() is eventually called).. + */ + smp_mb(); + + /* + * It's alright if someone comes along and hot-plugs a new CPU, + * since we have that bh_lru_dsiabled == true. The hot-remove + * case is handled in buffer_exit_cpu_dead(). + */ + for_each_online_cpu(cpu) { + if (has_bh_in_lru(cpu, NULL)) + cpumask_set_cpu(cpu, &lru_needs_invalidation); + } +} + +void bh_lru_enable(void) +{ + bh_lru_disabled = false; + cpumask_clear(&lru_needs_invalidation); +} + void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long offset) { diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 6b47f94..78eb5ee 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -193,7 +193,11 @@ void __breadahead_gfp(struct block_device *, sector_t block, unsigned int size, gfp_t gfp); struct buffer_head *__bread_gfp(struct block_device *, sector_t block, unsigned size, gfp_t gfp); +void invalidate_bh_lru(void *arg); void invalidate_bh_lrus(void); +bool need_bh_lru_invalidation(int cpu); +void bh_lru_disable(void); +void bh_lru_enable(void); struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); void unlock_buffer(struct buffer_head *bh); @@ -401,6 +405,10 @@ extern int __set_page_dirty_buffers(struct page *page); #else /* CONFIG_BLOCK */ static inline void buffer_init(void) {} +static inline void invalidate_bh_lru(void) {} +static inline bool need_bh_lru_invalidation(int cpu) { return false; } +static inline void bh_lru_disable(void) {} +static inline void bh_lru_enable(void) {} static inline int try_to_free_buffers(struct page *page) { return 1; } static inline int inode_has_buffers(struct inode *inode) { return 0; } static inline void invalidate_inode_buffers(struct inode *inode) {} diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 3a38963..9e4a2dc 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -46,6 +46,7 @@ extern int isolate_movable_page(struct page *page, isolate_mode_t mode); extern void putback_movable_page(struct page *page); extern void migrate_prep(void); +extern void migrate_finish(void); extern void migrate_prep_local(void); extern void migrate_page_states(struct page *newpage, struct page *page); extern void migrate_page_copy(struct page *newpage, struct page *page); @@ -67,6 +68,7 @@ static inline int isolate_movable_page(struct page *page, isolate_mode_t mode) { return -EBUSY; } static inline int migrate_prep(void) { return -ENOSYS; } +static inline int migrate_finish(void) { return -ENOSYS; } static inline int migrate_prep_local(void) { return -ENOSYS; } static inline void migrate_page_states(struct page *newpage, struct page *page) diff --git a/mm/migrate.c b/mm/migrate.c index a69da8a..a8928ee7 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -65,6 +65,16 @@ void migrate_prep(void) { /* + * If a page has buffer_heads contained in one of the per-cpu + * BH LRU caches, that page can't be migrated. Accordingly, we + * call bh_lru_disable() to prevent further buffer_heads from + * being cached, before we invalidate the LRUs in + * lru_add_drain_all(). The LRUs are re-enabled in + * migrate_finish(). + */ + bh_lru_disable(); + + /* * Clear the LRU lists so pages can be isolated. * Note that pages may be moved off the LRU after we have * drained them. Those pages will fail to migrate like other @@ -73,6 +83,15 @@ void migrate_prep(void) lru_add_drain_all(); } +void migrate_finish(void) +{ + /* + * Renable the per-cpu BH LRU caches, after having disabled them + * in migrate_prep(). + */ + bh_lru_enable(); +} + /* Do the necessary work of migrate_prep but not if it involves other CPUs */ void migrate_prep_local(void) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6446778..e4cb959 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -8493,6 +8493,9 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, ret = migrate_pages(&cc->migratepages, alloc_migration_target, NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE); } + + migrate_finish(); + if (ret < 0) { putback_movable_pages(&cc->migratepages); return ret; diff --git a/mm/swap.c b/mm/swap.c index 31b844d..c733c95 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -36,6 +36,7 @@ #include <linux/hugetlb.h> #include <linux/page_idle.h> #include <linux/local_lock.h> +#include <linux/buffer_head.h> #include "internal.h" @@ -628,6 +629,9 @@ void lru_add_drain_cpu(int cpu) if (pagevec_count(pvec)) pagevec_lru_move_fn(pvec, lru_lazyfree_fn); + if (need_bh_lru_invalidation(cpu)) + invalidate_bh_lru(NULL); + activate_page_drain(cpu); } @@ -815,7 +819,8 @@ void lru_add_drain_all(void) pagevec_count(&per_cpu(lru_pvecs.lru_deactivate_file, cpu)) || pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) || pagevec_count(&per_cpu(lru_pvecs.lru_lazyfree, cpu)) || - need_activate_page_drain(cpu)) { + need_activate_page_drain(cpu) || + need_bh_lru_invalidation(cpu)) { INIT_WORK(work, lru_add_drain_per_cpu); queue_work_on(cpu, mm_percpu_wq, work); __cpumask_set_cpu(cpu, &has_work); -- The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project