From: Jérôme Glisse <jglisse@xxxxxxxxxx> Subject: mm/ZONE_DEVICE: special case put_page() for device private pages A ZONE_DEVICE page that reach a refcount of 1 is free ie no longer have any user. For device private pages this is important to catch and thus we need to special case put_page() for this. Link: http://lkml.kernel.org/r/20170817000548.32038-9-jglisse@xxxxxxxxxx Signed-off-by: Jérôme Glisse <jglisse@xxxxxxxxxx> Cc: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> Cc: Dan Williams <dan.j.williams@xxxxxxxxx> Cc: Ross Zwisler <ross.zwisler@xxxxxxxxxxxxxxx> Cc: Aneesh Kumar <aneesh.kumar@xxxxxxxxxxxxxxxxxx> Cc: Balbir Singh <bsingharora@xxxxxxxxx> Cc: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx> Cc: David Nellans <dnellans@xxxxxxxxxx> Cc: Evgeny Baskakov <ebaskakov@xxxxxxxxxx> Cc: Johannes Weiner <hannes@xxxxxxxxxxx> Cc: John Hubbard <jhubbard@xxxxxxxxxx> Cc: Mark Hairgrove <mhairgrove@xxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxxxx> Cc: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx> Cc: Sherry Cheung <SCheung@xxxxxxxxxx> Cc: Subhash Gutti <sgutti@xxxxxxxxxx> Cc: Vladimir Davydov <vdavydov.dev@xxxxxxxxx> Cc: Bob Liu <liubo95@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- include/linux/memremap.h | 13 +++++++++++++ include/linux/mm.h | 31 ++++++++++++++++++++++--------- kernel/memremap.c | 25 ++++++++++++++++++++++++- mm/hmm.c | 8 ++++++++ 4 files changed, 67 insertions(+), 10 deletions(-) diff -puN include/linux/memremap.h~mm-zone_device-special-case-put_page-for-device-private-pages-v4 include/linux/memremap.h --- a/include/linux/memremap.h~mm-zone_device-special-case-put_page-for-device-private-pages-v4 +++ a/include/linux/memremap.h @@ -126,6 +126,14 @@ struct dev_pagemap { void *devm_memremap_pages(struct device *dev, struct resource *res, struct percpu_ref *ref, struct vmem_altmap *altmap); struct dev_pagemap *find_dev_pagemap(resource_size_t phys); + +static inline bool is_zone_device_page(const struct page *page); + +static inline bool is_device_private_page(const struct page *page) +{ + return is_zone_device_page(page) && + page->pgmap->type == MEMORY_DEVICE_PRIVATE; +} #else static inline void *devm_memremap_pages(struct device *dev, struct resource *res, struct percpu_ref *ref, @@ -144,6 +152,11 @@ static inline struct dev_pagemap *find_d { return NULL; } + +static inline bool is_device_private_page(const struct page *page) +{ + return false; +} #endif /** diff -puN include/linux/mm.h~mm-zone_device-special-case-put_page-for-device-private-pages-v4 include/linux/mm.h --- a/include/linux/mm.h~mm-zone_device-special-case-put_page-for-device-private-pages-v4 +++ a/include/linux/mm.h @@ -23,6 +23,7 @@ #include <linux/page_ext.h> #include <linux/err.h> #include <linux/page_ref.h> +#include <linux/memremap.h> struct mempolicy; struct anon_vma; @@ -792,25 +793,25 @@ static inline bool is_zone_device_page(c { return page_zonenum(page) == ZONE_DEVICE; } - -static inline bool is_device_private_page(const struct page *page) -{ - /* See MEMORY_DEVICE_PRIVATE in include/linux/memory_hotplug.h */ - return ((page_zonenum(page) == ZONE_DEVICE) && - (page->pgmap->type == MEMORY_DEVICE_PRIVATE)); -} #else static inline bool is_zone_device_page(const struct page *page) { return false; } +#endif -static inline bool is_device_private_page(const struct page *page) +#ifdef CONFIG_DEVICE_PRIVATE +void put_zone_device_private_page(struct page *page); +#else +static inline void put_zone_device_private_page(struct page *page) { - return false; } #endif +static inline bool is_device_private_page(const struct page *page); + +DECLARE_STATIC_KEY_FALSE(device_private_key); + static inline void get_page(struct page *page) { page = compound_head(page); @@ -826,6 +827,18 @@ static inline void put_page(struct page { page = compound_head(page); + /* + * For private device pages we need to catch refcount transition from + * 2 to 1, when refcount reach one it means the private device page is + * free and we need to inform the device driver through callback. See + * include/linux/memremap.h and HMM for details. + */ + if (static_branch_unlikely(&device_private_key) && + unlikely(is_device_private_page(page))) { + put_zone_device_private_page(page); + return; + } + if (put_page_testzero(page)) __put_page(page); } diff -puN kernel/memremap.c~mm-zone_device-special-case-put_page-for-device-private-pages-v4 kernel/memremap.c --- a/kernel/memremap.c~mm-zone_device-special-case-put_page-for-device-private-pages-v4 +++ a/kernel/memremap.c @@ -11,7 +11,6 @@ * General Public License for more details. */ #include <linux/radix-tree.h> -#include <linux/memremap.h> #include <linux/device.h> #include <linux/types.h> #include <linux/pfn_t.h> @@ -500,3 +499,27 @@ struct vmem_altmap *to_vmem_altmap(unsig return pgmap ? pgmap->altmap : NULL; } #endif /* CONFIG_ZONE_DEVICE */ + + +#ifdef CONFIG_DEVICE_PRIVATE +void put_zone_device_private_page(struct page *page) +{ + int count = page_ref_dec_return(page); + + /* + * If refcount is 1 then page is freed and refcount is stable as nobody + * holds a reference on the page. + */ + if (count == 1) { + /* Clear Active bit in case of parallel mark_page_accessed */ + __ClearPageActive(page); + __ClearPageWaiters(page); + + page->mapping = NULL; + + page->pgmap->page_free(page, page->pgmap->data); + } else if (!count) + __put_page(page); +} +EXPORT_SYMBOL(put_zone_device_private_page); +#endif /* CONFIG_DEVICE_PRIVATE */ diff -puN mm/hmm.c~mm-zone_device-special-case-put_page-for-device-private-pages-v4 mm/hmm.c --- a/mm/hmm.c~mm-zone_device-special-case-put_page-for-device-private-pages-v4 +++ a/mm/hmm.c @@ -25,9 +25,17 @@ #include <linux/sched.h> #include <linux/swapops.h> #include <linux/hugetlb.h> +#include <linux/jump_label.h> #include <linux/mmu_notifier.h> +/* + * Device private memory see HMM (Documentation/vm/hmm.txt) or hmm.h + */ +DEFINE_STATIC_KEY_FALSE(device_private_key); +EXPORT_SYMBOL(device_private_key); + + #ifdef CONFIG_HMM static const struct mmu_notifier_ops hmm_mmu_notifier_ops; _ -- To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html