When a ZONE_DEVICE page refcount reach 1 it means it is free and nobody is holding a reference on it (only device to which the memory belong do). Add a callback and call it when that happen so device driver can implement their own free page management. Signed-off-by: Jérôme Glisse <jglisse@xxxxxxxxxx> Cc: Dan Williams <dan.j.williams@xxxxxxxxx> Cc: Ross Zwisler <ross.zwisler@xxxxxxxxxxxxxxx> --- drivers/dax/pmem.c | 3 ++- drivers/nvdimm/pmem.c | 5 +++-- include/linux/memremap.h | 17 ++++++++++++++--- kernel/memremap.c | 14 +++++++++++++- tools/testing/nvdimm/test/iomap.c | 2 +- 5 files changed, 33 insertions(+), 8 deletions(-) diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c index 1f01e98..52ff674 100644 --- a/drivers/dax/pmem.c +++ b/drivers/dax/pmem.c @@ -107,7 +107,8 @@ static int dax_pmem_probe(struct device *dev) if (rc) return rc; - addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap); + addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, + altmap, NULL, NULL); if (IS_ERR(addr)) return PTR_ERR(addr); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 571a6c7..c261d12 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -260,7 +260,7 @@ static int pmem_attach_disk(struct device *dev, pmem->pfn_flags = PFN_DEV; if (is_nd_pfn(dev)) { addr = devm_memremap_pages(dev, &pfn_res, &q->q_usage_counter, - altmap); + altmap, NULL, NULL); pfn_sb = nd_pfn->pfn_sb; pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); pmem->pfn_pad = resource_size(res) - resource_size(&pfn_res); @@ -269,7 +269,8 @@ static int pmem_attach_disk(struct device *dev, res->start += pmem->data_offset; } else if (pmem_should_map_pages(dev)) { addr = devm_memremap_pages(dev, &nsio->res, - &q->q_usage_counter, NULL); + &q->q_usage_counter, + NULL, NULL, NULL); pmem->pfn_flags |= PFN_MAP; } else addr = devm_memremap(dev, pmem->phys_addr, diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 32314d2..7845f2e 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -35,23 +35,31 @@ static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) } #endif +typedef void (*dev_page_free_t)(struct page *page, void *data); + /** * struct dev_pagemap - metadata for ZONE_DEVICE mappings + * @page_free: free page callback when page refcount reach 1 * @altmap: pre-allocated/reserved memory for vmemmap allocations * @res: physical address range covered by @ref * @ref: reference count that pins the devm_memremap_pages() mapping * @dev: host device of the mapping for debug + * @data: privata data pointer for page_free */ struct dev_pagemap { + dev_page_free_t page_free; struct vmem_altmap *altmap; const struct resource *res; struct percpu_ref *ref; struct device *dev; + void *data; }; #ifdef CONFIG_ZONE_DEVICE void *devm_memremap_pages(struct device *dev, struct resource *res, - struct percpu_ref *ref, struct vmem_altmap *altmap); + struct percpu_ref *ref, struct vmem_altmap *altmap, + dev_page_free_t page_free, + void *data); struct dev_pagemap *find_dev_pagemap(resource_size_t phys); int devm_memremap_pages_remove(struct device *dev, struct dev_pagemap *pgmap); @@ -62,8 +70,11 @@ static inline bool dev_page_allow_migrate(const struct page *page) } #else static inline void *devm_memremap_pages(struct device *dev, - struct resource *res, struct percpu_ref *ref, - struct vmem_altmap *altmap) + struct resource *res, + struct percpu_ref *ref, + struct vmem_altmap *altmap, + dev_page_free_t page_free, + void *data) { /* * Fail attempts to call devm_memremap_pages() without diff --git a/kernel/memremap.c b/kernel/memremap.c index 250ef25..bc1e400 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -190,6 +190,12 @@ EXPORT_SYMBOL(get_zone_device_page); void put_zone_device_page(struct page *page) { + /* + * If refcount is 1 then page is freed and refcount is stable as nobody + * holds a reference on the page. + */ + if (page->pgmap->page_free && page_count(page) == 1) + page->pgmap->page_free(page, page->pgmap->data); put_dev_pagemap(page->pgmap); } EXPORT_SYMBOL(put_zone_device_page); @@ -270,6 +276,8 @@ struct dev_pagemap *find_dev_pagemap(resource_size_t phys) * @res: "host memory" address range * @ref: a live per-cpu reference count * @altmap: optional descriptor for allocating the memmap from @res + * @page_free: callback call when page refcount reach 1 ie it is free + * @data: privata data pointer for page_free * * Notes: * 1/ @ref must be 'live' on entry and 'dead' before devm_memunmap_pages() time @@ -280,7 +288,9 @@ struct dev_pagemap *find_dev_pagemap(resource_size_t phys) * this is not enforced. */ void *devm_memremap_pages(struct device *dev, struct resource *res, - struct percpu_ref *ref, struct vmem_altmap *altmap) + struct percpu_ref *ref, struct vmem_altmap *altmap, + dev_page_free_t page_free, + void *data) { resource_size_t key, align_start, align_size, align_end; pgprot_t pgprot = PAGE_KERNEL; @@ -322,6 +332,8 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, } pgmap->ref = ref; pgmap->res = &page_map->res; + pgmap->page_free = page_free; + pgmap->data = data; mutex_lock(&pgmap_lock); error = 0; diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index c29f8dc..6505a87 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -108,7 +108,7 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res, if (nfit_res) return nfit_res->buf + offset - nfit_res->res->start; - return devm_memremap_pages(dev, res, ref, altmap); + return devm_memremap_pages(dev, res, ref, altmap, NULL, NULL); } EXPORT_SYMBOL(__wrap_devm_memremap_pages); -- 2.4.3 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>