There are several scenarios where we need to retrieve and update metadata associated with a given devm_memremap_pages() mapping, and the only lookup key available is a pfn in the range: 1/ We want to augment vmemmap_populate() (called via arch_add_memory()) to allocate memmap storage from pre-allocated pages reserved by the device driver. At vmemmap_alloc_block_buf() time it grabs device pages rather than page allocator pages. This is in support of devm_memremap_pages() mappings where the memmap is too large to fit in main memory (i.e. large persistent memory devices). 2/ Taking a reference against the mapping when inserting device pages into the address_space radix of a given inode. This facilitates unmap_mapping_range() and truncate_inode_pages() operations when the driver is tearing down the mapping. 3/ get_user_pages() operations on ZONE_DEVICE memory require taking a reference against the mapping so that the driver teardown path can revoke and drain usage of device pages. Cc: Christoph Hellwig <hch@xxxxxx> Cc: Dave Chinner <david@xxxxxxxxxxxxx> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Cc: Ross Zwisler <ross.zwisler@xxxxxxxxxxxxxxx> Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> --- include/linux/mm.h | 18 ++++++++++++++++++ kernel/memremap.c | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 80001de019ba..30c3c8764649 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -717,6 +717,24 @@ static inline enum zone_type page_zonenum(const struct page *page) return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } +/** + * struct dev_pagemap - metadata for ZONE_DEVICE mappings + * @dev: host device of the mapping for debug + */ +struct dev_pagemap { + /* TODO: vmem_altmap and percpu_ref count */ + struct device *dev; +}; + +#ifdef CONFIG_ZONE_DEVICE +struct dev_pagemap *__get_dev_pagemap(resource_size_t phys); +#else +static inline struct dev_pagemap *get_dev_pagemap(resource_size_t phys) +{ + return NULL; +} +#endif + #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define SECTION_IN_PAGE_FLAGS #endif diff --git a/kernel/memremap.c b/kernel/memremap.c index 3218e8b1fc28..64bfd9fa93aa 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -10,6 +10,7 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. */ +#include <linux/rculist.h> #include <linux/device.h> #include <linux/types.h> #include <linux/io.h> @@ -138,18 +139,52 @@ void devm_memunmap(struct device *dev, void *addr) EXPORT_SYMBOL(devm_memunmap); #ifdef CONFIG_ZONE_DEVICE +static LIST_HEAD(ranges); +static DEFINE_SPINLOCK(range_lock); + struct page_map { struct resource res; + struct dev_pagemap pgmap; + struct list_head list; }; +static void add_page_map(struct page_map *page_map) +{ + spin_lock(&range_lock); + list_add_rcu(&page_map->list, &ranges); + spin_unlock(&range_lock); +} + +static void del_page_map(struct page_map *page_map) +{ + spin_lock(&range_lock); + list_del_rcu(&page_map->list); + spin_unlock(&range_lock); +} + static void devm_memremap_pages_release(struct device *dev, void *res) { struct page_map *page_map = res; + del_page_map(page_map); + /* pages are dead and unused, undo the arch mapping */ arch_remove_memory(page_map->res.start, resource_size(&page_map->res)); } +/* assumes rcu_read_lock() held at entry */ +struct dev_pagemap *__get_dev_pagemap(resource_size_t phys) +{ + struct page_map *page_map; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + list_for_each_entry_rcu(page_map, &ranges, list) + if (phys >= page_map->res.start && phys <= page_map->res.end) + return &page_map->pgmap; + return NULL; +} + void *devm_memremap_pages(struct device *dev, struct resource *res) { int is_ram = region_intersects(res->start, resource_size(res), @@ -173,12 +208,17 @@ void *devm_memremap_pages(struct device *dev, struct resource *res) memcpy(&page_map->res, res, sizeof(*res)); + page_map->pgmap.dev = dev; + INIT_LIST_HEAD(&page_map->list); + add_page_map(page_map); + nid = dev_to_node(dev); if (nid < 0) nid = numa_mem_id(); error = arch_add_memory(nid, res->start, resource_size(res), true); if (error) { + del_page_map(page_map); devres_free(page_map); return ERR_PTR(error); } -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>