[WHY] Have a cleaner way to expose all page zone helpers in one header file, rather than split them between mm.h and memremap.h files. Signed-off-by: Alex Sierra <alex.sierra@xxxxxxx> --- drivers/infiniband/core/rw.c | 1 - drivers/nvme/target/io-cmd-bdev.c | 1 - include/linux/memremap.h | 113 +---------------- include/linux/mm.h | 79 +----------- include/linux/page_zone.h | 194 ++++++++++++++++++++++++++++++ mm/memcontrol.c | 1 - 6 files changed, 196 insertions(+), 193 deletions(-) create mode 100644 include/linux/page_zone.h diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 4d98f931a13d..5a3bd41b331c 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -2,7 +2,6 @@ /* * Copyright (c) 2016 HGST, a Western Digital Company. */ -#include <linux/memremap.h> #include <linux/moduleparam.h> #include <linux/slab.h> #include <linux/pci-p2pdma.h> diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 27a72504d31c..16a8b7665fe4 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -6,7 +6,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/blkdev.h> #include <linux/blk-integrity.h> -#include <linux/memremap.h> #include <linux/module.h> #include "nvmet.h" diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 8af304f6b504..0f22f6f42e7d 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -2,70 +2,14 @@ #ifndef _LINUX_MEMREMAP_H_ #define _LINUX_MEMREMAP_H_ -#include <linux/mm.h> #include <linux/range.h> #include <linux/ioport.h> #include <linux/percpu-refcount.h> +#include <linux/page_zone.h> struct resource; struct device; -/** - * struct vmem_altmap - pre-allocated storage for vmemmap_populate - * @base_pfn: base of the entire dev_pagemap mapping - * @reserve: pages mapped, but reserved for driver use (relative to @base) - * @free: free pages set aside in the mapping for memmap storage - * @align: pages reserved to meet allocation alignments - * @alloc: track pages consumed, private to vmemmap_populate() - */ -struct vmem_altmap { - unsigned long base_pfn; - const unsigned long end_pfn; - const unsigned long reserve; - unsigned long free; - unsigned long align; - unsigned long alloc; -}; - -/* - * Specialize ZONE_DEVICE memory into multiple types each has a different - * usage. - * - * MEMORY_DEVICE_PRIVATE: - * Device memory that is not directly addressable by the CPU: CPU can neither - * read nor write private memory. In this case, we do still have struct pages - * backing the device memory. Doing so simplifies the implementation, but it is - * important to remember that there are certain points at which the struct page - * must be treated as an opaque object, rather than a "normal" struct page. - * - * A more complete discussion of unaddressable memory may be found in - * include/linux/hmm.h and Documentation/vm/hmm.rst. - * - * MEMORY_DEVICE_FS_DAX: - * Host memory that has similar access semantics as System RAM i.e. DMA - * coherent and supports page pinning. In support of coordinating page - * pinning vs other operations MEMORY_DEVICE_FS_DAX arranges for a - * wakeup event whenever a page is unpinned and becomes idle. This - * wakeup is used to coordinate physical address space management (ex: - * fs truncate/hole punch) vs pinned pages (ex: device dma). - * - * MEMORY_DEVICE_GENERIC: - * Host memory that has similar access semantics as System RAM i.e. DMA - * coherent and supports page pinning. This is for example used by DAX devices - * that expose memory using a character device. - * - * MEMORY_DEVICE_PCI_P2PDMA: - * Device memory residing in a PCI BAR intended for use with Peer-to-Peer - * transactions. - */ -enum memory_type { - /* 0 is reserved to catch uninitialized type fields */ - MEMORY_DEVICE_PRIVATE = 1, - MEMORY_DEVICE_FS_DAX, - MEMORY_DEVICE_GENERIC, - MEMORY_DEVICE_PCI_P2PDMA, -}; - struct dev_pagemap_ops { /* * Called once the page refcount reaches 0. The reference count will be @@ -83,42 +27,6 @@ struct dev_pagemap_ops { #define PGMAP_ALTMAP_VALID (1 << 0) -/** - * struct dev_pagemap - metadata for ZONE_DEVICE mappings - * @altmap: pre-allocated/reserved memory for vmemmap allocations - * @ref: reference count that pins the devm_memremap_pages() mapping - * @done: completion for @ref - * @type: memory type: see MEMORY_* in memory_hotplug.h - * @flags: PGMAP_* flags to specify defailed behavior - * @vmemmap_shift: structural definition of how the vmemmap page metadata - * is populated, specifically the metadata page order. - * A zero value (default) uses base pages as the vmemmap metadata - * representation. A bigger value will set up compound struct pages - * of the requested order value. - * @ops: method table - * @owner: an opaque pointer identifying the entity that manages this - * instance. Used by various helpers to make sure that no - * foreign ZONE_DEVICE memory is accessed. - * @nr_range: number of ranges to be mapped - * @range: range to be mapped when nr_range == 1 - * @ranges: array of ranges to be mapped when nr_range > 1 - */ -struct dev_pagemap { - struct vmem_altmap altmap; - struct percpu_ref ref; - struct completion done; - enum memory_type type; - unsigned int flags; - unsigned long vmemmap_shift; - const struct dev_pagemap_ops *ops; - void *owner; - int nr_range; - union { - struct range range; - struct range ranges[0]; - }; -}; - static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap) { if (pgmap->flags & PGMAP_ALTMAP_VALID) @@ -131,25 +39,6 @@ static inline unsigned long pgmap_vmemmap_nr(struct dev_pagemap *pgmap) return 1 << pgmap->vmemmap_shift; } -static inline bool is_device_private_page(const struct page *page) -{ - return IS_ENABLED(CONFIG_DEVICE_PRIVATE) && - is_zone_device_page(page) && - page->pgmap->type == MEMORY_DEVICE_PRIVATE; -} - -static inline bool folio_is_device_private(const struct folio *folio) -{ - return is_device_private_page(&folio->page); -} - -static inline bool is_pci_p2pdma_page(const struct page *page) -{ - return IS_ENABLED(CONFIG_PCI_P2PDMA) && - is_zone_device_page(page) && - page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA; -} - #ifdef CONFIG_ZONE_DEVICE void *memremap_pages(struct dev_pagemap *pgmap, int nid); void memunmap_pages(struct dev_pagemap *pgmap); diff --git a/include/linux/mm.h b/include/linux/mm.h index 3b31b33bd5be..e551616cd208 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -28,6 +28,7 @@ #include <linux/sched.h> #include <linux/pgtable.h> #include <linux/kasan.h> +#include <linux/page_zone.h> struct mempolicy; struct anon_vma; @@ -1049,84 +1050,6 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); * back into memory. */ -/* - * The zone field is never updated after free_area_init_core() - * sets it, so none of the operations on it need to be atomic. - */ - -/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */ -#define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) -#define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) -#define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) -#define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH) -#define KASAN_TAG_PGOFF (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH) - -/* - * Define the bit shifts to access each section. For non-existent - * sections we define the shift as 0; that plus a 0 mask ensures - * the compiler will optimise away reference to them. - */ -#define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0)) -#define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) -#define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) -#define LAST_CPUPID_PGSHIFT (LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0)) -#define KASAN_TAG_PGSHIFT (KASAN_TAG_PGOFF * (KASAN_TAG_WIDTH != 0)) - -/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */ -#ifdef NODE_NOT_IN_PAGE_FLAGS -#define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT) -#define ZONEID_PGOFF ((SECTIONS_PGOFF < ZONES_PGOFF)? \ - SECTIONS_PGOFF : ZONES_PGOFF) -#else -#define ZONEID_SHIFT (NODES_SHIFT + ZONES_SHIFT) -#define ZONEID_PGOFF ((NODES_PGOFF < ZONES_PGOFF)? \ - NODES_PGOFF : ZONES_PGOFF) -#endif - -#define ZONEID_PGSHIFT (ZONEID_PGOFF * (ZONEID_SHIFT != 0)) - -#define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) -#define NODES_MASK ((1UL << NODES_WIDTH) - 1) -#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) -#define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_SHIFT) - 1) -#define KASAN_TAG_MASK ((1UL << KASAN_TAG_WIDTH) - 1) -#define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) - -static inline enum zone_type page_zonenum(const struct page *page) -{ - ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT); - return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; -} - -static inline enum zone_type folio_zonenum(const struct folio *folio) -{ - return page_zonenum(&folio->page); -} - -#ifdef CONFIG_ZONE_DEVICE -static inline bool is_zone_device_page(const struct page *page) -{ - return page_zonenum(page) == ZONE_DEVICE; -} -extern void memmap_init_zone_device(struct zone *, unsigned long, - unsigned long, struct dev_pagemap *); -#else -static inline bool is_zone_device_page(const struct page *page) -{ - return false; -} -#endif - -static inline bool folio_is_zone_device(const struct folio *folio) -{ - return is_zone_device_page(&folio->page); -} - -static inline bool is_zone_movable_page(const struct page *page) -{ - return page_zonenum(page) == ZONE_MOVABLE; -} - #if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX) DECLARE_STATIC_KEY_FALSE(devmap_managed_key); diff --git a/include/linux/page_zone.h b/include/linux/page_zone.h new file mode 100644 index 000000000000..2a7a347173ee --- /dev/null +++ b/include/linux/page_zone.h @@ -0,0 +1,194 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _PAGE_ZONE_H_ +#define _PAGE_ZONE_H_ + +/* + * The zone field is never updated after free_area_init_core() + * sets it, so none of the operations on it need to be atomic. + */ + +/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */ +#define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) +#define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) +#define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) +#define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH) +#define KASAN_TAG_PGOFF (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH) + +/* + * Define the bit shifts to access each section. For non-existent + * sections we define the shift as 0; that plus a 0 mask ensures + * the compiler will optimise away reference to them. + */ +#define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0)) +#define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) +#define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) +#define LAST_CPUPID_PGSHIFT (LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0)) +#define KASAN_TAG_PGSHIFT (KASAN_TAG_PGOFF * (KASAN_TAG_WIDTH != 0)) + +/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */ +#ifdef NODE_NOT_IN_PAGE_FLAGS +#define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT) +#define ZONEID_PGOFF ((SECTIONS_PGOFF < ZONES_PGOFF) ? \ + SECTIONS_PGOFF : ZONES_PGOFF) +#else +#define ZONEID_SHIFT (NODES_SHIFT + ZONES_SHIFT) +#define ZONEID_PGOFF ((NODES_PGOFF < ZONES_PGOFF) ? \ + NODES_PGOFF : ZONES_PGOFF) +#endif + +#define ZONEID_PGSHIFT (ZONEID_PGOFF * (ZONEID_SHIFT != 0)) + +#define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) +#define NODES_MASK ((1UL << NODES_WIDTH) - 1) +#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) +#define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_SHIFT) - 1) +#define KASAN_TAG_MASK ((1UL << KASAN_TAG_WIDTH) - 1) +#define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) + +/* + * Specialize ZONE_DEVICE memory into multiple types each has a different + * usage. + * + * MEMORY_DEVICE_PRIVATE: + * Device memory that is not directly addressable by the CPU: CPU can neither + * read nor write private memory. In this case, we do still have struct pages + * backing the device memory. Doing so simplifies the implementation, but it is + * important to remember that there are certain points at which the struct page + * must be treated as an opaque object, rather than a "normal" struct page. + * + * A more complete discussion of unaddressable memory may be found in + * include/linux/hmm.h and Documentation/vm/hmm.rst. + * + * MEMORY_DEVICE_FS_DAX: + * Host memory that has similar access semantics as System RAM i.e. DMA + * coherent and supports page pinning. In support of coordinating page + * pinning vs other operations MEMORY_DEVICE_FS_DAX arranges for a + * wakeup event whenever a page is unpinned and becomes idle. This + * wakeup is used to coordinate physical address space management (ex: + * fs truncate/hole punch) vs pinned pages (ex: device dma). + * + * MEMORY_DEVICE_GENERIC: + * Host memory that has similar access semantics as System RAM i.e. DMA + * coherent and supports page pinning. This is for example used by DAX devices + * that expose memory using a character device. + * + * MEMORY_DEVICE_PCI_P2PDMA: + * Device memory residing in a PCI BAR intended for use with Peer-to-Peer + * transactions. + */ +enum memory_type { + /* 0 is reserved to catch uninitialized type fields */ + MEMORY_DEVICE_PRIVATE = 1, + MEMORY_DEVICE_FS_DAX, + MEMORY_DEVICE_GENERIC, + MEMORY_DEVICE_PCI_P2PDMA, +}; + +/** + * struct vmem_altmap - pre-allocated storage for vmemmap_populate + * @base_pfn: base of the entire dev_pagemap mapping + * @reserve: pages mapped, but reserved for driver use (relative to @base) + * @free: free pages set aside in the mapping for memmap storage + * @align: pages reserved to meet allocation alignments + * @alloc: track pages consumed, private to vmemmap_populate() + */ +struct vmem_altmap { + unsigned long base_pfn; + const unsigned long end_pfn; + const unsigned long reserve; + unsigned long free; + unsigned long align; + unsigned long alloc; +}; + +/** + * struct dev_pagemap - metadata for ZONE_DEVICE mappings + * @altmap: pre-allocated/reserved memory for vmemmap allocations + * @ref: reference count that pins the devm_memremap_pages() mapping + * @done: completion for @ref + * @type: memory type: see MEMORY_* in memory_hotplug.h + * @flags: PGMAP_* flags to specify defailed behavior + * @vmemmap_shift: structural definition of how the vmemmap page metadata + * is populated, specifically the metadata page order. + * A zero value (default) uses base pages as the vmemmap metadata + * representation. A bigger value will set up compound struct pages + * of the requested order value. + * @ops: method table + * @owner: an opaque pointer identifying the entity that manages this + * instance. Used by various helpers to make sure that no + * foreign ZONE_DEVICE memory is accessed. + * @nr_range: number of ranges to be mapped + * @range: range to be mapped when nr_range == 1 + * @ranges: array of ranges to be mapped when nr_range > 1 + */ +struct dev_pagemap { + struct vmem_altmap altmap; + struct percpu_ref ref; + struct completion done; + enum memory_type type; + unsigned int flags; + unsigned long vmemmap_shift; + const struct dev_pagemap_ops *ops; + void *owner; + int nr_range; + union { + struct range range; + struct range ranges[0]; + }; +}; + +static inline enum zone_type page_zonenum(const struct page *page) +{ + ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT); + return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; +} + +static inline enum zone_type folio_zonenum(const struct folio *folio) +{ + return page_zonenum(&folio->page); +} + +static inline bool is_zone_movable_page(const struct page *page) +{ + return page_zonenum(page) == ZONE_MOVABLE; +} + +#ifdef CONFIG_ZONE_DEVICE +static inline bool is_zone_device_page(const struct page *page) +{ + return page_zonenum(page) == ZONE_DEVICE; +} +extern void memmap_init_zone_device(struct zone *, unsigned long, + unsigned long, struct dev_pagemap *); +#else +static inline bool is_zone_device_page(const struct page *page) +{ + return false; +} +#endif + +static inline bool folio_is_zone_device(const struct folio *folio) +{ + return is_zone_device_page(&folio->page); +} + +static inline bool is_device_private_page(const struct page *page) +{ + return IS_ENABLED(CONFIG_DEVICE_PRIVATE) && + is_zone_device_page(page) && + page->pgmap->type == MEMORY_DEVICE_PRIVATE; +} + +static inline bool folio_is_device_private(const struct folio *folio) +{ + return is_device_private_page(&folio->page); +} + +static inline bool is_pci_p2pdma_page(const struct page *page) +{ + return IS_ENABLED(CONFIG_PCI_P2PDMA) && + is_zone_device_page(page) && + page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA; +} + +#endif /* _PAGE_ZONE_H_ */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 618c366a2f07..a2df2f193f06 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -53,7 +53,6 @@ #include <linux/fs.h> #include <linux/seq_file.h> #include <linux/vmpressure.h> -#include <linux/memremap.h> #include <linux/mm_inline.h> #include <linux/swap_cgroup.h> #include <linux/cpu.h> -- 2.32.0