Currently, we use just *one* zero page regardless of user process' node. When user process read zero page, at first, cpu should load this to cpu cache. If node of cpu is not same as node of zero page, loading takes long time. If we make zero pages for each nodes and use them adequetly, we can reduce this overhead. This patch implement basic infrastructure for numa_zero_pfn. It is default disabled, because it doesn't provide page coloring and some architecture use page coloring for zero page. Signed-off-by: Joonsoo Kim <js1304@xxxxxxxxx> diff --git a/mm/Kconfig b/mm/Kconfig index a3f8ddd..de0ab65 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -412,3 +412,8 @@ config FRONTSWAP and swap data is stored as normal on the matching swap device. If unsure, say Y to enable frontswap. + +config NUMA_ZERO_PFN + bool "Enable NUMA-aware zero page handling" + depends on NUMA + default n diff --git a/mm/memory.c b/mm/memory.c index 221fc9f..e7d3969 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -112,12 +112,43 @@ __setup("norandmaps", disable_randmaps); unsigned long zero_pfn __read_mostly; unsigned long highest_memmap_pfn __read_mostly; +#ifdef CONFIG_NUMA_ZERO_PFN +unsigned long node_to_zero_pfn[MAX_NUMNODES] __read_mostly; + +/* Should be called after zero_pfn initialization */ +static void __init init_numa_zero_pfn(void) +{ + unsigned int node; + + if (nr_node_ids == 1) + return; + + for_each_node_state(node, N_POSSIBLE) { + node_to_zero_pfn[node] = zero_pfn; + } + + for_each_node_state(node, N_HIGH_MEMORY) { + struct page *page; + page = alloc_pages_exact_node(node, + GFP_HIGHUSER | __GFP_ZERO, 0); + if (!page) + continue; + + node_to_zero_pfn[node] = page_to_pfn(page); + } +} +#else +static inline void __init init_numa_zero_pfn(void) {} +#endif + /* * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init() */ static int __init init_zero_pfn(void) { zero_pfn = page_to_pfn(ZERO_PAGE(0)); + init_numa_zero_pfn(); + return 0; } core_initcall(init_zero_pfn); @@ -717,6 +748,24 @@ static inline bool is_cow_mapping(vm_flags_t flags) return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; } +#ifdef CONFIG_NUMA_ZERO_PFN +static inline int is_numa_zero_pfn(unsigned long pfn) +{ + return zero_pfn == pfn || node_to_zero_pfn[pfn_to_nid(pfn)] == pfn; +} + +static inline unsigned long my_numa_zero_pfn(unsigned long addr) +{ + if (nr_node_ids == 1) + return zero_pfn; + + return node_to_zero_pfn[numa_node_id()]; +} + +#define is_zero_pfn is_numa_zero_pfn +#define my_zero_pfn my_numa_zero_pfn +#endif /* CONFIG_NUMA_ZERO_PFN */ + #ifndef is_zero_pfn static inline int is_zero_pfn(unsigned long pfn) { -- 1.7.9.5 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>