On 04/18/2013 08:43 PM, Srinivas Pandruvada wrote: > On 04/18/2013 02:54 AM, Srivatsa S. Bhat wrote: >> On 04/17/2013 10:23 PM, Srinivas Pandruvada wrote: >>> On 04/09/2013 02:45 PM, Srivatsa S. Bhat wrote: >>>> [I know, this cover letter is a little too long, but I wanted to >>>> clearly >>>> explain the overall goals and the high-level design of this patchset in >>>> detail. I hope this helps more than it annoys, and makes it easier for >>>> reviewers to relate to the background and the goals of this patchset.] >>>> >>>> >>>> Overview of Memory Power Management and its implications to the >>>> Linux MM >>>> ======================================================================== >>>> >>>> >> [...] >>> One thing you need to prevent is boot time allocation. You have to make >>> sure that frequently accessed per node data stored at the end of memory >>> will keep all ranks of memory active. >>> > When I was experimenting I did something like this. Thanks a lot for sharing this, Srinivas! Regards, Srivatsa S. Bhat > ///////////////////////////////// > > > +/* > + * Experimental MPST implemenentation > + * Copyright (c) 2012, Intel Corporation. > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2, as published by the Free Software Foundation. > + * > + * This program is distributed in the hope it will be useful, but WITHOUT > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public > License for > + * more details. > + * > + * You should have received a copy of the GNU General Public License > along with > + * this program; if not, write to the Free Software Foundation, Inc., > + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. > + * > + */ > +#include <linux/kernel.h> > +#include <linux/types.h> > +#include <linux/init.h> > +#include <linux/kthread.h> > +#include <linux/acpi.h> > +#include <linux/export.h> > +#include <linux/bootmem.h> > +#include <linux/delay.h> > +#include <linux/pfn.h> > +#include <linux/suspend.h> > +#include <linux/acpi.h> > +#include <linux/memblock.h> > +#include <linux/mm.h> > +#include <linux/mmzone.h> > +#include <linux/migrate.h> > +#include <linux/mm_inline.h> > +#include <linux/page-isolation.h> > +#include <linux/vmalloc.h> > +#include <linux/compaction.h> > +#include "internal.h" > + > +#define phys_to_pfn(p) ((p) >> PAGE_SHIFT) > +#define pfn_to_phys(p) ((p) << PAGE_SHIFT) > +#define MAX_MPST_ZONES 16 > +/* Atleast 4G of non MPST memory. */ > +#define MINIMAL_NON_MPST_MEMORY_PFN (0x100000000 >> PAGE_SHIFT) > + > +struct mpst_mem_zone { > + phys_addr_t start_addr; > + phys_addr_t end_addr; > +}; > + > +static struct mpst_mem_zone mpst_zones[MAX_MPST_ZONES]; > +static int mpst_zone_cnt; > +static unsigned long mpst_start_pfn; > +static unsigned long mpst_end_pfn; > +static bool mpst_enabled; > + > +/* Minimal parsing for just getting node ranges */ > +static int __init acpi_parse_mpst_table(struct acpi_table_header *table) > +{ > + struct acpi_table_mpst *mpst; > + struct acpi_mpst_power_node *node; > + u16 node_count; > + int i; > + > + mpst = (struct acpi_table_mpst *)table; > + if (!mpst) { > + pr_warn("Unable to map MPST\n"); > + return -ENODEV; > + } > + node_count = mpst->power_node_count; > + node = (struct acpi_mpst_power_node *)((u8 *)mpst + sizeof(*mpst)); > + > + for (i = mpst_zone_cnt; (i < node_count) && (i < MAX_MPST_ZONES); > + ++i) { > + if ((node->flags & ACPI_MPST_ENABLED) && > + (node->flags & ACPI_MPST_POWER_MANAGED)) { > + mpst_zones[mpst_zone_cnt].start_addr = > + node->range_address; > + mpst_zones[mpst_zone_cnt].end_addr = > + node->range_address + node->range_length; > + ++mpst_zone_cnt; > + } > + ++node; > + } > + > + return 0; > +} > + > +static unsigned long local_ahex_to_long(const char *name) > +{ > + unsigned long val = 0; > + > + for (;; name++) { > + switch (*name) { > + case '0' ... '9': > + val = 16*val+(*name-'0'); > + break; > + case 'A' ... 'F': > + val = 16*val+(*name-'A'+10); > + break; > + case 'a' ... 'f': > + val = 16*val+(*name-'a'+10); > + break; > + default: > + return val; > + } > + } > + > + return val; > +} > + > +/* Specify MPST range by command line for test till ACPI - MPST is > available */ > +static int __init parse_mpst_opt(char *str) > +{ > + char *ptr; > + phys_addr_t start_at = 0, end_at = 0; > + u64 mem_size = 0; > + > + if (!str) > + return -EINVAL; > + ptr = str; > + while (1) { > + if (*str == '-') { > + *str = '\0'; > + start_at = local_ahex_to_long(ptr); > + ++str; > + ptr = str; > + } > + if (start_at && (*str == '\0' || *str == ',' || *str == > ' ')) { > + *str = '\0'; > + end_at = local_ahex_to_long(ptr); > + mem_size = end_at-start_at; > + ++str; > + ptr = str; > + pr_info("-mpst[%#018Lx-%#018Lx size: %#018Lx]\n", > + start_at, end_at, > mem_size); > + if (IS_ALIGNED(phys_to_pfn(start_at), > + pageblock_nr_pages) && > + IS_ALIGNED(phys_to_pfn(end_at), > + pageblock_nr_pages)) { > + mpst_zones[mpst_zone_cnt].start_addr = > + start_at; > + mpst_zones[mpst_zone_cnt].end_addr = > + end_at; > + } else { > + pr_err("mpst invalid range\n"); > + return -EINVAL; > + } > + mpst_zone_cnt++; > + start_at = mem_size = end_at = 0; > + } > + if (*str == '\0') > + break; > + else > + ++str; > + } > + > + return 0; > +} > +early_param("mpst_range", parse_mpst_opt); > + > +/* Specify MPST range by command line for test till ACPI - MPST is > available */ > +static int __init parse_mpst_enable_opt(char *str) > +{ > + long value; > + if (kstrtol(str, 10, &value)) > + return -EINVAL; > + mpst_enabled = value ? true : false; > + > + return 0; > +} > +early_param("mpst_enable", parse_mpst_enable_opt); > + > +/* Set the minimum and maximum PFN */ > +static void mpst_set_min_max_pfn(void) > +{ > + int i; > + > + if (!mpst_zone_cnt) > + return; > + > + mpst_start_pfn = phys_to_pfn(mpst_zones[0].start_addr); > + mpst_end_pfn = phys_to_pfn(mpst_zones[0].end_addr); > + > + for (i = 1; i < mpst_zone_cnt; ++i) { > + if (mpst_start_pfn > phys_to_pfn(mpst_zones[i].start_addr)) > + mpst_start_pfn = > phys_to_pfn(mpst_zones[i].start_addr); > + if (mpst_end_pfn < phys_to_pfn(mpst_zones[i].end_addr)) > + mpst_end_pfn = phys_to_pfn(mpst_zones[i].end_addr); > + } > +} > + > +/* Change migrate type for the MPST ranges */ > +int mpst_set_migrate_type(void) > +{ > + int i; > + struct page *page; > + unsigned long start_pfn, end_pfn; > + > + if (!mpst_start_pfn || !mpst_end_pfn) > + return -EINVAL; > + if (!IS_ALIGNED(mpst_start_pfn, pageblock_nr_pages)) > + return -EINVAL; > + if (!IS_ALIGNED(mpst_end_pfn, pageblock_nr_pages)) > + return -EINVAL; > + memblock_free(pfn_to_phys(mpst_start_pfn), > + pfn_to_phys(mpst_end_pfn) - pfn_to_phys(mpst_start_pfn)); > + for (i = 0; i < mpst_zone_cnt; ++i) { > + start_pfn = phys_to_pfn(mpst_zones[i].start_addr); > + end_pfn = phys_to_pfn(mpst_zones[i].end_addr); > + for (; start_pfn < end_pfn; ++start_pfn) { > + page = pfn_to_page(start_pfn); > + if (page) > + set_pageblock_migratetype(page, > + MIGRATE_LP_MEMORY); > + } > + } > + > + return 0; > +} > + > +/* Parse ACPI table and find start and end of MPST zone. > +Assuming zones are contiguous */ > +int mpst_init(void) > +{ > + if (!mpst_enabled) { > + pr_info("mpst not enabled in command line\n"); > + return 0; > + } > + > + acpi_table_parse(ACPI_SIG_MPST, acpi_parse_mpst_table); > + mpst_set_min_max_pfn(); > + if (mpst_zone_cnt) { > + > + if (mpst_start_pfn < MINIMAL_NON_MPST_MEMORY_PFN) { > + pr_err("Not enough memory: Ignore MPST\n"); > + mpst_start_pfn = mpst_end_pfn = 0; > + return -EINVAL; > + } > + memblock_reserve(pfn_to_phys(mpst_start_pfn), > + pfn_to_phys(mpst_end_pfn) - > + pfn_to_phys(mpst_start_pfn)); > + pr_info("mpst_init memblock limit set to pfn %lu > 0x%#018lx\n", > + mpst_start_pfn, pfn_to_phys(mpst_start_pfn)); > + } > + > + return 0; > +} > > > > > > ///////////////////////////// -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>