On Mon, May 06, 2019 at 04:40:14PM -0700, Dan Williams wrote: > > +void subsection_mask_set(unsigned long *map, unsigned long pfn, > + unsigned long nr_pages) > +{ > + int idx = subsection_map_index(pfn); > + int end = subsection_map_index(pfn + nr_pages - 1); > + > + bitmap_set(map, idx, end - idx + 1); > +} > + > void subsection_map_init(unsigned long pfn, unsigned long nr_pages) > { > int end_sec = pfn_to_section_nr(pfn + nr_pages - 1); > @@ -219,20 +235,17 @@ void subsection_map_init(unsigned long pfn, unsigned long nr_pages) > return; > > for (i = start_sec; i <= end_sec; i++) { > - int idx, end; > - unsigned long pfns; > struct mem_section *ms; > + unsigned long pfns; > > - idx = subsection_map_index(pfn); > pfns = min(nr_pages, PAGES_PER_SECTION > - (pfn & ~PAGE_SECTION_MASK)); > - end = subsection_map_index(pfn + pfns - 1); > - > ms = __nr_to_section(i); > - bitmap_set(ms->usage->subsection_map, idx, end - idx + 1); > + subsection_mask_set(ms->usage->subsection_map, pfn, pfns); > > pr_debug("%s: sec: %d pfns: %ld set(%d, %d)\n", __func__, i, > - pfns, idx, end - idx + 1); > + pfns, subsection_map_index(pfn), > + subsection_map_index(pfn + pfns - 1)); I would definetely add subsection_mask_set() and above change to Patch#3. I think it suits there better than here. > > pfn += pfns; > nr_pages -= pfns; > @@ -319,6 +332,15 @@ static void __meminit sparse_init_one_section(struct mem_section *ms, > unsigned long pnum, struct page *mem_map, > struct mem_section_usage *usage) > { > + /* > + * Given that SPARSEMEM_VMEMMAP=y supports sub-section hotplug, > + * ->section_mem_map can not be guaranteed to point to a full > + * section's worth of memory. The field is only valid / used > + * in the SPARSEMEM_VMEMMAP=n case. > + */ > + if (IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) > + mem_map = NULL; > + > ms->section_mem_map &= ~SECTION_MAP_MASK; > ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) | > SECTION_HAS_MEM_MAP; > @@ -724,10 +746,142 @@ static void free_map_bootmem(struct page *memmap) > #endif /* CONFIG_MEMORY_HOTREMOVE */ > #endif /* CONFIG_SPARSEMEM_VMEMMAP */ > > +#ifndef CONFIG_MEMORY_HOTREMOVE > +static void free_map_bootmem(struct page *memmap) > +{ > +} > +#endif > + > +static bool is_early_section(struct mem_section *ms) > +{ > + struct page *usage_page; > + > + usage_page = virt_to_page(ms->usage); > + if (PageSlab(usage_page) || PageCompound(usage_page)) > + return false; > + else > + return true; > +} > + > +static void section_deactivate(unsigned long pfn, unsigned long nr_pages, > + int nid, struct vmem_altmap *altmap) > +{ > + DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 }; > + DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 }; > + struct mem_section *ms = __pfn_to_section(pfn); > + bool early_section = is_early_section(ms); > + struct page *memmap = NULL; > + unsigned long *subsection_map = ms->usage > + ? &ms->usage->subsection_map[0] : NULL; > + > + subsection_mask_set(map, pfn, nr_pages); > + if (subsection_map) > + bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION); > + > + if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION), > + "section already deactivated (%#lx + %ld)\n", > + pfn, nr_pages)) > + return; > + > + if (WARN(!IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP) > + && nr_pages < PAGES_PER_SECTION, > + "partial memory section removal not supported\n")) > + return; > + > + /* > + * There are 3 cases to handle across two configurations > + * (SPARSEMEM_VMEMMAP={y,n}): > + * > + * 1/ deactivation of a partial hot-added section (only possible > + * in the SPARSEMEM_VMEMMAP=y case). > + * a/ section was present at memory init > + * b/ section was hot-added post memory init > + * 2/ deactivation of a complete hot-added section > + * 3/ deactivation of a complete section from memory init > + * > + * For 1/, when subsection_map does not empty we will not be > + * freeing the usage map, but still need to free the vmemmap > + * range. > + * > + * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified > + */ > + bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION); > + if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION)) { > + unsigned long section_nr = pfn_to_section_nr(pfn); > + > + if (!early_section) { > + kfree(ms->usage); > + ms->usage = NULL; > + } > + memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); > + ms->section_mem_map = sparse_encode_mem_map(NULL, section_nr); > + } > + > + if (early_section && memmap) > + free_map_bootmem(memmap); > + else > + depopulate_section_memmap(pfn, nr_pages, altmap); > +} > + > +static struct page * __meminit section_activate(int nid, unsigned long pfn, > + unsigned long nr_pages, struct vmem_altmap *altmap) > +{ > + DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 }; > + struct mem_section *ms = __pfn_to_section(pfn); > + struct mem_section_usage *usage = NULL; > + unsigned long *subsection_map; > + struct page *memmap; > + int rc = 0; > + > + subsection_mask_set(map, pfn, nr_pages); > + > + if (!ms->usage) { > + usage = kzalloc(mem_section_usage_size(), GFP_KERNEL); > + if (!usage) > + return ERR_PTR(-ENOMEM); > + ms->usage = usage; > + } > + subsection_map = &ms->usage->subsection_map[0]; > + > + if (bitmap_empty(map, SUBSECTIONS_PER_SECTION)) > + rc = -EINVAL; > + else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION)) > + rc = -EEXIST; > + else > + bitmap_or(subsection_map, map, subsection_map, > + SUBSECTIONS_PER_SECTION); > + > + if (rc) { > + if (usage) > + ms->usage = NULL; > + kfree(usage); > + return ERR_PTR(rc); > + } > + > + /* > + * The early init code does not consider partially populated > + * initial sections, it simply assumes that memory will never be > + * referenced. If we hot-add memory into such a section then we > + * do not need to populate the memmap and can simply reuse what > + * is already there. > + */ > + if (nr_pages < PAGES_PER_SECTION && is_early_section(ms)) > + return pfn_to_page(pfn); > + > + memmap = populate_section_memmap(pfn, nr_pages, nid, altmap); > + if (!memmap) { > + section_deactivate(pfn, nr_pages, nid, altmap); > + return ERR_PTR(-ENOMEM); > + } > + > + return memmap; > +} I do not really like this. Sub-section scheme is only available on CONFIG_SPARSE_VMEMMAP, so I would rather have two internal __section_{activate,deactivate} functions for sparse-vmemmap and sparse-non-vmemmap. That way, we can hide all detail implementation and sub-section dance behind the __section_{activate,deactivate} functions. > + > @@ -741,49 +895,31 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn, > unsigned long nr_pages, struct vmem_altmap *altmap) > { > unsigned long section_nr = pfn_to_section_nr(start_pfn); > - struct mem_section_usage *usage; > struct mem_section *ms; > struct page *memmap; > int ret; > > - /* > - * no locking for this, because it does its own > - * plus, it does a kmalloc > - */ > ret = sparse_index_init(section_nr, nid); > if (ret < 0 && ret != -EEXIST) > return ret; > - ret = 0; > - memmap = populate_section_memmap(start_pfn, PAGES_PER_SECTION, nid, > - altmap); > - if (!memmap) > - return -ENOMEM; > - usage = kzalloc(mem_section_usage_size(), GFP_KERNEL); > - if (!usage) { > - depopulate_section_memmap(start_pfn, PAGES_PER_SECTION, altmap); > - return -ENOMEM; > - } > > - ms = __pfn_to_section(start_pfn); > - if (ms->section_mem_map & SECTION_MARKED_PRESENT) { > - ret = -EEXIST; > - goto out; > - } > + memmap = section_activate(nid, start_pfn, nr_pages, altmap); > + if (IS_ERR(memmap)) > + return PTR_ERR(memmap); > + ret = 0; > > /* > * Poison uninitialized struct pages in order to catch invalid flags > * combinations. > */ > - page_init_poison(memmap, sizeof(struct page) * PAGES_PER_SECTION); > + page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages); > > + ms = __pfn_to_section(start_pfn); > section_mark_present(ms); > - sparse_init_one_section(ms, section_nr, memmap, usage); > + sparse_init_one_section(ms, section_nr, memmap, ms->usage); > > -out: > - if (ret < 0) { > - kfree(usage); > - depopulate_section_memmap(start_pfn, PAGES_PER_SECTION, altmap); > - } > + if (ret < 0) > + section_deactivate(start_pfn, nr_pages, nid, altmap); > return ret; > } diff --git a/mm/sparse.c b/mm/sparse.c index 34f322d14e62..daeb2d7d8dd0 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -900,13 +900,12 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn, int ret; ret = sparse_index_init(section_nr, nid); - if (ret < 0 && ret != -EEXIST) + if (ret < 0) return ret; memmap = section_activate(nid, start_pfn, nr_pages, altmap); if (IS_ERR(memmap)) return PTR_ERR(memmap); - ret = 0; /* * Poison uninitialized struct pages in order to catch invalid flags @@ -918,8 +917,6 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn, section_mark_present(ms); sparse_init_one_section(ms, section_nr, memmap, ms->usage); - if (ret < 0) - section_deactivate(start_pfn, nr_pages, nid, altmap); return ret; } -- Oscar Salvador SUSE L3