From: Yasuaki Ishimatsu <isimatu.yasuaki@xxxxxxxxxxxxxx>
All pages of virtual mapping in removed memory cannot be freed,
since some pages
used as PGD/PUD includes not only removed memory but also other
memory. So the
patch checks whether page can be freed or not.
How to check whether page can be freed or not?
1. When removing memory, the page structs of the revmoved memory
are filled
with 0FD.
2. All page structs are filled with 0xFD on PT/PMD, PT/PMD can be
cleared.
In this case, the page used as PT/PMD can be freed.
Applying patch, __remove_section() of CONFIG_SPARSEMEM_VMEMMAP is
integrated
into one. So __remove_section() of CONFIG_SPARSEMEM_VMEMMAP is deleted.
Note: vmemmap_kfree() and vmemmap_free_bootmem() are not
implemented for ia64,
ppc, s390, and sparc.
CC: David Rientjes <rientjes@xxxxxxxxxx>
CC: Jiang Liu <liuj97@xxxxxxxxx>
CC: Len Brown <len.brown@xxxxxxxxx>
CC: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx>
CC: Paul Mackerras <paulus@xxxxxxxxx>
CC: Christoph Lameter <cl@xxxxxxxxx>
Cc: Minchan Kim <minchan.kim@xxxxxxxxx>
CC: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
CC: KOSAKI Motohiro <kosaki.motohiro@xxxxxxxxxxxxxx>
CC: Wen Congyang <wency@xxxxxxxxxxxxxx>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@xxxxxxxxxxxxxx>
---
arch/ia64/mm/discontig.c | 8 +++
arch/powerpc/mm/init_64.c | 8 +++
arch/s390/mm/vmem.c | 8 +++
arch/sparc/mm/init_64.c | 8 +++
arch/x86/mm/init_64.c | 119
+++++++++++++++++++++++++++++++++++++++++++++
include/linux/mm.h | 2 +
mm/memory_hotplug.c | 17 +------
mm/sparse.c | 5 +-
8 files changed, 158 insertions(+), 17 deletions(-)
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 33943db..0d23b69 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -823,6 +823,14 @@ int __meminit vmemmap_populate(struct page
*start_page,
return vmemmap_populate_basepages(start_page, size, node);
}
+void vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
+{
+}
+
+void vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages)
+{
+}
+
void register_page_bootmem_memmap(unsigned long section_nr,
struct page *start_page, unsigned long size)
{
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 3690c44..835a2b3 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -299,6 +299,14 @@ int __meminit vmemmap_populate(struct page
*start_page,
return 0;
}
+void vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
+{
+}
+
+void vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages)
+{
+}
+
void register_page_bootmem_memmap(unsigned long section_nr,
struct page *start_page, unsigned long size)
{
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index eda55cd..4b42b0b 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -227,6 +227,14 @@ out:
return ret;
}
+void vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
+{
+}
+
+void vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages)
+{
+}
+
void register_page_bootmem_memmap(unsigned long section_nr,
struct page *start_page, unsigned long size)
{
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index add1cc7..1384826 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2078,6 +2078,14 @@ void __meminit vmemmap_populate_print_last(void)
}
}
+void vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
+{
+}
+
+void vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages)
+{
+}
+
void register_page_bootmem_memmap(unsigned long section_nr,
struct page *start_page, unsigned long size)
{
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 0075592..4e8f8a4 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1138,6 +1138,125 @@ vmemmap_populate(struct page *start_page,
unsigned long size, int node)
return 0;
}
+#define PAGE_INUSE 0xFD
+
+unsigned long find_and_clear_pte_page(unsigned long addr, unsigned
long end,
+ struct page **pp, int *page_size)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ void *page_addr;
+ unsigned long next;
+
+ *pp = NULL;
+
+ pgd = pgd_offset_k(addr);
+ if (pgd_none(*pgd))
+ return pgd_addr_end(addr, end);
+
+ pud = pud_offset(pgd, addr);
+ if (pud_none(*pud))
+ return pud_addr_end(addr, end);
+
+ if (!cpu_has_pse) {
+ next = (addr + PAGE_SIZE) & PAGE_MASK;
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd))
+ return next;
+
+ pte = pte_offset_kernel(pmd, addr);
+ if (pte_none(*pte))
+ return next;
+
+ *page_size = PAGE_SIZE;
+ *pp = pte_page(*pte);
+ } else {
+ next = pmd_addr_end(addr, end);
+
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd))
+ return next;
+
+ *page_size = PMD_SIZE;
+ *pp = pmd_page(*pmd);
+ }
+
+ /*
+ * Removed page structs are filled with 0xFD.
+ */
+ memset((void *)addr, PAGE_INUSE, next - addr);
+
+ page_addr = page_address(*pp);
+
+ /*
+ * Check the page is filled with 0xFD or not.
+ * memchr_inv() returns the address. In this case, we cannot
+ * clear PTE/PUD entry, since the page is used by other.
+ * So we cannot also free the page.
+ *
+ * memchr_inv() returns NULL. In this case, we can clear
+ * PTE/PUD entry, since the page is not used by other.
+ * So we can also free the page.
+ */
+ if (memchr_inv(page_addr, PAGE_INUSE, *page_size)) {
+ *pp = NULL;
+ return next;
+ }
+