[PATCH] Dirty page tracking for physical system migration

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



 

This patch implements a system to track re-dirtied pages and modified

PTEs.  It is used by Stratus Technologies for both our ftLinux product and

our new GPL Live Kernel Self Migration project (lksm.sourceforge.net).

In both cases, we bring a backup server online by copying the primary

server's state while it is running.  We start by copying all of memory

top to bottom.  We then go back and re-copy any pages that were changed

during the first copy pass.  After several such passes we momentarily

suspend processing so we can copy the last few pages over and bring up

the secondary system.  This patch keeps track of which pages need to be

copied during these passes.

 

 arch/x86/Kconfig                      |   11 +++++++++++

 arch/x86/include/asm/hugetlb.h        |    3 +++

 arch/x86/include/asm/pgtable-2level.h |    4 ++++

 arch/x86/include/asm/pgtable-3level.h |   11 +++++++++++

 arch/x86/include/asm/pgtable.h        |    4 ++--

 arch/x86/include/asm/pgtable_32.h     |    1 +

 arch/x86/include/asm/pgtable_64.h     |    7 +++++++

 arch/x86/include/asm/pgtable_types.h  |    5 ++++-

 arch/x86/mm/Makefile                  |    2 ++

 mm/huge_memory.c                      |    4 ++--

 11 files changed, 48 insertions(+), 6 deletions(-)

 

Signed-off-by: "James Paradis" <james.paradis@xxxxxxxxxxx>

 

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig

index cc6c53a..cc778a4 100644

--- a/arch/x86/Kconfig

+++ b/arch/x86/Kconfig

@@ -1146,6 +1146,17 @@ config DIRECT_GBPAGES

                  support it. This can improve the kernel's performance a tiny bit by

                  reducing TLB pressure. If in doubt, say "Y".

 

+config TRACK_DIRTY_PAGES

+              bool "Enable dirty page tracking"

+              default n

+              depends on !KMEMCHECK

+              ---help---

+                Turning this on enables tracking of re-dirtied and

+                changed pages.  This is needed by the Live Kernel

+                Self Migration project (lksm.sourceforge.net) to perform

+                live copying of memory and system state to another system.

+                Most users will say n here.

+

 # Common NUMA Features

 config NUMA

                bool "Numa Memory Allocation and Scheduler Support"

diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h

index 439a9ac..8266873 100644

--- a/arch/x86/include/asm/hugetlb.h

+++ b/arch/x86/include/asm/hugetlb.h

@@ -2,6 +2,7 @@

 #define _ASM_X86_HUGETLB_H

 

 #include <asm/page.h>

+#include <asm/mm_track.h>

 

 

 static inline int is_hugepage_only_range(struct mm_struct *mm,

@@ -39,12 +40,14 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,

 static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,

                                                                   pte_t *ptep, pte_t pte)

 {

+              mm_track_pmd((pmd_t *)ptep);

                set_pte_at(mm, addr, ptep, pte);

 }

 

 static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,

                                                                                    unsigned long addr, pte_t *ptep)

 {

+              mm_track_pmd((pmd_t *)ptep);

                return ptep_get_and_clear(mm, addr, ptep);

 }

 

diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h

index 98391db..a59deb5 100644

--- a/arch/x86/include/asm/pgtable-2level.h

+++ b/arch/x86/include/asm/pgtable-2level.h

@@ -13,11 +13,13 @@

  */

 static inline void native_set_pte(pte_t *ptep , pte_t pte)

 {

+              mm_track_pte(ptep);

                *ptep = pte;

 }

 

 static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)

 {

+              mm_track_pmd(pmdp);

                *pmdp = pmd;

 }

 

@@ -34,12 +36,14 @@ static inline void native_pmd_clear(pmd_t *pmdp)

 static inline void native_pte_clear(struct mm_struct *mm,

                                                                    unsigned long addr, pte_t *xp)

 {

+              mm_track_pte(xp);

                *xp = native_make_pte(0);

 }

 

 #ifdef CONFIG_SMP

 static inline pte_t native_ptep_get_and_clear(pte_t *xp)

 {

+              mm_track_pte(xp);

                return __pte(xchg(&xp->pte_low, 0));

 }

 #else

diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h

index effff47..b75d753 100644

--- a/arch/x86/include/asm/pgtable-3level.h

+++ b/arch/x86/include/asm/pgtable-3level.h

@@ -26,6 +26,7 @@

  */

 static inline void native_set_pte(pte_t *ptep, pte_t pte)

 {

+              mm_track_pte(ptep);

                ptep->pte_high = pte.pte_high;

                smp_wmb();

                ptep->pte_low = pte.pte_low;

@@ -33,16 +34,19 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)

 

 static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)

 {

+              mm_track_pte(ptep);

                set_64bit((unsigned long long *)(ptep), native_pte_val(pte));

 }

 

 static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)

 {

+              mm_track_pmd(pmdp);

                set_64bit((unsigned long long *)(pmdp), native_pmd_val(pmd));

 }

 

 static inline void native_set_pud(pud_t *pudp, pud_t pud)

 {

+              mm_track_pud(pudp);

                set_64bit((unsigned long long *)(pudp), native_pud_val(pud));

 }

 

@@ -54,6 +58,7 @@ static inline void native_set_pud(pud_t *pudp, pud_t pud)

 static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,

                                                                    pte_t *ptep)

 {

+              mm_track_pte(ptep);

                ptep->pte_low = 0;

                smp_wmb();

                ptep->pte_high = 0;

@@ -62,6 +67,9 @@ static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,

 static inline void native_pmd_clear(pmd_t *pmd)

 {

                u32 *tmp = (u32 *)pmd;

+

+              mm_track_pmd(pmd);

+

                *tmp = 0;

                smp_wmb();

                *(tmp + 1) = 0;

@@ -69,6 +77,7 @@ static inline void native_pmd_clear(pmd_t *pmd)

 

 static inline void pud_clear(pud_t *pudp)

 {

+              mm_track_pud(pudp);

                set_pud(pudp, __pud(0));

 

                /*

@@ -88,6 +97,8 @@ static inline pte_t native_ptep_get_and_clear(pte_t *ptep)

 {

                pte_t res;

 

+              mm_track_pte(ptep);

+

                /* xchg acts as a barrier before the setting of the high bits */

                res.pte_low = xchg(&ptep->pte_low, 0);

                res.pte_high = ptep->pte_high;

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h

index 18601c8..30bb916 100644

--- a/arch/x86/include/asm/pgtable.h

+++ b/arch/x86/include/asm/pgtable.h

@@ -89,7 +89,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);

  */

 static inline int pte_dirty(pte_t pte)

 {

-              return pte_flags(pte) & _PAGE_DIRTY;

+              return pte_flags(pte) & (_PAGE_DIRTY | _PAGE_SOFTDIRTY);

 }

 

 static inline int pte_young(pte_t pte)

@@ -183,7 +183,7 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)

 

 static inline pte_t pte_mkclean(pte_t pte)

 {

-              return pte_clear_flags(pte, _PAGE_DIRTY);

+              return pte_clear_flags(pte, (_PAGE_DIRTY | _PAGE_SOFTDIRTY));

 }

 

 static inline pte_t pte_mkold(pte_t pte)

diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h

index 0c92113..78415fb 100644

--- a/arch/x86/include/asm/pgtable_32.h

+++ b/arch/x86/include/asm/pgtable_32.h

@@ -21,6 +21,7 @@

 #include <linux/bitops.h>

 #include <linux/list.h>

 #include <linux/spinlock.h>

+#include <asm/mm_track.h>

 

 struct mm_struct;

 struct vm_area_struct;

diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h

index 975f709..0848e9e 100644

--- a/arch/x86/include/asm/pgtable_64.h

+++ b/arch/x86/include/asm/pgtable_64.h

@@ -13,6 +13,7 @@

 #include <asm/processor.h>

 #include <linux/bitops.h>

 #include <linux/threads.h>

+#include <asm/mm_track.h>

 

 extern pud_t level3_kernel_pgt[512];

 extern pud_t level3_ident_pgt[512];

@@ -46,11 +47,13 @@ void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);

 static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,

                                                                    pte_t *ptep)

 {

+              mm_track_pte(ptep);

                *ptep = native_make_pte(0);

 }

 

 static inline void native_set_pte(pte_t *ptep, pte_t pte)

 {

+              mm_track_pte(ptep);

                *ptep = pte;

 }

 

@@ -61,6 +64,7 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)

 

 static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)

 {

+              mm_track_pmd(pmdp);

                *pmdp = pmd;

 }

 

@@ -71,6 +75,7 @@ static inline void native_pmd_clear(pmd_t *pmd)

 

 static inline pte_t native_ptep_get_and_clear(pte_t *xp)

 {

+              mm_track_pte(xp);

 #ifdef CONFIG_SMP

                return native_make_pte(xchg(&xp->pte, 0));

 #else

@@ -97,6 +102,7 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)

 

 static inline void native_set_pud(pud_t *pudp, pud_t pud)

 {

+              mm_track_pud(pudp);

                *pudp = pud;

 }

 

@@ -107,6 +113,7 @@ static inline void native_pud_clear(pud_t *pud)

 

 static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)

 {

+              mm_track_pgd(pgdp);

                *pgdp = pgd;

 }

 

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h

index d56187c..7f366d0 100644

--- a/arch/x86/include/asm/pgtable_types.h

+++ b/arch/x86/include/asm/pgtable_types.h

@@ -23,6 +23,7 @@

 #define _PAGE_BIT_SPECIAL     _PAGE_BIT_UNUSED1

 #define _PAGE_BIT_CPA_TEST  _PAGE_BIT_UNUSED1

 #define _PAGE_BIT_SPLITTING _PAGE_BIT_UNUSED1 /* only valid on a PSE pmd */

+#define _PAGE_BIT_SOFTDIRTY              _PAGE_BIT_HIDDEN

 #define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */

 

 /* If _PAGE_BIT_PRESENT is clear, we use these: */

@@ -47,6 +48,7 @@

 #define _PAGE_SPECIAL              (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)

 #define _PAGE_CPA_TEST          (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)

 #define _PAGE_SPLITTING          (_AT(pteval_t, 1) << _PAGE_BIT_SPLITTING)

+#define _PAGE_SOFTDIRTY       (_AT(pteval_t, 1) << _PAGE_BIT_SOFTDIRTY)

 #define __HAVE_ARCH_PTE_SPECIAL

 

 #ifdef CONFIG_KMEMCHECK

@@ -71,7 +73,8 @@

 

 /* Set of bits not changed in pte_modify */

 #define _PAGE_CHG_MASK       (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |                 \

-                                              _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY)

+                                              _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY |   \

+                                              _PAGE_SOFTDIRTY)

 #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)

 

 #define _PAGE_CACHE_MASK  (_PAGE_PCD | _PAGE_PWT)

diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile

index 3e608ed..a416317 100644

--- a/arch/x86/mm/Makefile

+++ b/arch/x86/mm/Makefile

@@ -30,3 +30,5 @@ obj-$(CONFIG_NUMA_EMU)                             += numa_emulation.o

 obj-$(CONFIG_HAVE_MEMBLOCK)                         += memblock.o

 

 obj-$(CONFIG_MEMTEST)                           += memtest.o

+

+obj-$(CONFIG_TRACK_DIRTY_PAGES)  += track.o

diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index 83326ad..b94aad6 100644

--- a/mm/huge_memory.c

+++ b/mm/huge_memory.c

@@ -795,7 +795,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,

                                                                                unsigned long haddr)

 {

                pgtable_t pgtable;

-              pmd_t _pmd;

+              pmd_t _pmd = {0};

                int ret = 0, i;

                struct page **pages;

 

@@ -1265,7 +1265,7 @@ static int __split_huge_page_map(struct page *page,

                                                                 unsigned long address)

 {

                struct mm_struct *mm = vma->vm_mm;

-              pmd_t *pmd, _pmd;

+              pmd_t *pmd, _pmd = {0};

                int ret = 0, i;

                pgtable_t pgtable;

                unsigned long haddr;


[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]