Restoring the memory address space begins with nuking the existing one of the current process, and then reading the VMA state and contents. Call do_mmap_pgoffset() for each VMA and then read in the data. Currently to restore private mapped memory we use the pathname saved to open a new file and pass it to do_mmap_pgoff(). Later we change that to reference a file object. Changelog[v14]: - Introduce per vma-type restore() function - Merge restart code into same file as checkpoint (memory.c) - Compare saved 'vdso' field of mm_context with current value - Check whether calls to ckpt_hbuf_get() fail - Discard field 'h->parent' - Revert change to pr_debug(), back to ckpt_debug() Changelog[v13]: - Avoid access to hh->vma_type after the header is freed - Test for no vma's in exit_mmap() before calling unmap_vma() (or it may crash if restart fails after having removed all vma's) Changelog[v12]: - Replace obsolete ckpt_debug() with pr_debug() Changelog[v9]: - Introduce ckpt_ctx_checkpoint() for checkpoint-specific ctx setup Changelog[v7]: - Fix argument given to kunmap_atomic() in memory dump/restore Changelog[v6]: - Balance all calls to ckpt_hbuf_get() with matching ckpt_hbuf_put() (even though it's not really needed) Changelog[v5]: - Improve memory restore code (following Dave Hansen's comments) - Change dump format (and code) to allow chunks of <vaddrs, pages> instead of one long list of each - Memory restore now maps user pages explicitly to copy data into them, instead of reading directly to user space; got rid of mprotect_fixup() Changelog[v4]: - Use standard list_... for ckpt_pgarr Signed-off-by: Oren Laadan <orenl@xxxxxxxxxxxxxxx> --- arch/x86/include/asm/checkpoint_hdr.h | 5 + arch/x86/mm/checkpoint.c | 59 +++++ checkpoint/checkpoint_arch.h | 1 + checkpoint/files.c | 33 +++ checkpoint/memory.c | 407 +++++++++++++++++++++++++++++++++ checkpoint/process.c | 4 + checkpoint/restart.c | 9 + include/linux/checkpoint.h | 5 + include/linux/checkpoint_hdr.h | 6 +- include/linux/mm.h | 9 + mm/filemap.c | 18 ++ mm/mmap.c | 30 ++- 12 files changed, 580 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/checkpoint_hdr.h b/arch/x86/include/asm/checkpoint_hdr.h index bad7b29..d61653c 100644 --- a/arch/x86/include/asm/checkpoint_hdr.h +++ b/arch/x86/include/asm/checkpoint_hdr.h @@ -104,4 +104,9 @@ struct ckpt_hdr_mm_context { __u32 nldt; } __attribute__((aligned(8))); +#ifdef __KERNEL__ +/* misc prototypes from kernel (not defined elsewhere) */ +asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount); +#endif + #endif /* __ASM_X86_CKPT_HDR__H */ diff --git a/arch/x86/mm/checkpoint.c b/arch/x86/mm/checkpoint.c index ede7045..a475a30 100644 --- a/arch/x86/mm/checkpoint.c +++ b/arch/x86/mm/checkpoint.c @@ -13,6 +13,7 @@ #include <asm/desc.h> #include <asm/i387.h> +#include <asm/elf.h> #include <linux/checkpoint_types.h> #include <asm/checkpoint_hdr.h> @@ -475,3 +476,61 @@ int restore_read_header_arch(struct ckpt_ctx *ctx) ckpt_hdr_put(ctx, h); return ret; } + +int restore_mm_context(struct ckpt_ctx *ctx, struct mm_struct *mm) +{ + struct ckpt_hdr_mm_context *h; + unsigned int n; + int ret; + + h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_MM_CONTEXT); + if (IS_ERR(h)) + return PTR_ERR(h); + + ckpt_debug("nldt %d vdso %#lx (%p)\n", + h->nldt, (unsigned long) h->vdso, mm->context.vdso); + + ret = -EINVAL; + if (h->vdso != (unsigned long) mm->context.vdso) + goto out; + if (h->ldt_entry_size != LDT_ENTRY_SIZE) + goto out; + + /* + * to utilize the syscall modify_ldt() we first convert the data + * in the checkpoint image from 'struct desc_struct' to 'struct + * user_desc' with reverse logic of include/asm/desc.h:fill_ldt() + */ + ret = 0; + for (n = 0; n < h->nldt; n++) { + struct user_desc info; + struct desc_struct desc; + mm_segment_t old_fs; + + ret = ckpt_kread(ctx, &desc, LDT_ENTRY_SIZE); + if (ret < 0) + break; + + info.entry_number = n; + info.base_addr = desc.base0 | (desc.base1 << 16); + info.limit = desc.limit0; + info.seg_32bit = desc.d; + info.contents = desc.type >> 2; + info.read_exec_only = (desc.type >> 1) ^ 1; + info.limit_in_pages = desc.g; + info.seg_not_present = desc.p ^ 1; + info.useable = desc.avl; + + old_fs = get_fs(); + set_fs(get_ds()); + ret = sys_modify_ldt(1, (struct user_desc __user *) &info, + sizeof(info)); + set_fs(old_fs); + + if (ret < 0) + break; + } + out: + ckpt_hdr_put(ctx, h); + return ret; +} diff --git a/checkpoint/checkpoint_arch.h b/checkpoint/checkpoint_arch.h index d168b9c..4b9b6bf 100644 --- a/checkpoint/checkpoint_arch.h +++ b/checkpoint/checkpoint_arch.h @@ -8,3 +8,4 @@ extern int checkpoint_mm_context(struct ckpt_ctx *ctx, struct mm_struct *mm); extern int restore_read_header_arch(struct ckpt_ctx *ctx); extern int restore_thread(struct ckpt_ctx *ctx); extern int restore_cpu(struct ckpt_ctx *ctx); +extern int restore_mm_context(struct ckpt_ctx *ctx, struct mm_struct *mm); diff --git a/checkpoint/files.c b/checkpoint/files.c index 1718526..a7cf6c3 100644 --- a/checkpoint/files.c +++ b/checkpoint/files.c @@ -86,3 +86,36 @@ int checkpoint_file(struct ckpt_ctx *ctx, struct file *file) { return dump_fname(ctx, &file->f_path, &ctx->fs_mnt); } + +/************************************************************************** + * Restart + */ + +/** + * read_open_fname - read a file name and open a file + * @ctx: checkpoint context + * @flags: file flags + * @mode: file mode + */ +static struct file *read_open_fname(struct ckpt_ctx *ctx, int flags, int mode) +{ + struct ckpt_hdr *h; + struct file *file; + char *fname; + + h = ckpt_read_buf_type(ctx, PATH_MAX, CKPT_HDR_FNAME); + if (IS_ERR(h)) + return (struct file *) h; + fname = (char *) (h + 1); + ckpt_debug("fname '%s' flags %#x mode %#x\n", fname, flags, mode); + + file = filp_open(fname, flags, mode); + ckpt_hdr_put(ctx, h); + return file; +} + +struct file *restore_file(struct ckpt_ctx *ctx) +{ + /* currently only called for mapped files; O_RDONLY works */ + return read_open_fname(ctx, O_RDONLY, 0); +} diff --git a/checkpoint/memory.c b/checkpoint/memory.c index 668d883..c725519 100644 --- a/checkpoint/memory.c +++ b/checkpoint/memory.c @@ -15,6 +15,9 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/file.h> +#include <linux/err.h> +#include <linux/mm.h> +#include <linux/mman.h> #include <linux/pagemap.h> #include <linux/mm_types.h> #include <linux/checkpoint.h> @@ -598,3 +601,407 @@ int checkpoint_mm(struct ckpt_ctx *ctx, struct task_struct *t) mmput(mm); return ret; } + +/* + * Restart + * + * Unlike checkpoint, restart is executed in the context of each restarting + * process: vma regions are restored via a call to mmap(), and the data is + * read into the address space of the current process. + */ + +/** + * read_pages_vaddrs - read addresses of pages to page-array chain + * @ctx - restart context + * @nr_pages - number of address to read + */ +static int read_pages_vaddrs(struct ckpt_ctx *ctx, unsigned long nr_pages) +{ + struct ckpt_pgarr *pgarr; + unsigned long *vaddrp; + int nr, ret; + + while (nr_pages) { + pgarr = pgarr_current(ctx); + if (!pgarr) + return -ENOMEM; + nr = pgarr_nr_free(pgarr); + if (nr > nr_pages) + nr = nr_pages; + vaddrp = &pgarr->vaddrs[pgarr->nr_used]; + ret = ckpt_kread(ctx, vaddrp, nr * sizeof(unsigned long)); + if (ret < 0) + return ret; + pgarr->nr_used += nr; + nr_pages -= nr; + } + return 0; +} + +static int restore_read_page(struct ckpt_ctx *ctx, struct page *page, void *p) +{ + void *ptr; + int ret; + + ret = ckpt_kread(ctx, p, PAGE_SIZE); + if (ret < 0) + return ret; + + ptr = kmap_atomic(page, KM_USER1); + memcpy(ptr, p, PAGE_SIZE); + kunmap_atomic(ptr, KM_USER1); + + return 0; +} + +/** + * read_pages_contents - read in data of pages in page-array chain + * @ctx - restart context + */ +static int read_pages_contents(struct ckpt_ctx *ctx) +{ + struct mm_struct *mm = current->mm; + struct ckpt_pgarr *pgarr; + unsigned long *vaddrs; + char *buf; + int i, ret = 0; + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + down_read(&mm->mmap_sem); + list_for_each_entry_reverse(pgarr, &ctx->pgarr_list, list) { + vaddrs = pgarr->vaddrs; + for (i = 0; i < pgarr->nr_used; i++) { + struct page *page; + + _ckpt_debug(CKPT_DPAGE, "got page %#lx\n", vaddrs[i]); + ret = get_user_pages(current, mm, vaddrs[i], + 1, 1, 1, &page, NULL); + if (ret < 0) + goto out; + + ret = restore_read_page(ctx, page, buf); + page_cache_release(page); + + if (ret < 0) + goto out; + } + } + + out: + up_read(&mm->mmap_sem); + kfree(buf); + return 0; +} + +/** + * restore_private_contents - restore contents of a VMA with private memory + * @ctx - restart context + * + * Reads a header that specifies how many pages will follow, then reads + * a list of virtual addresses into ctx->pgarr_list page-array chain, + * followed by the actual contents of the corresponding pages. Iterates + * these steps until reaching a header specifying "0" pages, which marks + * the end of the contents. + */ +static int restore_private_contents(struct ckpt_ctx *ctx) +{ + struct ckpt_hdr_pgarr *h; + unsigned long nr_pages; + int ret = 0; + + while (1) { + h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_PGARR); + if (IS_ERR(h)) + break; + + ckpt_debug("total pages %ld\n", (unsigned long) h->nr_pages); + + nr_pages = h->nr_pages; + ckpt_hdr_put(ctx, h); + + if (!nr_pages) + break; + + ret = read_pages_vaddrs(ctx, nr_pages); + if (ret < 0) + break; + ret = read_pages_contents(ctx); + if (ret < 0) + break; + pgarr_reset_all(ctx); + } + + return ret; +} + +/** + * calc_map_prot_bits - convert vm_flags to mmap protection + * orig_vm_flags: source vm_flags + */ +static unsigned long calc_map_prot_bits(unsigned long orig_vm_flags) +{ + unsigned long vm_prot = 0; + + if (orig_vm_flags & VM_READ) + vm_prot |= PROT_READ; + if (orig_vm_flags & VM_WRITE) + vm_prot |= PROT_WRITE; + if (orig_vm_flags & VM_EXEC) + vm_prot |= PROT_EXEC; + if (orig_vm_flags & PROT_SEM) /* only (?) with IPC-SHM */ + vm_prot |= PROT_SEM; + + return vm_prot; +} + +/** + * calc_map_flags_bits - convert vm_flags to mmap flags + * orig_vm_flags: source vm_flags + */ +static unsigned long calc_map_flags_bits(unsigned long orig_vm_flags) +{ + unsigned long vm_flags = 0; + + vm_flags = MAP_FIXED; + if (orig_vm_flags & VM_GROWSDOWN) + vm_flags |= MAP_GROWSDOWN; + if (orig_vm_flags & VM_DENYWRITE) + vm_flags |= MAP_DENYWRITE; + if (orig_vm_flags & VM_EXECUTABLE) + vm_flags |= MAP_EXECUTABLE; + if (orig_vm_flags & VM_MAYSHARE) + vm_flags |= MAP_SHARED; + else + vm_flags |= MAP_PRIVATE; + + return vm_flags; +} + +/** + * generic_vma_restore - restore a vma + * @mm - address space + * @file - file to map (NULL for anonymous) + * @h - vma header data + */ +static unsigned long generic_vma_restore(struct mm_struct *mm, + struct file *file, + struct ckpt_hdr_vma *h) +{ + unsigned long vm_size, vm_start, vm_flags, vm_prot, vm_pgoff; + unsigned long addr; + + if (h->vm_end < h->vm_start) + return -EINVAL; + if (h->vm_flags & CKPT_VMA_NOT_SUPPORTED) + return -ENOSYS; + + vm_start = h->vm_start; + vm_pgoff = h->vm_pgoff; + vm_size = h->vm_end - h->vm_start; + vm_prot = calc_map_prot_bits(h->vm_flags); + vm_flags = calc_map_flags_bits(h->vm_flags); + + down_write(&mm->mmap_sem); + addr = do_mmap_pgoff(file, vm_start, vm_size, + vm_prot, vm_flags, vm_pgoff); + up_write(&mm->mmap_sem); + ckpt_debug("size %#lx prot %#lx flag %#lx pgoff %#lx => %#lx\n", + vm_size, vm_prot, vm_flags, vm_pgoff, addr); + + return addr; +} + +/** + * private_vma_restore - read vma data, recreate it and read contents + * @ctx: checkpoint context + * @mm: memory address space + * @file: file to use for mapping + * @h - vma header data + */ +int private_vma_restore(struct ckpt_ctx *ctx, struct mm_struct *mm, + struct file *file, struct ckpt_hdr_vma *h) +{ + unsigned long addr; + + if (h->vm_flags & VM_SHARED) + return -EINVAL; + + addr = generic_vma_restore(mm, file, h); + if (IS_ERR((void *) addr)) + return PTR_ERR((void *) addr); + + return restore_private_contents(ctx); +} + +/** + * anon_private_restore - read vma data, recreate it and read contents + * @ctx: checkpoint context + * @mm: memory address space + * @h - vma header data + */ +static int anon_private_restore(struct ckpt_ctx *ctx, + struct mm_struct *mm, + struct ckpt_hdr_vma *h) +{ + /* + * vm_pgoff for anonymous mapping is the "global" page + * offset (namely from addr 0x0), so we force a zero + */ + h->vm_pgoff = 0; + + return private_vma_restore(ctx, mm, NULL, h); +} + +/* callbacks to restore vma per its type: */ +struct restore_vma_ops { + char *vma_name; + enum vma_type vma_type; + int (*restore) (struct ckpt_ctx *ctx, + struct mm_struct *mm, + struct ckpt_hdr_vma *ptr); +}; + +static struct restore_vma_ops restore_vma_ops[] = { + /* ignored vma */ + { + .vma_name = "IGNORE", + .vma_type = CKPT_VMA_IGNORE, + .restore = NULL, + }, + /* special mapping (vdso) */ + { + .vma_name = "VDSO", + .vma_type = CKPT_VMA_VDSO, + .restore = special_mapping_restore, + }, + /* anonymous private */ + { + .vma_name = "ANON PRIVATE", + .vma_type = CKPT_VMA_ANON, + .restore = anon_private_restore, + }, + /* file-mapped private */ + { + .vma_name = "FILE PRIVATE", + .vma_type = CKPT_VMA_FILE, + .restore = filemap_restore, + }, +}; + +/** + * restore_vma - read vma data, recreate it and read contents + * @ctx: checkpoint context + * @mm: memory address space + */ +static int restore_vma(struct ckpt_ctx *ctx, struct mm_struct *mm) +{ + struct ckpt_hdr_vma *h; + struct restore_vma_ops *ops; + int ret; + + h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_VMA); + if (IS_ERR(h)) + return PTR_ERR(h); + + ckpt_debug("vma %#lx-%#lx type %d\n", (unsigned long) h->vm_start, + (unsigned long) h->vm_end, (int) h->vma_type); + + ret = -EINVAL; + if (h->vm_end < h->vm_start) + goto out; + if (h->vma_type >= CKPT_VMA_MAX) + goto out; + + ops = &restore_vma_ops[h->vma_type]; + + /* make sure we don't change this accidentally */ + BUG_ON(ops->vma_type != h->vma_type); + + if (ops->restore) { + ckpt_debug("vma type %s\n", ops->vma_name); + ret = ops->restore(ctx, mm, h); + } else { + ckpt_debug("vma ignored\n"); + ret = 0; + } + out: + ckpt_hdr_put(ctx, h); + return ret; +} + +static int destroy_mm(struct mm_struct *mm) +{ + struct vm_area_struct *vmnext = mm->mmap; + struct vm_area_struct *vma; + int ret; + + while (vmnext) { + vma = vmnext; + vmnext = vmnext->vm_next; + ret = do_munmap(mm, vma->vm_start, vma->vm_end-vma->vm_start); + if (ret < 0) { + pr_warning("c/r: failed do_munmap (%d)\n", ret); + return ret; + } + } + return 0; +} + +int restore_mm(struct ckpt_ctx *ctx) +{ + struct ckpt_hdr_mm *h; + struct mm_struct *mm; + unsigned int nr; + int ret; + + h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_MM); + if (IS_ERR(h)) + return PTR_ERR(h); + + ckpt_debug("map_count %d\n", h->map_count); + + /* XXX need more sanity checks */ + + ret = -EINVAL; + if ((h->start_code > h->end_code) || + (h->start_data > h->end_data)) + goto out; + + mm = current->mm; + + /* point of no return -- destruct current mm */ + down_write(&mm->mmap_sem); + ret = destroy_mm(mm); + if (ret < 0) { + up_write(&mm->mmap_sem); + goto out; + } + mm->start_code = h->start_code; + mm->end_code = h->end_code; + mm->start_data = h->start_data; + mm->end_data = h->end_data; + mm->start_brk = h->start_brk; + mm->brk = h->brk; + mm->start_stack = h->start_stack; + mm->arg_start = h->arg_start; + mm->arg_end = h->arg_end; + mm->env_start = h->env_start; + mm->env_end = h->env_end; + up_write(&mm->mmap_sem); + + /* FIX: need also mm->flags */ + + for (nr = h->map_count; nr; nr--) { + ret = restore_vma(ctx, mm); + if (ret < 0) + goto out; + } + + ret = restore_mm_context(ctx, mm); + out: + ckpt_hdr_put(ctx, h); + return ret; +} diff --git a/checkpoint/process.c b/checkpoint/process.c index 64deb76..7adb842 100644 --- a/checkpoint/process.c +++ b/checkpoint/process.c @@ -108,6 +108,10 @@ int restore_task(struct ckpt_ctx *ctx) ckpt_debug("ret %d\n", ret); if (ret < 0) goto out; + ret = restore_mm(ctx); + ckpt_debug("memory: ret %d\n", ret); + if (ret < 0) + goto out; ret = restore_thread(ctx); ckpt_debug("thread: ret %d\n", ret); if (ret < 0) diff --git a/checkpoint/restart.c b/checkpoint/restart.c index 9adcc90..a1ab0a1 100644 --- a/checkpoint/restart.c +++ b/checkpoint/restart.c @@ -287,10 +287,19 @@ static int restore_read_tail(struct ckpt_ctx *ctx) return ret; } +/* setup restart-specific parts of ctx */ +static int ckpt_ctx_restart(struct ckpt_ctx *ctx) +{ + return 0; +} + int do_restart(struct ckpt_ctx *ctx, pid_t pid) { int ret; + ret = ckpt_ctx_restart(ctx); + if (ret < 0) + return ret; ret = restore_read_header(ctx); if (ret < 0) return ret; diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h index 108e6a1..73b34af 100644 --- a/include/linux/checkpoint.h +++ b/include/linux/checkpoint.h @@ -51,7 +51,11 @@ extern int private_vma_checkpoint(struct ckpt_ctx *ctx, struct vm_area_struct *vma, enum vma_type type); +extern int private_vma_restore(struct ckpt_ctx *ctx, struct mm_struct *mm, + struct file *file, struct ckpt_hdr_vma *h); + extern int checkpoint_mm(struct ckpt_ctx *ctx, struct task_struct *t); +extern int restore_mm(struct ckpt_ctx *ctx); #define CKPT_VMA_NOT_SUPPORTED \ (VM_SHARED | VM_MAYSHARE | VM_IO | VM_HUGETLB | \ @@ -61,6 +65,7 @@ extern int checkpoint_mm(struct ckpt_ctx *ctx, struct task_struct *t); /* files */ extern int checkpoint_file(struct ckpt_ctx *ctx, struct file *file); +extern struct file *restore_file(struct ckpt_ctx *ctx); /* debugging flags */ diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h index dab6b7f..5266e4b 100644 --- a/include/linux/checkpoint_hdr.h +++ b/include/linux/checkpoint_hdr.h @@ -114,11 +114,13 @@ struct ckpt_hdr_mm { __u64 arg_start, arg_end, env_start, env_end; } __attribute__((aligned(8))); -/* vma subtypes */ +/* vma subtypes - index into restore_vma_dispatch[] */ enum vma_type { - CKPT_VMA_VDSO = 1, /* special vdso vma */ + CKPT_VMA_IGNORE = 0, + CKPT_VMA_VDSO, /* special vdso vma */ CKPT_VMA_ANON, /* private anonymous */ CKPT_VMA_FILE, /* private mapped file */ + CKPT_VMA_MAX, }; /* vma decsriptor */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 05f0ed9..585d398 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1182,6 +1182,15 @@ extern int filemap_fault(struct vm_area_struct *, struct vm_fault *); int write_one_page(struct page *page, int wait); void task_dirty_inc(struct task_struct *tsk); + +/* checkpoint/restart */ +#ifdef CONFIG_CHECKPOINT +extern int filemap_restore(struct ckpt_ctx *ctx, struct mm_struct *mm, + struct ckpt_hdr_vma *hh); +extern int special_mapping_restore(struct ckpt_ctx *ctx, struct mm_struct *mm, + struct ckpt_hdr_vma *hh); +#endif + /* readahead.c */ #define VM_MAX_READAHEAD 128 /* kbytes */ #define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */ diff --git a/mm/filemap.c b/mm/filemap.c index 2b58027..ef5680b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1650,6 +1650,24 @@ static int filemap_checkpoint(struct ckpt_ctx *ctx, out: return ret; } + +int filemap_restore(struct ckpt_ctx *ctx, + struct mm_struct *mm, + struct ckpt_hdr_vma *h) +{ + struct file *file; + int ret; + + /* for private mapping using 'read-only' is sufficient */ + file = restore_file(ctx); + if (IS_ERR(file)) + return PTR_ERR(file); + + ret = private_vma_restore(ctx, mm, file, h); + + fput(file); + return ret; +} #else #define filemap_checkpoint NULL #endif /* CONFIG_CHECKPOINT */ diff --git a/mm/mmap.c b/mm/mmap.c index 6b75359..3b6356c 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2114,7 +2114,7 @@ void exit_mmap(struct mm_struct *mm) tlb = tlb_gather_mmu(mm, 1); /* update_hiwater_rss(mm) here? but nobody should be looking */ /* Use -1 here to ensure all VMAs in the mm are unmapped */ - end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); + end = vma ? unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL) : 0; vm_unacct_memory(nr_accounted); free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0); tlb_finish_mmu(tlb, 0, end); @@ -2272,13 +2272,22 @@ static void special_mapping_close(struct vm_area_struct *vma) { } -#if CONFIG_CHEKCPOINT +#ifdef CONFIG_CHECKPOINT +/* + * FIX: + * - checkpoint vdso pages (once per distinct vdso is enough) + * - check for compatilibility between saved and current vdso + * - accommodate for dynamic kernel data in vdso page + * + * Current, we require COMPAT_VDSO which somewhat mitigates the issue + */ static int special_mapping_checkpoint(struct ckpt_ctx *ctx, struct vm_area_struct *vma) { - char *name; + const char *name; /* + * FIX: * Currently, we only handle VDSO/vsyscall special handling. * Even that, is very basic - we just skip the contents and * hope for the best in terms of compatilibity upon restart. @@ -2288,11 +2297,24 @@ static int special_mapping_checkpoint(struct ckpt_ctx *ctx, return -ENOSYS; name = arch_vma_name(vma); - if (!name || strcmp(vma_name, "[vdso]")) + if (!name || strcmp(name, "[vdso]")) return -ENOSYS; return generic_vma_checkpoint(ctx, vma, CKPT_VMA_VDSO); } + +int special_mapping_restore(struct ckpt_ctx *ctx, + struct mm_struct *mm, + struct ckpt_hdr_vma *h) +{ + /* + * FIX: + * Currently, we only handle VDSO/vsyscall special handling. + * Even that, is very basic - call arch_setup_additional_pages + * requiring the same mapping (start address) as before. + */ + return arch_setup_additional_pages(NULL, h->vm_start, 0); +} #else #define special_mapping_checkpoint NULL #endif /* CONFIG_CHECKPOINT */ -- 1.5.4.3 _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/containers