On Wed, Aug 20, 2008 at 11:05:39PM -0400, Oren Laadan wrote: > > Restoring the memory address space begins with nuking the existing one > of the current process, and then reading the VMA state and contents. > Call do_mmap_pgoffset() for each VMA and then read in the data. [...] > diff --git a/checkpoint/rstr_mem.c b/checkpoint/rstr_mem.c > new file mode 100644 > index 0000000..df602a9 > --- /dev/null > +++ b/checkpoint/rstr_mem.c [...] > +static int cr_read_vma(struct cr_ctx *ctx, struct mm_struct *mm) > +{ > + struct cr_hdr_vma *hh = cr_hbuf_get(ctx, sizeof(*hh)); > + unsigned long vm_size, vm_flags, vm_prot, vm_pgoff; > + unsigned long addr; > + unsigned long flags; > + struct file *file = NULL; > + char *fname = NULL; > + int ret; > + > + ret = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_VMA); > + if (ret < 0) > + return ret; > + else if (ret != 0) > + return -EINVAL; > + > + cr_debug("vma %#lx-%#lx npages %d\n", (unsigned long) hh->vm_start, > + (unsigned long) hh->vm_end, (int) hh->npages); > + > + if (hh->vm_end < hh->vm_start || hh->npages < 0) > + return -EINVAL; > + > + vm_size = hh->vm_end - hh->vm_start; > + vm_prot = cr_calc_map_prot_bits(hh->vm_flags); > + vm_flags = cr_calc_map_flags_bits(hh->vm_flags); > + vm_pgoff = hh->vm_pgoff; > + > + if (hh->fname) { > + fname = ctx->tbuf; > + ret = cr_read_str(ctx, fname, PAGE_SIZE); > + if (ret < 0) > + return ret; > + } > + > + cr_debug("vma fname '%s' how %d\n", fname, hh->how); > + > + switch (hh->how) { > + > + case CR_VMA_ANON: /* anonymous private mapping */ > + if (hh->fname) > + return -EINVAL; > + /* vm_pgoff for anonymous mapping is the "global" page > + offset (namely from addr 0x0), so we force a zero */ > + vm_pgoff = 0; > + break; > + > + case CR_VMA_FILE: /* private mapping from a file */ > + if (!hh->fname) > + return -EINVAL; > + /* O_RDWR only needed if both (VM_WRITE|VM_SHARED) are set */ > + flags = hh->vm_flags & (VM_WRITE | VM_SHARED); > + flags = (flags == (VM_WRITE | VM_SHARED) ? O_RDWR : O_RDONLY); > + file = filp_open(fname, flags, 0); > + if (IS_ERR(file)) > + return PTR_ERR(file); > + break; > + > + default: > + return -EINVAL; > + > + } > + > + addr = do_mmap_pgoff(file, (unsigned long) hh->vm_start, > + vm_size, vm_prot, vm_flags, vm_pgoff); > + cr_debug("size %#lx prot %#lx flag %#lx pgoff %#lx => %#lx\n", > + vm_size, vm_prot, vm_flags, vm_pgoff, addr); > + > + /* the file (if opened) is now referenced by the vma */ > + if (file) > + filp_close(file, NULL); > + > + if (IS_ERR((void*) addr)) > + return (PTR_ERR((void *) addr)); > + > + /* > + * CR_VMA_ANON: read in memory as is > + * CR_VMA_FILE: read in memory as is > + * (more to follow ...) > + */ > + > + switch (hh->how) { > + case CR_VMA_ANON: > + case CR_VMA_FILE: > + /* standard case: read the data into the memory */ > + ret = cr_vma_read_pages(ctx, hh); > + break; > + } > + > + if (ret < 0) > + return ret; > + > + if (vm_prot & PROT_EXEC) > + flush_icache_range(hh->vm_start, hh->vm_end); > + > + cr_hbuf_put(ctx, sizeof(*hh)); > + cr_debug("vma retval %d\n", ret); > + return 0; > +} > + > +static int cr_destroy_mm(struct mm_struct *mm) > +{ > + struct vm_area_struct *vmnext = mm->mmap; > + struct vm_area_struct *vma; > + int ret; > + > + while (vmnext) { > + vma = vmnext; > + vmnext = vmnext->vm_next; > + ret = do_munmap(mm, vma->vm_start, vma->vm_end-vma->vm_start); > + if (ret < 0) > + return ret; > + } > + return 0; > +} > + > +int cr_read_mm(struct cr_ctx *ctx) > +{ > + struct cr_hdr_mm *hh = cr_hbuf_get(ctx, sizeof(*hh)); > + struct mm_struct *mm; > + int nr, ret; > + > + ret = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_MM); > + if (ret < 0) > + return ret; > +#if 0 /* activate when containers are used */ > + if (ret != task_pid_vnr(current)) > + return -EINVAL; > +#endif > + cr_debug("map_count %d\n", hh->map_count); > + > + /* XXX need more sanity checks */ > + if (hh->start_code > hh->end_code || > + hh->start_data > hh->end_data || hh->map_count < 0) > + return -EINVAL; > + > + mm = current->mm; > + > + /* point of no return -- destruct current mm */ > + down_write(&mm->mmap_sem); > + ret = cr_destroy_mm(mm); > + up_write(&mm->mmap_sem); > + > + if (ret < 0) > + return ret; > + Should down_write(&mm->mmap_sem) again here, and hold it until all vmas are restored. This means removing down_write() from cr_vma_writable(). Or perhaps make it finer grain: release it before looping on the vmas and make cr_read_vma() take it again before calling do_mmap_pgoff(). > + mm->start_code = hh->start_code; > + mm->end_code = hh->end_code; > + mm->start_data = hh->start_data; > + mm->end_data = hh->end_data; > + mm->start_brk = hh->start_brk; > + mm->brk = hh->brk; > + mm->start_stack = hh->start_stack; > + mm->arg_start = hh->arg_start; > + mm->arg_end = hh->arg_end; > + mm->env_start = hh->env_start; > + mm->env_end = hh->env_end; > + > + /* FIX: need also mm->flags */ > + > + for (nr = hh->map_count; nr; nr--) { > + ret = cr_read_vma(ctx, mm); > + if (ret < 0) > + return ret; > + } > + > + ret = cr_read_mm_context(ctx, mm, hh->tag); > + > + cr_hbuf_put(ctx, sizeof(*hh)); > + return ret; > +} Thanks, Louis -- Dr Louis Rilling Kerlabs Skype: louis.rilling Batiment Germanium Phone: (+33|0) 6 80 89 08 23 80 avenue des Buttes de Coesmes http://www.kerlabs.com/ 35700 Rennes
Attachment:
signature.asc
Description: Digital signature
_______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/containers