On Fri, Jul 15, 2011 at 05:49:08PM +0400, Pavel Emelyanov wrote: > Additionally the binfmt_img.h from kernel is required for cr-restore. > #include <stdio.h> > #include <unistd.h> > #include <signal.h> > #include <dirent.h> > #include <string.h> > #include <fcntl.h> > #include <sys/stat.h> > #include <errno.h> > #include <linux/kdev_t.h> > #include <stdlib.h> > #include <sys/mman.h> > #include <sys/vfs.h> > > #include <linux/types.h> > #include "img_structs.h" > > static int fdinfo_img; > static int pages_img; > static int core_img; > static int shmem_img; > static int pipes_img; > > #define PIPEFS_MAGIC 0x50495045 Shouldn't there be only one MAGIC number for checkpoint contents? You can always add an additional "type" number following the magic number. Or make the type a string with the name of the /proc file it's from... etc. > > static int prep_img_files(int pid) > { > __u32 type; > char name[64]; > > sprintf(name, "fdinfo-%d.img", pid); > fdinfo_img = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600); > if (fdinfo_img < 0) { > perror("Can't open fdinfo"); > return 1; > } > > type = FDINFO_MAGIC; > write(fdinfo_img, &type, 4); > > sprintf(name, "pages-%d.img", pid); > pages_img = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600); > if (pages_img < 0) { > perror("Can't open shmem"); > return 1; > } > > type = PAGES_MAGIC; > write(pages_img, &type, 4); > > sprintf(name, "core-%d.img", pid); > core_img = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600); > if (core_img < 0) { > perror("Can't open core"); > return 1; > } > > sprintf(name, "shmem-%d.img", pid); > shmem_img = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600); > if (shmem_img < 0) { > perror("Can't open shmem"); > return 1; > } > > type = SHMEM_MAGIC; > write(shmem_img, &type, 4); > > sprintf(name, "pipes-%d.img", pid); > pipes_img = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600); > if (pipes_img < 0) { > perror("Can't open pipes"); > return 1; > } > > type = PIPES_MAGIC; > write(pipes_img, &type, 4); > > return 0; > } > > static void kill_imgfiles(int pid) > { > /* FIXME */ > } > > static int stop_task(int pid) > { > return kill(pid, SIGSTOP); > } > > static void continue_task(int pid) > { > if (kill(pid, SIGCONT)) > perror("Can't cont task"); > } Eventually, I think you should use the cgroup freezer here rather than signals. Shells and debuggers use these signals so a checkpoint could easily and quietly be corrupted. Even if you use the freezer, there needs to be a mechanism to assure that the frozen cgroup is not thawed before a consistent checkpoint is complete. Otherwise corruption is always a possibility. > > static char big_tmp_str[PATH_MAX]; > > static int read_fd_params(int pid, char *fd, unsigned long *pos, unsigned int *flags) > { > char fd_str[128]; > int ifd; > > sprintf(fd_str, "/proc/%d/fdinfo/%s", pid, fd); > > printf("\tGetting fdinfo for fd %s\n", fd); > ifd = open(fd_str, O_RDONLY); > if (ifd < 0) { > perror("Can't open fdinfo"); > return 1; > } > > read(ifd, big_tmp_str, sizeof(big_tmp_str)); > close(ifd); > > sscanf(big_tmp_str, "pos:\t%lli\nflags:\t%o\n", pos, flags); > return 0; > } > > static int dump_one_reg_file(int type, unsigned long fd_name, int lfd, > int lclose, unsigned long pos, unsigned int flags) > { > char fd_str[128]; > int len; > struct fdinfo_entry e; > > sprintf(fd_str, "/proc/self/fd/%d", lfd); > len = readlink(fd_str, big_tmp_str, sizeof(big_tmp_str) - 1); > if (len < 0) { > perror("Can't readlink fd"); > return 1; > } > > big_tmp_str[len] = '\0'; > printf("\tDumping path for %x fd via self %d [%s]\n", fd_name, lfd, big_tmp_str); > > if (lclose) > close(lfd); > > e.type = type; > e.addr = fd_name; > e.len = len; > e.pos = pos; > e.flags = flags; > > write(fdinfo_img, &e, sizeof(e)); > write(fdinfo_img, big_tmp_str, len); > > return 0; > } > > #define MAX_PIPE_BUF_SIZE 1024 /* FIXME - this is not so */ > #define SPLICE_F_NONBLOCK 0x2 > > static int dump_pipe_and_data(int lfd, struct pipes_entry *e) > { > int steal_pipe[2]; > int ret; > > printf("\tDumping data from pipe %x\n", e->pipeid); > if (pipe(steal_pipe) < 0) { > perror("Can't create pipe for stealing data"); > return 1; > } > > ret = tee(lfd, steal_pipe[1], MAX_PIPE_BUF_SIZE, SPLICE_F_NONBLOCK); Neat application of tee(). > if (ret < 0) { > if (errno != EAGAIN) { > perror("Can't pick pipe data"); > return 1; > } > > ret = 0; > } > > e->bytes = ret; > write(pipes_img, e, sizeof(*e)); > > if (ret) { > ret = splice(steal_pipe[0], NULL, pipes_img, NULL, ret, 0); > if (ret < 0) { > perror("Can't push pipe data"); > return 1; > } > } > > close(steal_pipe[0]); > close(steal_pipe[1]); > return 0; > } > > static int dump_one_pipe(int fd, int lfd, unsigned int id, unsigned int flags) > { > struct pipes_entry e; > > printf("\tDumping pipe %d/%x flags %x\n", fd, id, flags); > > e.fd = fd; > e.pipeid = id; > e.flags = flags; > > if (flags & O_WRONLY) { > e.bytes = 0; > write(pipes_img, &e, sizeof(e)); > return 0; > } > > return dump_pipe_and_data(lfd, &e); > } > > static int dump_one_fd(int dir, char *fd_name, unsigned long pos, unsigned int flags) > { > int fd; > struct stat st_buf; > struct statfs stfs_buf; > > printf("\tDumping fd %s\n", fd_name); > fd = openat(dir, fd_name, O_RDONLY); > if (fd == -1) { > printf("Tried to openat %d/%d %s\n", getpid(), dir, fd_name); > perror("Can't open fd"); > return 1; > } > > if (fstat(fd, &st_buf) < 0) { > perror("Can't stat one"); > return 1; > } > > if (S_ISREG(st_buf.st_mode)) > return dump_one_reg_file(FDINFO_FD, atoi(fd_name), fd, 1, pos, flags); > > if (S_ISFIFO(st_buf.st_mode)) { > if (fstatfs(fd, &stfs_buf) < 0) { > perror("Can't statfs one"); > return 1; > } > > if (stfs_buf.f_type == PIPEFS_MAGIC) > return dump_one_pipe(atoi(fd_name), fd, st_buf.st_ino, flags); > } This is starting to look like a linear search over the set of all possible types of things file descriptors can refer to. A kernel implementation doesn't have to do this. Furthermore, if lots of file descriptors are open this could be alot of fstat() and fstatfs() calls -- will making so many syscalls force us to an completely in-kernel implementation, like the set already proposed, just to get usable performance? > > if (!strcmp(fd_name, "0")) { > printf("\tSkipping stdin\n"); > return 0; > } Assuming that fd 0 is "stdin" is very very gross. Yes, it's almost always true. But that does *not* mean that it's a pty. stdin could be a pipe we need to checkpoint. Really, this is also about the "type" of thing the fd is referring to -- not about which fd nr it is. What are your plans for removing this? > > if (!strcmp(fd_name, "1")) { > printf("\tSkipping stdout\n"); > return 0; > } Gross again, for the same reasons. > > if (!strcmp(fd_name, "2")) { > printf("\tSkipping stderr\n"); > return 0; > } Gross again, for the same reasons. > > fprintf(stderr, "Can't dump file %s of that type [%x]\n", fd_name, st_buf.st_mode); > return 1; > > } > > static int dump_task_files(int pid) > { > char pid_fd_dir[64]; > DIR *fd_dir; > struct dirent *de; > unsigned long pos; > unsigned int flags; > > printf("Dumping open files for %d\n", pid); > > sprintf(pid_fd_dir, "/proc/%d/fd", pid); > fd_dir = opendir(pid_fd_dir); > if (fd_dir == NULL) { > perror("Can't open fd dir"); > return -1; > } > > while ((de = readdir(fd_dir)) != NULL) { > if (de->d_name[0] == '.') > continue; > > if (read_fd_params(pid, de->d_name, &pos, &flags)) > return 1; > > if (dump_one_fd(dirfd(fd_dir), de->d_name, pos, flags)) > return 1; > } > > closedir(fd_dir); > return 0; > } > > #define PAGE_SIZE 4096 > #define PAGE_RSS 0x1 > > static unsigned long rawhex(char *str, char **end) > { > unsigned long ret = 0; > > while (1) { > if (str[0] >= '0' && str[0] <= '9') { > ret <<= 4; > ret += str[0] - '0'; > } else if (str[0] >= 'a' && str[0] <= 'f') { > ret <<= 4; > ret += str[0] - 'a' + 0xA; > } else if (str[0] >= 'A' && str[0] <= 'F') { > ret <<= 4; > ret += str[0] - 'A' + 0xA; > } else { > if (end) > *end = str; > return ret; > } > > str++; > } > } nit: I haven't looked closely enough to see where rawhex is being used, but is there's no suitable library function for this? > > static void map_desc_parm(char *desc, unsigned long *pgoff, unsigned long *len) > { > char *s; > unsigned long start, end; > > start = rawhex(desc, &s); > if (*s != '-') { > goto bug; > } > > end = rawhex(s + 1, &s); > if (*s != ' ') { > goto bug; > } > > s = strchr(s + 1, ' '); > *pgoff = rawhex(s + 1, &s); > if (*s != ' ') { > goto bug; > } > > if (start > end) > goto bug; > > *len = end - start; > > if (*len % PAGE_SIZE) { > goto bug; > } > if (*pgoff % PAGE_SIZE) { > goto bug; > } > > return; > bug: > fprintf(stderr, "BUG\n"); > exit(1); > } > > static int dump_map_pages(int lfd, unsigned long start, unsigned long pgoff, unsigned long len) > { > unsigned int nrpages, pfn; > void *mem; > unsigned char *mc; > > printf("\t\tDumping pages start %x len %x off %x\n", start, len, pgoff); > mem = mmap(NULL, len, PROT_READ, MAP_FILE | MAP_PRIVATE, lfd, pgoff); > if (mem == MAP_FAILED) { > perror("Can't map"); > return 1; > } > > nrpages = len / PAGE_SIZE; > mc = malloc(nrpages); > if (mincore(mem, len, mc)) { > perror("Can't mincore mapping"); > return 1; > } > > for (pfn = 0; pfn < nrpages; pfn++) > if (mc[pfn] & PAGE_RSS) { > __u64 vaddr; > > vaddr = start + pfn * PAGE_SIZE; > write(pages_img, &vaddr, 8); > write(pages_img, mem + pfn * PAGE_SIZE, PAGE_SIZE); > } > > munmap(mem, len); > > return 0; > } > > static int dump_anon_private_map(char *start) > { > printf("\tSkipping anon private mapping at %s\n", start); > return 0; > } > > static int dump_anon_shared_map(char *_start, char *mdesc, int lfd, struct stat *st) > { > unsigned long pgoff, len; > struct shmem_entry e; > unsigned long start; > struct stat buf; > > map_desc_parm(mdesc, &pgoff, &len); > > start = rawhex(_start, NULL); > e.start = start; > e.end = start + len; > e.shmid = st->st_ino; > > write(shmem_img, &e, sizeof(e)); > > if (dump_map_pages(lfd, start, pgoff, len)) > return 1; > > close(lfd); > return 0; > } > > static int dump_file_shared_map(char *start, char *mdesc, int lfd) > { > printf("\tSkipping file shared mapping at %s\n", start); > close(lfd); > return 0; > } Shouldn't this be an error since it appears these shared mappings are currently unsupported? > > static int dump_file_private_map(char *_start, char *mdesc, int lfd) > { > unsigned long pgoff, len; > unsigned long start; > > map_desc_parm(mdesc, &pgoff, &len); > > start = rawhex(_start, NULL); > if (dump_one_reg_file(FDINFO_MAP, start, lfd, 0, 0, O_RDONLY)) > return 1; > > close(lfd); > return 0; > } > > static int dump_one_mapping(char *mdesc, DIR *mfd_dir) > { > char *flags, *tmp; > char map_start[32]; > int lfd; > struct stat st_buf; > > tmp = strchr(mdesc, '-'); > memset(map_start, 0, sizeof(map_start)); > strncpy(map_start, mdesc, tmp - mdesc); > flags = strchr(mdesc, ' '); > flags++; > > printf("\tDumping %s\n", map_start); > lfd = openat(dirfd(mfd_dir), map_start, O_RDONLY); > if (lfd == -1) { > if (errno != ENOENT) { > perror("Can't open mapping"); > return 1; > } > > if (flags[3] != 'p') { > fprintf(stderr, "Bogus mapping [%s]\n", mdesc); > return 1; > } > > return dump_anon_private_map(map_start); > } > > if (fstat(lfd, &st_buf) < 0) { > perror("Can't stat mapping!"); > return 1; > } > > if (!S_ISREG(st_buf.st_mode)) { > perror("Can't handle non-regular mapping"); > return 1; > } > > if (MAJOR(st_buf.st_dev) == 0) { > if (flags[3] != 's') { > fprintf(stderr, "Bogus mapping [%s]\n", mdesc); > return 1; > } > > /* FIXME - this can be tmpfs visible file mapping */ > return dump_anon_shared_map(map_start, mdesc, lfd, &st_buf); > } > > if (flags[3] == 'p') > return dump_file_private_map(map_start, mdesc, lfd); > else > return dump_file_shared_map(map_start, mdesc, lfd); > } > > static int dump_task_ext_mm(int pid) > { > char path[64]; > DIR *mfd_dir; > FILE *maps; > > printf("Dumping mappings for %d\n", pid); > > sprintf(path, "/proc/%d/mfd", pid); > mfd_dir = opendir(path); > if (mfd_dir == NULL) { > perror("Can't open mfd dir"); > return -1; > } > > sprintf(path, "/proc/%d/maps", pid); > maps = fopen(path, "r"); > if (maps == NULL) { > perror("Can't open maps file"); > return 1; > } > > while (fgets(big_tmp_str, sizeof(big_tmp_str), maps) != NULL) > if (dump_one_mapping(big_tmp_str, mfd_dir)) > return 1; > > fclose(maps); > closedir(mfd_dir); > return 0; > } > > static int dump_task_state(int pid) > { > char path[64]; > int dump_fd; > void *mem; > > printf("Dumping task image for %d\n", pid); > sprintf(path, "/proc/%d/dump", pid); > dump_fd = open(path, O_RDONLY); > if (dump_fd < 0) { > perror("Can't open dump file"); > return 1; > } > > mem = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0); > if (mem == MAP_FAILED) { > perror("Can't get mem"); > return 1; > } > > while (1) { > int r, w; > > r = read(dump_fd, mem, 4096); > if (r == 0) > break; > if (r < 0) { > perror("Can't read dump file"); > return 1; > } > > w = 0; > while (w < r) { > int ret; > > ret = write(core_img, mem + w, r - w); > if (ret <= 0) { > perror("Can't write core"); > return 1; > } > > w += ret; > } > } > > munmap(mem, 4096); > close(dump_fd); > > return 0; > } > > static int dump_one_task(int pid, int stop) > { > printf("Dumping task %d\n", pid); > > if (prep_img_files(pid)) > return 1; > > if (stop && stop_task(pid)) > goto err_task; > > if (dump_task_files(pid)) > goto err; > > if (dump_task_ext_mm(pid)) > goto err; > > if (dump_task_state(pid)) > goto err; > > if (stop) > continue_task(pid); > > printf("Dump is complete\n"); > return 0; > > err: > if (stop) > continue_task(pid); > err_task: > kill_imgfiles(pid); > return 1; > } > > static int pstree_fd; > static char big_tmp_str[4096]; > static int *pids, nr_pids; > > static char *get_children_pids(int pid) > { > FILE *f; > int len; > char *ret, *tmp; > > sprintf(big_tmp_str, "/proc/%d/status", pid); > f = fopen(big_tmp_str, "r"); > if (f == NULL) > return NULL; > > while ((fgets(big_tmp_str, sizeof(big_tmp_str), f)) != NULL) { > if (strncmp(big_tmp_str, "Children:", 9)) > continue; > > tmp = big_tmp_str + 10; > len = strlen(tmp); > ret = malloc(len + 1); > strcpy(ret, tmp); > if (len) > ret[len - 1] = ' '; > > fclose(f); > return ret; > } > > fclose(f); > return NULL; > } > > static int dump_pid_and_children(int pid) > { > struct pstree_entry e; > char *chlist, *tmp, *tmp2; > > printf("\tReading %d children list\n", pid); > chlist = get_children_pids(pid); > if (chlist == NULL) > return 1; > > printf("\t%d has children %s\n", pid, chlist); > > e.pid = pid; > e.nr_children = 0; > > pids = realloc(pids, (nr_pids + 1) * sizeof(int)); > pids[nr_pids++] = e.pid; > > tmp = chlist; > while ((tmp = strchr(tmp, ' ')) != NULL) { > tmp++; > e.nr_children++; > } > > write(pstree_fd, &e, sizeof(e)); > tmp = chlist; > while (1) { > __u32 cpid; > > cpid = strtol(tmp, &tmp, 10); > if (cpid == 0) > break; > if (*tmp != ' ') { > fprintf(stderr, "Error in string with children!\n"); > return 1; > } > > write(pstree_fd, &cpid, sizeof(cpid)); > tmp++; > } > > tmp = chlist; > while ((tmp2 = strchr(tmp, ' ')) != NULL) { > *tmp2 = '\0'; > if (dump_pid_and_children(atoi(tmp))) > return 1; > tmp = tmp2 + 1; > } > > free(chlist); > return 0; > } > > static int __dump_all_tasks(void) > { > int i, pid; > > printf("Dumping tasks' images for"); > for (i = 0; i < nr_pids; i++) > printf(" %d", pids[i]); > printf("\n"); > > printf("Stopping tasks\n"); > for (i = 0; i < nr_pids; i++) > if (stop_task(pids[i])) > goto err; > > for (i = 0; i < nr_pids; i++) { > if (dump_one_task(pids[i], 0)) > goto err; > } > > printf("Resuming tasks\n"); > for (i = 0; i < nr_pids; i++) > continue_task(pids[i]); > > return 0; > > err: > for (i = 0; i < nr_pids; i++) > continue_task(pids[i]); > return 1; > > } > > static int dump_all_tasks(int pid) > { > char *chlist; > __u32 type; > > pids = NULL; > nr_pids = 0; > > printf("Dumping process tree, start from %d\n", pid); > > sprintf(big_tmp_str, "pstree-%d.img", pid); > pstree_fd = open(big_tmp_str, O_WRONLY | O_CREAT | O_EXCL, 0600); > if (pstree_fd < 0) { > perror("Can't create pstree"); > return 1; > } > > type = PSTREE_MAGIC; > write(pstree_fd, &type, sizeof(type)); > > if (dump_pid_and_children(pid)) > return 1; > > close(pstree_fd); > > return __dump_all_tasks(); > } > > int main(int argc, char **argv) > { > if (argc != 3) > goto usage; > if (argv[1][0] != '-') > goto usage; > if (argv[1][1] == 'p') > return dump_one_task(atoi(argv[2]), 1); > if (argv[1][1] == 't') > return dump_all_tasks(atoi(argv[2])); > > usage: > printf("Usage: %s (-p|-t) <pid>\n", argv[0]); > return 1; > } > #include <stdio.h> > #include <unistd.h> > #include <signal.h> > #include <dirent.h> > #include <string.h> > #include <fcntl.h> > #include <sys/stat.h> > #include <errno.h> > #include <linux/kdev_t.h> > #include <stdlib.h> > #include <sys/mman.h> > #include <sys/sendfile.h> > > #define PAGE_SIZE 4096 > > #include <linux/types.h> > #include "img_structs.h" > #include "binfmt_img.h" > > struct fmap_fd { > unsigned long start; > int fd; > struct fmap_fd *next; > }; > > static struct fmap_fd *fmap_fds; > > struct shmem_info { > unsigned long start; > unsigned long end; > unsigned long id; > int pid; > int real_pid; > }; > > static struct shmem_info *shmems; > static int nr_shmems; > > struct pipes_info { > unsigned int id; > int pid; > int real_pid; > int read_fd; > int write_fd; > int users; > }; > > static struct pipes_info *pipes; > static int nr_pipes; > > static void show_saved_shmems(void) > { > int i; > > printf("\tSaved shmems:\n"); > for (i = 0; i < nr_shmems; i++) > printf("\t\t%016lx %lx %d\n", shmems[i].start, shmems[i].id, shmems[i].pid); > } > > static void show_saved_pipes(void) > { > int i; > > printf("\tSaved pipes:\n"); > for (i = 0; i < nr_pipes; i++) > printf("\t\t%x -> %d\n", pipes[i].id, pipes[i].pid); > } > > static struct shmem_info *search_shmem(unsigned long addr, unsigned long id) > { > int i; > > for (i = 0; i < nr_shmems; i++) { > struct shmem_info *si; > > si = shmems + i; > if (si->start <= addr && si->end >= addr && si->id == id) > return si; > } > > return NULL; > } > > static struct pipes_info *search_pipes(unsigned int pipeid) > { > int i; > > for (i = 0; i < nr_pipes; i++) { > struct pipes_info *pi; > > pi = pipes + i; > if (pi->id == pipeid) > return pi; > } > > return NULL; > } > > static void shmem_update_real_pid(int vpid, int rpid) > { > int i; > > for (i = 0; i < nr_shmems; i++) > if (shmems[i].pid == vpid) > shmems[i].real_pid = rpid; > } > > static int shmem_wait_and_open(struct shmem_info *si) > { > /* FIXME - not good */ > char path[128]; > unsigned long time = 1000; > > sleep(1); > > while (si->real_pid == 0) > usleep(time); > > sprintf(path, "/proc/%d/mfd/0x%lx", si->real_pid, si->start); > while (1) { > int ret; > > ret = open(path, O_RDWR); > if (ret > 0) > return ret; > > if (ret < 0 && errno != ENOENT) { > perror(" Can't stat shmem"); > return -1; > } > > printf("Waiting for [%s] to appear\n", path); > if (time < 20000000) > time <<= 1; > usleep(time); > } > } > > static int try_to_add_shmem(int pid, struct shmem_entry *e) > { > int i; > > for (i = 0; i < nr_shmems; i++) { > if (shmems[i].start != e->start || shmems[i].id != e->shmid) > continue; > > if (shmems[i].end != e->end) { > printf("Bogus shmem\n"); > return 1; > } > > if (shmems[i].pid > pid) > shmems[i].pid = pid; > > return 0; > } > > if ((nr_shmems + 1) * sizeof(struct shmem_info) >= 4096) { > printf("OOM storing shmems\n"); > return 1; > } > > shmems[nr_shmems].start = e->start; > shmems[nr_shmems].end = e->end; > shmems[nr_shmems].id = e->shmid; > shmems[nr_shmems].pid = pid; > shmems[nr_shmems].real_pid = 0; > nr_shmems++; > > return 0; > } > > static int try_to_add_pipe(int pid, struct pipes_entry *e, int p_fd) > { > int i; > > for (i = 0; i < nr_pipes; i++) { > if (pipes[i].id != e->pipeid) > continue; > > if (pipes[i].pid > pid) > pipes[i].pid = pid; > pipes[i].users++; > > return 0; > } > > if ((nr_pipes + 1) * sizeof(struct pipes_info) >= 4096) { > printf("OOM storing pipes\n"); > return 1; > } > > pipes[nr_pipes].id = e->pipeid; > pipes[nr_pipes].pid = pid; > pipes[nr_pipes].real_pid = 0; > pipes[nr_pipes].read_fd = 0; > pipes[nr_pipes].write_fd = 0; > pipes[nr_pipes].users = 1; > nr_pipes++; > > return 0; > } > > static int prepare_shmem_pid(int pid) > { > char path[64]; > int sh_fd; > __u32 type = 0; > > sprintf(path, "shmem-%d.img", pid); > sh_fd = open(path, O_RDONLY); > if (sh_fd < 0) { > perror("Can't open shmem info"); > return 1; > } > > read(sh_fd, &type, sizeof(type)); > if (type != SHMEM_MAGIC) { > perror("Bad shmem magic"); > return 1; > } > > while (1) { > struct shmem_entry e; > int ret; > > ret = read(sh_fd, &e, sizeof(e)); > if (ret == 0) > break; > if (ret != sizeof(e)) { > perror("Can't read shmem entry"); > return 1; > } > > if (try_to_add_shmem(pid, &e)) > return 1; > } > > close(sh_fd); > return 0; > } > > static int prepare_pipes_pid(int pid) > { > char path[64]; > int p_fd; > __u32 type = 0; > > sprintf(path, "pipes-%d.img", pid); > p_fd = open(path, O_RDONLY); > if (p_fd < 0) { > perror("Can't open pipes image"); > return 1; > } > > read(p_fd, &type, sizeof(type)); > if (type != PIPES_MAGIC) { > perror("Bad pipes magin"); > return 1; > } > > while (1) { > struct pipes_entry e; > int ret; > > ret = read(p_fd, &e, sizeof(e)); > if (ret == 0) > break; > if (ret != sizeof(e)) { > fprintf(stderr, "Read pipes for %s failed %d of %d read\n", > path, ret, sizeof(e)); > perror("Can't read pipes entry"); > return 1; > } > > if (try_to_add_pipe(pid, &e, p_fd)) > return 1; > > lseek(p_fd, e.bytes, SEEK_CUR); > } > > close(p_fd); > return 0; > } > > static int prepare_shared(int ps_fd) > { > printf("Preparing info about shared resources\n"); > > nr_shmems = 0; > shmems = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, 0, 0); > if (shmems == MAP_FAILED) { > perror("Can't map shmems"); > return 1; > } > > pipes = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, 0, 0); > if (pipes == MAP_FAILED) { > perror("Can't map pipes"); > return 1; > } > > while (1) { > struct pstree_entry e; > int ret; > > ret = read(ps_fd, &e, sizeof(e)); > if (ret == 0) > break; > > if (ret != sizeof(e)) { > perror("Can't read ps"); > return 1; > } > > if (prepare_shmem_pid(e.pid)) > return 1; > > if (prepare_pipes_pid(e.pid)) > return 1; > > lseek(ps_fd, e.nr_children * sizeof(__u32), SEEK_CUR); > } > > lseek(ps_fd, sizeof(__u32), SEEK_SET); > > show_saved_shmems(); > show_saved_pipes(); > > return 0; > } > > static struct fmap_fd *pop_fmap_fd(unsigned long start) > { > struct fmap_fd **p, *r; > > for (p = &fmap_fds; *p != NULL; p = &(*p)->next) { > if ((*p)->start != start) > continue; > > r = *p; > *p = r->next; > return r; > } > > return NULL; > } > > static int open_fe_fd(struct fdinfo_entry *fe, int fd) > { > char path[PATH_MAX]; > int tmp; > > if (read(fd, path, fe->len) != fe->len) { > fprintf(stderr, "Error reading path"); > return -1; > } > > path[fe->len] = '\0'; > > tmp = open(path, fe->flags); > if (tmp < 0) { > perror("Can't open file"); > return -1; > } > > lseek(tmp, fe->pos, SEEK_SET); > > return tmp; > } > > static int reopen_fd(int old_fd, int new_fd) > { > int tmp; > > if (old_fd != new_fd) { > tmp = dup2(old_fd, new_fd); > if (tmp < 0) > return tmp; > > close(old_fd); > } > > return new_fd; > } > > static int open_fd(int pid, struct fdinfo_entry *fe, int *cfd) > { > int fd, tmp; > > if (*cfd == (int)fe->addr) { > tmp = dup(*cfd); > if (tmp < 0) { > perror("Can't dup file"); > return 1; > } > > *cfd = tmp; > } > > tmp = open_fe_fd(fe, *cfd); > if (tmp < 0) > return 1; > > fd = reopen_fd(tmp, (int)fe->addr); > if (fd < 0) { > perror("Can't dup"); > return 1; > } > > return 0; > } > > static int open_fmap(int pid, struct fdinfo_entry *fe, int fd) > { > int tmp; > struct fmap_fd *new; > > tmp = open_fe_fd(fe, fd); > if (tmp < 0) > return 1; > > printf("%d:\t\tWill map %x to %d\n", pid, fe->addr, tmp); > new = malloc(sizeof(*new)); > new->start = fe->addr; > new->fd = tmp; > new->next = fmap_fds; > fmap_fds = new; > > return 0; > } > > static int prepare_fds(int pid) > { > __u32 mag; > char path[64]; > int fdinfo_fd; > > printf("%d: Opening files\n", pid); > > sprintf(path, "fdinfo-%d.img", pid); > fdinfo_fd = open(path, O_RDONLY); > if (fdinfo_fd < 0) { > perror("Can't open fdinfo"); > return 1; > } > > read(fdinfo_fd, &mag, 4); > if (mag != FDINFO_MAGIC) { > fprintf(stderr, "Bad file\n"); > return 1; > } > > while (1) { > int ret; > struct fdinfo_entry fe; > > ret = read(fdinfo_fd, &fe, sizeof(fe)); > if (ret == 0) { > close(fdinfo_fd); > return 0; > } > > if (ret < 0) { > perror("Can't read file"); > return 1; > } > if (ret != sizeof(fe)) { > fprintf(stderr, "Error reading\n"); > return 1; > } > > printf("\t%d: Got fd for %lx type %d namelen %d\n", pid, > (unsigned long)fe.addr, fe.type, fe.len); > switch (fe.type) { > case FDINFO_FD: > if (open_fd(pid, &fe, &fdinfo_fd)) > return 1; > > break; > case FDINFO_MAP: > if (open_fmap(pid, &fe, fdinfo_fd)) > return 1; > > break; > default: > fprintf(stderr, "Some bullshit in a file\n"); > return 1; > } > } > } > > struct shmem_to_id { > unsigned long addr; > unsigned long end; > unsigned long id; > struct shmem_to_id *next; > }; > > static struct shmem_to_id *my_shmem_ids; > > static unsigned long find_shmem_id(unsigned long addr) > { > struct shmem_to_id *si; > > for (si = my_shmem_ids; si != NULL; si = si->next) > if (si->addr <= addr && si->end >= addr) > return si->id; > > return 0; > } > > static void save_shmem_id(struct shmem_entry *e) > { > struct shmem_to_id *si; > > si = malloc(sizeof(*si)); > si->addr = e->start; > si->end = e->end; > si->id = e->shmid; > si->next = my_shmem_ids; > my_shmem_ids = si; > } > > static int prepare_shmem(int pid) > { > char path[64]; > int sh_fd; > __u32 type = 0; > > sprintf(path, "shmem-%d.img", pid); > sh_fd = open(path, O_RDONLY); > if (sh_fd < 0) { > perror("Can't open shmem info"); > return 1; > } > > read(sh_fd, &type, sizeof(type)); > if (type != SHMEM_MAGIC) { > perror("Bad shmem magic"); > return 1; > } > > while (1) { > struct shmem_entry e; > int ret; > > ret = read(sh_fd, &e, sizeof(e)); > if (ret == 0) > break; > if (ret != sizeof(e)) { > perror("Can't read shmem entry"); > return 1; > } > > save_shmem_id(&e); > } > > close(sh_fd); > return 0; > } > > static int try_fixup_file_map(int pid, struct binfmt_vma_image *vi, int fd) > { > struct fmap_fd *fmfd; > > fmfd = pop_fmap_fd(vi->start); > if (fmfd != NULL) { > printf("%d: Fixing %lx vma to %d fd\n", pid, vi->start, fmfd->fd); > lseek(fd, -sizeof(*vi), SEEK_CUR); > vi->fd = fmfd->fd; > if (write(fd, vi, sizeof(*vi)) != sizeof(*vi)) { > perror("Can't write img"); > return 1; > } > > free(fmfd); > } > > return 0; > } > > static int try_fixup_shared_map(int pid, struct binfmt_vma_image *vi, int fd) > { > struct shmem_info *si; > unsigned long id; > > id = find_shmem_id(vi->start); > if (id == 0) > return 0; > > si = search_shmem(vi->start, id); > printf("%d: Search for %016lx shmem %p/%d\n", pid, vi->start, si, si ? si->pid : -1); > > if (si == NULL) { > fprintf(stderr, "Can't find my shmem %016lx\n", vi->start); > return 1; > } > > if (si->pid != pid) { > int sh_fd; > > sh_fd = shmem_wait_and_open(si); > printf("%d: Fixing %lx vma to %x/%d shmem -> %d\n", pid, vi->start, si->id, si->pid, sh_fd); > if (fd < 0) { > perror("Can't open shmem"); > return 1; > } > > lseek(fd, -sizeof(*vi), SEEK_CUR); > vi->fd = sh_fd; > if (write(fd, vi, sizeof(*vi)) != sizeof(*vi)) { > perror("Can't write img"); > return 1; > } > } > > return 0; > } > > static int fixup_vma_fds(int pid, int fd) > { > lseek(fd, sizeof(struct binfmt_img_header) + > sizeof(struct binfmt_regs_image) + > sizeof(struct binfmt_mm_image), SEEK_SET); > > while (1) { > struct binfmt_vma_image vi; > > if (read(fd, &vi, sizeof(vi)) != sizeof(vi)) { > perror("Can't read"); > return 1; > } > > if (vi.start == 0 && vi.end == 0) > return 0; > > printf("%d: Fixing %016lx-%016lx %016lx vma\n", pid, vi.start, vi.end, vi.pgoff); > if (try_fixup_file_map(pid, &vi, fd)) > return 1; > > if (try_fixup_shared_map(pid, &vi, fd)) > return 1; > } > } > > static inline int should_restore_page(int pid, unsigned long vaddr) > { > struct shmem_info *si; > unsigned long id; > > id = find_shmem_id(vaddr); > if (id == 0) > return 1; > > si = search_shmem(vaddr, id); > return si->pid == pid; > } > > static int fixup_pages_data(int pid, int fd) > { > char path[128]; > int shfd; > __u32 mag; > __u64 vaddr; > > sprintf(path, "pages-%d.img", pid); > shfd = open(path, O_RDONLY); > if (shfd < 0) { > perror("Can't open shmem image"); > return 1; > } > > read(shfd, &mag, sizeof(mag)); > if (mag != PAGES_MAGIC) { > fprintf(stderr, "Bad shmem image\n"); > return 1; > } > > lseek(fd, -sizeof(struct binfmt_page_image), SEEK_END); > read(fd, &vaddr, sizeof(vaddr)); > if (vaddr != 0) { > printf("SHIT %lx\n", (unsigned long)vaddr); > return 1; > } > lseek(fd, -sizeof(struct binfmt_page_image), SEEK_END); > > while (1) { > int ret; > > ret = read(shfd, &vaddr, sizeof(vaddr)); > if (ret == 0) > break; > > if (ret < 0 || ret != sizeof(vaddr)) { > perror("Can't read vaddr"); > return 1; > } > > if (vaddr == 0) > break; > > if (!should_restore_page(pid, vaddr)) { > lseek(shfd, PAGE_SIZE, SEEK_CUR); > continue; > } > > // printf("Copy page %lx to image\n", (unsigned long)vaddr); > write(fd, &vaddr, sizeof(vaddr)); > sendfile(fd, shfd, NULL, PAGE_SIZE); > } > > close(shfd); > vaddr = 0; > write(fd, &vaddr, sizeof(vaddr)); > return 0; > } > > static int prepare_image_maps(int fd, int pid) > { > printf("%d: Fixing maps before executing image\n", pid); > > if (fixup_vma_fds(pid, fd)) > return 1; > > if (fixup_pages_data(pid, fd)) > return 1; > > close(fd); > return 0; > } > > static int execute_image(int pid) > { > char path[128]; > int fd, fd_new; > struct stat buf; > > sprintf(path, "core-%d.img", pid); > fd = open(path, O_RDONLY); > if (fd < 0) { > perror("Can't open exec image"); > return 1; > } > > if (fstat(fd, &buf)) { > perror("Can't stat"); > return 1; > } > > sprintf(path, "core-%d.img.out", pid); > fd_new = open(path, O_RDWR | O_CREAT | O_EXCL, 0700); > if (fd_new < 0) { > perror("Can't open new image"); > return 1; > } > > printf("%d: Preparing execution image\n", pid); > sendfile(fd_new, fd, NULL, buf.st_size); > close(fd); > > if (fchmod(fd_new, 0700)) { > perror("Can't prepare exec image"); > return 1; > } > > if (prepare_image_maps(fd_new, pid)) > return 1; > > printf("%d/%d EXEC IMAGE\n", pid, getpid()); > return execl(path, path, NULL); How are you going to restore O_CLOEXEC flags? > } > > static int create_pipe(int pid, struct pipes_entry *e, struct pipes_info *pi, int pipes_fd) > { > int pfd[2], tmp; > unsigned long time = 1000; > > printf("\t%d: Creating pipe %x\n", pid, e->pipeid); > > if (pipe(pfd) < 0) { > perror("Can't create pipe"); > return 1; > } > > if (e->bytes) { > printf("\t%d: Splicing data to %d\n", pid, pfd[1]); > > tmp = splice(pipes_fd, NULL, pfd[1], NULL, e->bytes, 0); > if (tmp != e->bytes) { > fprintf(stderr, "Wanted to restore %ld bytes, but got %ld\n", > e->bytes, tmp); > if (tmp < 0) > perror("Error splicing data"); > return 1; > } > } > > pi->read_fd = pfd[0]; > pi->write_fd = pfd[1]; > pi->real_pid = getpid(); > > printf("\t%d: Done, waiting for others on %d pid with r:%d w:%d\n", > pid, pi->real_pid, pfd[0], pfd[1]); > > while (1) { > if (pi->users == 1) /* only I left */ > break; > > printf("\t%d: Waiting for %x pipe to attach (%d users left)\n", > pid, e->pipeid, pi->users - 1); > if (time < 20000000) > time <<= 1; > usleep(time); > } > > printf("\t%d: All is ok - reopening pipe for %d\n", pid, e->fd); > if (e->flags & O_WRONLY) { > close(pfd[0]); > tmp = reopen_fd(pfd[1], e->fd); > } else { > close(pfd[1]); > tmp = reopen_fd(pfd[0], e->fd); > } > > if (tmp < 0) { > perror("Can't dup pipe fd"); > return 1; > } > > return 0; > } > > static int attach_pipe(int pid, struct pipes_entry *e, struct pipes_info *pi) > { > char path[128]; > int tmp, fd; > > printf("\t%d: Wating for pipe %x to appear\n", pid, e->pipeid); > > while (pi->real_pid == 0) > usleep(1000); > > if (e->flags & O_WRONLY) > tmp = pi->write_fd; > else > tmp = pi->read_fd; > > sprintf(path, "/proc/%d/fd/%d", pi->real_pid, tmp); > printf("\t%d: Attaching pipe %s\n", pid, path); > > fd = open(path, e->flags); > if (fd < 0) { > perror("Can't attach pipe"); > return 1; > } > > printf("\t%d: Done, reopening for %d\n", pid, e->fd); > pi->users--; > tmp = reopen_fd(fd, e->fd); > if (tmp < 0) { > perror("Can't dup to attach pipe"); > return 1; > } > > return 0; > > } > > static int open_pipe(int pid, struct pipes_entry *e, int *pipes_fd) > { > struct pipes_info *pi; > > printf("\t%d: Opening pipe %x on fd %d\n", pid, e->pipeid, e->fd); > if (e->fd == *pipes_fd) { > int tmp; > > tmp = dup(*pipes_fd); > if (tmp < 0) { > perror("Can't dup file"); > return 1; > } > > *pipes_fd = tmp; > } > > pi = search_pipes(e->pipeid); > if (pi == NULL) { > fprintf(stderr, "BUG: can't find my pipe %x\n", e->pipeid); > return 1; > } > > if (pi->pid == pid) > return create_pipe(pid, e, pi, *pipes_fd); > else > return attach_pipe(pid, e, pi); > } > > static int prepare_pipes(int pid) > { > char path[64]; > int pipes_fd; > __u32 type = 0; > > printf("%d: Opening pipes\n", pid); > > sprintf(path, "pipes-%d.img", pid); > pipes_fd = open(path, O_RDONLY); > if (pipes_fd < 0) { > perror("Can't open pipes img"); > return 1; > } > > read(pipes_fd, &type, sizeof(type)); > if (type != PIPES_MAGIC) { > perror("Bad pipes file"); > return 1; > } > > while (1) { > struct pipes_entry e; > int ret; > > ret = read(pipes_fd, &e, sizeof(e)); > if (ret == 0) { > close(pipes_fd); > return 0; > } > if (ret != sizeof(e)) { > perror("Bad pipes entry"); > return 1; > } > > if (open_pipe(pid, &e, &pipes_fd)) > return 1; > } > } > > static int restore_one_task(int pid) > { > printf("%d: Restoring resources\n", pid); > > if (prepare_pipes(pid)) > return 1; > > if (prepare_fds(pid)) > return 1; > > if (prepare_shmem(pid)) > return 1; > > return execute_image(pid); > } > > static int restore_task_with_children(int my_pid, char *pstree_path); > > #if 0 > static inline int fork_with_pid(int pid, char *pstree_path) > { > /* FIXME - no such ability now */ > int ret; > > ret = fork(); > if (ret == 0) { > ret = restore_task_with_children(pid, pstree_path); > exit(ret); > } > > return ret; > } > #else > #define CLONE_CHILD_USEPID 0x02000000 > > static int do_child(void *arg) > { > return restore_task_with_children(getpid(), arg); > } > > static inline int fork_with_pid(int pid, char *pstree_path) > { > void *stack; > > stack = mmap(0, 4 * 4096, PROT_READ | PROT_WRITE, > MAP_PRIVATE | MAP_ANON | MAP_GROWSDOWN, 0, 0); > if (stack == MAP_FAILED) > return -1; > > stack += 4 * 4096; > return clone(do_child, stack, SIGCHLD | CLONE_CHILD_USEPID, pstree_path, NULL, NULL, &pid); > > } > #endif > > static int restore_task_with_children(int my_pid, char *pstree_path) > { > int *pids; > int fd, ret, i; > struct pstree_entry e; > > printf("%d: Starting restore\n", my_pid); > > fd = open(pstree_path, O_RDONLY); > if (fd < 0) { > perror("Can't reopen pstree image"); > exit(1); > } > > lseek(fd, sizeof(__u32), SEEK_SET); > while (1) { > ret = read(fd, &e, sizeof(e)); > if (ret != sizeof(e)) { > fprintf(stderr, "%d: Read returned %d\n", my_pid, ret); > if (ret < 0) > perror("Can't read pstree"); > exit(1); > } > > if (e.pid != my_pid) { > lseek(fd, e.nr_children * sizeof(__u32), SEEK_CUR); > continue; > } > > break; > } > > if (e.nr_children > 0) { > i = e.nr_children * sizeof(int); > pids = malloc(i); > ret = read(fd, pids, i); > if (ret != i) { > perror("Can't read children pids"); > exit(1); > } > > close(fd); > > printf("%d: Restoring %d children:\n", my_pid, e.nr_children); > for (i = 0; i < e.nr_children; i++) { > printf("\tFork %d from %d\n", pids[i], my_pid); > ret = fork_with_pid(pids[i], pstree_path); > if (ret < 0) { > perror("Can't fork kid"); > exit(1); > } > } > } else > close(fd); > > shmem_update_real_pid(my_pid, getpid()); > > return restore_one_task(my_pid); > } > > static int restore_root_task(char *pstree_path, int fd) > { > struct pstree_entry e; > int ret; > > ret = read(fd, &e, sizeof(e)); > if (ret != sizeof(e)) { > perror("Can't read root pstree entry"); > return 1; > } > > close(fd); > > printf("Forking root with %d pid\n", e.pid); > ret = fork_with_pid(e.pid, pstree_path); > if (ret < 0) { > perror("Can't fork root"); > return 1; > } > > wait(NULL); > return 0; > } > > static int restore_all_tasks(char *pid) > { > char path[128]; > int pstree_fd; > __u32 type = 0; > > sprintf(path, "pstree-%s.img", pid); > pstree_fd = open(path, O_RDONLY); > if (pstree_fd < 0) { > perror("Can't open pstree image"); > return 1; > } > > read(pstree_fd, &type, sizeof(type)); > if (type != PSTREE_MAGIC) { > perror("Bad pstree magic"); > return 1; > } > > if (prepare_shared(pstree_fd)) > return 1; > > return restore_root_task(path, pstree_fd); > } > > int main(int argc, char **argv) > { > if (argc != 3) > goto usage; > if (argv[1][0] != '-') > goto usage; > if (argv[1][1] == 'p') > return restore_one_task(atoi(argv[2])); > if (argv[1][1] == 't') > return restore_all_tasks(argv[2]); > > usage: > printf("Usage: %s (-t|-p) <pid>\n", argv[0]); > return 1; > } > #include <stdio.h> > #include <unistd.h> > #include <fcntl.h> > #include <stdlib.h> > #include <linux/types.h> > #include <string.h> > #include "img_structs.h" > #include "binfmt_img.h" > > static int show_fdinfo(int fd) > { > char data[1024]; > struct fdinfo_entry e; > > while (1) { > int ret; > > ret = read(fd, &e, sizeof(e)); > if (ret == 0) > break; > if (ret != sizeof(e)) { > perror("Can't read"); > return 1; > } > > ret = read(fd, data, e.len); > if (ret != e.len) { > perror("Can't read"); > return 1; > } > > data[e.len] = '\0'; > switch (e.type) { > case FDINFO_FD: > printf("fd %d [%s] pos %lx flags %o\n", (int)e.addr, data, e.pos, e.flags); > break; > case FDINFO_MAP: > printf("map %lx [%s] flags %o\n", e.addr, data, e.flags); > break; > default: > fprintf(stderr, "Unknown fdinfo entry type %d\n", e.type); > return 1; > } > } > > return 0; > } > > #define PAGE_SIZE 4096 > > static int show_mem(int fd) > { > __u64 vaddr; > unsigned int data[2]; > > while (1) { > if (read(fd, &vaddr, 8) == 0) > break; > if (vaddr == 0) > break; > > read(fd, &data[0], sizeof(unsigned int)); > lseek(fd, PAGE_SIZE - 2 * sizeof(unsigned int), SEEK_CUR); > read(fd, &data[1], sizeof(unsigned int)); > > printf("\tpage 0x%lx [%x...%x]\n", (unsigned long)vaddr, data[0], data[1]); > } > > return 0; > } > > static int show_pages(int fd) > { > return show_mem(fd); > } > > static int show_shmem(int fd) > { > int r; > struct shmem_entry e; > > while (1) { > r = read(fd, &e, sizeof(e)); > if (r == 0) > return 0; > if (r != sizeof(e)) { > perror("Can't read shmem entry"); > return 1; > } > > printf("%016lx-%016lx %016x\n", e.start, e.end, e.shmid); > } > } > > static char *segval(__u16 seg) > { > switch (seg) { > case CKPT_X86_SEG_NULL: return "nul"; > case CKPT_X86_SEG_USER32_CS: return "cs32"; > case CKPT_X86_SEG_USER32_DS: return "ds32"; > case CKPT_X86_SEG_USER64_CS: return "cs64"; > case CKPT_X86_SEG_USER64_DS: return "ds64"; > } > > if (seg & CKPT_X86_SEG_TLS) > return "tls"; > if (seg & CKPT_X86_SEG_LDT) > return "ldt"; > > return "[unknown]"; > } > > static int show_regs(int fd) > { > struct binfmt_regs_image ri; > > if (read(fd, &ri, sizeof(ri)) != sizeof(ri)) { > perror("Can't read registers from image"); > return 1; > } > > printf("Registers:\n"); > > printf("\tr15: %016lx\n", ri.r15); > printf("\tr14: %016lx\n", ri.r14); > printf("\tr13: %016lx\n", ri.r13); > printf("\tr12: %016lx\n", ri.r12); > printf("\tr11: %016lx\n", ri.r11); > printf("\tr10: %016lx\n", ri.r10); > printf("\tr9: %016lx\n", ri.r9); > printf("\tr8: %016lx\n", ri.r8); > printf("\tax: %016lx\n", ri.ax); > printf("\torig_ax: %016lx\n", ri.orig_ax); > printf("\tbx: %016lx\n", ri.bx); > printf("\tcx: %016lx\n", ri.cx); > printf("\tdx: %016lx\n", ri.dx); > printf("\tsi: %016lx\n", ri.si); > printf("\tdi: %016lx\n", ri.di); > printf("\tip: %016lx\n", ri.ip); > printf("\tflags: %016lx\n", ri.flags); > printf("\tbp: %016lx\n", ri.bp); > printf("\tsp: %016lx\n", ri.sp); > printf("\tgs: %016lx\n", ri.gs); > printf("\tfs: %016lx\n", ri.fs); > printf("\tgsindex: %s\n", segval(ri.gsindex)); > printf("\tfsindex: %s\n", segval(ri.fsindex)); > printf("\tcs: %s\n", segval(ri.cs)); > printf("\tss: %s\n", segval(ri.ss)); > printf("\tds: %s\n", segval(ri.ds)); > printf("\tes: %s\n", segval(ri.es)); > > printf("\ttls0 %016lx\n", ri.tls[0]); > printf("\ttls1 %016lx\n", ri.tls[1]); > printf("\ttls2 %016lx\n", ri.tls[2]); > > return 0; > } > > static int show_mm(int fd, unsigned long *stack) > { > struct binfmt_mm_image mi; > > if (read(fd, &mi, sizeof(mi)) != sizeof(mi)) { > perror("Can't read mm from image"); > return 1; > } > > printf("MM:\n"); > printf("\tflags: %016lx\n", mi.flags); > printf("\tdef_flags: %016lx\n", mi.def_flags); > printf("\tstart_code: %016lx\n", mi.start_code); > printf("\tend_code: %016lx\n", mi.end_code); > printf("\tstart_data: %016lx\n", mi.start_data); > printf("\tend_data: %016lx\n", mi.end_data); > printf("\tstart_brk: %016lx\n", mi.start_brk); > printf("\tbrk: %016lx\n", mi.brk); > printf("\tstart_stack: %016lx\n", mi.start_stack); > printf("\targ_start: %016lx\n", mi.arg_start); > printf("\targ_end: %016lx\n", mi.arg_end); > printf("\tenv_start: %016lx\n", mi.env_start); > printf("\tenv_end: %016lx\n", mi.env_end); > > *stack = mi.start_stack; > > return 0; > } > > static int show_vmas(int fd, unsigned long stack) > { > struct binfmt_vma_image vi; > > printf("VMAs:\n"); > while (1) { > char *note = ""; > > if (read(fd, &vi, sizeof(vi)) != sizeof(vi)) { > perror("Can't read vma from image"); > return 1; > } > > if (vi.start == 0 && vi.end == 0) > return 0; > > if (vi.start <= stack && vi.end >= stack) > note = "[stack]"; > > printf("\t%016lx-%016lx file %d %016lx prot %x flags %x %s\n", > vi.start, vi.end, vi.fd, vi.pgoff, > vi.prot, vi.flags, note); > } > } > > static int show_privmem(int fd) > { > printf("Pages:\n"); > return show_mem(fd); > } > > static int show_core(int fd) > { > __u32 version = 0; > unsigned long stack; > > read(fd, &version, 4); > if (version != BINFMT_IMG_VERS_0) { > printf("Unsupported version %d\n", version); > return 1; > } > > printf("Showing version 0\n"); > > if (show_regs(fd)) > return 1; > > if (show_mm(fd, &stack)) > return 1; > > if (show_vmas(fd, stack)) > return 1; > > if (show_privmem(fd)) > return 1; > > return 0; > } > > static int show_pstree(int fd) > { > int ret; > struct pstree_entry e; > > while (1) { > int i; > __u32 *ch; > > ret = read(fd, &e, sizeof(e)); > if (ret == 0) > return 0; > if (ret != sizeof(e)) { > perror("Can't read processes entry"); > return 1; > } > > printf("%d:", e.pid); > i = e.nr_children * sizeof(__u32); > ch = malloc(i); > ret = read(fd, ch, i); > if (ret != i) { > perror("Can't read children list"); > return 1; > } > > for (i = 0; i < e.nr_children; i++) > printf(" %d", ch[i]); > printf("\n"); > } > } > > static int show_pipes(int fd) > { > struct pipes_entry e; > int ret; > char buf[17]; > > while (1) { > ret = read(fd, &e, sizeof(e)); > if (ret == 0) > break; > if (ret != sizeof(e)) { > perror("Can't read pipe entry"); > return 1; > } > > printf("%d: %lx %o %d ", e.fd, e.pipeid, e.flags, e.bytes); > if (e.flags & O_WRONLY) { > printf("\n"); > > if (e.bytes) { > printf("Bogus pipe\n"); > return 1; > } > > continue; > } > > memset(buf, 0, sizeof(buf)); > ret = e.bytes; > if (ret > 16) > ret = 16; > > read(fd, buf, ret); > printf("\t[%s", buf); > if (ret < e.bytes) > printf("..."); > printf("]\n"); > lseek(fd, e.bytes - ret, SEEK_CUR); > } > > return 0; > > } > > int main(int argc, char **argv) > { > __u32 type; > int fd; > > fd = open(argv[1], O_RDONLY); > if (fd < 0) { > perror("Can't open"); > return 1; > } > > read(fd, &type, 4); > > if (type == FDINFO_MAGIC) > return show_fdinfo(fd); > if (type == PAGES_MAGIC) > return show_pages(fd); > if (type == SHMEM_MAGIC) > return show_shmem(fd); > if (type == PSTREE_MAGIC) > return show_pstree(fd); > if (type == PIPES_MAGIC) > return show_pipes(fd); > if (type == BINFMT_IMG_MAGIC) > return show_core(fd); > > printf("Unknown file type 0x%x\n", type); > return 1; > } > > #define FDINFO_MAGIC 0x01010101 > > struct fdinfo_entry { > __u8 type; > __u8 len; > __u16 flags; > __u32 pos; > __u64 addr; > }; > > #define FDINFO_FD 1 > #define FDINFO_MAP 2 > > #define PAGES_MAGIC 0x20202020 > > #define SHMEM_MAGIC 0x03300330 > > struct shmem_entry { > __u64 start; > __u64 end; > __u64 shmid; > }; > > #define PSTREE_MAGIC 0x40044004 > > struct pstree_entry { > __u32 pid; > __u32 nr_children; > }; > > #define PIPES_MAGIC 0x05055050 > > struct pipes_entry { > __u32 fd; > __u32 pipeid; > __u32 flags; > __u32 bytes; > }; > all: cr-dump img-show cr-restore > > img-show: img-show.c > gcc -o $@ $< > > cr-dump: cr-dump.c > gcc -o $@ $< > > cr-restore: cr-restore.c > gcc -o $@ $< > > clean: > rm -f cr-dump img-show cr-restore > _______________________________________________ > Containers mailing list > Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx > https://lists.linux-foundation.org/mailman/listinfo/containers For any subsequent postings could you split this up into multiple emails -- perhaps one per file? Or perhaps make them patches to the kernel's tools directory? Cheers, -Matt Helsley _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/containers