Hi, On Sat, Oct 18, 2008 at 03:11:33AM +0400, Andrey Mirkin wrote: > Functions to dump task struct, fpu state and registers are added. > All IDs are saved from the POV of process (container) namespace. Just a couple of little comments, in case this series should keep on living. [...] > diff --git a/checkpoint/cpt_process.c b/checkpoint/cpt_process.c > new file mode 100644 > index 0000000..58f608d > --- /dev/null > +++ b/checkpoint/cpt_process.c > @@ -0,0 +1,236 @@ > +/* > + * Copyright (C) 2008 Parallels, Inc. > + * > + * Author: Andrey Mirkin <major@xxxxxxxxxx> > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License as > + * published by the Free Software Foundation, version 2 of the > + * License. > + * > + */ > + > +#include <linux/sched.h> > +#include <linux/fs.h> > +#include <linux/file.h> > +#include <linux/version.h> > +#include <linux/nsproxy.h> > + > +#include "checkpoint.h" > +#include "cpt_image.h" > + > +static unsigned int encode_task_flags(unsigned int task_flags) > +{ > + unsigned int flags = 0; > + > + if (task_flags & PF_EXITING) > + flags |= (1 << CPT_PF_EXITING); > + if (task_flags & PF_FORKNOEXEC) > + flags |= (1 << CPT_PF_FORKNOEXEC); > + if (task_flags & PF_SUPERPRIV) > + flags |= (1 << CPT_PF_SUPERPRIV); > + if (task_flags & PF_DUMPCORE) > + flags |= (1 << CPT_PF_DUMPCORE); > + if (task_flags & PF_SIGNALED) > + flags |= (1 << CPT_PF_SIGNALED); > + if (task_flags & PF_USED_MATH) > + flags |= (1 << CPT_PF_USED_MATH); > + > + return flags; > + > +} > + > +int cpt_dump_task_struct(struct task_struct *tsk, struct cpt_context *ctx) > +{ > + struct cpt_task_image *t; > + int i; > + int err; > + > + t = kzalloc(sizeof(*t), GFP_KERNEL); > + if (!t) > + return -ENOMEM; > + > + t->cpt_len = sizeof(*t); > + t->cpt_type = CPT_OBJ_TASK; > + t->cpt_hdrlen = sizeof(*t); > + t->cpt_content = CPT_CONTENT_ARRAY; > + > + t->cpt_state = tsk->state; > + t->cpt_flags = encode_task_flags(tsk->flags); > + t->cpt_exit_code = tsk->exit_code; > + t->cpt_exit_signal = tsk->exit_signal; > + t->cpt_pdeath_signal = tsk->pdeath_signal; > + t->cpt_pid = task_pid_nr_ns(tsk, ctx->nsproxy->pid_ns); > + t->cpt_tgid = task_tgid_nr_ns(tsk, ctx->nsproxy->pid_ns); > + t->cpt_ppid = tsk->parent ? > + task_pid_nr_ns(tsk->parent, ctx->nsproxy->pid_ns) : 0; > + t->cpt_rppid = tsk->real_parent ? > + task_pid_nr_ns(tsk->real_parent, ctx->nsproxy->pid_ns) : 0; > + t->cpt_pgrp = task_pgrp_nr_ns(tsk, ctx->nsproxy->pid_ns); > + t->cpt_session = task_session_nr_ns(tsk, ctx->nsproxy->pid_ns); > + t->cpt_old_pgrp = 0; > + if (tsk->signal->tty_old_pgrp) > + t->cpt_old_pgrp = pid_vnr(tsk->signal->tty_old_pgrp); > + t->cpt_leader = tsk->group_leader ? task_pid_vnr(tsk->group_leader) : 0; Why pid_vnr() here, and task_*_nr_ns() above? According to the introducing comment, I'd expect something like pid_nr_ns(tsk->signal->tty_old_pgrp, tsk->nsproxy->pid_ns), and the same for tsk->group_leader. IIUC, pid_vnr() is correct only if ctx->nsproxy->pid_ns == tsk->nsproxy->pid_ns == current->nsproxy->pid_ns, and I expect current to live in a different pid_ns. Comments? > + t->cpt_utime = tsk->utime; > + t->cpt_stime = tsk->stime; > + t->cpt_utimescaled = tsk->utimescaled; > + t->cpt_stimescaled = tsk->stimescaled; > + t->cpt_gtime = tsk->gtime; > + t->cpt_prev_utime = tsk->prev_utime; > + t->cpt_prev_stime = tsk->prev_stime; > + t->cpt_nvcsw = tsk->nvcsw; > + t->cpt_nivcsw = tsk->nivcsw; > + t->cpt_start_time = cpt_timespec_export(&tsk->start_time); > + t->cpt_real_start_time = cpt_timespec_export(&tsk->real_start_time); > + t->cpt_min_flt = tsk->min_flt; > + t->cpt_maj_flt = tsk->maj_flt; > + memcpy(t->cpt_comm, tsk->comm, TASK_COMM_LEN); > + for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) { > + t->cpt_tls[i] = (((u64)tsk->thread.tls_array[i].b) << 32) + > + tsk->thread.tls_array[i].a; > + } > + /* TODO: encode thread flags and status like task flags */ > + t->cpt_thrflags = task_thread_info(tsk)->flags & ~(1<<TIF_FREEZE); > + t->cpt_thrstatus = task_thread_info(tsk)->status; > + t->cpt_user = tsk->user->uid; > + t->cpt_uid = tsk->uid; > + t->cpt_euid = tsk->euid; > + t->cpt_suid = tsk->suid; > + t->cpt_fsuid = tsk->fsuid; > + t->cpt_gid = tsk->gid; > + t->cpt_egid = tsk->egid; > + t->cpt_sgid = tsk->sgid; > + t->cpt_fsgid = tsk->fsgid; > + > + err = ctx->write(t, sizeof(*t), ctx); > + > + kfree(t); > + return err; > +} > + > +static int cpt_dump_fpustate(struct task_struct *tsk, struct cpt_context *ctx) > +{ > + struct cpt_obj_bits hdr; > + int err; > + int content; > + unsigned long size; > + > + content = CPT_CONTENT_X86_FPUSTATE; > + size = sizeof(struct i387_fxsave_struct); > +#ifndef CONFIG_X86_64 > + if (!cpu_has_fxsr) { > + size = sizeof(struct i387_fsave_struct); > + content = CPT_CONTENT_X86_FPUSTATE_OLD; > + } > +#endif > + > + hdr.cpt_len = sizeof(hdr) + size; > + hdr.cpt_type = CPT_OBJ_BITS; > + hdr.cpt_hdrlen = sizeof(hdr); > + hdr.cpt_content = content; > + hdr.cpt_size = size; > + err = ctx->write(&hdr, sizeof(hdr), ctx); > + if (!err) > + ctx->write(tsk->thread.xstate, size, ctx); Should check the error code of the line above, right? > + return err; > +} > + > +static u32 encode_segment(u32 segreg) > +{ > + segreg &= 0xFFFF; > + > + if (segreg == 0) > + return CPT_SEG_ZERO; > + if ((segreg & 3) != 3) { > + eprintk("Invalid RPL of a segment reg %x\n", segreg); > + return CPT_SEG_ZERO; > + } > + > + /* LDT descriptor, it is just an index to LDT array */ > + if (segreg & 4) > + return CPT_SEG_LDT + (segreg >> 3); > + > + /* TLS descriptor. */ > + if ((segreg >> 3) >= GDT_ENTRY_TLS_MIN && > + (segreg >> 3) <= GDT_ENTRY_TLS_MAX) > + return CPT_SEG_TLS1 + ((segreg>>3) - GDT_ENTRY_TLS_MIN); > + > + /* One of standard desriptors */ > +#ifdef CONFIG_X86_64 > + if (segreg == __USER32_DS) > + return CPT_SEG_USER32_DS; > + if (segreg == __USER32_CS) > + return CPT_SEG_USER32_CS; > + if (segreg == __USER_DS) > + return CPT_SEG_USER64_DS; > + if (segreg == __USER_CS) > + return CPT_SEG_USER64_CS; > +#else > + if (segreg == __USER_DS) > + return CPT_SEG_USER32_DS; > + if (segreg == __USER_CS) > + return CPT_SEG_USER32_CS; > +#endif > + eprintk("Invalid segment reg %x\n", segreg); > + return CPT_SEG_ZERO; > +} > + > +static int cpt_dump_registers(struct task_struct *tsk, struct cpt_context *ctx) > +{ > + struct cpt_x86_regs ri; > + struct pt_regs *pt_regs; > + > + ri.cpt_len = sizeof(ri); > + ri.cpt_type = CPT_OBJ_X86_REGS; > + ri.cpt_hdrlen = sizeof(ri); > + ri.cpt_content = CPT_CONTENT_VOID; > + > + ri.cpt_debugreg[0] = tsk->thread.debugreg0; > + ri.cpt_debugreg[1] = tsk->thread.debugreg1; > + ri.cpt_debugreg[2] = tsk->thread.debugreg2; > + ri.cpt_debugreg[3] = tsk->thread.debugreg3; > + ri.cpt_debugreg[4] = 0; > + ri.cpt_debugreg[5] = 0; > + ri.cpt_debugreg[6] = tsk->thread.debugreg6; > + ri.cpt_debugreg[7] = tsk->thread.debugreg7; > + > + pt_regs = task_pt_regs(tsk); > + > + ri.cpt_fs = encode_segment(pt_regs->fs); > + ri.cpt_gs = encode_segment(tsk->thread.gs); > + > + ri.cpt_bx = pt_regs->bx; > + ri.cpt_cx = pt_regs->cx; > + ri.cpt_dx = pt_regs->dx; > + ri.cpt_si = pt_regs->si; > + ri.cpt_di = pt_regs->di; > + ri.cpt_bp = pt_regs->bp; > + ri.cpt_ax = pt_regs->ax; > + ri.cpt_ds = encode_segment(pt_regs->ds); > + ri.cpt_es = encode_segment(pt_regs->es); > + ri.cpt_orig_ax = pt_regs->orig_ax; > + ri.cpt_ip = pt_regs->ip; > + ri.cpt_cs = encode_segment(pt_regs->cs); > + ri.cpt_flags = pt_regs->flags; > + ri.cpt_sp = pt_regs->sp; > + ri.cpt_ss = encode_segment(pt_regs->ss); > + > + return ctx->write(&ri, sizeof(ri), ctx); > +} > + > +int cpt_dump_task(struct task_struct *tsk, struct cpt_context *ctx) > +{ > + int err; > + > + err = cpt_dump_task_struct(tsk, ctx); > + > + /* Dump task mm */ > + > + if (!err) > + cpt_dump_fpustate(tsk, ctx); error checking... > + if (!err) > + cpt_dump_registers(tsk, ctx); error checking... > + > + return err; > +} > -- > 1.5.6 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ Louis -- Dr Louis Rilling Kerlabs Skype: louis.rilling Batiment Germanium Phone: (+33|0) 6 80 89 08 23 80 avenue des Buttes de Coesmes http://www.kerlabs.com/ 35700 Rennes
Attachment:
signature.asc
Description: Digital signature
_______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/containers