On 08/04/2010 01:02 PM, Dan Smith wrote: > The semaphore undo list is a set of adjustments to be made to semaphores > held by a task on exit. Right now, we do not checkpoint or restore this > list which could cause undesirable behavior by a restarted process on exit. > > Changes in v2: > - Remove collect operation > - Add a BUILD_BUG_ON() to ensure sizeof(short) == sizeof(__u16) > - Use sizeof(__u16) when copying to/from checkpoint header > - Fix a couple of leaked hdr objects > - Avoid reading the semadj buffer with rcu_read_lock() held > - Set the sem_undo pointer on tasks other than the first to restore a list > - Fix refcounting on restart > - Pull out the guts of exit_sem() into put_undo_list() and call that > from our drop() function in case we're the last one. > > Signed-off-by: Dan Smith<danms@xxxxxxxxxx> Looks better. Still some more comments... > --- > include/linux/checkpoint.h | 4 + > include/linux/checkpoint_hdr.h | 18 ++ > ipc/sem.c | 342 +++++++++++++++++++++++++++++++++++++++- > kernel/checkpoint/process.c | 13 ++ > 4 files changed, 369 insertions(+), 8 deletions(-) > > diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h > index 4e25042..34e81f4 100644 > --- a/include/linux/checkpoint.h > +++ b/include/linux/checkpoint.h > @@ -271,6 +271,10 @@ extern int ckpt_collect_fs(struct ckpt_ctx *ctx, struct task_struct *t); > extern int checkpoint_obj_fs(struct ckpt_ctx *ctx, struct task_struct *t); > extern int restore_obj_fs(struct ckpt_ctx *ctx, int fs_objref); > > +/* per-task semaphore undo */ > +int checkpoint_obj_sem_undo(struct ckpt_ctx *ctx, struct task_struct *t); > +int restore_obj_sem_undo(struct ckpt_ctx *ctx, int sem_undo_objref); Please add "extern" here (been burned before) [...] > +static int obj_sem_undo_grab(void *ptr) > +{ > + struct sem_undo_list *ulp = ptr; > + > + atomic_inc(&ulp->refcnt); > + return 0; > +} > + > +static void put_undo_list(struct sem_undo_list *ulp); nit: to me it makes more sense to move grab/drop/checkpoint/restart code, as well sem_init(), to the end of the file (and avoid those statics...). > +static void obj_sem_undo_drop(void *ptr, int lastref) > +{ > + struct sem_undo_list *ulp = ptr; > + > + put_undo_list(ulp); > +} > + > +static int obj_sem_undo_users(void *ptr) > +{ > + struct sem_undo_list *ulp = ptr; > + > + return atomic_read(&ulp->refcnt); > +} Can get rid of ..._users() since we don't collect them. > + > +static void *restore_sem_undo(struct ckpt_ctx *ctx); > +static int checkpoint_sem_undo(struct ckpt_ctx *ctx, void *ptr); > + > +static const struct ckpt_obj_ops ckpt_obj_sem_undo_ops = { > + .obj_name = "IPC_SEM_UNDO", > + .obj_type = CKPT_OBJ_SEM_UNDO, > + .ref_drop = obj_sem_undo_drop, > + .ref_grab = obj_sem_undo_grab, > + .ref_users = obj_sem_undo_users, (here too) > + .checkpoint = checkpoint_sem_undo, > + .restore = restore_sem_undo, > +}; > + > void __init sem_init (void) > { > sem_init_ns(&init_ipc_ns); > ipc_init_proc_interface("sysvipc/sem", > " key semid perms nsems uid gid cuid cgid otime ctime\n", > IPC_SEM_IDS, sysvipc_sem_proc_show); > + > + /* sem_undo_list uses a short > + * ckpt_hdr_task_sem_undo uses a __u16 > + */ > + BUILD_BUG_ON(sizeof(short) != sizeof(__u16)); semadj is short, so: s/__u16/__s16/ > + > + register_checkpoint_obj(&ckpt_obj_sem_undo_ops); > } > > /* > @@ -1363,14 +1407,8 @@ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) > * The current implementation does not do so. The POSIX standard > * and SVID should be consulted to determine what behavior is mandated. > */ > -void exit_sem(struct task_struct *tsk) > +static void put_undo_list(struct sem_undo_list *ulp) > { > - struct sem_undo_list *ulp; > - > - ulp = tsk->sysvsem.undo_list; > - if (!ulp) > - return; > - tsk->sysvsem.undo_list = NULL; > > if (!atomic_dec_and_test(&ulp->refcnt)) > return; > @@ -1393,7 +1431,7 @@ void exit_sem(struct task_struct *tsk) > if (semid == -1) > break; > > - sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid); > + sma = sem_lock_check(ulp->ipc_ns, un->semid); This hunk belongs to previous patch, no ? [...] > +int checkpoint_sem_undo_adj(struct ckpt_ctx *ctx, struct sem_undo *un) > +{ > + int nsems; > + int ret; > + short *semadj = NULL; > + struct sem_array *sma; > + struct ckpt_hdr_task_sem_undo *h = NULL; > + > + sma = sem_lock(ctx->root_nsproxy->ipc_ns, un->semid); > + if (IS_ERR(sma)) { > + ckpt_debug("unable to lock semid %i (wrong ns?)\n", un->semid); > + return PTR_ERR(sma); > + } > + > + nsems = sma->sem_nsems; > + sem_getref_and_unlock(sma); > + > + h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_TASK_SEM_UNDO); > + if (!h) > + goto putref_abort; > + > + semadj = kzalloc(nsems * sizeof(short), GFP_KERNEL); > + if (!semadj) > + goto putref_abort; > + > + sem_lock_and_putref(sma); > + > + h->semid = un->semid; > + h->semadj_count = nsems; > + memcpy(semadj, un->semadj, h->semadj_count * sizeof(__u16)); s/u__u16/__s16/ > + > + sem_unlock(sma); > + > + ret = ckpt_write_obj(ctx, (struct ckpt_hdr *)h); > + if (ret == 0) > + ret = ckpt_write_buffer(ctx, semadj, nsems * sizeof(short)); .... s/short/__s16/ [...] > +static int restore_task_sem_undo_adj(struct ckpt_ctx *ctx) > +{ > + struct ckpt_hdr_task_sem_undo *h; > + int len; > + int ret = -ENOMEM; > + struct sem_undo *un; > + int nsems; > + short *semadj = NULL; > + > + h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_TASK_SEM_UNDO); > + if (IS_ERR(h)) > + return PTR_ERR(h); > + > + len = sizeof(__u16) * h->semadj_count; s/__u16/__s16/ > + semadj = kzalloc(len, GFP_KERNEL); > + if (!semadj) > + goto out; > + > + ret = _ckpt_read_buffer(ctx, semadj, len); > + if (ret< 0) > + goto out; Use ckpt_read_payload() - it already allocates the payload buffer. > + > + un = find_alloc_undo(current->nsproxy->ipc_ns, h->semid); > + if (IS_ERR(un)) { > + ret = PTR_ERR(un); > + ckpt_debug("unable to find semid %i\n", h->semid); > + goto out; > + } > + > + nsems = sem_undo_nsems(un, current->nsproxy->ipc_ns); > + len = sizeof(short) * nsems; s/short/__s16/ If nsems > h->semadj_count, we may be accessing bad memory in the semadj buffer from above, therefore ... > + memcpy(un->semadj, semadj, len); > + rcu_read_unlock(); > + > + if (nsems != h->semadj_count) ... this test should occur before the memcpy() above. > + ckpt_err(ctx, -EINVAL, > + "semid %i has nmsems=%i but %i undo adjustments\n", > + h->semid, nsems, h->semadj_count); > + else > + ckpt_debug("semid %i restored with %i adjustments\n", > + h->semid, h->semadj_count); > + out: > + ckpt_hdr_put(ctx, h); > + kfree(semadj); > + > + return ret; > +} > + > +static void *restore_sem_undo(struct ckpt_ctx *ctx) > +{ > + struct ckpt_hdr_task_sem_undo_list *h; > + struct sem_undo_list *ulp = NULL; > + int i; > + int ret = 0; > + > + h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_TASK_SEM_UNDO_LIST); > + if (IS_ERR(h)) > + return ERR_PTR(PTR_ERR(h)); > + > + ulp = alloc_undo_list(current->nsproxy->ipc_ns); > + if (!ulp) { > + ret = -ENOMEM; > + goto out; > + } > + > + for (i = 0; i< h->count; i++) { > + ret = restore_task_sem_undo_adj(ctx); > + if (ret< 0) > + goto out; > + } > + out: > + ckpt_hdr_put(ctx, h); > + if (ret< 0) > + return ERR_PTR(ret); > + else > + return ulp; > +} > + > +int restore_obj_sem_undo(struct ckpt_ctx *ctx, int sem_undo_objref) > +{ > + struct sem_undo_list *ulp; > + > + if (!sem_undo_objref) > + return 0; /* Task had no undo list */ > + > + ulp = ckpt_obj_try_fetch(ctx, sem_undo_objref, CKPT_OBJ_SEM_UNDO); > + if (IS_ERR(ulp)) > + return PTR_ERR(ulp); > + > + /* The first task to restore a shared list should already have this, > + * but subsequent ones won't, so attach to current in that case > + */ > + if (!current->sysvsem.undo_list) > + current->sysvsem.undo_list = ulp; > + > + atomic_inc(&ulp->refcnt); I suspect there is a leak here: atomic_inc is needed only for tasks other than the first task; The first task already gets the refcount deep inside find_alloc_undo(), and the hash-table gets its ref via the obj->grab method. [...] Oren. _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/containers