Re: [PATCH 2/2] Add support for the per-task sem_undo list (v2)

Oren Laadan <orenl@xxxxxxxxxxxxxxx> · Wed, 04 Aug 2010 14:49:09 -0400

On 08/04/2010 01:02 PM, Dan Smith wrote:
> The semaphore undo list is a set of adjustments to be made to semaphores
> held by a task on exit.  Right now, we do not checkpoint or restore this
> list which could cause undesirable behavior by a restarted process on exit.
>
> Changes in v2:
>   - Remove collect operation
>   - Add a BUILD_BUG_ON() to ensure sizeof(short) == sizeof(__u16)
>   - Use sizeof(__u16) when copying to/from checkpoint header
>   - Fix a couple of leaked hdr objects
>   - Avoid reading the semadj buffer with rcu_read_lock() held
>   - Set the sem_undo pointer on tasks other than the first to restore a list
>   - Fix refcounting on restart
>   - Pull out the guts of exit_sem() into put_undo_list() and call that
>     from our drop() function in case we're the last one.
>
> Signed-off-by: Dan Smith<danms@xxxxxxxxxx>

Looks better. Still some more comments...

> ---
>   include/linux/checkpoint.h     |    4 +
>   include/linux/checkpoint_hdr.h |   18 ++
>   ipc/sem.c                      |  342 +++++++++++++++++++++++++++++++++++++++-
>   kernel/checkpoint/process.c    |   13 ++
>   4 files changed, 369 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
> index 4e25042..34e81f4 100644
> --- a/include/linux/checkpoint.h
> +++ b/include/linux/checkpoint.h
> @@ -271,6 +271,10 @@ extern int ckpt_collect_fs(struct ckpt_ctx *ctx, struct task_struct *t);
>   extern int checkpoint_obj_fs(struct ckpt_ctx *ctx, struct task_struct *t);
>   extern int restore_obj_fs(struct ckpt_ctx *ctx, int fs_objref);
>
> +/* per-task semaphore undo */
> +int checkpoint_obj_sem_undo(struct ckpt_ctx *ctx, struct task_struct *t);
> +int restore_obj_sem_undo(struct ckpt_ctx *ctx, int sem_undo_objref);

Please add "extern" here (been burned before)

[...]

> +static int obj_sem_undo_grab(void *ptr)
> +{
> +	struct sem_undo_list *ulp = ptr;
> +
> +	atomic_inc(&ulp->refcnt);
> +	return 0;
> +}
> +
> +static void put_undo_list(struct sem_undo_list *ulp);

nit: to me it makes more sense to move grab/drop/checkpoint/restart
code, as well sem_init(), to the end of the file (and avoid those
statics...).

> +static void obj_sem_undo_drop(void *ptr, int lastref)
> +{
> +	struct sem_undo_list *ulp = ptr;
> +
> +	put_undo_list(ulp);
> +}
> +
> +static int obj_sem_undo_users(void *ptr)
> +{
> +	struct sem_undo_list *ulp = ptr;
> +
> +	return atomic_read(&ulp->refcnt);
> +}

Can get rid of ..._users() since we don't collect them.

> +
> +static void *restore_sem_undo(struct ckpt_ctx *ctx);
> +static int checkpoint_sem_undo(struct ckpt_ctx *ctx, void *ptr);
> +
> +static const struct ckpt_obj_ops ckpt_obj_sem_undo_ops = {
> +	.obj_name = "IPC_SEM_UNDO",
> +	.obj_type = CKPT_OBJ_SEM_UNDO,
> +	.ref_drop = obj_sem_undo_drop,
> +	.ref_grab = obj_sem_undo_grab,
> +	.ref_users = obj_sem_undo_users,

(here too)

> +	.checkpoint = checkpoint_sem_undo,
> +	.restore = restore_sem_undo,
> +};
> +
>   void __init sem_init (void)
>   {
>   	sem_init_ns(&init_ipc_ns);
>   	ipc_init_proc_interface("sysvipc/sem",
>   				"       key      semid perms      nsems   uid   gid  cuid  cgid      otime      ctime\n",
>   				IPC_SEM_IDS, sysvipc_sem_proc_show);
> +
> +	/* sem_undo_list          uses a short
> +	*  ckpt_hdr_task_sem_undo uses a __u16
> +	*/
> +	BUILD_BUG_ON(sizeof(short) != sizeof(__u16));

semadj is short, so:
s/__u16/__s16/

> +
> +	register_checkpoint_obj(&ckpt_obj_sem_undo_ops);
>   }
>
>   /*
> @@ -1363,14 +1407,8 @@ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
>    * The current implementation does not do so. The POSIX standard
>    * and SVID should be consulted to determine what behavior is mandated.
>    */
> -void exit_sem(struct task_struct *tsk)
> +static void put_undo_list(struct sem_undo_list *ulp)
>   {
> -	struct sem_undo_list *ulp;
> -
> -	ulp = tsk->sysvsem.undo_list;
> -	if (!ulp)
> -		return;
> -	tsk->sysvsem.undo_list = NULL;
>
>   	if (!atomic_dec_and_test(&ulp->refcnt))
>   		return;
> @@ -1393,7 +1431,7 @@ void exit_sem(struct task_struct *tsk)
>   		if (semid == -1)
>   			break;
>
> -		sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid);
> +		sma = sem_lock_check(ulp->ipc_ns, un->semid);

This hunk belongs to previous patch, no ?

[...]

> +int checkpoint_sem_undo_adj(struct ckpt_ctx *ctx, struct sem_undo *un)
> +{
> +	int nsems;
> +	int ret;
> +	short *semadj = NULL;
> +	struct sem_array *sma;
> +	struct ckpt_hdr_task_sem_undo *h = NULL;
> +
> +	sma = sem_lock(ctx->root_nsproxy->ipc_ns, un->semid);
> +	if (IS_ERR(sma)) {
> +		ckpt_debug("unable to lock semid %i (wrong ns?)\n", un->semid);
> +		return PTR_ERR(sma);
> +	}
> +
> +	nsems = sma->sem_nsems;
> +	sem_getref_and_unlock(sma);
> +
> +	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_TASK_SEM_UNDO);
> +	if (!h)
> +		goto putref_abort;
> +
> +	semadj = kzalloc(nsems * sizeof(short), GFP_KERNEL);
> +	if (!semadj)
> +		goto putref_abort;
> +
> +	sem_lock_and_putref(sma);
> +
> +	h->semid = un->semid;
> +	h->semadj_count = nsems;
> +	memcpy(semadj, un->semadj, h->semadj_count * sizeof(__u16));

s/u__u16/__s16/

> +
> +	sem_unlock(sma);
> +
> +	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *)h);
> +	if (ret == 0)
> +		ret = ckpt_write_buffer(ctx, semadj, nsems * sizeof(short));

.... s/short/__s16/

[...]

> +static int restore_task_sem_undo_adj(struct ckpt_ctx *ctx)
> +{
> +	struct ckpt_hdr_task_sem_undo *h;
> +	int len;
> +	int ret = -ENOMEM;
> +	struct sem_undo *un;
> +	int nsems;
> +	short *semadj = NULL;
> +
> +	h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_TASK_SEM_UNDO);
> +	if (IS_ERR(h))
> +		return PTR_ERR(h);
> +
> +	len = sizeof(__u16) * h->semadj_count;

s/__u16/__s16/

> +	semadj = kzalloc(len, GFP_KERNEL);
> +	if (!semadj)
> +		goto out;
> +
> +	ret = _ckpt_read_buffer(ctx, semadj, len);
> +	if (ret<  0)
> +		goto out;

Use ckpt_read_payload() - it already allocates the payload buffer.

> +
> +	un = find_alloc_undo(current->nsproxy->ipc_ns, h->semid);
> +	if (IS_ERR(un)) {
> +		ret = PTR_ERR(un);
> +		ckpt_debug("unable to find semid %i\n", h->semid);
> +		goto out;
> +	}
> +
> +	nsems = sem_undo_nsems(un, current->nsproxy->ipc_ns);
> +	len = sizeof(short) * nsems;

s/short/__s16/

If nsems > h->semadj_count, we may be accessing bad memory in
the semadj buffer from above, therefore ...

> +	memcpy(un->semadj, semadj, len);
> +	rcu_read_unlock();
> +
> +	if (nsems != h->semadj_count)

... this test should occur before the memcpy() above.

> +		ckpt_err(ctx, -EINVAL,
> +			 "semid %i has nmsems=%i but %i undo adjustments\n",
> +			 h->semid, nsems, h->semadj_count);
> +	else
> +		ckpt_debug("semid %i restored with %i adjustments\n",
> +			   h->semid, h->semadj_count);
> + out:
> +	ckpt_hdr_put(ctx, h);
> +	kfree(semadj);
> +
> +	return ret;
> +}
> +
> +static void *restore_sem_undo(struct ckpt_ctx *ctx)
> +{
> +	struct ckpt_hdr_task_sem_undo_list *h;
> +	struct sem_undo_list *ulp = NULL;
> +	int i;
> +	int ret = 0;
> +
> +	h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_TASK_SEM_UNDO_LIST);
> +	if (IS_ERR(h))
> +		return ERR_PTR(PTR_ERR(h));
> +
> +	ulp = alloc_undo_list(current->nsproxy->ipc_ns);
> +	if (!ulp) {
> +		ret = -ENOMEM;
> +		goto out;
> +	}
> +
> +	for (i = 0; i<  h->count; i++) {
> +		ret = restore_task_sem_undo_adj(ctx);
> +		if (ret<  0)
> +			goto out;
> +	}
> + out:
> +	ckpt_hdr_put(ctx, h);
> +	if (ret<  0)
> +		return ERR_PTR(ret);
> +	else
> +		return ulp;
> +}
> +
> +int restore_obj_sem_undo(struct ckpt_ctx *ctx, int sem_undo_objref)
> +{
> +	struct sem_undo_list *ulp;
> +
> +	if (!sem_undo_objref)
> +		return 0; /* Task had no undo list */
> +
> +	ulp = ckpt_obj_try_fetch(ctx, sem_undo_objref, CKPT_OBJ_SEM_UNDO);
> +	if (IS_ERR(ulp))
> +		return PTR_ERR(ulp);
> +
> +	/* The first task to restore a shared list should already have this,
> +	 * but subsequent ones won't, so attach to current in that case
> +	 */
> +	if (!current->sysvsem.undo_list)
> +		current->sysvsem.undo_list = ulp;
> +
> +	atomic_inc(&ulp->refcnt);

I suspect there is a leak here: atomic_inc is needed only for
tasks other than the first task; The first task already gets the
refcount deep inside find_alloc_undo(), and the hash-table gets
its ref via the obj->grab method.

[...]

Oren.
_______________________________________________
Containers mailing list
Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linux-foundation.org/mailman/listinfo/containers