Quoting Oren Laadan (orenl@xxxxxxxxxxx): > Introduce a helper to iterate over the descendants of a given > task. The prototype is: > > int walk_task_subtree(struct task_struct *root, > int (*func)(struct task_struct *, void *), > void *data) > > The function will start with @root, and iterate through all the > descendants, including threads, in a DFS manner. Children of a task > are traversed before proceeding to the next thread of that task. > > For each task, the callback @func will be called providing the task > pointer and the @data. The callback is invoked while holding the > tasklist_lock for reading. If the callback fails it should return a > negative error, and the traversal ends. If the callback succeeds, it > returns a non-negative number, and these values are summed. > > On success, walk_task_subtree() returns the total summed. On failure, > it returns a negative value. > > The code in checkpoint/checkpoint.c and checkpoint/restart.c has > been converted to use this helper. > > Signed-off-by: Oren Laadan <orenl@xxxxxxxxxxxxxxx> Acked-by: Serge Hallyn <serue@xxxxxxxxxx> > --- > checkpoint/checkpoint.c | 97 ++++++++++++++-------------------------- > checkpoint/restart.c | 105 +++++++++++++++++++------------------------- > checkpoint/sys.c | 55 +++++++++++++++++++++++ > include/linux/checkpoint.h | 3 + > 4 files changed, 137 insertions(+), 123 deletions(-) > > diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c > index dbe9e10..1eeb557 100644 > --- a/checkpoint/checkpoint.c > +++ b/checkpoint/checkpoint.c > @@ -520,80 +520,51 @@ static int collect_objects(struct ckpt_ctx *ctx) > return ret; > } > > +struct ckpt_cnt_tasks { > + struct ckpt_ctx *ctx; > + int nr; > +}; > + > /* count number of tasks in tree (and optionally fill pid's in array) */ > -static int tree_count_tasks(struct ckpt_ctx *ctx) > +static int __tree_count_tasks(struct task_struct *task, void *data) > { > - struct task_struct *root; > - struct task_struct *task; > - struct task_struct *parent; > - struct task_struct **tasks_arr = ctx->tasks_arr; > - int nr_tasks = ctx->nr_tasks; > - int nr = 0; > + struct ckpt_cnt_tasks *d = (struct ckpt_cnt_tasks *) data; > + struct ckpt_ctx *ctx = d->ctx; > int ret; > > - read_lock(&tasklist_lock); > - > - /* we hold the lock, so root_task->real_parent can't change */ > - task = ctx->root_task; > - if (ctx->root_init) { > - /* container-init: start from container parent */ > - parent = task->real_parent; > - root = parent; > - } else { > - /* non-container-init: start from root task and down */ > - parent = NULL; > - root = task; > - } > - > - /* count tasks via DFS scan of the tree */ > - while (1) { > - ctx->tsk = task; /* (for ckpt_write_err) */ > + ctx->tsk = task; /* (for ckpt_write_err) */ > > - /* is this task cool ? */ > - ret = may_checkpoint_task(ctx, task); > - if (ret < 0) { > - nr = ret; > - break; > - } > - if (tasks_arr) { > - /* unlikely... but if so then try again later */ > - if (nr == nr_tasks) { > - nr = -EBUSY; /* cleanup in ckpt_ctx_free() */ > - break; > - } > - tasks_arr[nr] = task; > - get_task_struct(task); > - } > - nr++; > - /* if has children - proceed with child */ > - if (!list_empty(&task->children)) { > - parent = task; > - task = list_entry(task->children.next, > - struct task_struct, sibling); > - continue; > - } > - while (task != root) { > - /* if has sibling - proceed with sibling */ > - if (!list_is_last(&task->sibling, &parent->children)) { > - task = list_entry(task->sibling.next, > - struct task_struct, sibling); > - break; > - } > + /* is this task cool ? */ > + ret = may_checkpoint_task(ctx, task); > + if (ret < 0) > + goto out; > > - /* else, trace back to parent and proceed */ > - task = parent; > - parent = parent->real_parent; > + if (ctx->tasks_arr) { > + if (d->nr == ctx->nr_tasks) { /* unlikely... try again later */ > + __ckpt_write_err(ctx, "TE", "bad task count\n", -EBUSY); > + ret = -EBUSY; > + goto out; > } > - if (task == root) > - break; > + ctx->tasks_arr[d->nr++] = task; > + get_task_struct(task); > } > + > + ret = 1; > + out: > + if (ret < 0) > + ckpt_write_err(ctx, "", NULL); > ctx->tsk = NULL; > + return ret; > +} > > - read_unlock(&tasklist_lock); > +static int tree_count_tasks(struct ckpt_ctx *ctx) > +{ > + struct ckpt_cnt_tasks data; > > - if (nr < 0) > - ckpt_write_err(ctx, "", NULL); > - return nr; > + data.ctx = ctx; > + data.nr = 0; > + > + return walk_task_subtree(ctx->root_task, __tree_count_tasks, &data); > } > > /* > diff --git a/checkpoint/restart.c b/checkpoint/restart.c > index 37454c5..c021eaf 100644 > --- a/checkpoint/restart.c > +++ b/checkpoint/restart.c > @@ -804,77 +804,56 @@ static int do_restore_task(void) > * Called by the coodinator to set the ->checkpoint_ctx pointer of the > * root task and all its descendants. > */ > -static int prepare_descendants(struct ckpt_ctx *ctx, struct task_struct *root) > +static int __prepare_descendants(struct task_struct *task, void *data) > { > - struct task_struct *leader = root; > - struct task_struct *parent = NULL; > - struct task_struct *task = root; > - int nr_pids = 0; > - int ret = -EBUSY; > + struct ckpt_ctx *ctx = (struct ckpt_ctx *) data; > > - read_lock(&tasklist_lock); > - while (1) { > - ckpt_debug("consider task %d\n", task_pid_vnr(task)); > - if (task_ptrace(task) & PT_PTRACED) > - break; > - /* > - * Set task->checkpoint_ctx of all non-zombie descendants. > - * If a descendant already has a ->checkpoint_ctx, it > - * must be a coordinator (for a different restart ?) so > - * we fail. > - * > - * Note that own ancestors cannot interfere since they > - * won't descend past us, as own ->checkpoint_ctx must > - * already be set. > - */ > - if (!task->exit_state) { > - if (!set_task_ctx(task, ctx)) > - break; > - ckpt_debug("prepare task %d\n", task_pid_vnr(task)); > - wake_up_process(task); > - nr_pids++; > - } > + ckpt_debug("consider task %d\n", task_pid_vnr(task)); > > - /* if has children - proceed with child */ > - if (!list_empty(&task->children)) { > - parent = task; > - task = list_entry(task->children.next, > - struct task_struct, sibling); > - continue; > - } > - while (task != root) { > - /* if has sibling - proceed with sibling */ > - if (!list_is_last(&task->sibling, &parent->children)) { > - task = list_entry(task->sibling.next, > - struct task_struct, sibling); > - break; > - } > - > - /* else, trace back to parent and proceed */ > - task = parent; > - parent = parent->real_parent; > - } > - if (task == root) { > - /* in case root task is multi-threaded */ > - root = task = next_thread(task); > - if (root == leader) { > - ret = 0; > - break; > - } > - } > + if (task_ptrace(task) & PT_PTRACED) { > + ckpt_debug("ptraced task %d\n", task_pid_vnr(task)); > + return -EBUSY; > } > - read_unlock(&tasklist_lock); > - ckpt_debug("nr %d/%d ret %d\n", ctx->nr_pids, nr_pids, ret); > + > + /* > + * Set task->checkpoint_ctx of all non-zombie descendants. > + * If a descendant already has a ->checkpoint_ctx, it > + * must be a coordinator (for a different restart ?) so > + * we fail. > + * > + * Note that own ancestors cannot interfere since they > + * won't descend past us, as own ->checkpoint_ctx must > + * already be set. > + */ > + if (!task->exit_state) { > + if (!set_task_ctx(task, ctx)) > + return -EBUSY; > + ckpt_debug("prepare task %d\n", task_pid_vnr(task)); > + wake_up_process(task); > + return 1; > + } > + > + return 0; > +} > + > +static int prepare_descendants(struct ckpt_ctx *ctx, struct task_struct *root) > +{ > + int nr_pids; > + > + nr_pids = walk_task_subtree(root, __prepare_descendants, ctx); > + ckpt_debug("nr %d/%d\n", ctx->nr_pids, nr_pids); > + if (nr_pids < 0) > + return nr_pids; > > /* > * Actual tasks count may exceed ctx->nr_pids due of 'dead' > * tasks used as place-holders for PGIDs, but not fall short. > */ > - if (!ret && (nr_pids < ctx->nr_pids)) > - ret = -ESRCH; > + if (nr_pids < ctx->nr_pids) > + return -ESRCH; > > atomic_set(&ctx->nr_total, nr_pids); > - return ret; > + return nr_pids; > } > > static int wait_all_tasks_finish(struct ckpt_ctx *ctx) > @@ -991,6 +970,12 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid) > return -EBUSY; > } > > + /* > + * From now on we are committed to the restart. If anything > + * fails, we'll wipe out the entire subtree below us, to > + * ensure proper cleanup. > + */ > + > if (ctx->uflags & RESTART_TASKSELF) { > ret = restore_task(ctx); > ckpt_debug("restore task: %d\n", ret); > diff --git a/checkpoint/sys.c b/checkpoint/sys.c > index 77613d7..7604089 100644 > --- a/checkpoint/sys.c > +++ b/checkpoint/sys.c > @@ -276,6 +276,61 @@ void ckpt_ctx_put(struct ckpt_ctx *ctx) > ckpt_ctx_free(ctx); > } > > +int walk_task_subtree(struct task_struct *root, > + int (*func)(struct task_struct *, void *), > + void *data) > +{ > + > + struct task_struct *leader = root; > + struct task_struct *parent = NULL; > + struct task_struct *task = root; > + int total = 0; > + int ret; > + > + read_lock(&tasklist_lock); > + while (1) { > + /* invoke callback on this task */ > + ret = func(task, data); > + if (ret < 0) > + break; > + > + total += ret; > + > + /* if has children - proceed with child */ > + if (!list_empty(&task->children)) { > + parent = task; > + task = list_entry(task->children.next, > + struct task_struct, sibling); > + continue; > + } > + > + while (task != root) { > + /* if has sibling - proceed with sibling */ > + if (!list_is_last(&task->sibling, &parent->children)) { > + task = list_entry(task->sibling.next, > + struct task_struct, sibling); > + break; > + } > + > + /* else, trace back to parent and proceed */ > + task = parent; > + parent = parent->real_parent; > + } > + > + if (task == root) { > + /* in case root task is multi-threaded */ > + root = task = next_thread(task); > + if (root == leader) > + break; > + } > + } > + read_unlock(&tasklist_lock); > + > + ckpt_debug("total %d ret %d\n", total, ret); > + return (ret < 0 ? ret : total); > +} > + > + > /** > * sys_checkpoint - checkpoint a container > * @pid: pid of the container init(1) process > diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h > index b7f1796..12210e4 100644 > --- a/include/linux/checkpoint.h > +++ b/include/linux/checkpoint.h > @@ -50,6 +50,9 @@ > RESTART_FROZEN | \ > RESTART_GHOST) > > +extern int walk_task_subtree(struct task_struct *task, > + int (*func)(struct task_struct *, void *), > + void *data); > extern void exit_checkpoint(struct task_struct *tsk); > > extern int ckpt_kwrite(struct ckpt_ctx *ctx, void *buf, int count); > -- > 1.6.0.4 > > _______________________________________________ > Containers mailing list > Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx > https://lists.linux-foundation.org/mailman/listinfo/containers _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/containers