Patch doesn't compile ... apply this: diff --git a/include/linux/freezer.h b/include/linux/freezer.h index cd31593..4acc2a1 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -74,7 +74,7 @@ static inline int cgroup_freezing_or_frozen(struct task_struct *task) } static inline int cgroup_freezer_begin_checkpoint(struct task_struct *task) { - return -ENOTSUP; + return -ENOTSUPP; } static inline void cgroup_freezer_end_checkpoint(struct task_struct *task) {} diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 6519692..f81b333 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -64,6 +64,13 @@ int cgroup_freezing_or_frozen(struct task_struct *task) return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN); } +/* Task is frozen or will freeze immediately when next it gets woken */ +static bool is_task_frozen_enough(struct task_struct *task) +{ + return frozen(task) || + (task_is_stopped_or_traced(task) && freezing(task)); +} + /* * caller must hold freezer->lock */ @@ -109,7 +116,7 @@ static int freezer_checkpointing(struct task_struct *task, task_lock(task); css = task_subsys_state(task, freezer_subsys_id); css_get(css); /* make sure freezer doesn't go away */ - freezer = containerof(css, struct freezer, css); + freezer = container_of(css, struct freezer, css); task_unlock(task); if (freezer->state == CGROUP_FREEZING) { @@ -239,13 +246,6 @@ static void freezer_destroy(struct cgroup_subsys *ss, kfree(cgroup_freezer(cgroup)); } -/* Task is frozen or will freeze immediately when next it gets woken */ -static bool is_task_frozen_enough(struct task_struct *task) -{ - return frozen(task) || - (task_is_stopped_or_traced(task) && freezing(task)); -} - /* * The call to cgroup_lock() in the freezer.state write method prevents * a write to that file racing against an attach, and hence the On Wed, 3 Jun 2009, Matt Helsley wrote: > The CHECKPOINTING state prevents userspace from unfreezing tasks until > sys_checkpoint() is finished. When doing container checkpoint userspace > will do: > > echo FROZEN > /cgroups/my_container/freezer.state > ... > rc = sys_checkpoint( <pid of container root> ); > > To ensure a consistent checkpoint image userspace should not be allowed > to thaw the cgroup (echo THAWED > /cgroups/my_container/freezer.state) > during checkpoint. > > "CHECKPOINTING" can only be set on a "FROZEN" cgroup using the checkpoint > system call. Once in the "CHECKPOINTING" state, the cgroup may not leave until > the checkpoint system call is finished and ready to return. Then the > freezer state returns to "FROZEN". Writing any new state to freezer.state while > checkpointing will return EBUSY. These semantics ensure that userspace cannot > unfreeze the cgroup midway through the checkpoint system call. > > The cgroup_freezer_begin_checkpoint() and cgroup_freezer_end_checkpoint() > make relatively few assumptions about the task that is passed in. However the > way they are called in do_checkpoint() assumes that the root of the container > is in the same freezer cgroup as all the other tasks that will be > checkpointed. > > Signed-off-by: Matt Helsley <matthltc@xxxxxxxxxx> > Cc: Paul Menage <menage@xxxxxxxxxx> > Cc: Li Zefan <lizf@xxxxxxxxxxxxxx> > Cc: Cedric Le Goater <legoater@xxxxxxx> > Cc: Oren Laadan <orenl@xxxxxxxxxxxxxxx> > > Notes: > Meant to work with Oren's checkpoint/restart v16-dev git tree. > Still needs testing. > As a side-effect this prevents the multiple tasks from entering the > CHECKPOINTING state simultaneously. All but one will get -EBUSY. > --- > Documentation/cgroups/freezer-subsystem.txt | 10 ++ > checkpoint/checkpoint.c | 8 ++- > include/linux/freezer.h | 8 ++ > kernel/cgroup_freezer.c | 128 +++++++++++++++++++------- > 4 files changed, 117 insertions(+), 37 deletions(-) > > diff --git a/Documentation/cgroups/freezer-subsystem.txt b/Documentation/cgroups/freezer-subsystem.txt > index 41f37fe..92b68e6 100644 > --- a/Documentation/cgroups/freezer-subsystem.txt > +++ b/Documentation/cgroups/freezer-subsystem.txt > @@ -100,3 +100,13 @@ things happens: > and returns EINVAL) > 3) The tasks that blocked the cgroup from entering the "FROZEN" > state disappear from the cgroup's set of tasks. > + > +When the cgroup freezer is used to guard container checkpoint operations the > +freezer.state may be "CHECKPOINTING". "CHECKPOINTING" can only be set on a > +"FROZEN" cgroup using the checkpoint system call. Once in the "CHECKPOINTING" > +state, the cgroup may not leave until the checkpoint system call returns the > +freezer state to "FROZEN". Writing any new state to freezer.state while > +checkpointing will return EBUSY. These semantics ensure that userspace cannot > +unfreeze the cgroup midway through the checkpoint system call. Note that, > +unlike "FROZEN" and "FREEZING", there is no corresponding "CHECKPOINTED" > +state. > diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c > index afc7300..d586a9b 100644 > --- a/checkpoint/checkpoint.c > +++ b/checkpoint/checkpoint.c > @@ -569,7 +569,10 @@ int do_checkpoint(struct ckpt_ctx *ctx, pid_t pid) > > ret = init_checkpoint_ctx(ctx, pid); > if (ret < 0) > - goto out; > + return ret; > + ret = cgroup_freezer_begin_checkpoint(ctx->root_task); > + if (ret < 0) > + return ret; > ret = build_tree(ctx); > if (ret < 0) > goto out; > @@ -597,6 +600,7 @@ int do_checkpoint(struct ckpt_ctx *ctx, pid_t pid) > /* on success, return (unique) checkpoint identifier */ > ctx->crid = atomic_inc_return(&ctx_count); > ret = ctx->crid; > - out: > +out: > + cgroup_freezer_end_checkpoint(ctx->root_task); > return ret; > } > diff --git a/include/linux/freezer.h b/include/linux/freezer.h > index da7e52b..cd31593 100644 > --- a/include/linux/freezer.h > +++ b/include/linux/freezer.h > @@ -65,11 +65,19 @@ extern void cancel_freezing(struct task_struct *p); > > #ifdef CONFIG_CGROUP_FREEZER > extern int cgroup_freezing_or_frozen(struct task_struct *task); > +extern int cgroup_freezer_begin_checkpoint(struct task_struct *task); > +extern void cgroup_freezer_end_checkpoint(struct task_struct *task); > #else /* !CONFIG_CGROUP_FREEZER */ > static inline int cgroup_freezing_or_frozen(struct task_struct *task) > { > return 0; > } > +static inline int cgroup_freezer_begin_checkpoint(struct task_struct *task) > +{ > + return -ENOTSUP; > +} > +static inline void cgroup_freezer_end_checkpoint(struct task_struct *task) > +{} > #endif /* !CONFIG_CGROUP_FREEZER */ > > /* > diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c > index 05795b7..6519692 100644 > --- a/kernel/cgroup_freezer.c > +++ b/kernel/cgroup_freezer.c > @@ -25,6 +25,7 @@ enum freezer_state { > CGROUP_THAWED = 0, > CGROUP_FREEZING, > CGROUP_FROZEN, > + CGROUP_CHECKPOINTING, > }; > > struct freezer { > @@ -64,6 +65,90 @@ int cgroup_freezing_or_frozen(struct task_struct *task) > } > > /* > + * caller must hold freezer->lock > + */ > +static void update_freezer_state(struct cgroup *cgroup, > + struct freezer *freezer) > +{ > + struct cgroup_iter it; > + struct task_struct *task; > + unsigned int nfrozen = 0, ntotal = 0; > + > + cgroup_iter_start(cgroup, &it); > + while ((task = cgroup_iter_next(cgroup, &it))) { > + ntotal++; > + if (is_task_frozen_enough(task)) > + nfrozen++; > + } > + > + /* > + * Transition to FROZEN when no new tasks can be added ensures > + * that we never exist in the FROZEN state while there are unfrozen > + * tasks. > + */ > + if (nfrozen == ntotal) > + freezer->state = CGROUP_FROZEN; > + else if (nfrozen > 0) > + freezer->state = CGROUP_FREEZING; > + else > + freezer->state = CGROUP_THAWED; > + cgroup_iter_end(cgroup, &it); > +} > + > +/* > + * cgroup freezer state changes made without the aid of the cgroup filesystem > + * must go through this function to ensure proper locking is observed. > + */ > +static int freezer_checkpointing(struct task_struct *task, > + enum freezer_state next_state) > +{ > + struct freezer *freezer; > + struct cgroup_subsys_state *css; > + enum freezer_state state; > + > + task_lock(task); > + css = task_subsys_state(task, freezer_subsys_id); > + css_get(css); /* make sure freezer doesn't go away */ > + freezer = containerof(css, struct freezer, css); > + task_unlock(task); > + > + if (freezer->state == CGROUP_FREEZING) { > + /* May be in middle of a lazy FREEZING -> FROZEN transition */ > + if (cgroup_lock_live_group(css->cgroup)) { > + spin_lock_irq(&freezer->lock); > + update_freezer_state(css->cgroup, freezer); > + spin_unlock_irq(&freezer->lock); > + cgroup_unlock(); > + } > + } > + > + spin_lock_irq(&freezer->lock); > + state = freezer->state; > + if ((state == CGROUP_FROZEN && next_state == CGROUP_CHECKPOINTING) || > + (state == CGROUP_CHECKPOINTING && next_state == CGROUP_FROZEN)) > + freezer->state = next_state; > + spin_unlock_irq(&freezer->lock); > + css_put(css); > + return state; > +} > + > +int cgroup_freezer_begin_checkpoint(struct task_struct *task) > +{ > + if (freezer_checkpointing(task, CGROUP_CHECKPOINTING) != CGROUP_FROZEN) > + return -EBUSY; > + return 0; > +} > + > +void cgroup_freezer_end_checkpoint(struct task_struct *task) > +{ > + /* > + * If we weren't in CHECKPOINTING state then userspace could have > + * unfrozen a task and given us an inconsistent checkpoint image > + */ > + WARN_ON(freezer_checkpointing(task, CGROUP_FROZEN) != CGROUP_CHECKPOINTING); > +} > + > +/* > * cgroups_write_string() limits the size of freezer state strings to > * CGROUP_LOCAL_BUFFER_SIZE > */ > @@ -71,6 +156,7 @@ static const char *freezer_state_strs[] = { > "THAWED", > "FREEZING", > "FROZEN", > + "CHECKPOINTING", > }; > > /* > @@ -78,9 +164,9 @@ static const char *freezer_state_strs[] = { > * Transitions are caused by userspace writes to the freezer.state file. > * The values in parenthesis are state labels. The rest are edge labels. > * > - * (THAWED) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN) > - * ^ ^ | | > - * | \_______THAWED_______/ | > + * (THAWED) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN) --> (CHECKPOINTING) > + * ^ ^ | | ^ | > + * | \_______THAWED_______/ | \_____________/ > * \__________________________THAWED____________/ > */ > > @@ -216,37 +302,6 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) > spin_unlock_irq(&freezer->lock); > } > > -/* > - * caller must hold freezer->lock > - */ > -static void update_freezer_state(struct cgroup *cgroup, > - struct freezer *freezer) > -{ > - struct cgroup_iter it; > - struct task_struct *task; > - unsigned int nfrozen = 0, ntotal = 0; > - > - cgroup_iter_start(cgroup, &it); > - while ((task = cgroup_iter_next(cgroup, &it))) { > - ntotal++; > - if (is_task_frozen_enough(task)) > - nfrozen++; > - } > - > - /* > - * Transition to FROZEN when no new tasks can be added ensures > - * that we never exist in the FROZEN state while there are unfrozen > - * tasks. > - */ > - if (nfrozen == ntotal) > - freezer->state = CGROUP_FROZEN; > - else if (nfrozen > 0) > - freezer->state = CGROUP_FREEZING; > - else > - freezer->state = CGROUP_THAWED; > - cgroup_iter_end(cgroup, &it); > -} > - > static int freezer_read(struct cgroup *cgroup, struct cftype *cft, > struct seq_file *m) > { > @@ -320,7 +375,10 @@ static int freezer_change_state(struct cgroup *cgroup, > freezer = cgroup_freezer(cgroup); > > spin_lock_irq(&freezer->lock); > - > + if (freezer->state == CGROUP_CHECKPOINTING) { > + retval = -EBUSY; > + goto out; > + } > update_freezer_state(cgroup, freezer); > if (goal_state == freezer->state) > goto out; > _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/containers