Oren Laadan wrote: > Very necessary ! > See comments inline. > > Serge E. Hallyn wrote: >> Checkpoint and restore task->fs. Tasks sharing task->fs will >> share them again after restart. >> >> Changelog: >> Dec 28: Addressed comments by Oren (and Dave) >> 1. define and use {get,put}_fs_struct helpers >> 2. fix locking comment >> 3. define ckpt_read_fname() and use in checkpoint/files.c >> >> Signed-off-by: Serge E. Hallyn <serue@xxxxxxxxxx> >> --- >> checkpoint/files.c | 197 +++++++++++++++++++++++++++++++++++++++- >> checkpoint/objhash.c | 9 ++ >> checkpoint/process.c | 13 +++ >> fs/fs_struct.c | 21 ++++ >> fs/open.c | 53 ++++++----- >> include/linux/checkpoint.h | 10 ++- >> include/linux/checkpoint_hdr.h | 10 ++ >> include/linux/fs.h | 4 + >> include/linux/fs_struct.h | 2 + >> 9 files changed, 293 insertions(+), 26 deletions(-) >> >> diff --git a/checkpoint/files.c b/checkpoint/files.c >> index b622588..d6cf945 100644 >> --- a/checkpoint/files.c >> +++ b/checkpoint/files.c >> @@ -24,6 +24,9 @@ >> #include <linux/checkpoint_hdr.h> >> #include <linux/eventpoll.h> >> #include <linux/eventfd.h> >> +#include <linux/fs.h> >> +#include <linux/fs_struct.h> >> +#include <linux/namei.h> >> #include <net/sock.h> >> >> >> @@ -449,10 +452,81 @@ int ckpt_collect_file_table(struct ckpt_ctx *ctx, struct task_struct *t) >> return ret; >> } >> >> +int checkpoint_get_task_fs(struct ckpt_ctx *ctx, struct task_struct *t) > > Renaming these to {checkpoint/restore}_obj_filesystem() will be > consistent with existing naming convention there. > >> +{ >> + struct fs_struct *fs; >> + int fs_objref; >> + >> + task_lock(current); >> + fs = t->fs; >> + get_fs_struct(fs); >> + task_unlock(current); >> + >> + fs_objref = checkpoint_obj(ctx, fs, CKPT_OBJ_TASK_FS); >> + put_fs_struct(fs); >> + >> + return fs_objref; >> +} >> + >> +/* >> + * called with fs refcount bumped so it won't disappear >> + */ >> +int checkpoint_obj_task_fs(struct ckpt_ctx *ctx, struct fs_struct *fs) >> +{ >> + struct ckpt_hdr_task_fs *h; >> + int ret; >> + struct fs_struct *fscopy; >> + >> + h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_TASK_FS); >> + if (!h) >> + return -ENOMEM; >> + ret = ckpt_write_obj(ctx, &h->h); >> + ckpt_hdr_put(ctx, h); >> + if (ret) >> + return ret; >> + >> + fscopy = copy_fs_struct(fs); >> + if (!fs) >> + return -ENOMEM; >> + > > Reverse the order below: cwd first then root - this will allow > the cwd to be outside of the current chroot, in the case that > the task escaped the chroot. > >> + ret = checkpoint_fname(ctx, &fscopy->root, &ctx->fs_mnt); >> + if (ret < 0) { >> + ckpt_err(ctx, ret, "%(T)writing name of fs root"); >> + goto out; >> + } >> + ret = checkpoint_fname(ctx, &fscopy->pwd, &ctx->fs_mnt); >> + if (ret < 0) { >> + ckpt_err(ctx, ret, "%(T)writing name of pwd"); >> + goto out; >> + } >> + ret = 0; > > [...] > >> +/* this is the fn called by objhash when it runs into a >> + * CKPT_OBJ_TASK_FS entry. Creates an fs_struct and >> + * places it in the hash. */ >> +static struct fs_struct *restore_obj_task_fs(struct ckpt_ctx *ctx) >> +{ >> + struct ckpt_hdr_task_fs *h; >> + struct fs_struct *fs; >> + int ret = 0; >> + char *root, *cwd; >> + >> + h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_TASK_FS); >> + if (IS_ERR(h)) >> + return ERR_PTR(PTR_ERR(h)); >> + ckpt_hdr_put(ctx, h); >> + >> + fs = copy_fs_struct(current->fs); >> + if (!fs) >> + return ERR_PTR(-ENOMEM); >> + > > Please reverse the order below: restore cwd before chroot. > >> + ret = ckpt_read_fname(ctx, &root); >> + if (ret < 0) >> + goto out; >> + ret = restore_chroot(ctx, fs, root); >> + kfree(root); >> + if (ret) >> + goto out; >> + >> + ret = ckpt_read_fname(ctx, &cwd); >> + if (ret < 0) >> + goto out; >> + ret = restore_cwd(ctx, fs, cwd); >> + kfree(cwd); >> + >> +out: >> + if (ret) { >> + free_fs_struct(fs); >> + return ERR_PTR(ret); >> + } >> + return fs; >> +} >> + >> +void *restore_task_fs(struct ckpt_ctx *ctx) >> +{ >> + return (void *) restore_obj_task_fs(ctx); >> +} >> + >> +/* >> + * Called by task restore code to set the restarted task's >> + * current->fs to an entry on the hash >> + */ >> +int restore_set_task_fs(struct ckpt_ctx *ctx, int fs_objref) > > (Please rename this one, too) > >> +{ >> + struct fs_struct *newfs, *oldfs; >> + >> + newfs = ckpt_obj_fetch(ctx, fs_objref, CKPT_OBJ_TASK_FS); >> + if (IS_ERR(newfs)) >> + return PTR_ERR(newfs); >> + >> + task_lock(current); >> + get_fs_struct(newfs); >> + oldfs = current->fs; >> + current->fs = newfs; >> + task_unlock(current); >> + put_fs_struct(oldfs); >> + >> + return 0; >> +} >> diff --git a/checkpoint/objhash.c b/checkpoint/objhash.c >> index 782661d..20fd3e9 100644 >> --- a/checkpoint/objhash.c >> +++ b/checkpoint/objhash.c >> @@ -417,6 +417,15 @@ static struct ckpt_obj_ops ckpt_obj_ops[] = { >> .checkpoint = checkpoint_userns, >> .restore = restore_userns, >> }, >> + /* struct fs_struct */ >> + { >> + .obj_name = "TASK_FS", >> + .obj_type = CKPT_OBJ_TASK_FS, >> + .ref_drop = obj_task_fs_drop, >> + .ref_grab = obj_task_fs_grab, >> + .checkpoint = checkpoint_task_fs, >> + .restore = restore_task_fs, > > I think we also need a .collect method to prevent leaks ? Scratch that. I meant to ask if we needed more work on collect because the fs itself points to underlying objects (the path). But no, because we didn't yet "objectize" the path. Oren. _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/containers