[C/R v20][PATCH 84/96] c/r: restore task fs_root and pwd (v3)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Checkpoint and restore task->fs.  Tasks sharing task->fs will
share them again after restart.

Original patch by Serge Hallyn <serue@xxxxxxxxxx>

Changelog:
  Jan 25: [orenl] Addressed comments by .. myself:
    - add leak detection
    - change order of save/restore of chroot and cwd
    - save/restore fs only after file-table and mm
    - rename functions to adapt existing conventions
  Dec 28: [serge] Addressed comments by Oren (and Dave)
    - define and use {get,put}_fs_struct helpers
    - fix locking comment
    - define ckpt_read_fname() and use in checkpoint/files.c

Signed-off-by: Oren Laadan <orenl@xxxxxxxxxxxxxxx>
Signed-off-by: Serge Hallyn <serue@xxxxxxxxxx>
---
 checkpoint/files.c             |  203 +++++++++++++++++++++++++++++++++++++++-
 checkpoint/objhash.c           |   34 +++++++
 checkpoint/process.c           |   17 ++++
 fs/fs_struct.c                 |   21 ++++
 fs/open.c                      |   58 +++++++-----
 include/linux/checkpoint.h     |    8 ++-
 include/linux/checkpoint_hdr.h |   12 +++
 include/linux/fs.h             |    4 +
 include/linux/fs_struct.h      |    2 +
 9 files changed, 331 insertions(+), 28 deletions(-)

diff --git a/checkpoint/files.c b/checkpoint/files.c
index 4b551fe..7855bae 100644
--- a/checkpoint/files.c
+++ b/checkpoint/files.c
@@ -15,6 +15,9 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/fs_struct.h>
+#include <linux/fs.h>
 #include <linux/fdtable.h>
 #include <linux/fsnotify.h>
 #include <linux/pipe_fs_i.h>
@@ -374,6 +377,62 @@ int checkpoint_obj_file_table(struct ckpt_ctx *ctx, struct task_struct *t)
 	return objref;
 }
 
+int checkpoint_obj_fs(struct ckpt_ctx *ctx, struct task_struct *t)
+{
+	struct fs_struct *fs;
+	int fs_objref;
+
+	task_lock(current);
+	fs = t->fs;
+	get_fs_struct(fs);
+	task_unlock(current);
+
+	fs_objref = checkpoint_obj(ctx, fs, CKPT_OBJ_FS);
+	put_fs_struct(fs);
+
+	return fs_objref;
+}
+
+/* called with fs refcount bumped so it won't disappear */
+static int do_checkpoint_fs(struct ckpt_ctx *ctx, struct fs_struct *fs)
+{
+	struct ckpt_hdr_fs *h;
+	struct fs_struct *fscopy;
+	int ret;
+
+	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_FS);
+	if (!h)
+		return -ENOMEM;
+	ret = ckpt_write_obj(ctx, &h->h);
+	ckpt_hdr_put(ctx, h);
+	if (ret)
+		return ret;
+
+	fscopy = copy_fs_struct(fs);
+	if (!fs)
+		return -ENOMEM;
+
+	ret = checkpoint_fname(ctx, &fscopy->pwd, &ctx->root_fs_path);
+	if (ret < 0) {
+		ckpt_err(ctx, ret, "%(T)writing path of cwd");
+		goto out;
+	}
+	ret = checkpoint_fname(ctx, &fscopy->root, &ctx->root_fs_path);
+	if (ret < 0) {
+		ckpt_err(ctx, ret, "%(T)writing path of fs root");
+		goto out;
+	}
+	ret = 0;
+ out:
+	free_fs_struct(fscopy);
+	return ret;
+}
+
+int checkpoint_fs(struct ckpt_ctx *ctx, void *ptr)
+{
+	return do_checkpoint_fs(ctx, (struct fs_struct *) ptr);
+}
+
 /***********************************************************************
  * Collect
  */
@@ -460,10 +519,41 @@ int ckpt_collect_file_table(struct ckpt_ctx *ctx, struct task_struct *t)
 	return ret;
 }
 
+int ckpt_collect_fs(struct ckpt_ctx *ctx, struct task_struct *t)
+{
+	struct fs_struct *fs;
+	int ret;
+
+	task_lock(t);
+	fs = t->fs;
+	get_fs_struct(fs);
+	task_unlock(t);
+
+	ret = ckpt_obj_collect(ctx, fs, CKPT_OBJ_FS);
+
+	put_fs_struct(fs);
+	return ret;
+}
+
 /**************************************************************************
  * Restart
  */
 
+static int ckpt_read_fname(struct ckpt_ctx *ctx, char **fname)
+{
+	int len;
+
+	len = ckpt_read_payload(ctx, (void **) fname,
+				PATH_MAX, CKPT_HDR_FILE_NAME);
+	if (len < 0)
+		return len;
+
+	(*fname)[len - 1] = '\0';	/* always play if safe */
+	ckpt_debug("read filename '%s'\n", *fname);
+
+	return len;
+}
+
 /**
  * restore_open_fname - read a file name and open a file
  * @ctx: checkpoint context
@@ -479,11 +569,9 @@ struct file *restore_open_fname(struct ckpt_ctx *ctx, int flags)
 	if (flags & (O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC))
 		return ERR_PTR(-EINVAL);
 
-	len = ckpt_read_payload(ctx, (void **) &fname,
-				PATH_MAX, CKPT_HDR_FILE_NAME);
+	len = ckpt_read_fname(ctx, &fname);
 	if (len < 0)
 		return ERR_PTR(len);
-	fname[len - 1] = '\0';	/* always play if safe */
 	ckpt_debug("fname '%s' flags %#x\n", fname, flags);
 
 	file = filp_open(fname, flags, 0);
@@ -819,3 +907,112 @@ int restore_obj_file_table(struct ckpt_ctx *ctx, int files_objref)
 
 	return 0;
 }
+
+/*
+ * Called by task restore code to set the restarted task's
+ * current->fs to an entry on the hash
+ */
+int restore_obj_fs(struct ckpt_ctx *ctx, int fs_objref)
+{
+	struct fs_struct *newfs, *oldfs;
+
+	newfs = ckpt_obj_fetch(ctx, fs_objref, CKPT_OBJ_FS);
+	if (IS_ERR(newfs))
+		return PTR_ERR(newfs);
+
+	task_lock(current);
+	get_fs_struct(newfs);
+	oldfs = current->fs;
+	current->fs = newfs;
+	task_unlock(current);
+	put_fs_struct(oldfs);
+
+	return 0;
+}
+
+static int restore_chroot(struct ckpt_ctx *ctx, struct fs_struct *fs, char *name)
+{
+	struct nameidata nd;
+	int ret;
+
+	ckpt_debug("attempting chroot to %s\n", name);
+	ret = path_lookup(name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &nd);
+	if (ret) {
+		ckpt_err(ctx, ret, "%(T)Opening chroot dir %s", name);
+		return ret;
+	}
+	ret = do_chroot(fs, &nd.path);
+	path_put(&nd.path);
+	if (ret) {
+		ckpt_err(ctx, ret, "%(T)Setting chroot %s", name);
+		return ret;
+	}
+	return 0;
+}
+
+static int restore_cwd(struct ckpt_ctx *ctx, struct fs_struct *fs, char *name)
+{
+	struct nameidata nd;
+	int ret;
+
+	ckpt_debug("attempting chdir to %s\n", name);
+	ret = path_lookup(name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &nd);
+	if (ret) {
+		ckpt_err(ctx, ret, "%(T)Opening cwd %s", name);
+		return ret;
+	}
+	ret = do_chdir(fs, &nd.path);
+	path_put(&nd.path);
+	if (ret) {
+		ckpt_err(ctx, ret, "%(T)Setting cwd %s", name);
+		return ret;
+	}
+	return 0;
+}
+
+/*
+ * Called by objhash when it runs into a CKPT_OBJ_FS entry. Creates
+ * an fs_struct with desired chroot/cwd and places it in the hash.
+ */
+static struct fs_struct *do_restore_fs(struct ckpt_ctx *ctx)
+{
+	struct ckpt_hdr_fs *h;
+	struct fs_struct *fs;
+	char *path;
+	int ret = 0;
+
+	h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_FS);
+	if (IS_ERR(h))
+		return ERR_PTR(PTR_ERR(h));
+	ckpt_hdr_put(ctx, h);
+
+	fs = copy_fs_struct(current->fs);
+	if (!fs)
+		return ERR_PTR(-ENOMEM);
+
+	ret = ckpt_read_fname(ctx, &path);
+	if (ret < 0)
+		goto out;
+	ret = restore_cwd(ctx, fs, path);
+	kfree(path);
+	if (ret)
+		goto out;
+
+	ret = ckpt_read_fname(ctx, &path);
+	if (ret < 0)
+		goto out;
+	ret = restore_chroot(ctx, fs, path);
+	kfree(path);
+
+out:
+	if (ret) {
+		free_fs_struct(fs);
+		return ERR_PTR(ret);
+	}
+	return fs;
+}
+
+void *restore_fs(struct ckpt_ctx *ctx)
+{
+	return (void *) do_restore_fs(ctx);
+}
diff --git a/checkpoint/objhash.c b/checkpoint/objhash.c
index 84bceec..5c4749d 100644
--- a/checkpoint/objhash.c
+++ b/checkpoint/objhash.c
@@ -15,6 +15,7 @@
 #include <linux/hash.h>
 #include <linux/file.h>
 #include <linux/fdtable.h>
+#include <linux/fs_struct.h>
 #include <linux/sched.h>
 #include <linux/ipc_namespace.h>
 #include <linux/user_namespace.h>
@@ -126,6 +127,29 @@ static int obj_mm_users(void *ptr)
 	return atomic_read(&((struct mm_struct *) ptr)->mm_users);
 }
 
+static int obj_fs_grab(void *ptr)
+{
+	get_fs_struct((struct fs_struct *) ptr);
+	return 0;
+}
+
+static void obj_fs_drop(void *ptr, int lastref)
+{
+	put_fs_struct((struct fs_struct *) ptr);
+}
+
+static int obj_fs_users(void *ptr)
+{
+	/*
+	 * It's safe to not use fs->lock because the fs referenced.
+	 * It's also sufficient for leak detection: with no leak the
+	 * count can't change; with a leak it will be too big already
+	 * (even if it's about to grow), and if it's about to shrink
+	 * then it's as if we sampled the count a bit earlier.
+	 */
+	return ((struct fs_struct *) ptr)->users;
+}
+
 static int obj_sighand_grab(void *ptr)
 {
 	atomic_inc(&((struct sighand_struct *) ptr)->count);
@@ -330,6 +354,16 @@ static struct ckpt_obj_ops ckpt_obj_ops[] = {
 		.checkpoint = checkpoint_mm,
 		.restore = restore_mm,
 	},
+	/* fs object */
+	{
+		.obj_name = "FS",
+		.obj_type = CKPT_OBJ_FS,
+		.ref_drop = obj_fs_drop,
+		.ref_grab = obj_fs_grab,
+		.ref_users = obj_fs_users,
+		.checkpoint = checkpoint_fs,
+		.restore = restore_fs,
+	},
 	/* sighand object */
 	{
 		.obj_name = "SIGHAND",
diff --git a/checkpoint/process.c b/checkpoint/process.c
index e0ef795..f917112 100644
--- a/checkpoint/process.c
+++ b/checkpoint/process.c
@@ -232,6 +232,7 @@ static int checkpoint_task_objs(struct ckpt_ctx *ctx, struct task_struct *t)
 	struct ckpt_hdr_task_objs *h;
 	int files_objref;
 	int mm_objref;
+	int fs_objref;
 	int sighand_objref;
 	int signal_objref;
 	int first, ret;
@@ -272,6 +273,13 @@ static int checkpoint_task_objs(struct ckpt_ctx *ctx, struct task_struct *t)
 		return mm_objref;
 	}
 
+	/* note: this must come *after* file-table and mm */
+	fs_objref = checkpoint_obj_fs(ctx, t);
+	if (fs_objref < 0) {
+		ckpt_err(ctx, fs_objref, "%(T)process fs\n");
+		return fs_objref;
+	}
+
 	sighand_objref = checkpoint_obj_sighand(ctx, t);
 	ckpt_debug("sighand: objref %d\n", sighand_objref);
 	if (sighand_objref < 0) {
@@ -299,6 +307,7 @@ static int checkpoint_task_objs(struct ckpt_ctx *ctx, struct task_struct *t)
 		return -ENOMEM;
 	h->files_objref = files_objref;
 	h->mm_objref = mm_objref;
+	h->fs_objref = fs_objref;
 	h->sighand_objref = sighand_objref;
 	h->signal_objref = signal_objref;
 	ret = ckpt_write_obj(ctx, &h->h);
@@ -477,6 +486,9 @@ int ckpt_collect_task(struct ckpt_ctx *ctx, struct task_struct *t)
 	ret = ckpt_collect_mm(ctx, t);
 	if (ret < 0)
 		return ret;
+	ret = ckpt_collect_fs(ctx, t);
+	if (ret < 0)
+		return ret;
 	ret = ckpt_collect_sighand(ctx, t);
 
 	return ret;
@@ -645,6 +657,11 @@ static int restore_task_objs(struct ckpt_ctx *ctx)
 	if (ret < 0)
 		goto out;
 
+	ret = restore_obj_fs(ctx, h->fs_objref);
+	ckpt_debug("fs: ret %d (%p)\n", ret, current->fs);
+	if (ret < 0)
+		return ret;
+
 	ret = restore_obj_sighand(ctx, h->sighand_objref);
 	ckpt_debug("sighand: ret %d (%p)\n", ret, current->sighand);
 	if (ret < 0)
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index eee0590..2a4c6f5 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -6,6 +6,27 @@
 #include <linux/fs_struct.h>
 
 /*
+ * call with owning task locked
+ */
+void get_fs_struct(struct fs_struct *fs)
+{
+	write_lock(&fs->lock);
+	fs->users++;
+	write_unlock(&fs->lock);
+}
+
+void put_fs_struct(struct fs_struct *fs)
+{
+	int kill;
+
+	write_lock(&fs->lock);
+	kill = !--fs->users;
+	write_unlock(&fs->lock);
+	if (kill)
+		free_fs_struct(fs);
+}
+
+/*
  * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
  * It can block.
  */
diff --git a/fs/open.c b/fs/open.c
index 040cef7..62fc70c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -527,6 +527,18 @@ SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
 	return sys_faccessat(AT_FDCWD, filename, mode);
 }
 
+int do_chdir(struct fs_struct *fs, struct path *path)
+{
+	int error;
+
+	error = inode_permission(path->dentry->d_inode, MAY_EXEC | MAY_ACCESS);
+	if (error)
+		return error;
+
+	set_fs_pwd(fs, path);
+	return 0;
+}
+
 SYSCALL_DEFINE1(chdir, const char __user *, filename)
 {
 	struct path path;
@@ -534,17 +546,10 @@ SYSCALL_DEFINE1(chdir, const char __user *, filename)
 
 	error = user_path_dir(filename, &path);
 	if (error)
-		goto out;
-
-	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
-	if (error)
-		goto dput_and_out;
-
-	set_fs_pwd(current->fs, &path);
+		return error;
 
-dput_and_out:
+	error = do_chdir(current->fs, &path);
 	path_put(&path);
-out:
 	return error;
 }
 
@@ -574,31 +579,36 @@ out:
 	return error;
 }
 
-SYSCALL_DEFINE1(chroot, const char __user *, filename)
+int do_chroot(struct fs_struct *fs, struct path *path)
 {
-	struct path path;
 	int error;
 
-	error = user_path_dir(filename, &path);
+	error = inode_permission(path->dentry->d_inode, MAY_EXEC | MAY_ACCESS);
 	if (error)
-		goto out;
+		return error;
+
+	if (!capable(CAP_SYS_CHROOT))
+		return -EPERM;
 
-	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
+	error = security_path_chroot(path);
 	if (error)
-		goto dput_and_out;
+		return error;
 
-	error = -EPERM;
-	if (!capable(CAP_SYS_CHROOT))
-		goto dput_and_out;
-	error = security_path_chroot(&path);
+	set_fs_root(fs, path);
+	return 0;
+}
+
+SYSCALL_DEFINE1(chroot, const char __user *, filename)
+{
+	struct path path;
+	int error;
+
+	error = user_path_dir(filename, &path);
 	if (error)
-		goto dput_and_out;
+		return error;
 
-	set_fs_root(current->fs, &path);
-	error = 0;
-dput_and_out:
+	error = do_chroot(current->fs, &path);
 	path_put(&path);
-out:
 	return error;
 }
 
diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
index ca91405..3e0937a 100644
--- a/include/linux/checkpoint.h
+++ b/include/linux/checkpoint.h
@@ -10,7 +10,7 @@
  *  distribution for more details.
  */
 
-#define CHECKPOINT_VERSION  3
+#define CHECKPOINT_VERSION  4
 
 /* checkpoint user flags */
 #define CHECKPOINT_SUBTREE	0x1
@@ -236,6 +236,12 @@ extern int checkpoint_file_common(struct ckpt_ctx *ctx, struct file *file,
 extern int restore_file_common(struct ckpt_ctx *ctx, struct file *file,
 			       struct ckpt_hdr_file *h);
 
+extern int ckpt_collect_fs(struct ckpt_ctx *ctx, struct task_struct *t);
+extern int checkpoint_obj_fs(struct ckpt_ctx *ctx, struct task_struct *t);
+extern int restore_obj_fs(struct ckpt_ctx *ctx, int fs_objref);
+extern int checkpoint_fs(struct ckpt_ctx *ctx, void *ptr);
+extern void *restore_fs(struct ckpt_ctx *ctx);
+
 /* credentials */
 extern int checkpoint_groupinfo(struct ckpt_ctx *ctx, void *ptr);
 extern int checkpoint_user(struct ckpt_ctx *ctx, void *ptr);
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 0b36430..4dc852d 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -131,6 +131,9 @@ enum {
 	CKPT_HDR_MM_CONTEXT,
 #define CKPT_HDR_MM_CONTEXT CKPT_HDR_MM_CONTEXT
 
+	CKPT_HDR_FS = 451,  /* must be after file-table, mm */
+#define CKPT_HDR_FS CKPT_HDR_FS
+
 	CKPT_HDR_IPC = 501,
 #define CKPT_HDR_IPC CKPT_HDR_IPC
 	CKPT_HDR_IPC_SHM,
@@ -201,6 +204,8 @@ enum obj_type {
 #define CKPT_OBJ_FILE CKPT_OBJ_FILE
 	CKPT_OBJ_MM,
 #define CKPT_OBJ_MM CKPT_OBJ_MM
+	CKPT_OBJ_FS,
+#define CKPT_OBJ_FS CKPT_OBJ_FS
 	CKPT_OBJ_SIGHAND,
 #define CKPT_OBJ_SIGHAND CKPT_OBJ_SIGHAND
 	CKPT_OBJ_SIGNAL,
@@ -416,6 +421,7 @@ struct ckpt_hdr_task_objs {
 
 	__s32 files_objref;
 	__s32 mm_objref;
+	__s32 fs_objref;
 	__s32 sighand_objref;
 	__s32 signal_objref;
 } __attribute__((aligned(8)));
@@ -453,6 +459,12 @@ enum restart_block_type {
 };
 
 /* file system */
+struct ckpt_hdr_fs {
+	struct ckpt_hdr h;
+	/* char *fs_root */
+	/* char *fs_pwd */
+} __attribute__((aligned(8)));
+
 struct ckpt_hdr_file_table {
 	struct ckpt_hdr h;
 	__s32 fdt_nfds;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7902a51..a1525aa 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1818,6 +1818,10 @@ extern void drop_collected_mounts(struct vfsmount *);
 
 extern int vfs_statfs(struct dentry *, struct kstatfs *);
 
+struct fs_struct;
+extern int do_chdir(struct fs_struct *fs, struct path *path);
+extern int do_chroot(struct fs_struct *fs, struct path *path);
+
 extern int current_umask(void);
 
 /* /sys/fs */
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index 78a05bf..a73cbcb 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -20,5 +20,7 @@ extern struct fs_struct *copy_fs_struct(struct fs_struct *);
 extern void free_fs_struct(struct fs_struct *);
 extern void daemonize_fs_struct(void);
 extern int unshare_fs_struct(void);
+extern void get_fs_struct(struct fs_struct *);
+extern void put_fs_struct(struct fs_struct *);
 
 #endif /* _LINUX_FS_STRUCT_H */
-- 
1.6.3.3

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux