While checkpointing each file-descriptor, find all the locks on the file and save information about the lock in the checkpoint-image. A follow-on patch will use this informaiton to restore the file-locks. Changelog[v2]: [Matt Helsley]: Use fixed sizes (__s64) instead of 'loff_t' in 'struct ckpt_hdr_file_lock'. [Matt Helsley, Serge Hallyn]: Highlight new use of BKL (using lock_flocks() macros as suggested by Serge). [Matt Helsley]: Reorg code a bit to simplify error handling. [Matt Helsley]: Reorg code to initialize marker-lock (Pass a NULL lock to checkpoint_one_lock() to indicate marker). Signed-off-by: Sukadev Bhattiprolu <sukadev@xxxxxxxxxxxxxxxxxx> --- fs/checkpoint.c | 101 ++++++++++++++++++++++++++++++++++----- include/linux/checkpoint_hdr.h | 10 ++++ 2 files changed, 98 insertions(+), 13 deletions(-) diff --git a/fs/checkpoint.c b/fs/checkpoint.c index e82f4f1..7773488 100644 --- a/fs/checkpoint.c +++ b/fs/checkpoint.c @@ -26,8 +26,19 @@ #include <linux/checkpoint.h> #include <linux/eventpoll.h> #include <linux/eventfd.h> +#include <linux/smp_lock.h> #include <net/sock.h> +/* + * TODO: This code uses the BKL for consistency with other uses of + * 'for_each_lock()'. But since the BKL may be replaced with another + * lock in the future, use lock_flocks() macros instead. lock_flocks() + * are currently used in BKL-fix sand boxes and when those changes + * are merged, the following macros can be removed + */ +#define lock_flocks() lock_kernel() +#define unlock_flocks() unlock_kernel() + /************************************************************************** * Checkpoint */ @@ -256,8 +267,79 @@ static int checkpoint_file(struct ckpt_ctx *ctx, void *ptr) return ret; } +static int checkpoint_one_file_lock(struct ckpt_ctx *ctx, struct file *file, + struct file_lock *lock) +{ + int rc; + struct ckpt_hdr_file_lock *h; + + h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_FILE_LOCK); + if (!h) + return -ENOMEM; + + if (lock) { + h->fl_start = lock->fl_start; + h->fl_end = lock->fl_end; + h->fl_type = lock->fl_type; + h->fl_flags = lock->fl_flags; + } else { + /* Checkpoint a dummy lock as a marker */ + h->fl_start = -1; + h->fl_flags = FL_POSIX; + } + + rc = ckpt_write_obj(ctx, &h->h); + + ckpt_hdr_put(ctx, h); + + return rc; +} + +int +checkpoint_file_locks(struct ckpt_ctx *ctx, struct files_struct *files, + struct file *file) +{ + int rc; + struct inode *inode; + struct file_lock **lockpp; + struct file_lock *lockp; + + lock_flocks(); + inode = file->f_path.dentry->d_inode; + for_each_lock(inode, lockpp) { + lockp = *lockpp; + ckpt_debug("Lock [%lld, %lld, %d, 0x%x]\n", lockp->fl_start, + lockp->fl_end, lockp->fl_type, lockp->fl_flags); + + if (lockp->fl_owner != files) + continue; + + rc = -EBADF; + if (IS_POSIX(lockp)) + rc = checkpoint_one_file_lock(ctx, file, lockp); + + if (rc < 0) { + ckpt_err(ctx, rc, "%(T), checkpoint of lock " + "[%lld, %lld, %d, 0x%x] failed\n", + lockp->fl_start, lockp->fl_end, + lockp->fl_type, lockp->fl_flags); + goto out; + } + } + + /* + * At the end of file-locks for this file, checkpoint a marker. + */ + rc = checkpoint_one_file_lock(ctx, file, NULL); + if (rc < 0) + ckpt_err(ctx, rc, "%(T), checkpoint marker-lock failed\n"); +out: + unlock_flocks(); + return rc; +} + /** - * ckpt_write_file_desc - dump the state of a given file descriptor + * checkpoint_file_desc - dump the state of a given file descriptor * @ctx: checkpoint context * @files: files_struct pointer * @fd: file descriptor @@ -288,18 +370,6 @@ static int checkpoint_file_desc(struct ckpt_ctx *ctx, } rcu_read_unlock(); - ret = find_locks_with_owner(file, files); - /* - * find_locks_with_owner() returns an error when there - * are no locks found, so we *want* it to return an error - * code. Its success means we have to fail the checkpoint. - */ - if (!ret) { - ret = -EBADF; - ckpt_err(ctx, ret, "%(T)fd %d has file lock or lease\n", fd); - goto out; - } - /* sanity check (although this shouldn't happen) */ ret = -EBADF; if (!file) { @@ -323,6 +393,11 @@ static int checkpoint_file_desc(struct ckpt_ctx *ctx, h->fd_close_on_exec = coe; ret = ckpt_write_obj(ctx, &h->h); + if (ret < 0) + goto out; + + ret = checkpoint_file_locks(ctx, files, file); + out: ckpt_hdr_put(ctx, h); if (file) diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h index 44e2a0d..4509016 100644 --- a/include/linux/checkpoint_hdr.h +++ b/include/linux/checkpoint_hdr.h @@ -144,6 +144,8 @@ enum { #define CKPT_HDR_TTY_LDISC CKPT_HDR_TTY_LDISC CKPT_HDR_EPOLL_ITEMS, /* must be after file-table */ #define CKPT_HDR_EPOLL_ITEMS CKPT_HDR_EPOLL_ITEMS + CKPT_HDR_FILE_LOCK, +#define CKPT_HDR_FILE_LOCK CKPT_HDR_FILE_LOCK CKPT_HDR_MM = 401, #define CKPT_HDR_MM CKPT_HDR_MM @@ -581,6 +583,14 @@ struct ckpt_hdr_file_generic { struct ckpt_hdr_file common; } __attribute__((aligned(8))); +struct ckpt_hdr_file_lock { + struct ckpt_hdr h; + __s64 fl_start; + __s64 fl_end; + __u8 fl_type; + __u8 fl_flags; +}; + struct ckpt_hdr_file_pipe { struct ckpt_hdr_file common; __s32 pipe_objref; -- 1.6.0.4 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html