This changes the checkpoint/restart procedure for sockets a bit. The socket file header is now checkpointed separately from the socket itself, which allows us to checkpoint a socket without arriving at it from a file descriptor. Thus, most sockets will be checkpointed as a result of processing the file table, calling sock_file_checkpoint(fd), which in turn calls checkpoint_obj(socket). However, we may arrive at some sockets while checkpointing other objects, such as the other end of an AF_UNIX socket with buffers in flight. This patch just opens that door, which is utilized by the next patch. Signed-off-by: Dan Smith <danms@xxxxxxxxxx> --- checkpoint/objhash.c | 2 + include/linux/checkpoint_hdr.h | 4 +- include/net/sock.h | 2 + net/checkpoint.c | 116 ++++++++++++++++++++++++++++----------- net/unix/checkpoint.c | 3 +- 5 files changed, 91 insertions(+), 36 deletions(-) diff --git a/checkpoint/objhash.c b/checkpoint/objhash.c index 019077b..4f26e86 100644 --- a/checkpoint/objhash.c +++ b/checkpoint/objhash.c @@ -381,6 +381,8 @@ static struct ckpt_obj_ops ckpt_obj_ops[] = { .obj_type = CKPT_OBJ_SOCK, .ref_drop = obj_sock_drop, .ref_grab = obj_sock_grab, + .checkpoint = checkpoint_sock, + .restore = restore_sock, }, }; diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h index 78f1f27..39b3cb4 100644 --- a/include/linux/checkpoint_hdr.h +++ b/include/linux/checkpoint_hdr.h @@ -68,6 +68,7 @@ enum { CKPT_HDR_USER, CKPT_HDR_GROUPINFO, CKPT_HDR_TASK_CREDS, + CKPT_HDR_SOCKET, /* 201-299: reserved for arch-dependent */ @@ -367,6 +368,7 @@ struct ckpt_hdr_file_pipe { /* socket */ struct ckpt_hdr_socket { + struct ckpt_hdr h; struct { /* struct socket */ __u64 flags; __u8 state; @@ -425,7 +427,7 @@ struct ckpt_hdr_socket_unix { struct ckpt_hdr_file_socket { struct ckpt_hdr_file common; - struct ckpt_hdr_socket socket; + __s32 sock_objref; } __attribute__((aligned(8))); struct ckpt_hdr_utsns { diff --git a/include/net/sock.h b/include/net/sock.h index 8e3b050..0db1ca3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1644,6 +1644,8 @@ extern __u32 sysctl_rmem_default; /* Checkpoint/Restart Functions */ struct ckpt_ctx; struct ckpt_hdr_file; +extern int checkpoint_sock(struct ckpt_ctx *ctx, void *ptr); +extern void *restore_sock(struct ckpt_ctx *ctx); extern int sock_file_checkpoint(struct ckpt_ctx *ctx, struct file *file); extern struct file *sock_file_restore(struct ckpt_ctx *ctx, struct ckpt_hdr_file *h); diff --git a/net/checkpoint.c b/net/checkpoint.c index fdbf8e7..c84511e 100644 --- a/net/checkpoint.c +++ b/net/checkpoint.c @@ -411,31 +411,26 @@ static int sock_cptrst(struct ckpt_ctx *ctx, struct sock *sk, return 0; } -int sock_file_checkpoint(struct ckpt_ctx *ctx, struct file *file) +static int do_sock_checkpoint(struct ckpt_ctx *ctx, struct sock *sk) { - struct ckpt_hdr_file_socket *h; - struct socket *sock = file->private_data; - struct sock *sk = sock->sk; int ret; + struct socket *sock = sk->sk_socket; + struct ckpt_hdr_socket *h; if (!sock->ops->checkpoint) { ckpt_write_err(ctx, "socket (proto_ops: %pS)", sock->ops); return -ENOSYS; } - h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_FILE); + h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_SOCKET); if (!h) return -ENOMEM; - h->common.f_type = CKPT_FILE_SOCKET; - /* part I: common to all sockets */ - ret = sock_cptrst(ctx, sk, &h->socket, CKPT_CPT); - if (ret < 0) - goto out; - ret = checkpoint_file_common(ctx, file, &h->common); + ret = sock_cptrst(ctx, sk, h, CKPT_CPT); if (ret < 0) goto out; + ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) h); if (ret < 0) goto out; @@ -452,6 +447,42 @@ int sock_file_checkpoint(struct ckpt_ctx *ctx, struct file *file) goto out; ret = sock_write_buffers(ctx, &sk->sk_write_queue); } + + out: + ckpt_hdr_put(ctx, h); + + return ret; +} + +int checkpoint_sock(struct ckpt_ctx *ctx, void *ptr) +{ + return do_sock_checkpoint(ctx, (struct sock *)ptr); +} + +int sock_file_checkpoint(struct ckpt_ctx *ctx, struct file *file) +{ + struct ckpt_hdr_file_socket *h; + struct socket *sock = file->private_data; + struct sock *sk = sock->sk; + int ret; + + h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_FILE); + if (!h) + return -ENOMEM; + + h->common.f_type = CKPT_FILE_SOCKET; + + h->sock_objref = checkpoint_obj(ctx, sk, CKPT_OBJ_SOCK); + if (h->sock_objref < 0) { + ret = h->sock_objref; + goto out; + } + + ret = checkpoint_file_common(ctx, file, &h->common); + if (ret < 0) + goto out; + + ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) h); out: ckpt_hdr_put(ctx, h); return ret; @@ -511,35 +542,30 @@ static struct file *sock_alloc_attach_fd(struct socket *sock) return file; } -struct file *sock_file_restore(struct ckpt_ctx *ctx, struct ckpt_hdr_file *ptr) +static struct sock *do_sock_restore(struct ckpt_ctx *ctx) { - struct ckpt_hdr_file_socket *hh = (struct ckpt_hdr_file_socket *) ptr; - struct ckpt_hdr_socket *h = &hh->socket; + struct ckpt_hdr_socket *h; struct socket *sock; - struct file *file; int ret; - if (ptr->h.type != CKPT_HDR_FILE || - ptr->h.len != sizeof(*hh) || ptr->f_type != CKPT_FILE_SOCKET) - return ERR_PTR(-EINVAL); + h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_SOCKET); + if (IS_ERR(h)) + return ERR_PTR(PTR_ERR(h)); /* silently clear flags, e.g. SOCK_NONBLOCK or SOCK_CLOEXEC */ h->sock.type &= SOCK_TYPE_MASK; ret = sock_create(h->sock_common.family, h->sock.type, 0, &sock); if (ret < 0) - return ERR_PTR(ret); + goto err; + /* part II and III: per-protocol restore */ if (!sock->ops->restore) { ckpt_debug("proto_ops lacks checkpoint: %pS\n", sock->ops); ret = -EINVAL; goto err; } - /* - * part II: per socket type state - * (also takes care of part III: socket buffer) - */ ret = sock->ops->restore(ctx, sock, h); if (ret < 0) goto err; @@ -549,21 +575,45 @@ struct file *sock_file_restore(struct ckpt_ctx *ctx, struct ckpt_hdr_file *ptr) if (ret < 0) goto err; - file = sock_alloc_attach_fd(sock); - if (IS_ERR(file)) { - ret = PTR_ERR(file); - goto err; - } + ckpt_hdr_put(ctx, h); + + return sock->sk; + err: + ckpt_hdr_put(ctx, h); + sock_release(sock); + + return ERR_PTR(ret); +} + +void *restore_sock(struct ckpt_ctx *ctx) +{ + return do_sock_restore(ctx); +} + +struct file *sock_file_restore(struct ckpt_ctx *ctx, struct ckpt_hdr_file *ptr) +{ + struct ckpt_hdr_file_socket *h = (struct ckpt_hdr_file_socket *)ptr; + struct sock *sk; + struct file *file; + int ret; + + if (ptr->h.type != CKPT_HDR_FILE || ptr->f_type != CKPT_FILE_SOCKET) + return ERR_PTR(-EINVAL); + + sk = ckpt_obj_fetch(ctx, h->sock_objref, CKPT_OBJ_SOCK); + if (IS_ERR(sk)) + return ERR_PTR(PTR_ERR(sk)); + + file = sock_alloc_attach_fd(sk->sk_socket); + if (IS_ERR(file)) + return file; ret = restore_file_common(ctx, file, ptr); if (ret < 0) { fput(file); - file = ERR_PTR(ret); + return ERR_PTR(ret); } - return file; - err: - sock_release(sock); - return ERR_PTR(ret); + return file; } diff --git a/net/unix/checkpoint.c b/net/unix/checkpoint.c index 366bc80..cda8434 100644 --- a/net/unix/checkpoint.c +++ b/net/unix/checkpoint.c @@ -57,7 +57,6 @@ static int unix_write_cwd(struct ckpt_ctx *ctx, int unix_checkpoint(struct ckpt_ctx *ctx, struct socket *sock) { struct unix_sock *sk = unix_sk(sock->sk); - struct unix_sock *pr = unix_sk(sk->peer); struct ckpt_hdr_socket_unix *un; int new; int ret = -ENOMEM; @@ -86,7 +85,7 @@ int unix_checkpoint(struct ckpt_ctx *ctx, struct socket *sock) goto out; if (sk->peer) - un->peer = ckpt_obj_lookup_add(ctx, pr, CKPT_OBJ_SOCK, &new); + un->peer = checkpoint_obj(ctx, sk->peer, CKPT_OBJ_SOCK); else un->peer = 0; -- 1.6.2.5 _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/containers