Implement the s390 arch-specific checkpoint/restart helpers. This is on top of Oren Laadan's c/r code. With these, I am able to checkpoint and restart simple programs as per Oren's patch intro. While on x86 I never had to freeze a single task to checkpoint it, on s390 I do need to. That is a prereq for consistent snapshots (esp with multiple processes) anyway so I don't see that as a problem. I'm having a strange problem with libraries though. If I link a program with some extra libraries (-lm, -lcrypt, -lpthread, whatever), then after restart, if I do a fprintf("%f), the program segfaults. Not linking with extra libraries beside libc, or not doing a fprintf of a float, doesn't cause any segfaults after restart. ltrace and strace aren't helpful, and gdb says that the restarted program faulted at __printf_fp@@GLIBC2.4. objdump -d output shows no difference (of course, since this is after linking), but mentions a __dso_handle which doesn't look familiar compared to x86 output. /proc/$$/maps looks the same on original and restarted task too. So I'm flummoxed. Changelog: Jan 30: . Switched types in cr_hdr_cpu to __u64 etc. (Per Oren suggestion) . Replaced direct inclusion of structs in cr_hdr_cpu with the struct members. (Per Oren suggestion) . Also ended up adding a bunch of new things into restart (mm_segment, ksp, etc) in vain attempt to get code using fpu to not segfault after restart. Signed-off-by: Serge E. Hallyn <serue@xxxxxxxxxx> --- arch/s390/include/asm/checkpoint_hdr.h | 91 ++++++++++++++ arch/s390/include/asm/unistd.h | 4 +- arch/s390/kernel/compat_wrapper.S | 12 ++ arch/s390/kernel/syscalls.S | 2 + arch/s390/mm/Makefile | 1 + arch/s390/mm/checkpoint.c | 206 ++++++++++++++++++++++++++++++++ checkpoint/Kconfig | 2 +- 7 files changed, 316 insertions(+), 2 deletions(-) create mode 100644 arch/s390/include/asm/checkpoint_hdr.h create mode 100644 arch/s390/mm/checkpoint.c diff --git a/arch/s390/include/asm/checkpoint_hdr.h b/arch/s390/include/asm/checkpoint_hdr.h new file mode 100644 index 0000000..f11ec74 --- /dev/null +++ b/arch/s390/include/asm/checkpoint_hdr.h @@ -0,0 +1,91 @@ +#ifndef __ASM_S390_CKPT_HDR_H +#define __ASM_S390_CKPT_HDR_H +/* + * Checkpoint/restart - architecture specific headers s/390 + * + * Copyright (C) 2008 Oren Laadan + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of the Linux + * distribution for more details. + */ + +#include <linux/types.h> + +/* + * To maintain compatibility between 32-bit and 64-bit architecture flavors, + * keep data 64-bit aligned: use padding for structure members, and use + * __attribute__((aligned (8))) for the entire structure. + * + * Quoting Arnd Bergmann: + * "This structure has an odd multiple of 32-bit members, which means + * that if you put it into a larger structure that also contains 64-bit + * members, the larger structure may get different alignment on x86-32 + * and x86-64, which you might want to avoid. I can't tell if this is + * an actual problem here. ... In this case, I'm pretty sure that + * sizeof(cr_hdr_task) on x86-32 is different from x86-64, since it + * will be 32-bit aligned on x86-32." + */ + +#ifdef __KERNEL__ +#include <asm/processor.h> +#else +#include <sys/user.h> +#endif + +#ifdef __s390x__ +/* + * Notes + * NUM_GPRS defined in <asm/ptrace.h> to be 16 + * NUM_FPRS defined in <asm/ptrace.h> to be 16 + * NUM_APRS defined in <asm/ptrace.h> to be 16 + */ +struct cr_hdr_cpu { + __u64 args[1]; + __u64 gprs[NUM_GPRS]; + __u64 orig_gpr2; + __u16 svcnr; + __u16 ilc; + __u32 acrs[NUM_ACRS]; + __u64 ksp; + __u64 prot_addr; + __u32 trap_no; + __u64 ieee_instruction_pointer; + __u64 pfault_wait; + + /* mm_segment_t */ + __u32 mm_segment_t_ar4; + + /* psw_t */ + __u64 psw_t_mask; + __u64 psw_t_addr; + + /* s390_fp_regs_t */ + __u32 fpc; + struct { + float f; + double d; + __u64 ui; + __u32 fp_hi; + __u32 fp_lo; + } fprs[NUM_FPRS]; + + /* per_struct */ + __u64 per_control_regs[3]; + __u32 em_instr; + __u64 starting_addr; + __u64 ending_addr; + __u16 perc_atmid; + __u64 address; + __u8 access_id; +}; + +struct cr_hdr_mm_context { + unsigned long vdso_base; +}; + +struct cr_hdr_head_arch { +}; +#endif /* __s390x__ */ + +#endif /* __ASM_S390_CKPT_HDR__H */ diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index c8ad350..ffe64a0 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -265,7 +265,9 @@ #define __NR_pipe2 325 #define __NR_dup3 326 #define __NR_epoll_create1 327 -#define NR_syscalls 328 +#define __NR_checkpoint 328 +#define __NR_restart 329 +#define NR_syscalls 330 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index fc2c971..9546a81 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1767,3 +1767,15 @@ sys_dup3_wrapper: sys_epoll_create1_wrapper: lgfr %r2,%r2 # int jg sys_epoll_create1 # branch to system call + + .globl sys_checkpoint_wrapper +sys_checkpoint_wrapper: + lgfr %r2,%r2 # pid_t + lgfr %r3,%r3 # int + llgfr %r4,%r4 # unsigned long + + .globl sys_restart_wrapper +sys_restart_wrapper: + lgfr %r2,%r2 # int + lgfr %r3,%r3 # int + llgfr %r4,%r4 # unsigned long diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 2d61787..54316c8 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -336,3 +336,5 @@ SYSCALL(sys_inotify_init1,sys_inotify_init1,sys_inotify_init1_wrapper) SYSCALL(sys_pipe2,sys_pipe2,sys_pipe2_wrapper) /* 325 */ SYSCALL(sys_dup3,sys_dup3,sys_dup3_wrapper) SYSCALL(sys_epoll_create1,sys_epoll_create1,sys_epoll_create1_wrapper) +SYSCALL(sys_checkpoint,sys_checkpoint,sys_checkpoint_wrapper) +SYSCALL(sys_restart,sys_restart,sys_restart_wrapper) diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 2a74581..b3f0f32 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -6,3 +6,4 @@ obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PAGE_STATES) += page-states.o +obj-$(CONFIG_CHECKPOINT_RESTART) += checkpoint.o diff --git a/arch/s390/mm/checkpoint.c b/arch/s390/mm/checkpoint.c new file mode 100644 index 0000000..2c96493 --- /dev/null +++ b/arch/s390/mm/checkpoint.c @@ -0,0 +1,206 @@ +/* + * Checkpoint/restart - architecture specific support for s390 + * + * Copyright (C) 2008 Oren Laadan + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of the Linux + * distribution for more details. + */ + +#include <linux/checkpoint.h> +#include <linux/checkpoint_hdr.h> +#include <linux/kernel.h> +#include <asm/system.h> +#include <asm/pgtable.h> + +int cr_write_thread(struct cr_ctx *ctx, struct task_struct *t) +{ + return 0; +} + +static void cr_save_cpu_regs(struct cr_hdr_cpu *hh, struct task_struct *t) +{ + struct thread_struct *thread = &t->thread; + struct pt_regs *regs = task_pt_regs(t); + + hh->fpc = thread->fp_regs.fpc; + memcpy(&hh->fprs, &thread->fp_regs.fprs, NUM_FPRS*sizeof(freg_t)); + memcpy(hh->acrs, &thread->acrs[0], NUM_ACRS * sizeof(unsigned int)); + hh->mm_segment_t_ar4 = thread->mm_segment.ar4; + hh->psw_t_mask = regs->psw.mask; + hh->psw_t_addr = regs->psw.addr; + + hh->ksp = thread->ksp; /* unsure */ + + hh->args[0] = regs->args[0]; + hh->svcnr = regs->svcnr; + hh->ilc = regs->ilc; + memcpy(hh->gprs, regs->gprs, NUM_GPRS*sizeof(unsigned long)); + hh->orig_gpr2 = regs->orig_gpr2; + + hh->prot_addr = thread->prot_addr; + hh->trap_no = thread->trap_no; + hh->ieee_instruction_pointer = thread->ieee_instruction_pointer; + hh->pfault_wait = thread->pfault_wait; + + /* per_info */ + memcpy(&hh->per_control_regs, &thread->per_info.control_regs.words, + 3 * sizeof(unsigned long)); + hh->em_instr = 0; + if (thread->per_info.single_step) + hh->em_instr |= 1; + if (thread->per_info.instruction_fetch) + hh->em_instr |= 2; + hh->starting_addr = thread->per_info.starting_addr; + hh->ending_addr = thread->per_info.ending_addr; + hh->perc_atmid = thread->per_info.lowcore.words.perc_atmid; + hh->address = thread->per_info.lowcore.words.address; + hh->access_id = thread->per_info.lowcore.words.access_id; +} + +/* dump the cpu state and registers of a given task */ +int cr_write_cpu(struct cr_ctx *ctx, struct task_struct *t) +{ + struct cr_hdr h; + struct cr_hdr_cpu *hh = cr_hbuf_get(ctx, sizeof(*hh)); + int ret; + + h.type = CR_HDR_CPU; + h.len = sizeof(*hh); + h.parent = task_pid_vnr(t); + + cr_save_cpu_regs(hh, t); + + ret = cr_write_obj(ctx, &h, hh); + cr_hbuf_put(ctx, sizeof(*hh)); + WARN_ON_ONCE(ret < 0); + + return ret; +} + +int cr_write_head_arch(struct cr_ctx *ctx) +{ + return 0; +} + +/* Nothing to do for mm context state */ +int cr_write_mm_context(struct cr_ctx *ctx, struct mm_struct *mm, int parent) +{ + struct cr_hdr h; + struct cr_hdr_mm_context *hh = cr_hbuf_get(ctx, sizeof(*hh)); + int ret; + + h.type = CR_HDR_MM_CONTEXT; + h.len = sizeof(*hh); + h.parent = parent; + +#if 0 + /* Oren's v13 is on an older kernel which has no vdso_base */ + /* on newer kernel, we'll have to enable this */ + hh->vdso_base = mm->context.vdso_base; + printk(KERN_NOTICE "checkpointing vdso_base %lx\n", hh->vdso_base); +#else + hh->vdso_base = 0; +#endif + + ret = cr_write_obj(ctx, &h, hh); + cr_hbuf_put(ctx, sizeof(*hh)); + + return ret; +} + +/* restart APIs */ + +int cr_read_thread(struct cr_ctx *ctx) +{ + return 0; +} + +int cr_read_cpu(struct cr_ctx *ctx) +{ + struct cr_hdr_cpu *hh = cr_hbuf_get(ctx, sizeof(*hh)); + struct thread_struct *thread = ¤t->thread; + struct pt_regs *regs = task_pt_regs(current); + int parent, ret; + + parent = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_CPU); + if (parent < 0) { + ret = parent; + goto out; + } + ret = 0; + + regs->psw.addr &= ~PSW_ADDR_INSN; + regs->psw.addr |= hh->psw_t_addr & PSW_ADDR_INSN; + + regs->args[0] = hh->args[0]; + regs->svcnr = hh->svcnr; + regs->ilc = hh->ilc; + memcpy(regs->gprs, hh->gprs, NUM_GPRS*sizeof(unsigned long)); + regs->orig_gpr2 = hh->orig_gpr2; + + thread->ksp = hh->ksp; /* unsure */ + + memcpy(thread->acrs, hh->acrs, NUM_ACRS * sizeof(unsigned int)); + thread->prot_addr = hh->prot_addr; + thread->trap_no = hh->trap_no; + thread->ieee_instruction_pointer = hh->ieee_instruction_pointer; + thread->pfault_wait = hh->pfault_wait; + + /* s390_fp_regs_t */ + thread->fp_regs.fpc = hh->fpc; + memcpy(&thread->fp_regs.fprs, &hh->fprs, NUM_FPRS*sizeof(freg_t)); + + thread->mm_segment.ar4 = hh->mm_segment_t_ar4; + + /* per_struct */ + memcpy(&thread->per_info.control_regs.words, &hh->per_control_regs, + 3 * sizeof(unsigned long)); + if (hh->em_instr & 0x01) + thread->per_info.single_step = 1; + if (hh->em_instr & 0x02) + thread->per_info.instruction_fetch = 1; + thread->per_info.starting_addr = hh->starting_addr; + thread->per_info.ending_addr = hh->ending_addr; + thread->per_info.lowcore.words.perc_atmid = hh->perc_atmid; + thread->per_info.lowcore.words.address = hh->address; + thread->per_info.lowcore.words.access_id = hh->access_id; + +out: + cr_hbuf_put(ctx, sizeof(*hh)); + return ret; +} + +int cr_read_head_arch(struct cr_ctx *ctx) +{ + return 0; +} + + +int cr_read_mm_context(struct cr_ctx *ctx, struct mm_struct *mm, int rparent) +{ + struct cr_hdr_mm_context *hh = cr_hbuf_get(ctx, sizeof(*hh)); + int parent, ret = -EINVAL; + + s390_enable_sie(); + + parent = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_MM_CONTEXT); + if (parent < 0) { + ret = parent; + goto out; + } + if (parent != rparent) + goto out; + +#if 0 + /* enable this when s390 defines vdso-base */ + mm->context.vdso_base = hh->vdso_base; + printk(KERN_NOTICE "read vdso_base %lx\n", hh->vdso_base); +#endif + ret = 0; + + out: + cr_hbuf_put(ctx, sizeof(*hh)); + return ret; +} diff --git a/checkpoint/Kconfig b/checkpoint/Kconfig index ffaa635..cb1d29d 100644 --- a/checkpoint/Kconfig +++ b/checkpoint/Kconfig @@ -1,7 +1,7 @@ config CHECKPOINT_RESTART prompt "Enable checkpoint/restart (EXPERIMENTAL)" def_bool n - depends on X86_32 && EXPERIMENTAL + depends on (X86_32 || (S390 && 64BIT)) && EXPERIMENTAL help Application checkpoint/restart is the ability to save the state of a running application so that it can later resume -- 1.6.1 -- To unsubscribe from this list: send the line "unsubscribe linux-s390" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html