On 2/17/21 4:35 PM, Thomas Huth wrote: > On 17/02/2021 15.41, Janosch Frank wrote: >> Having two sets of macros for saving registers on exceptions makes >> maintaining harder. Also we have limited space in the lowcore to save >> stuff and by using the stack as a save area, we can stack exceptions. >> >> So let's use the SAVE/RESTORE_REGS_STACK as the default. When we also >> move the diag308 macro over we can remove the old SAVE/RESTORE_REGS >> macros. > [...] >> diff --git a/lib/s390x/asm/arch_def.h b/lib/s390x/asm/arch_def.h >> index 9c4e330a..31c2fc66 100644 >> --- a/lib/s390x/asm/arch_def.h >> +++ b/lib/s390x/asm/arch_def.h >> @@ -8,13 +8,30 @@ >> #ifndef _ASM_S390X_ARCH_DEF_H_ >> #define _ASM_S390X_ARCH_DEF_H_ >> >> -/* >> - * We currently only specify the stack frame members needed for the >> - * SIE library code. >> - */ >> struct stack_frame { >> - unsigned long back_chain; >> - unsigned long empty1[5]; >> + struct stack_frame *back_chain; >> + u64 reserved; >> + /* GRs 2 - 5 */ >> + unsigned long argument_area[4]; >> + /* GRs 6 - 15 */ >> + unsigned long grs[10]; >> + /* FPRs 0, 2, 4, 6 */ >> + s64 fprs[4]; >> +}; > > For consistency, could you please replace the "unsigned long" with u64, or > even switch to uint64_t completely? > > Currently, we have: > > $ grep -r u64 lib/s390x/ | wc -l > 8 > $ grep -r uint64 lib/s390x/ | wc -l > 94 > > ... so uint64_t seems to be the better choice. Hmm, I like the short kernel types, but okay I'll bow to the majority. :) > >> +struct stack_frame_int { >> + struct stack_frame *back_chain; >> + u64 reserved; >> + /* >> + * The GRs are offset compatible with struct stack_frame so we >> + * can easily fetch GR14 for backtraces. >> + */ >> + u64 grs0[14]; >> + u64 grs1[2]; > > Which registers go into grs0 and which ones into grs1? And why is there a > split at all? A comment would be really helpful! I've added two comments one for each struct member. > >> + u32 res; > > res = reserved? Please add a comment. Yes It's now 'reserved1' > >> + u32 fpc; >> + u64 fprs[16]; >> + u64 crs[16]; >> }; > > Similar, switch to uint32_t and uint64_t ? Will do > >> diff --git a/s390x/macros.S b/s390x/macros.S >> index e51a557a..d7eeeb55 100644 >> --- a/s390x/macros.S >> +++ b/s390x/macros.S >> @@ -3,9 +3,10 @@ >> * s390x assembly macros >> * >> * Copyright (c) 2017 Red Hat Inc >> - * Copyright (c) 2020 IBM Corp. >> + * Copyright (c) 2020, 2021 IBM Corp. >> * >> * Authors: >> + * Janosch Frank <frankja@xxxxxxxxxxxxx> >> * Pierre Morel <pmorel@xxxxxxxxxxxxx> >> * David Hildenbrand <david@xxxxxxxxxx> >> */ >> @@ -41,36 +42,45 @@ >> >> /* Save registers on the stack (r15), so we can have stacked interrupts. */ >> .macro SAVE_REGS_STACK >> - /* Allocate a stack frame for 15 general registers */ >> - slgfi %r15, 15 * 8 >> + /* Allocate a full stack frame */ >> + slgfi %r15, 32 * 8 + 4 * 8 > > How did you come up with that number? That does neither match stack > stack_frame nor stack_frame_int, if I got this right. Please add a comment > to the code to explain the numbers. > >> /* Store registers r0 to r14 on the stack */ >> - stmg %r0, %r14, 0(%r15) >> - /* Allocate a stack frame for 16 floating point registers */ >> - /* The size of a FP register is the size of an double word */ >> - slgfi %r15, 16 * 8 >> + stmg %r2, %r15, STACK_FRAME_INT_GRS0(%r15) > > Storing up to r14 should be sufficent since you store r15 again below? Yes, but it also doesn't hurt. > >> + stg %r0, STACK_FRAME_INT_GRS1(%r15) >> + stg %r1, STACK_FRAME_INT_GRS1 + 8(%r15) >> + /* Store the gr15 value before we allocated the new stack */ >> + lgr %r0, %r15 >> + algfi %r0, 32 * 8 + 4 * 8 >> + stg %r0, 13 * 8 + STACK_FRAME_INT_GRS0(%r15) >> + stg %r0, STACK_FRAME_INT_BACKCHAIN(%r15) >> + /* >> + * Store CR0 and load initial CR0 so AFP is active and we can >> + * access all fprs to save them. >> + */ >> + stctg %c0,%c15,STACK_FRAME_INT_CRS(%r15) >> + larl %r1, initial_cr0 >> + lctlg %c0, %c0, 0(%r1) >> /* Save fp register on stack: offset to SP is multiple of reg number */ >> .irp i, 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 >> - std \i, \i * 8(%r15) >> + std \i, \i * 8 + STACK_FRAME_INT_FPRS(%r15) >> .endr > > So you saved 16 GRs, 16 CRs and 16 FPRs onto the stack, that's at least 16 * > 3 * 8 = 48 * 8 bytes ... but you only decreased the stack by 32 * 8 + 4 * 8 > bytes initially ... is this a bug, or do I miss something? > > Thomas > After I fixed the CR problem I didn't touch this anymore and the offset macro overwrote it anyway and fixed the problem so it still worked on tests. I've squashed the next patch into this one so we should be fine.