Hi Drew, On 3/22/21 12:10 PM, Andrew Jones wrote: > So far we've counted on QEMU or kvmtool implicitly zeroing all memory. > With our goal of eventually supporting bare-metal targets with > target-efi we should explicitly zero any memory we expect to be zeroed > ourselves. This obviously includes the BSS, but also the bootcpu's > stack, as the bootcpu's thread-info lives in the stack and may get > used in early setup to get the cpu index. Note, this means we still > assume the bootcpu's cpu index to be zero. That assumption can be > removed later. > > Cc: Nikos Nikoleris <nikos.nikoleris@xxxxxxx> > Cc: Alexandru Elisei <alexandru.elisei@xxxxxxx> > Signed-off-by: Andrew Jones <drjones@xxxxxxxxxx> > --- > arm/cstart.S | 22 ++++++++++++++++++++++ > arm/cstart64.S | 23 ++++++++++++++++++++++- > arm/flat.lds | 6 ++++++ > 3 files changed, 50 insertions(+), 1 deletion(-) > > diff --git a/arm/cstart.S b/arm/cstart.S > index ef936ae2f874..6de461ef94bf 100644 > --- a/arm/cstart.S > +++ b/arm/cstart.S > @@ -15,12 +15,34 @@ > > #define THREAD_START_SP ((THREAD_SIZE - S_FRAME_SIZE * 8) & ~7) > > +.macro zero_range, tmp1, tmp2, tmp3, tmp4 > + mov \tmp3, #0 > + mov \tmp4, #0 > +9998: cmp \tmp1, \tmp2 > + beq 9997f > + strd \tmp3, \tmp4, [\tmp1] > + add \tmp1, \tmp1, #8 This could use post-indexed addressing and the add instruction could be removed. Same for arm64. > + b 9998b > +9997: > +.endm > + > + > .arm > > .section .init > > .globl start > start: > + /* zero BSS */ > + ldr r4, =bss > + ldr r5, =ebss > + zero_range r4, r5, r6, r7 > + > + /* zero stack */ > + ldr r4, =stackbase > + ldr r5, =stacktop > + zero_range r4, r5, r6, r7 > + > /* > * set stack, making room at top of stack for cpu0's > * exception stacks. Must start wtih stackptr, not > diff --git a/arm/cstart64.S b/arm/cstart64.S > index 0428014aa58a..4dc5989ef50c 100644 > --- a/arm/cstart64.S > +++ b/arm/cstart64.S > @@ -13,6 +13,15 @@ > #include <asm/page.h> > #include <asm/pgtable-hwdef.h> > > +.macro zero_range, tmp1, tmp2 > +9998: cmp \tmp1, \tmp2 > + b.eq 9997f > + stp xzr, xzr, [\tmp1] > + add \tmp1, \tmp1, #16 > + b 9998b > +9997: > +.endm > + > .section .init > > /* > @@ -51,7 +60,19 @@ start: > b 1b > > 1: > - /* set up stack */ > + /* zero BSS */ > + adrp x4, bss > + add x4, x4, :lo12:bss > + adrp x5, ebss > + add x5, x5, :lo12:ebss > + zero_range x4, x5 > + > + /* zero and set up stack */ > + adrp x4, stackbase > + add x4, x4, :lo12:stackbase > + adrp x5, stacktop > + add x5, x5, :lo12:stacktop > + zero_range x4, x5 > mov x4, #1 > msr spsel, x4 > isb > diff --git a/arm/flat.lds b/arm/flat.lds > index 25f8d03cba87..8eab3472e2f2 100644 > --- a/arm/flat.lds > +++ b/arm/flat.lds > @@ -17,7 +17,11 @@ SECTIONS > > .rodata : { *(.rodata*) } > .data : { *(.data) } > + . = ALIGN(16); > + PROVIDE(bss = .); > .bss : { *(.bss) } > + . = ALIGN(16); > + PROVIDE(ebss = .); > . = ALIGN(64K); > PROVIDE(edata = .); > > @@ -26,6 +30,8 @@ SECTIONS > * sp must be 16 byte aligned for arm64, and 8 byte aligned for arm > * sp must always be strictly less than the true stacktop > */ > + . = ALIGN(16); > + PROVIDE(stackbase = .); This is correct, but strictly speaking, current_thread_info() accesses stacktop - THREAD_SIZE, which is at most 64k. Is it worth declaring it after we add 644 and we align it, something like this: PROVIDE(stackbase = . - 64K) Or maybe we shouldn't even create a variable for the base of the stack, and compute it in cstart{,64}.S as stacktop - THREAD_SIZE? That could make the boot process a tiny bit faster in some cases. Thanks, Alex > . += 64K; > . = ALIGN(64K); > PROVIDE(stackptr = . - 16);