On Mon, Mar 22, 2021 at 03:52:13PM +0000, Alexandru Elisei wrote: > Hi Drew, > > On 3/22/21 12:10 PM, Andrew Jones wrote: > > So far we've counted on QEMU or kvmtool implicitly zeroing all memory. > > With our goal of eventually supporting bare-metal targets with > > target-efi we should explicitly zero any memory we expect to be zeroed > > ourselves. This obviously includes the BSS, but also the bootcpu's > > stack, as the bootcpu's thread-info lives in the stack and may get > > used in early setup to get the cpu index. Note, this means we still > > assume the bootcpu's cpu index to be zero. That assumption can be > > removed later. > > > > Cc: Nikos Nikoleris <nikos.nikoleris@xxxxxxx> > > Cc: Alexandru Elisei <alexandru.elisei@xxxxxxx> > > Signed-off-by: Andrew Jones <drjones@xxxxxxxxxx> > > --- > > arm/cstart.S | 22 ++++++++++++++++++++++ > > arm/cstart64.S | 23 ++++++++++++++++++++++- > > arm/flat.lds | 6 ++++++ > > 3 files changed, 50 insertions(+), 1 deletion(-) > > > > diff --git a/arm/cstart.S b/arm/cstart.S > > index ef936ae2f874..6de461ef94bf 100644 > > --- a/arm/cstart.S > > +++ b/arm/cstart.S > > @@ -15,12 +15,34 @@ > > > > #define THREAD_START_SP ((THREAD_SIZE - S_FRAME_SIZE * 8) & ~7) > > > > +.macro zero_range, tmp1, tmp2, tmp3, tmp4 > > + mov \tmp3, #0 > > + mov \tmp4, #0 > > +9998: cmp \tmp1, \tmp2 > > + beq 9997f > > + strd \tmp3, \tmp4, [\tmp1] > > + add \tmp1, \tmp1, #8 > > This could use post-indexed addressing and the add instruction could be removed. > Same for arm64. > > > + b 9998b > > +9997: > > +.endm > > + > > + > > .arm > > > > .section .init > > > > .globl start > > start: > > + /* zero BSS */ > > + ldr r4, =bss > > + ldr r5, =ebss > > + zero_range r4, r5, r6, r7 > > + > > + /* zero stack */ > > + ldr r4, =stackbase > > + ldr r5, =stacktop > > + zero_range r4, r5, r6, r7 > > + > > /* > > * set stack, making room at top of stack for cpu0's > > * exception stacks. Must start wtih stackptr, not > > diff --git a/arm/cstart64.S b/arm/cstart64.S > > index 0428014aa58a..4dc5989ef50c 100644 > > --- a/arm/cstart64.S > > +++ b/arm/cstart64.S > > @@ -13,6 +13,15 @@ > > #include <asm/page.h> > > #include <asm/pgtable-hwdef.h> > > > > +.macro zero_range, tmp1, tmp2 > > +9998: cmp \tmp1, \tmp2 > > + b.eq 9997f > > + stp xzr, xzr, [\tmp1] > > + add \tmp1, \tmp1, #16 > > + b 9998b > > +9997: > > +.endm > > + > > .section .init > > > > /* > > @@ -51,7 +60,19 @@ start: > > b 1b > > > > 1: > > - /* set up stack */ > > + /* zero BSS */ > > + adrp x4, bss > > + add x4, x4, :lo12:bss > > + adrp x5, ebss > > + add x5, x5, :lo12:ebss > > + zero_range x4, x5 > > + > > + /* zero and set up stack */ > > + adrp x4, stackbase > > + add x4, x4, :lo12:stackbase > > + adrp x5, stacktop > > + add x5, x5, :lo12:stacktop > > + zero_range x4, x5 > > mov x4, #1 > > msr spsel, x4 > > isb > > diff --git a/arm/flat.lds b/arm/flat.lds > > index 25f8d03cba87..8eab3472e2f2 100644 > > --- a/arm/flat.lds > > +++ b/arm/flat.lds > > @@ -17,7 +17,11 @@ SECTIONS > > > > .rodata : { *(.rodata*) } > > .data : { *(.data) } > > + . = ALIGN(16); > > + PROVIDE(bss = .); > > .bss : { *(.bss) } > > + . = ALIGN(16); > > + PROVIDE(ebss = .); > > . = ALIGN(64K); > > PROVIDE(edata = .); > > > > @@ -26,6 +30,8 @@ SECTIONS > > * sp must be 16 byte aligned for arm64, and 8 byte aligned for arm > > * sp must always be strictly less than the true stacktop > > */ > > + . = ALIGN(16); > > + PROVIDE(stackbase = .); > > This is correct, but strictly speaking, current_thread_info() accesses stacktop - > THREAD_SIZE, which is at most 64k. Is it worth declaring it after we add 644 and > we align it, something like this: > > PROVIDE(stackbase = . - 64K) > > Or maybe we shouldn't even create a variable for the base of the stack, and > compute it in cstart{,64}.S as stacktop - THREAD_SIZE? That could make the boot > process a tiny bit faster in some cases. Good idea. I will take both your suggestions and send a v2. Thanks, drew