Re: [PATCH kvm-unit-tests] arm/arm64: Zero BSS and stack at startup

Alexandru Elisei <alexandru.elisei@xxxxxxx> · Mon, 22 Mar 2021 15:52:13 +0000

Hi Drew,

On 3/22/21 12:10 PM, Andrew Jones wrote:
> So far we've counted on QEMU or kvmtool implicitly zeroing all memory.
> With our goal of eventually supporting bare-metal targets with
> target-efi we should explicitly zero any memory we expect to be zeroed
> ourselves. This obviously includes the BSS, but also the bootcpu's
> stack, as the bootcpu's thread-info lives in the stack and may get
> used in early setup to get the cpu index. Note, this means we still
> assume the bootcpu's cpu index to be zero. That assumption can be
> removed later.
>
> Cc: Nikos Nikoleris <nikos.nikoleris@xxxxxxx>
> Cc: Alexandru Elisei <alexandru.elisei@xxxxxxx>
> Signed-off-by: Andrew Jones <drjones@xxxxxxxxxx>
> ---
>  arm/cstart.S   | 22 ++++++++++++++++++++++
>  arm/cstart64.S | 23 ++++++++++++++++++++++-
>  arm/flat.lds   |  6 ++++++
>  3 files changed, 50 insertions(+), 1 deletion(-)
>
> diff --git a/arm/cstart.S b/arm/cstart.S
> index ef936ae2f874..6de461ef94bf 100644
> --- a/arm/cstart.S
> +++ b/arm/cstart.S
> @@ -15,12 +15,34 @@
>  
>  #define THREAD_START_SP ((THREAD_SIZE - S_FRAME_SIZE * 8) & ~7)
>  
> +.macro zero_range, tmp1, tmp2, tmp3, tmp4
> +	mov	\tmp3, #0
> +	mov	\tmp4, #0
> +9998:	cmp	\tmp1, \tmp2
> +	beq	9997f
> +	strd	\tmp3, \tmp4, [\tmp1]
> +	add	\tmp1, \tmp1, #8

This could use post-indexed addressing and the add instruction could be removed.
Same for arm64.

> +	b	9998b
> +9997:
> +.endm
> +
> +
>  .arm
>  
>  .section .init
>  
>  .globl start
>  start:
> +	/* zero BSS */
> +	ldr	r4, =bss
> +	ldr	r5, =ebss
> +	zero_range r4, r5, r6, r7
> +
> +	/* zero stack */
> +	ldr	r4, =stackbase
> +	ldr	r5, =stacktop
> +	zero_range r4, r5, r6, r7
> +
>  	/*
>  	 * set stack, making room at top of stack for cpu0's
>  	 * exception stacks. Must start wtih stackptr, not
> diff --git a/arm/cstart64.S b/arm/cstart64.S
> index 0428014aa58a..4dc5989ef50c 100644
> --- a/arm/cstart64.S
> +++ b/arm/cstart64.S
> @@ -13,6 +13,15 @@
>  #include <asm/page.h>
>  #include <asm/pgtable-hwdef.h>
>  
> +.macro zero_range, tmp1, tmp2
> +9998:	cmp	\tmp1, \tmp2
> +	b.eq	9997f
> +	stp	xzr, xzr, [\tmp1]
> +	add	\tmp1, \tmp1, #16
> +	b	9998b
> +9997:
> +.endm
> +
>  .section .init
>  
>  /*
> @@ -51,7 +60,19 @@ start:
>  	b	1b
>  
>  1:
> -	/* set up stack */
> +	/* zero BSS */
> +	adrp	x4, bss
> +	add	x4, x4, :lo12:bss
> +	adrp    x5, ebss
> +	add     x5, x5, :lo12:ebss
> +	zero_range x4, x5
> +
> +	/* zero and set up stack */
> +	adrp	x4, stackbase
> +	add	x4, x4, :lo12:stackbase
> +	adrp    x5, stacktop
> +	add     x5, x5, :lo12:stacktop
> +	zero_range x4, x5
>  	mov	x4, #1
>  	msr	spsel, x4
>  	isb
> diff --git a/arm/flat.lds b/arm/flat.lds
> index 25f8d03cba87..8eab3472e2f2 100644
> --- a/arm/flat.lds
> +++ b/arm/flat.lds
> @@ -17,7 +17,11 @@ SECTIONS
>  
>      .rodata   : { *(.rodata*) }
>      .data     : { *(.data) }
> +    . = ALIGN(16);
> +    PROVIDE(bss = .);
>      .bss      : { *(.bss) }
> +    . = ALIGN(16);
> +    PROVIDE(ebss = .);
>      . = ALIGN(64K);
>      PROVIDE(edata = .);
>  
> @@ -26,6 +30,8 @@ SECTIONS
>       * sp must be 16 byte aligned for arm64, and 8 byte aligned for arm
>       * sp must always be strictly less than the true stacktop
>       */
> +    . = ALIGN(16);
> +    PROVIDE(stackbase = .);

This is correct, but strictly speaking, current_thread_info() accesses stacktop -
THREAD_SIZE, which is at most 64k. Is it worth declaring it after we add 644 and
we align it, something like this:

PROVIDE(stackbase = . - 64K)

Or maybe we shouldn't even create a variable for the base of the stack, and
compute it in cstart{,64}.S as stacktop - THREAD_SIZE? That could make the boot
process a tiny bit faster in some cases.

Thanks,

Alex

>      . += 64K;
>      . = ALIGN(64K);
>      PROVIDE(stackptr = . - 16);