[PATCH v3 3/3] xtensa: fix fast_syscall_spill_registers

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Chris Zankel <chris@xxxxxxxxxx>

The original implementation could clobber registers under certain conditions.

The Xtensa processor architecture uses windowed registers and the original
implementation was using a4 as a temporary register, which under certain
conditions could be register a0 of the oldest window frame, and didn't always
restore the content correctly.

By moving the _spill_registers routine inside the fast system call, it frees
up one more register (the return address is not required anymore) for the
spill routine.

Cc: stable@xxxxxxxxxxxxxxx
Signed-off-by: Chris Zankel <chris@xxxxxxxxxx>
Signed-off-by: Max Filippov <jcmvbkbc@xxxxxxxxx>
---
Changes v2->v3:
- new patch.

 arch/xtensa/kernel/entry.S | 383 ++++++++++++++++++++-------------------------
 1 file changed, 174 insertions(+), 209 deletions(-)

diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index b61e251..ef7f499 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1081,34 +1081,202 @@ ENTRY(fast_syscall_spill_registers)
 
 	rsr	a0, sar
 	s32i	a3, a2, PT_AREG3
-	s32i	a4, a2, PT_AREG4
-	s32i	a0, a2, PT_AREG5	# store SAR to PT_AREG5
+	s32i	a0, a2, PT_SAR
 
-	/* The spill routine might clobber a7, a11, and a15. */
+	/* The spill routine might clobber a4, a7, a8, a11, a12, and a15. */
 
+	s32i	a4, a2, PT_AREG4
 	s32i	a7, a2, PT_AREG7
+	s32i	a8, a2, PT_AREG8
 	s32i	a11, a2, PT_AREG11
+	s32i	a12, a2, PT_AREG12
 	s32i	a15, a2, PT_AREG15
 
-	call0	_spill_registers	# destroys a3, a4, and SAR
+	/*
+	 * Rotate ws so that the current windowbase is at bit 0.
+	 * Assume ws = xxxwww1yy (www1 current window frame).
+	 * Rotate ws right so that a4 = yyxxxwww1.
+	 */
+
+	rsr	a0, windowbase
+	rsr	a3, windowstart		# a3 = xxxwww1yy
+	ssr	a0			# holds WB
+	slli	a0, a3, WSBITS
+	or	a3, a3, a0		# a3 = xxxwww1yyxxxwww1yy
+	srl	a3, a3			# a3 = 00xxxwww1yyxxxwww1
+
+	/* We are done if there are no more than the current register frame. */
+
+	extui	a3, a3, 1, WSBITS-1	# a3 = 0yyxxxwww
+	movi	a0, (1 << (WSBITS-1))
+	_beqz	a3, .Lnospill		# only one active frame? jump
+
+	/* We want 1 at the top, so that we return to the current windowbase */
+
+	or	a3, a3, a0		# 1yyxxxwww
+
+	/* Skip empty frames - get 'oldest' WINDOWSTART-bit. */
+
+	wsr	a3, windowstart		# save shifted windowstart
+	neg	a0, a3
+	and	a3, a0, a3		# first bit set from right: 000010000
+
+	ffs_ws	a0, a3			# a0: shifts to skip empty frames
+	movi	a3, WSBITS
+	sub	a0, a3, a0		# WSBITS-a0:number of 0-bits from right
+	ssr	a0			# save in SAR for later.
+
+	rsr	a3, windowbase
+	add	a3, a3, a0
+	wsr	a3, windowbase
+	rsync
+
+	rsr	a3, windowstart
+	srl	a3, a3			# shift windowstart
+
+	/* WB is now just one frame below the oldest frame in the register
+	   window. WS is shifted so the oldest frame is in bit 0, thus, WB
+	   and WS differ by one 4-register frame. */
+
+	/* Save frames. Depending what call was used (call4, call8, call12),
+	 * we have to save 4,8. or 12 registers.
+	 */
+
+
+.Lloop: _bbsi.l	a3, 1, .Lc4
+	_bbci.l	a3, 2, .Lc12
+
+.Lc8:	s32e	a4, a13, -16
+	l32e	a4, a5, -12
+	s32e	a8, a4, -32
+	s32e	a5, a13, -12
+	s32e	a6, a13, -8
+	s32e	a7, a13, -4
+	s32e	a9, a4, -28
+	s32e	a10, a4, -24
+	s32e	a11, a4, -20
+	srli	a11, a3, 2		# shift windowbase by 2
+	rotw	2
+	_bnei	a3, 1, .Lloop
+	j	.Lexit
+
+.Lc4:	s32e	a4, a9, -16
+	s32e	a5, a9, -12
+	s32e	a6, a9, -8
+	s32e	a7, a9, -4
+
+	srli	a7, a3, 1
+	rotw	1
+	_bnei	a3, 1, .Lloop
+	j	.Lexit
+
+.Lc12:	_bbci.l	a3, 3, .Linvalid_mask	# bit 2 shouldn't be zero!
+
+	/* 12-register frame (call12) */
+
+	l32e	a0, a5, -12
+	s32e	a8, a0, -48
+	mov	a8, a0
+
+	s32e	a9, a8, -44
+	s32e	a10, a8, -40
+	s32e	a11, a8, -36
+	s32e	a12, a8, -32
+	s32e	a13, a8, -28
+	s32e	a14, a8, -24
+	s32e	a15, a8, -20
+	srli	a15, a3, 3
+
+	/* The stack pointer for a4..a7 is out of reach, so we rotate the
+	 * window, grab the stackpointer, and rotate back.
+	 * Alternatively, we could also use the following approach, but that
+	 * makes the fixup routine much more complicated:
+	 * rotw	1
+	 * s32e	a0, a13, -16
+	 * ...
+	 * rotw 2
+	 */
+
+	rotw	1
+	mov	a4, a13
+	rotw	-1
+
+	s32e	a4, a8, -16
+	s32e	a5, a8, -12
+	s32e	a6, a8, -8
+	s32e	a7, a8, -4
+
+	rotw	3
+
+	_beqi	a3, 1, .Lexit
+	j	.Lloop
+
+.Lexit:
+
+	/* Done. Do the final rotation and set WS */
+
+	rotw	1
+	rsr	a3, windowbase
+	ssl	a3
+	movi	a3, 1
+	sll	a3, a3
+	wsr	a3, windowstart
+.Lnospill:
 
 	/* Advance PC, restore registers and SAR, and return from exception. */
 
-	l32i	a3, a2, PT_AREG5
-	l32i	a4, a2, PT_AREG4
+	l32i	a3, a2, PT_SAR
 	l32i	a0, a2, PT_AREG0
 	wsr	a3, sar
 	l32i	a3, a2, PT_AREG3
 
 	/* Restore clobbered registers. */
 
+	l32i	a4, a2, PT_AREG4
 	l32i	a7, a2, PT_AREG7
+	l32i	a8, a2, PT_AREG8
 	l32i	a11, a2, PT_AREG11
+	l32i	a12, a2, PT_AREG12
 	l32i	a15, a2, PT_AREG15
 
 	movi	a2, 0
 	rfe
 
+.Linvalid_mask:
+
+	/* We get here because of an unrecoverable error in the window
+	 * registers, so set up a dummy frame and kill the user application.
+	 * Note: We assume EXC_TABLE_KSTK contains a valid stack pointer.
+	 */
+
+	movi	a0, 1
+	movi	a1, 0
+
+	wsr	a0, windowstart
+	wsr	a1, windowbase
+	rsync
+
+	movi	a0, 0
+
+	rsr	a3, excsave1
+	l32i	a1, a3, EXC_TABLE_KSTK
+
+	movi	a4, (1 << PS_WOE_BIT) | LOCKLEVEL
+	wsr	a4, ps
+	rsync
+
+	movi	a6, SIGSEGV
+	movi	a4, do_exit
+	callx4	a4
+
+	/* shouldn't return, so panic */
+
+	wsr	a0, excsave1
+	movi	a0, unrecoverable_exception
+	callx0	a0		# should not return
+1:	j	1b
+
+
 ENDPROC(fast_syscall_spill_registers)
 
 /* Fixup handler.
@@ -1232,209 +1400,6 @@ ENTRY(fast_syscall_spill_registers_fixup_return)
 
 ENDPROC(fast_syscall_spill_registers_fixup_return)
 
-/*
- * spill all registers.
- *
- * This is not a real function. The following conditions must be met:
- *
- *  - must be called with call0.
- *  - uses a3, a4 and SAR.
- *  - the last 'valid' register of each frame are clobbered.
- *  - the caller must have registered a fixup handler
- *    (or be inside a critical section)
- *  - PS_EXCM must be set (PS_WOE cleared?)
- */
-
-ENTRY(_spill_registers)
-
-	/*
-	 * Rotate ws so that the current windowbase is at bit 0.
-	 * Assume ws = xxxwww1yy (www1 current window frame).
-	 * Rotate ws right so that a4 = yyxxxwww1.
-	 */
-
-	rsr	a4, windowbase
-	rsr	a3, windowstart		# a3 = xxxwww1yy
-	ssr	a4			# holds WB
-	slli	a4, a3, WSBITS
-	or	a3, a3, a4		# a3 = xxxwww1yyxxxwww1yy
-	srl	a3, a3			# a3 = 00xxxwww1yyxxxwww1
-
-	/* We are done if there are no more than the current register frame. */
-
-	extui	a3, a3, 1, WSBITS-1	# a3 = 0yyxxxwww
-	movi	a4, (1 << (WSBITS-1))
-	_beqz	a3, .Lnospill		# only one active frame? jump
-
-	/* We want 1 at the top, so that we return to the current windowbase */
-
-	or	a3, a3, a4		# 1yyxxxwww
-
-	/* Skip empty frames - get 'oldest' WINDOWSTART-bit. */
-
-	wsr	a3, windowstart		# save shifted windowstart
-	neg	a4, a3
-	and	a3, a4, a3		# first bit set from right: 000010000
-
-	ffs_ws	a4, a3			# a4: shifts to skip empty frames
-	movi	a3, WSBITS
-	sub	a4, a3, a4		# WSBITS-a4:number of 0-bits from right
-	ssr	a4			# save in SAR for later.
-
-	rsr	a3, windowbase
-	add	a3, a3, a4
-	wsr	a3, windowbase
-	rsync
-
-	rsr	a3, windowstart
-	srl	a3, a3			# shift windowstart
-
-	/* WB is now just one frame below the oldest frame in the register
-	   window. WS is shifted so the oldest frame is in bit 0, thus, WB
-	   and WS differ by one 4-register frame. */
-
-	/* Save frames. Depending what call was used (call4, call8, call12),
-	 * we have to save 4,8. or 12 registers.
-	 */
-
-	_bbsi.l	a3, 1, .Lc4
-	_bbsi.l	a3, 2, .Lc8
-
-	/* Special case: we have a call12-frame starting at a4. */
-
-	_bbci.l	a3, 3, .Lc12	# bit 3 shouldn't be zero! (Jump to Lc12 first)
-
-	s32e	a4, a1, -16	# a1 is valid with an empty spill area
-	l32e	a4, a5, -12
-	s32e	a8, a4, -48
-	mov	a8, a4
-	l32e	a4, a1, -16
-	j	.Lc12c
-
-.Lnospill:
-	ret
-
-.Lloop: _bbsi.l	a3, 1, .Lc4
-	_bbci.l	a3, 2, .Lc12
-
-.Lc8:	s32e	a4, a13, -16
-	l32e	a4, a5, -12
-	s32e	a8, a4, -32
-	s32e	a5, a13, -12
-	s32e	a6, a13, -8
-	s32e	a7, a13, -4
-	s32e	a9, a4, -28
-	s32e	a10, a4, -24
-	s32e	a11, a4, -20
-
-	srli	a11, a3, 2		# shift windowbase by 2
-	rotw	2
-	_bnei	a3, 1, .Lloop
-
-.Lexit: /* Done. Do the final rotation, set WS, and return. */
-
-	rotw	1
-	rsr	a3, windowbase
-	ssl	a3
-	movi	a3, 1
-	sll	a3, a3
-	wsr	a3, windowstart
-	ret
-
-.Lc4:	s32e	a4, a9, -16
-	s32e	a5, a9, -12
-	s32e	a6, a9, -8
-	s32e	a7, a9, -4
-
-	srli	a7, a3, 1
-	rotw	1
-	_bnei	a3, 1, .Lloop
-	j	.Lexit
-
-.Lc12:	_bbci.l	a3, 3, .Linvalid_mask	# bit 2 shouldn't be zero!
-
-	/* 12-register frame (call12) */
-
-	l32e	a2, a5, -12
-	s32e	a8, a2, -48
-	mov	a8, a2
-
-.Lc12c: s32e	a9, a8, -44
-	s32e	a10, a8, -40
-	s32e	a11, a8, -36
-	s32e	a12, a8, -32
-	s32e	a13, a8, -28
-	s32e	a14, a8, -24
-	s32e	a15, a8, -20
-	srli	a15, a3, 3
-
-	/* The stack pointer for a4..a7 is out of reach, so we rotate the
-	 * window, grab the stackpointer, and rotate back.
-	 * Alternatively, we could also use the following approach, but that
-	 * makes the fixup routine much more complicated:
-	 * rotw	1
-	 * s32e	a0, a13, -16
-	 * ...
-	 * rotw 2
-	 */
-
-	rotw	1
-	mov	a5, a13
-	rotw	-1
-
-	s32e	a4, a9, -16
-	s32e	a5, a9, -12
-	s32e	a6, a9, -8
-	s32e	a7, a9, -4
-
-	rotw	3
-
-	_beqi	a3, 1, .Lexit
-	j	.Lloop
-
-.Linvalid_mask:
-
-	/* We get here because of an unrecoverable error in the window
-	 * registers. If we are in user space, we kill the application,
-	 * however, this condition is unrecoverable in kernel space.
-	 */
-
-	rsr	a0, ps
-	_bbci.l	a0, PS_UM_BIT, 1f
-
-	/* User space: Setup a dummy frame and kill application.
-	 * Note: We assume EXC_TABLE_KSTK contains a valid stack pointer.
-	 */
-
-	movi	a0, 1
-	movi	a1, 0
-
-	wsr	a0, windowstart
-	wsr	a1, windowbase
-	rsync
-
-	movi	a0, 0
-
-	rsr	a3, excsave1
-	l32i	a1, a3, EXC_TABLE_KSTK
-
-	movi	a4, (1 << PS_WOE_BIT) | LOCKLEVEL
-	wsr	a4, ps
-	rsync
-
-	movi	a6, SIGSEGV
-	movi	a4, do_exit
-	callx4	a4
-
-1:	/* Kernel space: PANIC! */
-
-	wsr	a0, excsave1
-	movi	a0, unrecoverable_exception
-	callx0	a0		# should not return
-1:	j	1b
-
-ENDPROC(_spill_registers)
-
 #ifdef CONFIG_MMU
 /*
  * We should never get here. Bail out!
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe stable" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]