From: Chris Zankel <chris@xxxxxxxxxx> The original implementation could clobber registers under certain conditions. The Xtensa processor architecture uses windowed registers and the original implementation was using a4 as a temporary register, which under certain conditions could be register a0 of the oldest window frame, and didn't always restore the content correctly. By moving the _spill_registers routine inside the fast system call, it frees up one more register (the return address is not required anymore) for the spill routine. Cc: stable@xxxxxxxxxxxxxxx Signed-off-by: Chris Zankel <chris@xxxxxxxxxx> Signed-off-by: Max Filippov <jcmvbkbc@xxxxxxxxx> --- Changes v2->v3: - new patch. arch/xtensa/kernel/entry.S | 383 ++++++++++++++++++++------------------------- 1 file changed, 174 insertions(+), 209 deletions(-) diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index b61e251..ef7f499 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -1081,34 +1081,202 @@ ENTRY(fast_syscall_spill_registers) rsr a0, sar s32i a3, a2, PT_AREG3 - s32i a4, a2, PT_AREG4 - s32i a0, a2, PT_AREG5 # store SAR to PT_AREG5 + s32i a0, a2, PT_SAR - /* The spill routine might clobber a7, a11, and a15. */ + /* The spill routine might clobber a4, a7, a8, a11, a12, and a15. */ + s32i a4, a2, PT_AREG4 s32i a7, a2, PT_AREG7 + s32i a8, a2, PT_AREG8 s32i a11, a2, PT_AREG11 + s32i a12, a2, PT_AREG12 s32i a15, a2, PT_AREG15 - call0 _spill_registers # destroys a3, a4, and SAR + /* + * Rotate ws so that the current windowbase is at bit 0. + * Assume ws = xxxwww1yy (www1 current window frame). + * Rotate ws right so that a4 = yyxxxwww1. + */ + + rsr a0, windowbase + rsr a3, windowstart # a3 = xxxwww1yy + ssr a0 # holds WB + slli a0, a3, WSBITS + or a3, a3, a0 # a3 = xxxwww1yyxxxwww1yy + srl a3, a3 # a3 = 00xxxwww1yyxxxwww1 + + /* We are done if there are no more than the current register frame. */ + + extui a3, a3, 1, WSBITS-1 # a3 = 0yyxxxwww + movi a0, (1 << (WSBITS-1)) + _beqz a3, .Lnospill # only one active frame? jump + + /* We want 1 at the top, so that we return to the current windowbase */ + + or a3, a3, a0 # 1yyxxxwww + + /* Skip empty frames - get 'oldest' WINDOWSTART-bit. */ + + wsr a3, windowstart # save shifted windowstart + neg a0, a3 + and a3, a0, a3 # first bit set from right: 000010000 + + ffs_ws a0, a3 # a0: shifts to skip empty frames + movi a3, WSBITS + sub a0, a3, a0 # WSBITS-a0:number of 0-bits from right + ssr a0 # save in SAR for later. + + rsr a3, windowbase + add a3, a3, a0 + wsr a3, windowbase + rsync + + rsr a3, windowstart + srl a3, a3 # shift windowstart + + /* WB is now just one frame below the oldest frame in the register + window. WS is shifted so the oldest frame is in bit 0, thus, WB + and WS differ by one 4-register frame. */ + + /* Save frames. Depending what call was used (call4, call8, call12), + * we have to save 4,8. or 12 registers. + */ + + +.Lloop: _bbsi.l a3, 1, .Lc4 + _bbci.l a3, 2, .Lc12 + +.Lc8: s32e a4, a13, -16 + l32e a4, a5, -12 + s32e a8, a4, -32 + s32e a5, a13, -12 + s32e a6, a13, -8 + s32e a7, a13, -4 + s32e a9, a4, -28 + s32e a10, a4, -24 + s32e a11, a4, -20 + srli a11, a3, 2 # shift windowbase by 2 + rotw 2 + _bnei a3, 1, .Lloop + j .Lexit + +.Lc4: s32e a4, a9, -16 + s32e a5, a9, -12 + s32e a6, a9, -8 + s32e a7, a9, -4 + + srli a7, a3, 1 + rotw 1 + _bnei a3, 1, .Lloop + j .Lexit + +.Lc12: _bbci.l a3, 3, .Linvalid_mask # bit 2 shouldn't be zero! + + /* 12-register frame (call12) */ + + l32e a0, a5, -12 + s32e a8, a0, -48 + mov a8, a0 + + s32e a9, a8, -44 + s32e a10, a8, -40 + s32e a11, a8, -36 + s32e a12, a8, -32 + s32e a13, a8, -28 + s32e a14, a8, -24 + s32e a15, a8, -20 + srli a15, a3, 3 + + /* The stack pointer for a4..a7 is out of reach, so we rotate the + * window, grab the stackpointer, and rotate back. + * Alternatively, we could also use the following approach, but that + * makes the fixup routine much more complicated: + * rotw 1 + * s32e a0, a13, -16 + * ... + * rotw 2 + */ + + rotw 1 + mov a4, a13 + rotw -1 + + s32e a4, a8, -16 + s32e a5, a8, -12 + s32e a6, a8, -8 + s32e a7, a8, -4 + + rotw 3 + + _beqi a3, 1, .Lexit + j .Lloop + +.Lexit: + + /* Done. Do the final rotation and set WS */ + + rotw 1 + rsr a3, windowbase + ssl a3 + movi a3, 1 + sll a3, a3 + wsr a3, windowstart +.Lnospill: /* Advance PC, restore registers and SAR, and return from exception. */ - l32i a3, a2, PT_AREG5 - l32i a4, a2, PT_AREG4 + l32i a3, a2, PT_SAR l32i a0, a2, PT_AREG0 wsr a3, sar l32i a3, a2, PT_AREG3 /* Restore clobbered registers. */ + l32i a4, a2, PT_AREG4 l32i a7, a2, PT_AREG7 + l32i a8, a2, PT_AREG8 l32i a11, a2, PT_AREG11 + l32i a12, a2, PT_AREG12 l32i a15, a2, PT_AREG15 movi a2, 0 rfe +.Linvalid_mask: + + /* We get here because of an unrecoverable error in the window + * registers, so set up a dummy frame and kill the user application. + * Note: We assume EXC_TABLE_KSTK contains a valid stack pointer. + */ + + movi a0, 1 + movi a1, 0 + + wsr a0, windowstart + wsr a1, windowbase + rsync + + movi a0, 0 + + rsr a3, excsave1 + l32i a1, a3, EXC_TABLE_KSTK + + movi a4, (1 << PS_WOE_BIT) | LOCKLEVEL + wsr a4, ps + rsync + + movi a6, SIGSEGV + movi a4, do_exit + callx4 a4 + + /* shouldn't return, so panic */ + + wsr a0, excsave1 + movi a0, unrecoverable_exception + callx0 a0 # should not return +1: j 1b + + ENDPROC(fast_syscall_spill_registers) /* Fixup handler. @@ -1232,209 +1400,6 @@ ENTRY(fast_syscall_spill_registers_fixup_return) ENDPROC(fast_syscall_spill_registers_fixup_return) -/* - * spill all registers. - * - * This is not a real function. The following conditions must be met: - * - * - must be called with call0. - * - uses a3, a4 and SAR. - * - the last 'valid' register of each frame are clobbered. - * - the caller must have registered a fixup handler - * (or be inside a critical section) - * - PS_EXCM must be set (PS_WOE cleared?) - */ - -ENTRY(_spill_registers) - - /* - * Rotate ws so that the current windowbase is at bit 0. - * Assume ws = xxxwww1yy (www1 current window frame). - * Rotate ws right so that a4 = yyxxxwww1. - */ - - rsr a4, windowbase - rsr a3, windowstart # a3 = xxxwww1yy - ssr a4 # holds WB - slli a4, a3, WSBITS - or a3, a3, a4 # a3 = xxxwww1yyxxxwww1yy - srl a3, a3 # a3 = 00xxxwww1yyxxxwww1 - - /* We are done if there are no more than the current register frame. */ - - extui a3, a3, 1, WSBITS-1 # a3 = 0yyxxxwww - movi a4, (1 << (WSBITS-1)) - _beqz a3, .Lnospill # only one active frame? jump - - /* We want 1 at the top, so that we return to the current windowbase */ - - or a3, a3, a4 # 1yyxxxwww - - /* Skip empty frames - get 'oldest' WINDOWSTART-bit. */ - - wsr a3, windowstart # save shifted windowstart - neg a4, a3 - and a3, a4, a3 # first bit set from right: 000010000 - - ffs_ws a4, a3 # a4: shifts to skip empty frames - movi a3, WSBITS - sub a4, a3, a4 # WSBITS-a4:number of 0-bits from right - ssr a4 # save in SAR for later. - - rsr a3, windowbase - add a3, a3, a4 - wsr a3, windowbase - rsync - - rsr a3, windowstart - srl a3, a3 # shift windowstart - - /* WB is now just one frame below the oldest frame in the register - window. WS is shifted so the oldest frame is in bit 0, thus, WB - and WS differ by one 4-register frame. */ - - /* Save frames. Depending what call was used (call4, call8, call12), - * we have to save 4,8. or 12 registers. - */ - - _bbsi.l a3, 1, .Lc4 - _bbsi.l a3, 2, .Lc8 - - /* Special case: we have a call12-frame starting at a4. */ - - _bbci.l a3, 3, .Lc12 # bit 3 shouldn't be zero! (Jump to Lc12 first) - - s32e a4, a1, -16 # a1 is valid with an empty spill area - l32e a4, a5, -12 - s32e a8, a4, -48 - mov a8, a4 - l32e a4, a1, -16 - j .Lc12c - -.Lnospill: - ret - -.Lloop: _bbsi.l a3, 1, .Lc4 - _bbci.l a3, 2, .Lc12 - -.Lc8: s32e a4, a13, -16 - l32e a4, a5, -12 - s32e a8, a4, -32 - s32e a5, a13, -12 - s32e a6, a13, -8 - s32e a7, a13, -4 - s32e a9, a4, -28 - s32e a10, a4, -24 - s32e a11, a4, -20 - - srli a11, a3, 2 # shift windowbase by 2 - rotw 2 - _bnei a3, 1, .Lloop - -.Lexit: /* Done. Do the final rotation, set WS, and return. */ - - rotw 1 - rsr a3, windowbase - ssl a3 - movi a3, 1 - sll a3, a3 - wsr a3, windowstart - ret - -.Lc4: s32e a4, a9, -16 - s32e a5, a9, -12 - s32e a6, a9, -8 - s32e a7, a9, -4 - - srli a7, a3, 1 - rotw 1 - _bnei a3, 1, .Lloop - j .Lexit - -.Lc12: _bbci.l a3, 3, .Linvalid_mask # bit 2 shouldn't be zero! - - /* 12-register frame (call12) */ - - l32e a2, a5, -12 - s32e a8, a2, -48 - mov a8, a2 - -.Lc12c: s32e a9, a8, -44 - s32e a10, a8, -40 - s32e a11, a8, -36 - s32e a12, a8, -32 - s32e a13, a8, -28 - s32e a14, a8, -24 - s32e a15, a8, -20 - srli a15, a3, 3 - - /* The stack pointer for a4..a7 is out of reach, so we rotate the - * window, grab the stackpointer, and rotate back. - * Alternatively, we could also use the following approach, but that - * makes the fixup routine much more complicated: - * rotw 1 - * s32e a0, a13, -16 - * ... - * rotw 2 - */ - - rotw 1 - mov a5, a13 - rotw -1 - - s32e a4, a9, -16 - s32e a5, a9, -12 - s32e a6, a9, -8 - s32e a7, a9, -4 - - rotw 3 - - _beqi a3, 1, .Lexit - j .Lloop - -.Linvalid_mask: - - /* We get here because of an unrecoverable error in the window - * registers. If we are in user space, we kill the application, - * however, this condition is unrecoverable in kernel space. - */ - - rsr a0, ps - _bbci.l a0, PS_UM_BIT, 1f - - /* User space: Setup a dummy frame and kill application. - * Note: We assume EXC_TABLE_KSTK contains a valid stack pointer. - */ - - movi a0, 1 - movi a1, 0 - - wsr a0, windowstart - wsr a1, windowbase - rsync - - movi a0, 0 - - rsr a3, excsave1 - l32i a1, a3, EXC_TABLE_KSTK - - movi a4, (1 << PS_WOE_BIT) | LOCKLEVEL - wsr a4, ps - rsync - - movi a6, SIGSEGV - movi a4, do_exit - callx4 a4 - -1: /* Kernel space: PANIC! */ - - wsr a0, excsave1 - movi a0, unrecoverable_exception - callx0 a0 # should not return -1: j 1b - -ENDPROC(_spill_registers) - #ifdef CONFIG_MMU /* * We should never get here. Bail out! -- 1.8.1.4 -- To unsubscribe from this list: send the line "unsubscribe stable" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html