Thiemo Seufer wrote: > Ralf Baechle wrote: > > On Mon, Nov 22, 2004 at 08:00:04AM +0100, Thiemo Seufer wrote: > > > > > > Why bother, the unaligned exception handler should take care of this. > > > > > > It really does so for unaligned accesses from kernel space? > > > > Yes. In fact it's crucially important for this very case. > > Ok, I'll update the patch accordingly when I'm back to better > connectivity than I have now. > > [snip] > > > has 4 bytes and is loaded with lw. Using a macro which abstracts for > > > 32/64bit compilation hides this needlessly, and can even lead to the > > > erraneous impression the code would be useful for 64bit, too. > > > > I'm more following the religion of using such abstractions everywhere > > because code tends to be copied around mindlessly ... > > I would agree if there was a roughly similiar 64bit version of the code. > But due to the differences between 32bit and 64bit kernel there will > never be one, so it's IMHO best to make them as distinct as reasonable > in this case. The appended patch leaves unaligned stack handling alone and is updated to newest CVS, including the recent ptrace fix. Thiemo Index: arch/mips/kernel/scall32-o32.S =================================================================== RCS file: /home/cvs/linux/arch/mips/kernel/scall32-o32.S,v retrieving revision 1.17 diff -u -p -r1.17 scall32-o32.S --- arch/mips/kernel/scall32-o32.S 25 Nov 2004 13:40:10 -0000 1.17 +++ arch/mips/kernel/scall32-o32.S 26 Nov 2004 23:15:47 -0000 @@ -5,6 +5,7 @@ * * Copyright (C) 1995, 96, 97, 98, 99, 2000, 01, 02 by Ralf Baechle * Copyright (C) 2001 MIPS Technologies, Inc. + * Copyright (C) 2004 Thiemo Seufer */ #include <linux/config.h> #include <linux/errno.h> @@ -32,26 +33,30 @@ NESTED(handle_sys, PT_SIZE, sp) lw t1, PT_EPC(sp) # skip syscall on return +#if defined(CONFIG_BINFMT_IRIX) sltiu t0, v0, MAX_SYSCALL_NO + 1 # check syscall number +#else + subu v0, v0, __NR_O32_Linux # check syscall number + sltiu t0, v0, __NR_O32_Linux_syscalls + 1 +#endif addiu t1, 4 # skip to next instruction sw t1, PT_EPC(sp) beqz t0, illegal_syscall - /* XXX Put both in one cacheline, should save a bit. */ - sll t0, v0, 2 - lw t2, sys_call_table(t0) # syscall routine - lbu t3, sys_narg_table(v0) # number of arguments - beqz t2, illegal_syscall; + sll t0, v0, 3 + la t1, sys_call_table + addu t1, t0 + lw t2, (t1) # syscall routine + lw t3, 4(t1) # >= 0 if we need stack arguments + beqz t2, illegal_syscall - subu t0, t3, 5 # 5 or more arguments? sw a3, PT_R26(sp) # save a3 for syscall restarting - bgez t0, stackargs + bgez t3, stackargs stack_done: - sw a3, PT_R26(sp) # save for syscall restart - LONG_L t0, TI_FLAGS($28) # syscall tracing enabled? + lw t0, TI_FLAGS($28) # syscall tracing enabled? li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT - and t0, t1, t0 + and t0, t1 bnez t0, syscall_trace_entry # -> yes jalr t2 # Do The Real Thing (TM) @@ -70,9 +75,9 @@ o32_syscall_exit: local_irq_disable # make sure need_resched and # signals dont change between # sampling and return - LONG_L a2, TI_FLAGS($28) # current->work + lw a2, TI_FLAGS($28) # current->work li t0, _TIF_ALLWORK_MASK - and t0, a2, t0 + and t0, a2 bnez t0, o32_syscall_exit_work j restore_partial @@ -116,49 +121,48 @@ syscall_trace_entry: */ stackargs: lw t0, PT_R29(sp) # get old user stack pointer - subu t3, 4 - sll t1, t3, 2 # stack valid? - - addu t1, t0 # end address - or t0, t1 - bltz t0, bad_stack # -> sp is bad - - lw t0, PT_R29(sp) # get old user stack pointer - PTR_LA t1, 4f # copy 1 to 3 arguments - sll t3, t3, 4 - subu t1, t3 - jr t1 - /* Ok, copy the args from the luser stack to the kernel stack */ /* - * I know Ralf doesn't like nops but this avoids code - * duplication for R3000 targets (and this is the - * only place where ".set reorder" doesn't help). - * Harald. + * We intentionally keep the kernel stack a little below the top of + * userspace so we don't have to do a slower byte accurate check here. */ + lw t5, TI_ADDR_LIMIT($28) + addu t4, t0, 32 + and t5, t4 + bltz t5, bad_stack # -> sp is bad + + /* Ok, copy the args from the luser stack to the kernel stack. + * t3 is the precomputed number of instruction bytes needed to + * load or store arguments 6-8. + */ + + la t1, 5f # load up to 3 arguments + subu t1, t3 +1: lw t5, 16(t0) # argument #5 from usp .set push .set noreorder .set nomacro -1: lw t1, 24(t0) # argument #7 from usp - nop - sw t1, 24(sp) - nop -2: lw t1, 20(t0) # argument #5 from usp - nop - sw t1, 20(sp) - nop -3: lw t1, 16(t0) # argument #5 from usp - nop - sw t1, 16(sp) - nop -4: .set pop + jr t1 + addiu t1, 6f - 5f - j stack_done # go back +2: lw t8, 28(t0) # argument #8 from usp +3: lw t7, 24(t0) # argument #7 from usp +4: lw t6, 20(t0) # argument #6 from usp +5: jr t1 + sw t5, 16(sp) # argument #5 to ksp + + sw t8, 28(sp) # argument #8 to ksp + sw t7, 24(sp) # argument #7 to ksp + sw t6, 20(sp) # argument #6 to ksp +6: j stack_done # go back + nop + .set pop .section __ex_table,"a" PTR 1b,bad_stack PTR 2b,bad_stack PTR 3b,bad_stack + PTR 4b,bad_stack .previous /* @@ -238,12 +242,12 @@ illegal_syscall: sw v0, PT_R2(sp) # result /* Success, so skip usual error handling garbage. */ - LONG_L a2, TI_FLAGS($28) # syscall tracing enabled? + lw a2, TI_FLAGS($28) # syscall tracing enabled? li t0, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT and t0, a2, t0 bnez t0, 1f - b o32_syscall_exit + j o32_syscall_exit 1: SAVE_STATIC move a0, sp @@ -269,67 +273,47 @@ bad_alignment: END(sys_sysmips) LEAF(sys_syscall) - lw t0, PT_R29(sp) # user sp - - sltu v0, a0, __NR_O32_Linux + __NR_O32_Linux_syscalls + 1 +#if defined(CONFIG_BINFMT_IRIX) + sltiu v0, a0, MAX_SYSCALL_NO + 1 # check syscall number +#else + subu v0, a0, __NR_O32_Linux # check syscall number + sltiu v0, v0, __NR_O32_Linux_syscalls + 1 +#endif beqz v0, enosys - sll v0, a0, 2 - la v1, sys_syscall - lw t2, sys_call_table(v0) # function pointer - lbu t4, sys_narg_table(a0) # number of arguments - - li v0, -EINVAL - beq t2, v1, out # do not recurse + sll t0, v0, 3 + lw t2, sys_call_table(t0) # syscall routine + li v1, 4000 # nr of sys_syscall beqz t2, enosys # null function pointer? - andi v0, t0, 0x3 # unaligned stack pointer? - bnez v0, sigsegv + li v0, -EINVAL + beq a0, v1, out # do not recurse - addu v0, t0, 16 # v0 = usp + 16 - addu t1, v0, 12 # 3 32-bit arguments - lw v1, TI_ADDR_LIMIT($28) - or v0, v0, t1 - and v1, v1, v0 - bltz v1, efault + /* Some syscalls like execve get their arguments from struct pt_regs + and claim zero arguments in the syscall table. Thus we have to + assume the worst case and shuffle around all potential arguments. + If you want performance, don't use indirect syscalls. */ move a0, a1 # shift argument registers move a1, a2 move a2, a3 - -1: lw a3, 16(t0) -2: lw t3, 20(t0) -3: lw t4, 24(t0) - - .section __ex_table, "a" - .word 1b, efault - .word 2b, efault - .word 3b, efault - .previous - - sw t3, 16(sp) # put into new stackframe - sw t4, 20(sp) - - bnez t4, 1f # zero arguments? - addu a0, sp, 32 # then pass sp in a0 -1: - - sw t3, 16(sp) - sw v1, 20(sp) + lw a3, 16(sp) + lw t4, 20(sp) + lw t5, 24(sp) + lw t6, 28(sp) + sw t4, 16(sp) + sw t5, 20(sp) + sw t6, 24(sp) + sw a0, PT_R4(sp) # .. and push back a0 - a3, some + sw a1, PT_R5(sp) # syscalls expect them there + sw a2, PT_R6(sp) + sw a3, PT_R7(sp) + sw a3, PT_R26(sp) # update a3 for syscall restarting jr t2 /* Unreached */ enosys: li v0, -ENOSYS - b out - -sigsegv: - li a0, _SIGSEGV - move a1, $28 - jal force_sig - /* Fall through */ - -efault: li v0, -EFAULT out: jr ra END(sys_syscall) @@ -349,12 +333,14 @@ out: jr ra .endm .macro syscalltable +#if defined(CONFIG_BINFMT_IRIX) mille sys_ni_syscall 0 /* 0 - 999 SVR4 flavour */ - #include "irix5sys.h" /* 1000 - 1999 32-bit IRIX */ +# include "irix5sys.h" /* 1000 - 1999 32-bit IRIX */ mille sys_ni_syscall 0 /* 2000 - 2999 BSD43 flavour */ mille sys_ni_syscall 0 /* 3000 - 3999 POSIX flavour */ +#endif - sys sys_syscall 0 /* 4000 */ + sys sys_syscall 8 /* 4000 */ sys sys_exit 1 sys sys_fork 0 sys sys_read 3 @@ -640,19 +626,16 @@ out: jr ra .endm + /* We pre-compute the number of _instruction_ bytes needed to + load or store the arguments 6-8. Negative values are ignored. */ + .macro sys function, nargs PTR \function + LONG (\nargs << 2) - (5 << 2) .endm .align 3 + .type sys_call_table,@object sys_call_table: syscalltable .size sys_call_table, . - sys_call_table - - .macro sys function, nargs - .byte \nargs - .endm - -sys_narg_table: - syscalltable - .size sys_narg_table, . - sys_narg_table Index: arch/mips/kernel/scall64-o32.S =================================================================== RCS file: /home/cvs/linux/arch/mips/kernel/scall64-o32.S,v retrieving revision 1.24 diff -u -p -r1.24 scall64-o32.S --- arch/mips/kernel/scall64-o32.S 25 Nov 2004 13:40:10 -0000 1.24 +++ arch/mips/kernel/scall64-o32.S 26 Nov 2004 23:15:47 -0000 @@ -6,6 +6,7 @@ * Copyright (C) 1995 - 2000, 2001 by Ralf Baechle * Copyright (C) 1999, 2000 Silicon Graphics, Inc. * Copyright (C) 2001 MIPS Technologies, Inc. + * Copyright (C) 2004 Thiemo Seufer * * Hairy, the userspace application uses a different argument passing * convention than the kernel, so we have to translate things from o32 @@ -43,6 +44,8 @@ NESTED(handle_sys, PT_SIZE, sp) RESTORE_ALL #endif + /* We don't want to stumble over broken sign extensions from + userland. O32 does never use the upper half. */ sll a0, a0, 0 sll a1, a1, 0 sll a2, a2, 0 @@ -68,11 +71,13 @@ NESTED(handle_sys, PT_SIZE, sp) 1: lw a4, 16(t0) # argument #5 from usp 2: lw a5, 20(t0) # argument #6 from usp 3: lw a6, 24(t0) # argument #7 from usp +4: lw a7, 28(t0) # argument #8 from usp (for indirect syscalls) .section __ex_table,"a" PTR 1b, bad_stack PTR 2b, bad_stack PTR 3b, bad_stack + PTR 4b, bad_stack .previous li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT @@ -91,7 +96,7 @@ NESTED(handle_sys, PT_SIZE, sp) sd v0, PT_R0(sp) # flag for syscall restarting 1: sd v0, PT_R2(sp) # result -FEXPORT(o32_syscall_exit) +o32_syscall_exit: local_irq_disable # make need_resched and # signals dont change between # sampling and return @@ -109,12 +114,12 @@ o32_syscall_exit_work: trace_a_syscall: SAVE_STATIC - sd a4, PT_R8(sp) + sd a4, PT_R8(sp) # Save argument registers sd a5, PT_R9(sp) sd a6, PT_R10(sp) - sd a7, PT_R11(sp) + sd a7, PT_R11(sp) # For indirect syscalls - move s0, t2 + move s0, t2 # Save syscall pointer move a0, sp li a1, 0 jal do_syscall_trace @@ -125,7 +130,8 @@ trace_a_syscall: ld a3, PT_R7(sp) ld a4, PT_R8(sp) ld a5, PT_R9(sp) - ld a6, PT_R10(sp) # For indirect syscalls + ld a6, PT_R10(sp) + ld a7, PT_R11(sp) # For indirect syscalls jalr s0 li t0, -EMAXERRNO - 1 # error? @@ -173,55 +179,40 @@ illegal_syscall: END(handle_sys) LEAF(sys32_syscall) - ld t0, PT_R29(sp) # user sp - sltu v0, a0, __NR_O32_Linux + __NR_O32_Linux_syscalls + 1 beqz v0, enosys dsll v0, a0, 3 - dla v1, sys32_syscall ld t2, (sys_call_table - (__NR_O32_Linux * 8))(v0) + li v1, 4000 # indirect syscall number li v0, -EINVAL - beq t2, v1, out # do not recurse + beq a0, v1, out # do not recurse beqz t2, enosys # null function pointer? - andi v0, t0, 0x3 # unaligned stack pointer? - bnez v0, sigsegv - - daddiu v0, t0, 16 # v0 = usp + 16 - daddu t1, v0, 12 # 3 32-bit arguments - ld v1, TI_ADDR_LIMIT($28) - or v0, v0, t1 - and v1, v1, v0 - bnez v1, efault - move a0, a1 # shift argument registers move a1, a2 move a2, a3 move a3, a4 move a4, a5 move a5, a6 + move a6, a7 + sd a0, PT_R4(sp) # ... and push back a0 - a3, some + sd a1, PT_R5(sp) # syscalls expect them there + sd a2, PT_R6(sp) + sd a3, PT_R7(sp) + sd a3, PT_R26(sp) # update a3 for syscall restarting jr t2 /* Unreached */ enosys: li v0, -ENOSYS - b out - -sigsegv: - li a0, _SIGSEGV - move a1, $28 - jal force_sig - /* Fall through */ - -efault: li v0, -EFAULT out: jr ra END(sys32_syscall) .align 3 - .type sys_call_table,@object; + .type sys_call_table,@object sys_call_table: PTR sys32_syscall /* 4000 */ PTR sys_exit