This might be broken on several other architectures as well therefore posting here: We had a bug on s390 when preemption is enabled and preemption of the BKL is enabled as well. If returning from an interrupt to kernel context we check for TIF_NEED_RESCHED and if that is true we just call schedule(). However this is broken. Instead of schedule we must call preempt_schedule_irq() otherwise the BKL will be unlocked and somebody else might grab it. This bug explains some strange list corruptions we've seen in fs/locks.c where all the locking is done with the BKL. And probably a bunch of other unexplainable bugs. Patch below is more or less what is needed on s390. --- arch/s390/kernel/entry.S | 8 +++----- arch/s390/kernel/entry64.S | 7 +------ 2 files changed, 4 insertions(+), 11 deletions(-) Index: linux-2.6/arch/s390/kernel/entry64.S =================================================================== --- linux-2.6.orig/arch/s390/kernel/entry64.S +++ linux-2.6/arch/s390/kernel/entry64.S @@ -583,12 +583,7 @@ io_preempt: io_resume_loop: tm __TI_flags+7(%r9),_TIF_NEED_RESCHED jno io_leave - larl %r1,.Lc_pactive - mvc __TI_precount(4,%r9),0(%r1) - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - brasl %r14,schedule # call schedule - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts - xc __TI_precount(4,%r9),__TI_precount(%r9) + brasl %r14,preempt_schedule_irq # call schedule j io_resume_loop #endif Index: linux-2.6/arch/s390/kernel/entry.S =================================================================== --- linux-2.6.orig/arch/s390/kernel/entry.S +++ linux-2.6/arch/s390/kernel/entry.S @@ -597,12 +597,8 @@ io_preempt: io_resume_loop: tm __TI_flags+3(%r9),_TIF_NEED_RESCHED bno BASED(io_leave) - mvc __TI_precount(4,%r9),BASED(.Lc_pactive) - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - l %r1,BASED(.Lschedule) + l %r1,BASED(.Lpreempt_schedule_irq) basr %r14,%r1 # call schedule - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts - xc __TI_precount(4,%r9),__TI_precount(%r9) b BASED(io_resume_loop) #endif @@ -1043,6 +1039,8 @@ cleanup_io_leave_insn: .Lexecve_tail: .long execve_tail .Ljump_table: .long pgm_check_table .Lschedule: .long schedule +.Lpreempt_schedule_irq: + .long preempt_schedule_irq .Ltrace: .long syscall_trace .Lschedtail: .long schedule_tail .Lsysc_table: .long sys_call_table - To unsubscribe from this list: send the line "unsubscribe linux-arch" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html