On Wed, May 21, 2014 at 12:33 AM, Richard Weinberger <richard.weinberger@xxxxxxxxx> wrote: > CC'ing RT folks > > On Wed, May 21, 2014 at 8:23 AM, Austin Schuh <austin@xxxxxxxxxxxxxxxx> wrote: >> On Tue, May 13, 2014 at 7:29 PM, Austin Schuh <austin@xxxxxxxxxxxxxxxx> wrote: >>> Hi, >>> >>> I am observing a filesystem lockup with XFS on a CONFIG_PREEMPT_RT >>> patched kernel. I have currently only triggered it using dpkg. Dave >>> Chinner on the XFS mailing list suggested that it was a rt-kernel >>> workqueue issue as opposed to a XFS problem after looking at the >>> kernel messages. I've got a 100% reproducible test case that doesn't involve a filesystem. I wrote a module that triggers the bug when the device is written to, making it easy to enable tracing during the event and capture everything. It looks like rw_semaphores don't trigger wq_worker_sleeping to run when work goes to sleep on a rw_semaphore. This only happens with the RT patches, not with the mainline kernel. I'm foreseeing a second deadlock/bug coming into play shortly. If a task holding the work pool spinlock gets preempted, and we need to schedule more work from another worker thread which was just blocked by a mutex, we'll then end up trying to go to sleep on 2 locks at once. That is getting a bit deep into the scheduler for me... Any suggestions on how to fix it? Austin
#include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/fs.h> #include <asm/uaccess.h> #include <linux/semaphore.h> static int device_open(struct inode *, struct file *); static int device_release(struct inode *, struct file *); static ssize_t device_read(struct file *, char *, size_t, loff_t *); static ssize_t device_write(struct file *, const char *, size_t, loff_t *); // Dev name as it appears in /proc/devices #define DEVICE_NAME "aschuh" // Major number assigned to our device driver static int major; static struct workqueue_struct *lockup_wq1; static struct workqueue_struct *lockup_wq2; static struct file_operations fops = { .read = device_read, .write = device_write, .open = device_open, .release = device_release }; static int __init init_killer_module(void) { lockup_wq1 = alloc_workqueue("lockup_wq1", WQ_MEM_RECLAIM, 0); if (!lockup_wq1) return -ENOMEM; lockup_wq2 = alloc_workqueue("lockup_wq2", WQ_MEM_RECLAIM, 0); if (!lockup_wq2) { destroy_workqueue(lockup_wq1); return -ENOMEM; } major = register_chrdev(0, DEVICE_NAME, &fops); if (major < 0) { printk(KERN_ALERT "Registering char device failed with %d\n", major); destroy_workqueue(lockup_wq1); destroy_workqueue(lockup_wq2); return major; } printk(KERN_INFO "'mknod /dev/%s c %d 0'.\n", DEVICE_NAME, major); // A non 0 return means init_module failed; module can't be loaded. return 0; } // Called when a process tries to open the device file. static int device_open(struct inode *inode, struct file *file) { try_module_get(THIS_MODULE); return 0; } // Called when a process closes the device file. static int device_release(struct inode *inode, struct file *file) { // Decrement the usage count, or else once you opened the file, you'll never // get get rid of the module. module_put(THIS_MODULE); return 0; } static ssize_t device_read(struct file *filp, char *buffer, size_t length, loff_t *offset) { return 0; } #if 0 #define SEM_INIT(sem) sema_init(sem, 1) #define SEM_TYPE struct semaphore #define SEM_DOWN(sem) down(sem) #define SEM_UP(sem) up(sem) #else #define SEM_INIT(sem) init_rwsem(sem) #define SEM_TYPE struct rw_semaphore #define SEM_DOWN(sem) down_write_nested(sem, 0) #define SEM_UP(sem) up_write(sem) #endif struct mywork { struct work_struct work; int index; SEM_TYPE *sem; }; static void work1(struct work_struct *work) { struct mywork *my_work = container_of(work, struct mywork, work); trace_printk("work1 Called with index %d\n", my_work->index); } static void work2(struct work_struct *work) { struct mywork *my_work = container_of(work, struct mywork, work); trace_printk("work2 Called with index %d\n", my_work->index); SEM_DOWN(my_work->sem); SEM_UP(my_work->sem); trace_printk("work2 Finished with index %d\n", my_work->index); } static ssize_t device_write(struct file *filp, const char *buff, size_t len, loff_t *off) { SEM_TYPE write_sem; SEM_INIT(&write_sem); struct mywork my_work1; struct mywork my_work2; trace_printk("lockup_wq1 %p lockup_wq2 %p\n", lockup_wq1, lockup_wq2); trace_printk("Got a write\n"); SEM_DOWN(&write_sem); my_work1.index = len; my_work1.sem = &write_sem; INIT_WORK_ONSTACK(&my_work1.work, work1); my_work2.index = len; my_work2.sem = &write_sem; INIT_WORK_ONSTACK(&my_work2.work, work2); queue_work(lockup_wq2, &my_work2.work); queue_work(lockup_wq1, &my_work1.work); flush_work(&my_work1.work); destroy_work_on_stack(&my_work1.work); SEM_UP(&write_sem); flush_work(&my_work2.work); destroy_work_on_stack(&my_work2.work); trace_printk("Write done\n"); tracing_off(); return len; } static void __exit cleanup_killer_module(void) { printk(KERN_INFO "Goodbye world 1.\n"); unregister_chrdev(major, DEVICE_NAME); destroy_workqueue(lockup_wq1); destroy_workqueue(lockup_wq2); } module_init(init_killer_module); module_exit(cleanup_killer_module); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Austin Schuh <austin@xxxxxxxxxxxxxxxx>"); MODULE_DESCRIPTION("Triggers a workqueue bug on write.");