Re: Filesystem lockup with CONFIG_PREEMPT_RT

Austin Schuh <austin@xxxxxxxxxxxxxxxx> · Thu, 26 Jun 2014 12:50:24 -0700

On Wed, May 21, 2014 at 12:33 AM, Richard Weinberger
<richard.weinberger@xxxxxxxxx> wrote:
> CC'ing RT folks
>
> On Wed, May 21, 2014 at 8:23 AM, Austin Schuh <austin@xxxxxxxxxxxxxxxx> wrote:
>> On Tue, May 13, 2014 at 7:29 PM, Austin Schuh <austin@xxxxxxxxxxxxxxxx> wrote:
>>> Hi,
>>>
>>> I am observing a filesystem lockup with XFS on a CONFIG_PREEMPT_RT
>>> patched kernel.  I have currently only triggered it using dpkg.  Dave
>>> Chinner on the XFS mailing list suggested that it was a rt-kernel
>>> workqueue issue as opposed to a XFS problem after looking at the
>>> kernel messages.

I've got a 100% reproducible test case that doesn't involve a
filesystem.  I wrote a module that triggers the bug when the device is
written to, making it easy to enable tracing during the event and
capture everything.

It looks like rw_semaphores don't trigger wq_worker_sleeping to run
when work goes to sleep on a rw_semaphore.  This only happens with the
RT patches, not with the mainline kernel.  I'm foreseeing a second
deadlock/bug coming into play shortly.  If a task holding the work
pool spinlock gets preempted, and we need to schedule more work from
another worker thread which was just blocked by a mutex, we'll then
end up trying to go to sleep on 2 locks at once.

That is getting a bit deep into the scheduler for me...  Any
suggestions on how to fix it?

Austin
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <asm/uaccess.h>
#include <linux/semaphore.h>

static int device_open(struct inode *, struct file *);
static int device_release(struct inode *, struct file *);
static ssize_t device_read(struct file *, char *, size_t, loff_t *);
static ssize_t device_write(struct file *, const char *, size_t, loff_t *);

// Dev name as it appears in /proc/devices
#define DEVICE_NAME "aschuh"

// Major number assigned to our device driver
static int major;
static struct workqueue_struct *lockup_wq1;
static struct workqueue_struct *lockup_wq2;

static struct file_operations fops = {
  .read = device_read,
  .write = device_write,
  .open = device_open,
  .release = device_release
};

static int __init init_killer_module(void) {

  lockup_wq1 = alloc_workqueue("lockup_wq1", WQ_MEM_RECLAIM, 0);
  if (!lockup_wq1) return -ENOMEM;

  lockup_wq2 = alloc_workqueue("lockup_wq2", WQ_MEM_RECLAIM, 0);
  if (!lockup_wq2) {
    destroy_workqueue(lockup_wq1);
    return -ENOMEM;
  }

  major = register_chrdev(0, DEVICE_NAME, &fops);
  if (major < 0) {
    printk(KERN_ALERT "Registering char device failed with %d\n", major);
    destroy_workqueue(lockup_wq1);
    destroy_workqueue(lockup_wq2);

    return major;
  }

  printk(KERN_INFO "'mknod /dev/%s c %d 0'.\n", DEVICE_NAME, major);

  // A non 0 return means init_module failed; module can't be loaded.
  return 0;
}

// Called when a process tries to open the device file.
static int device_open(struct inode *inode, struct file *file) {
  try_module_get(THIS_MODULE);
  return 0;
}

// Called when a process closes the device file.
static int device_release(struct inode *inode, struct file *file) {
  // Decrement the usage count, or else once you opened the file, you'll never
  // get get rid of the module.
  module_put(THIS_MODULE);

  return 0;
}

static ssize_t device_read(struct file *filp, char *buffer, size_t length,
                           loff_t *offset) {
  return 0;
}

#if 0

#define SEM_INIT(sem) sema_init(sem, 1)
#define SEM_TYPE struct semaphore
#define SEM_DOWN(sem) down(sem)
#define SEM_UP(sem) up(sem)

#else

#define SEM_INIT(sem) init_rwsem(sem)
#define SEM_TYPE struct rw_semaphore
#define SEM_DOWN(sem) down_write_nested(sem, 0)
#define SEM_UP(sem) up_write(sem)

#endif

struct mywork {
  struct work_struct work;
  int index;
  SEM_TYPE *sem;
};

static void work1(struct work_struct *work) {
  struct mywork *my_work = container_of(work, struct mywork, work);
  trace_printk("work1 Called with index %d\n", my_work->index);
}

static void work2(struct work_struct *work) {
  struct mywork *my_work = container_of(work, struct mywork, work);
  trace_printk("work2 Called with index %d\n", my_work->index);
  SEM_DOWN(my_work->sem);
  SEM_UP(my_work->sem);
  trace_printk("work2 Finished with index %d\n", my_work->index);
}

static ssize_t device_write(struct file *filp, const char *buff, size_t len,
                            loff_t *off) {
  SEM_TYPE write_sem;
  SEM_INIT(&write_sem);

  struct mywork my_work1;
  struct mywork my_work2;
  trace_printk("lockup_wq1 %p lockup_wq2 %p\n", lockup_wq1, lockup_wq2);

  trace_printk("Got a write\n");

  SEM_DOWN(&write_sem);
  my_work1.index = len;
  my_work1.sem = &write_sem;
  INIT_WORK_ONSTACK(&my_work1.work, work1);

  my_work2.index = len;
  my_work2.sem = &write_sem;
  INIT_WORK_ONSTACK(&my_work2.work, work2);

  queue_work(lockup_wq2, &my_work2.work);

  queue_work(lockup_wq1, &my_work1.work);
  flush_work(&my_work1.work);
  destroy_work_on_stack(&my_work1.work);

  SEM_UP(&write_sem);

  flush_work(&my_work2.work);
  destroy_work_on_stack(&my_work2.work);
  trace_printk("Write done\n");
  tracing_off();

  return len;
}

static void __exit cleanup_killer_module(void) {
  printk(KERN_INFO "Goodbye world 1.\n");
  unregister_chrdev(major, DEVICE_NAME);
	destroy_workqueue(lockup_wq1);
	destroy_workqueue(lockup_wq2);
}

module_init(init_killer_module);
module_exit(cleanup_killer_module);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Austin Schuh <austin@xxxxxxxxxxxxxxxx>");
MODULE_DESCRIPTION("Triggers a workqueue bug on write.");