Re: Filesystem lockup with CONFIG_PREEMPT_RT

From: Austin Schuh
Date: Thu Jun 26 2014 - 15:50:50 EST


On Wed, May 21, 2014 at 12:33 AM, Richard Weinberger
<richard.weinberger@xxxxxxxxx> wrote:
> CC'ing RT folks
>
> On Wed, May 21, 2014 at 8:23 AM, Austin Schuh <austin@xxxxxxxxxxxxxxxx> wrote:
>> On Tue, May 13, 2014 at 7:29 PM, Austin Schuh <austin@xxxxxxxxxxxxxxxx> wrote:
>>> Hi,
>>>
>>> I am observing a filesystem lockup with XFS on a CONFIG_PREEMPT_RT
>>> patched kernel. I have currently only triggered it using dpkg. Dave
>>> Chinner on the XFS mailing list suggested that it was a rt-kernel
>>> workqueue issue as opposed to a XFS problem after looking at the
>>> kernel messages.

I've got a 100% reproducible test case that doesn't involve a
filesystem. I wrote a module that triggers the bug when the device is
written to, making it easy to enable tracing during the event and
capture everything.

It looks like rw_semaphores don't trigger wq_worker_sleeping to run
when work goes to sleep on a rw_semaphore. This only happens with the
RT patches, not with the mainline kernel. I'm foreseeing a second
deadlock/bug coming into play shortly. If a task holding the work
pool spinlock gets preempted, and we need to schedule more work from
another worker thread which was just blocked by a mutex, we'll then
end up trying to go to sleep on 2 locks at once.

That is getting a bit deep into the scheduler for me... Any
suggestions on how to fix it?

Austin
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <asm/uaccess.h>
#include <linux/semaphore.h>

static int device_open(struct inode *, struct file *);
static int device_release(struct inode *, struct file *);
static ssize_t device_read(struct file *, char *, size_t, loff_t *);
static ssize_t device_write(struct file *, const char *, size_t, loff_t *);

// Dev name as it appears in /proc/devices
#define DEVICE_NAME "aschuh"

// Major number assigned to our device driver
static int major;
static struct workqueue_struct *lockup_wq1;
static struct workqueue_struct *lockup_wq2;

static struct file_operations fops = {
.read = device_read,
.write = device_write,
.open = device_open,
.release = device_release
};

static int __init init_killer_module(void) {

lockup_wq1 = alloc_workqueue("lockup_wq1", WQ_MEM_RECLAIM, 0);
if (!lockup_wq1) return -ENOMEM;

lockup_wq2 = alloc_workqueue("lockup_wq2", WQ_MEM_RECLAIM, 0);
if (!lockup_wq2) {
destroy_workqueue(lockup_wq1);
return -ENOMEM;
}

major = register_chrdev(0, DEVICE_NAME, &fops);
if (major < 0) {
printk(KERN_ALERT "Registering char device failed with %d\n", major);
destroy_workqueue(lockup_wq1);
destroy_workqueue(lockup_wq2);

return major;
}

printk(KERN_INFO "'mknod /dev/%s c %d 0'.\n", DEVICE_NAME, major);

// A non 0 return means init_module failed; module can't be loaded.
return 0;
}

// Called when a process tries to open the device file.
static int device_open(struct inode *inode, struct file *file) {
try_module_get(THIS_MODULE);
return 0;
}

// Called when a process closes the device file.
static int device_release(struct inode *inode, struct file *file) {
// Decrement the usage count, or else once you opened the file, you'll never
// get get rid of the module.
module_put(THIS_MODULE);

return 0;
}

static ssize_t device_read(struct file *filp, char *buffer, size_t length,
loff_t *offset) {
return 0;
}

#if 0

#define SEM_INIT(sem) sema_init(sem, 1)
#define SEM_TYPE struct semaphore
#define SEM_DOWN(sem) down(sem)
#define SEM_UP(sem) up(sem)

#else

#define SEM_INIT(sem) init_rwsem(sem)
#define SEM_TYPE struct rw_semaphore
#define SEM_DOWN(sem) down_write_nested(sem, 0)
#define SEM_UP(sem) up_write(sem)

#endif

struct mywork {
struct work_struct work;
int index;
SEM_TYPE *sem;
};

static void work1(struct work_struct *work) {
struct mywork *my_work = container_of(work, struct mywork, work);
trace_printk("work1 Called with index %d\n", my_work->index);
}

static void work2(struct work_struct *work) {
struct mywork *my_work = container_of(work, struct mywork, work);
trace_printk("work2 Called with index %d\n", my_work->index);
SEM_DOWN(my_work->sem);
SEM_UP(my_work->sem);
trace_printk("work2 Finished with index %d\n", my_work->index);
}


static ssize_t device_write(struct file *filp, const char *buff, size_t len,
loff_t *off) {
SEM_TYPE write_sem;
SEM_INIT(&write_sem);

struct mywork my_work1;
struct mywork my_work2;
trace_printk("lockup_wq1 %p lockup_wq2 %p\n", lockup_wq1, lockup_wq2);

trace_printk("Got a write\n");

SEM_DOWN(&write_sem);
my_work1.index = len;
my_work1.sem = &write_sem;
INIT_WORK_ONSTACK(&my_work1.work, work1);

my_work2.index = len;
my_work2.sem = &write_sem;
INIT_WORK_ONSTACK(&my_work2.work, work2);

queue_work(lockup_wq2, &my_work2.work);

queue_work(lockup_wq1, &my_work1.work);
flush_work(&my_work1.work);
destroy_work_on_stack(&my_work1.work);

SEM_UP(&write_sem);

flush_work(&my_work2.work);
destroy_work_on_stack(&my_work2.work);
trace_printk("Write done\n");
tracing_off();

return len;
}

static void __exit cleanup_killer_module(void) {
printk(KERN_INFO "Goodbye world 1.\n");
unregister_chrdev(major, DEVICE_NAME);
destroy_workqueue(lockup_wq1);
destroy_workqueue(lockup_wq2);
}

module_init(init_killer_module);
module_exit(cleanup_killer_module);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Austin Schuh <austin@xxxxxxxxxxxxxxxx>");
MODULE_DESCRIPTION("Triggers a workqueue bug on write.");