On Tue, May 26 2009, Jens Axboe wrote: > On Tue, May 26 2009, Damien Wyart wrote: > > > > I have been playing with v7 since your sending and after a while > > > > (short on laptop, longer on desktop, a few hours), writeback doesn't > > > > seem to work anymore. Manual call to sync hangs (process in D state) > > > > and Dirty value in meminfo gets growing. As previous versions had > > > > been heavily tested, I guess there is some regression in v7. > > > > > Not good, the prime suspect is the sync notification stuff. I'll take > > > a look and get that fixed. You didn't happen to catch any sysrq-t back > > > traces or anything like that? Would be interesting to see where > > > bdi-default and the bdi-* threads are stuck. > > > > No, as I was doing many things at the same time and not exclusively > > debugging, I just rebooted hard and went back to an upatched kernel when > > the problems occured. But I noticed only bdi-default was alive, the > > other bdi-* threads had disappeared and the sync commands I had tried > > were all in D state. Also I tried to reinstall a kernel .deb (these > > systems are Debian) and this got stuck guring installation, when probing > > grub config (do not know if there is some sync syscall inthere). > > > > Can try to go further tomorrow but will not have a lot of time... > > OK, I spotted the problem. If we fallback to the on-stack allocation in > bdi_writeback_all(), then we do the wait for the work completion with > the bdi_lock mutex held. This can deadlock with bdi_forker_task(), so if > we require that to be invoked to make progress (happens if a thread > needs to be restarted), then we have a deadlock on that mutex. > > I'll cook up a fix for this, but probably not before the morning. Untested fix. I think it should work, but I haven't run it here yet. diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index a185a16..1662ede 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -122,12 +122,11 @@ static void bdi_work_free(struct rcu_head *head) static void wb_work_complete(struct bdi_work *work) { - if (!bdi_work_on_stack(work)) { - bdi_work_clear(work); + const enum writeback_sync_modes sync_mode = work->sync_mode; - if (work->sync_mode == WB_SYNC_NONE) - call_rcu(&work->rcu_head, bdi_work_free); - } else + if (!bdi_work_on_stack(work)) + bdi_work_clear(work); + if (sync_mode == WB_SYNC_NONE || bdi_work_on_stack(work)) call_rcu(&work->rcu_head, bdi_work_free); } @@ -272,7 +271,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, */ if (work == &work_stack || must_wait) { bdi_wait_on_work_clear(work); - if (must_wait) + if (must_wait && work != &work_stack) call_rcu(&work->rcu_head, bdi_work_free); } } @@ -511,10 +510,9 @@ int bdi_writeback_task(struct bdi_writeback *wb) * we are simply called for WB_SYNC_NONE, then writeback will merely be * scheduled to run. */ -void bdi_writeback_all(struct super_block *sb, long nr_pages, - enum writeback_sync_modes sync_mode) +void bdi_writeback_all(struct super_block *sb, struct writeback_control *wbc) { - const bool must_wait = sync_mode == WB_SYNC_ALL; + const bool must_wait = wbc->sync_mode == WB_SYNC_ALL; struct backing_dev_info *bdi, *tmp; struct bdi_work *work; LIST_HEAD(list); @@ -522,31 +520,21 @@ void bdi_writeback_all(struct super_block *sb, long nr_pages, mutex_lock(&bdi_lock); list_for_each_entry_safe(bdi, tmp, &bdi_list, bdi_list) { - struct bdi_work *work, work_stack; + struct bdi_work *work; if (!bdi_has_dirty_io(bdi)) continue; - work = bdi_alloc_work(sb, nr_pages, sync_mode); + work = bdi_alloc_work(sb, wbc->nr_to_write, wbc->sync_mode); if (!work) { - work = &work_stack; - bdi_work_init_on_stack(work, sb, nr_pages, sync_mode); - } else if (must_wait) + generic_sync_bdi_inodes(sb, wbc); + continue; + } + if (must_wait) list_add_tail(&work->wait_list, &list); bdi_queue_work(bdi, work); __bdi_start_work(bdi, work); - - /* - * Do the wait inline if this came from the stack. This - * only happens if we ran out of memory, so should very - * rarely trigger. - */ - if (work == &work_stack) { - bdi_wait_on_work_clear(work); - if (must_wait) - call_rcu(&work->rcu_head, bdi_work_free); - } } mutex_unlock(&bdi_lock); @@ -1082,7 +1070,7 @@ void generic_sync_sb_inodes(struct super_block *sb, if (wbc->bdi) bdi_start_writeback(wbc->bdi, sb, wbc->nr_to_write, wbc->sync_mode); else - bdi_writeback_all(sb, wbc->nr_to_write, wbc->sync_mode); + bdi_writeback_all(sb, wbc); if (wbc->sync_mode == WB_SYNC_ALL) { struct inode *inode, *old_inode = NULL; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index dee38ec..679cfb8 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -107,8 +107,7 @@ void bdi_unregister(struct backing_dev_info *bdi); void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, long nr_pages, enum writeback_sync_modes sync_mode); int bdi_writeback_task(struct bdi_writeback *wb); -void bdi_writeback_all(struct super_block *sb, long nr_pages, - enum writeback_sync_modes sync_mode); +void bdi_writeback_all(struct super_block *sb, struct writeback_control *wbc); void bdi_add_default_flusher_task(struct backing_dev_info *bdi); void bdi_add_flusher_task(struct backing_dev_info *bdi); int bdi_has_dirty_io(struct backing_dev_info *bdi); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7dd7de7..dd403cf 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -669,10 +669,18 @@ void throttle_vm_writeout(gfp_t gfp_mask) */ void wakeup_flusher_threads(long nr_pages) { + struct writeback_control wbc = { + .sync_mode = WB_SYNC_NONE, + .older_than_this = NULL, + .range_cyclic = 1, + }; + if (nr_pages == 0) nr_pages = global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS); - bdi_writeback_all(NULL, nr_pages, WB_SYNC_NONE); + + wbc.nr_to_write = nr_pages; + bdi_writeback_all(NULL, &wbc); } static void laptop_timer_fn(unsigned long unused); -- Jens Axboe -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html