On Mon 18-05-09 14:19:45, Jens Axboe wrote: > Add a struct bdi_writeback for tracking and handling dirty IO. This > is in preparation for adding > 1 flusher task per bdi. Some changes (IMO the most complicated ones ;) in this patch set seem to be just reordering / cleanup of changes which happened in patch #2. Could you maybe move it there. Commented below... > > Signed-off-by: Jens Axboe <jens.axboe@xxxxxxxxxx> > --- > fs/fs-writeback.c | 140 +++++++++++++++++----------- > include/linux/backing-dev.h | 42 +++++---- > mm/backing-dev.c | 218 ++++++++++++++++++++++++++++-------------- > 3 files changed, 256 insertions(+), 144 deletions(-) > > diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c > index 8a25d14..50e21e8 100644 > --- a/fs/fs-writeback.c > +++ b/fs/fs-writeback.c > @@ -46,9 +46,11 @@ int nr_pdflush_threads; > * unless they implement their own. Which is somewhat inefficient, as this > * may prevent concurrent writeback against multiple devices. > */ > -static int writeback_acquire(struct backing_dev_info *bdi) > +static int writeback_acquire(struct bdi_writeback *wb) > { > - return !test_and_set_bit(BDI_pdflush, &bdi->state); > + struct backing_dev_info *bdi = wb->bdi; > + > + return !test_and_set_bit(wb->nr, &bdi->wb_active); > } > > /** > @@ -59,19 +61,38 @@ static int writeback_acquire(struct backing_dev_info *bdi) > */ > int writeback_in_progress(struct backing_dev_info *bdi) > { > - return test_bit(BDI_pdflush, &bdi->state); > + return bdi->wb_active != 0; > } > > /** > * writeback_release - relinquish exclusive writeback access against a device. > * @bdi: the device's backing_dev_info structure > */ > -static void writeback_release(struct backing_dev_info *bdi) > +static void writeback_release(struct bdi_writeback *wb) > { > - WARN_ON_ONCE(!writeback_in_progress(bdi)); > - bdi->wb_arg.nr_pages = 0; > - bdi->wb_arg.sb = NULL; > - clear_bit(BDI_pdflush, &bdi->state); > + struct backing_dev_info *bdi = wb->bdi; > + > + wb->nr_pages = 0; > + wb->sb = NULL; > + clear_bit(wb->nr, &bdi->wb_active); > +} > + > +static void wb_start_writeback(struct bdi_writeback *wb, struct super_block *sb, > + long nr_pages) > +{ > + if (!wb_has_dirty_io(wb)) > + return; > + > + if (writeback_acquire(wb)) { > + wb->nr_pages = nr_pages; > + wb->sb = sb; > + > + /* > + * make above store seen before the task is woken > + */ > + smp_mb(); > + wake_up(&wb->wait); > + } > } > > int bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, > @@ -81,21 +102,12 @@ int bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, > * This only happens the first time someone kicks this bdi, so put > * it out-of-line. > */ > - if (unlikely(!bdi->task)) { > + if (unlikely(!bdi->wb.task)) { > bdi_add_default_flusher_task(bdi); > return 1; > } > > - if (writeback_acquire(bdi)) { > - bdi->wb_arg.nr_pages = nr_pages; > - bdi->wb_arg.sb = sb; > - /* > - * make above store seen before the task is woken > - */ > - smp_mb(); > - wake_up(&bdi->wait); > - } > - > + wb_start_writeback(&bdi->wb, sb, nr_pages); > return 0; > } > > @@ -123,12 +135,12 @@ int bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, > * older_than_this takes precedence over nr_to_write. So we'll only write back > * all dirty pages if they are all attached to "old" mappings. > */ > -static void bdi_kupdated(struct backing_dev_info *bdi) > +static void wb_kupdated(struct bdi_writeback *wb) > { > unsigned long oldest_jif; > long nr_to_write; > struct writeback_control wbc = { > - .bdi = bdi, > + .bdi = wb->bdi, > .sync_mode = WB_SYNC_NONE, > .older_than_this = &oldest_jif, > .nr_to_write = 0, > @@ -155,15 +167,19 @@ static void bdi_kupdated(struct backing_dev_info *bdi) > } > } > > -static void bdi_pdflush(struct backing_dev_info *bdi) > +static void generic_sync_wb_inodes(struct bdi_writeback *wb, > + struct super_block *sb, > + struct writeback_control *wbc); > + > +static void wb_writeback(struct bdi_writeback *wb) > { > struct writeback_control wbc = { > - .bdi = bdi, > + .bdi = wb->bdi, > .sync_mode = WB_SYNC_NONE, > .older_than_this = NULL, > .range_cyclic = 1, > }; > - long nr_pages = bdi->wb_arg.nr_pages; > + long nr_pages = wb->nr_pages; > > for (;;) { > unsigned long background_thresh, dirty_thresh; > @@ -177,7 +193,7 @@ static void bdi_pdflush(struct backing_dev_info *bdi) > wbc.encountered_congestion = 0; > wbc.nr_to_write = MAX_WRITEBACK_PAGES; > wbc.pages_skipped = 0; > - generic_sync_bdi_inodes(bdi->wb_arg.sb, &wbc); > + generic_sync_wb_inodes(wb, wb->sb, &wbc); > nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; > /* > * If we ran out of stuff to write, bail unless more_io got set > @@ -194,13 +210,13 @@ static void bdi_pdflush(struct backing_dev_info *bdi) > * Handle writeback of dirty data for the device backed by this bdi. Also > * wakes up periodically and does kupdated style flushing. > */ > -int bdi_writeback_task(struct backing_dev_info *bdi) > +int bdi_writeback_task(struct bdi_writeback *wb) > { > while (!kthread_should_stop()) { > unsigned long wait_jiffies; > DEFINE_WAIT(wait); > > - prepare_to_wait(&bdi->wait, &wait, TASK_INTERRUPTIBLE); > + prepare_to_wait(&wb->wait, &wait, TASK_INTERRUPTIBLE); > wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); > schedule_timeout(wait_jiffies); > try_to_freeze(); > @@ -219,13 +235,13 @@ int bdi_writeback_task(struct backing_dev_info *bdi) > * pdflush style writeout. > * > */ > - if (writeback_acquire(bdi)) > - bdi_kupdated(bdi); > + if (writeback_acquire(wb)) > + wb_kupdated(wb); > else > - bdi_pdflush(bdi); > + wb_writeback(wb); > > - writeback_release(bdi); > - finish_wait(&bdi->wait, &wait); > + writeback_release(wb); > + finish_wait(&wb->wait, &wait); > } > > return 0; > @@ -248,6 +264,14 @@ restart: > rcu_read_unlock(); > } > > +/* > + * We have only a single wb per bdi, so just return that. > + */ > +static inline struct bdi_writeback *inode_get_wb(struct inode *inode) > +{ > + return &inode_to_bdi(inode)->wb; > +} > + > /** > * __mark_inode_dirty - internal function > * @inode: inode to mark > @@ -346,9 +370,10 @@ void __mark_inode_dirty(struct inode *inode, int flags) > * reposition it (that would break b_dirty time-ordering). > */ > if (!was_dirty) { > + struct bdi_writeback *wb = inode_get_wb(inode); > + > inode->dirtied_when = jiffies; > - list_move(&inode->i_list, > - &inode_to_bdi(inode)->b_dirty); > + list_move(&inode->i_list, &wb->b_dirty); > } > } > out: > @@ -375,16 +400,16 @@ static int write_inode(struct inode *inode, int sync) > */ > static void redirty_tail(struct inode *inode) > { > - struct backing_dev_info *bdi = inode_to_bdi(inode); > + struct bdi_writeback *wb = inode_get_wb(inode); > > - if (!list_empty(&bdi->b_dirty)) { > + if (!list_empty(&wb->b_dirty)) { > struct inode *tail; > > - tail = list_entry(bdi->b_dirty.next, struct inode, i_list); > + tail = list_entry(wb->b_dirty.next, struct inode, i_list); > if (time_before(inode->dirtied_when, tail->dirtied_when)) > inode->dirtied_when = jiffies; > } > - list_move(&inode->i_list, &bdi->b_dirty); > + list_move(&inode->i_list, &wb->b_dirty); > } > > /* > @@ -392,7 +417,9 @@ static void redirty_tail(struct inode *inode) > */ > static void requeue_io(struct inode *inode) > { > - list_move(&inode->i_list, &inode_to_bdi(inode)->b_more_io); > + struct bdi_writeback *wb = inode_get_wb(inode); > + > + list_move(&inode->i_list, &wb->b_more_io); > } > > static void inode_sync_complete(struct inode *inode) > @@ -439,11 +466,10 @@ static void move_expired_inodes(struct list_head *delaying_queue, > /* > * Queue all expired dirty inodes for io, eldest first. > */ > -static void queue_io(struct backing_dev_info *bdi, > - unsigned long *older_than_this) > +static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) > { > - list_splice_init(&bdi->b_more_io, bdi->b_io.prev); > - move_expired_inodes(&bdi->b_dirty, &bdi->b_io, older_than_this); > + list_splice_init(&wb->b_more_io, wb->b_io.prev); > + move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); > } > > /* > @@ -604,20 +630,20 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) > return __sync_single_inode(inode, wbc); > } > > -void generic_sync_bdi_inodes(struct super_block *sb, > - struct writeback_control *wbc) > +static void generic_sync_wb_inodes(struct bdi_writeback *wb, > + struct super_block *sb, > + struct writeback_control *wbc) > { > const int is_blkdev_sb = sb_is_blkdev_sb(sb); > - struct backing_dev_info *bdi = wbc->bdi; > const unsigned long start = jiffies; /* livelock avoidance */ > > spin_lock(&inode_lock); > > - if (!wbc->for_kupdate || list_empty(&bdi->b_io)) > - queue_io(bdi, wbc->older_than_this); > + if (!wbc->for_kupdate || list_empty(&wb->b_io)) > + queue_io(wb, wbc->older_than_this); > > - while (!list_empty(&bdi->b_io)) { > - struct inode *inode = list_entry(bdi->b_io.prev, > + while (!list_empty(&wb->b_io)) { > + struct inode *inode = list_entry(wb->b_io.prev, > struct inode, i_list); > long pages_skipped; > > @@ -629,7 +655,7 @@ void generic_sync_bdi_inodes(struct super_block *sb, > continue; > } > > - if (!bdi_cap_writeback_dirty(bdi)) { > + if (!bdi_cap_writeback_dirty(wb->bdi)) { > redirty_tail(inode); > if (is_blkdev_sb) { > /* > @@ -651,7 +677,7 @@ void generic_sync_bdi_inodes(struct super_block *sb, > continue; > } > > - if (wbc->nonblocking && bdi_write_congested(bdi)) { > + if (wbc->nonblocking && bdi_write_congested(wb->bdi)) { > wbc->encountered_congestion = 1; > if (!is_blkdev_sb) > break; /* Skip a congested fs */ > @@ -685,7 +711,7 @@ void generic_sync_bdi_inodes(struct super_block *sb, > wbc->more_io = 1; > break; > } > - if (!list_empty(&bdi->b_more_io)) > + if (!list_empty(&wb->b_more_io)) > wbc->more_io = 1; > } > > @@ -693,6 +719,14 @@ void generic_sync_bdi_inodes(struct super_block *sb, > /* Leave any unwritten inodes on b_io */ > } > > +void generic_sync_bdi_inodes(struct super_block *sb, > + struct writeback_control *wbc) > +{ > + struct backing_dev_info *bdi = wbc->bdi; > + > + generic_sync_wb_inodes(&bdi->wb, sb, wbc); > +} > + > /* > * Write out a superblock's list of dirty inodes. A wait will be performed > * upon no inodes, all inodes or the final one, depending upon sync_mode. > diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h > index a848eea..a0c70f1 100644 > --- a/include/linux/backing-dev.h > +++ b/include/linux/backing-dev.h > @@ -23,8 +23,8 @@ struct dentry; > * Bits in backing_dev_info.state > */ > enum bdi_state { > - BDI_pdflush, /* A pdflush thread is working this device */ > BDI_pending, /* On its way to being activated */ > + BDI_wb_alloc, /* Default embedded wb allocated */ > BDI_async_congested, /* The async (write) queue is getting full */ > BDI_sync_congested, /* The sync queue is getting full */ > BDI_unused, /* Available bits start here */ > @@ -40,15 +40,23 @@ enum bdi_stat_item { > > #define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) > > -struct bdi_writeback_arg { > - unsigned long nr_pages; > - struct super_block *sb; > +struct bdi_writeback { > + struct backing_dev_info *bdi; /* our parent bdi */ > + unsigned int nr; > + > + struct task_struct *task; /* writeback task */ > + wait_queue_head_t wait; > + struct list_head b_dirty; /* dirty inodes */ > + struct list_head b_io; /* parked for writeback */ > + struct list_head b_more_io; /* parked for more writeback */ > + > + unsigned long nr_pages; > + struct super_block *sb; > }; > > struct backing_dev_info { > - struct list_head bdi_list; > struct rcu_head rcu_head; > - > + struct list_head bdi_list; > unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ > unsigned long state; /* Always use atomic bitops on this */ > unsigned int capabilities; /* Device capabilities */ > @@ -65,14 +73,11 @@ struct backing_dev_info { > unsigned int min_ratio; > unsigned int max_ratio, max_prop_frac; > > - struct device *dev; > + struct bdi_writeback wb; /* default writeback info for this bdi */ > + unsigned long wb_active; /* bitmap of active tasks */ > + unsigned long wb_mask; /* number of registered tasks */ > > - struct task_struct *task; /* writeback task */ > - wait_queue_head_t wait; > - struct bdi_writeback_arg wb_arg; /* protected by BDI_pdflush */ > - struct list_head b_dirty; /* dirty inodes */ > - struct list_head b_io; /* parked for writeback */ > - struct list_head b_more_io; /* parked for more writeback */ > + struct device *dev; > > #ifdef CONFIG_DEBUG_FS > struct dentry *debug_dir; > @@ -89,18 +94,19 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); > void bdi_unregister(struct backing_dev_info *bdi); > int bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, > long nr_pages); > -int bdi_writeback_task(struct backing_dev_info *bdi); > +int bdi_writeback_task(struct bdi_writeback *wb); > void bdi_writeback_all(struct super_block *sb, long nr_pages); > void bdi_add_default_flusher_task(struct backing_dev_info *bdi); > +int bdi_has_dirty_io(struct backing_dev_info *bdi); > > extern spinlock_t bdi_lock; > extern struct list_head bdi_list; > > -static inline int bdi_has_dirty_io(struct backing_dev_info *bdi) > +static inline int wb_has_dirty_io(struct bdi_writeback *wb) > { > - return !list_empty(&bdi->b_dirty) || > - !list_empty(&bdi->b_io) || > - !list_empty(&bdi->b_more_io); > + return !list_empty(&wb->b_dirty) || > + !list_empty(&wb->b_io) || > + !list_empty(&wb->b_more_io); > } > > static inline void __add_bdi_stat(struct backing_dev_info *bdi, > diff --git a/mm/backing-dev.c b/mm/backing-dev.c > index c759449..677a8c6 100644 > --- a/mm/backing-dev.c > +++ b/mm/backing-dev.c > @@ -199,17 +199,59 @@ static int __init default_bdi_init(void) > } > subsys_initcall(default_bdi_init); > > +static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi) > +{ > + memset(wb, 0, sizeof(*wb)); > + > + wb->bdi = bdi; > + init_waitqueue_head(&wb->wait); > + INIT_LIST_HEAD(&wb->b_dirty); > + INIT_LIST_HEAD(&wb->b_io); > + INIT_LIST_HEAD(&wb->b_more_io); > +} > + > +static void bdi_flush_io(struct backing_dev_info *bdi) > +{ > + struct writeback_control wbc = { > + .bdi = bdi, > + .sync_mode = WB_SYNC_NONE, > + .older_than_this = NULL, > + .range_cyclic = 1, > + .nr_to_write = 1024, > + }; > + > + generic_sync_bdi_inodes(NULL, &wbc); > +} > + > +static int wb_assign_nr(struct backing_dev_info *bdi, struct bdi_writeback *wb) > +{ > + set_bit(0, &bdi->wb_mask); > + wb->nr = 0; > + return 0; > +} > + > +static void bdi_put_wb(struct backing_dev_info *bdi, struct bdi_writeback *wb) > +{ > + clear_bit(wb->nr, &bdi->wb_mask); > + clear_bit(BDI_wb_alloc, &bdi->state); > +} > + > +static struct bdi_writeback *bdi_new_wb(struct backing_dev_info *bdi) > +{ > + struct bdi_writeback *wb; > + > + set_bit(BDI_wb_alloc, &bdi->state); > + wb = &bdi->wb; > + wb_assign_nr(bdi, wb); > + return wb; > +} > + > static int bdi_start_fn(void *ptr) > { > - struct backing_dev_info *bdi = ptr; > + struct bdi_writeback *wb = ptr; > + struct backing_dev_info *bdi = wb->bdi; > struct task_struct *tsk = current; > - > - /* > - * Add us to the active bdi_list > - */ > - spin_lock_bh(&bdi_lock); > - list_add_rcu(&bdi->bdi_list, &bdi_list); > - spin_unlock_bh(&bdi_lock); > + int ret; > > tsk->flags |= PF_FLUSHER | PF_SWAPWRITE; > set_freezable(); > @@ -225,77 +267,81 @@ static int bdi_start_fn(void *ptr) > clear_bit(BDI_pending, &bdi->state); > wake_up_bit(&bdi->state, BDI_pending); > > - return bdi_writeback_task(bdi); > + ret = bdi_writeback_task(wb); > + > + bdi_put_wb(bdi, wb); > + return ret; > +} > + > +int bdi_has_dirty_io(struct backing_dev_info *bdi) > +{ > + return wb_has_dirty_io(&bdi->wb); > } > > static int bdi_forker_task(void *ptr) > { > - struct backing_dev_info *bdi, *me = ptr; > + struct bdi_writeback *me = ptr; > > for (;;) { > + struct backing_dev_info *bdi; > + struct bdi_writeback *wb; > DEFINE_WAIT(wait); > > /* > * Should never trigger on the default bdi > */ > - WARN_ON(bdi_has_dirty_io(me)); > + if (wb_has_dirty_io(me)) { > + bdi_flush_io(me->bdi); > + WARN_ON(1); > + } > > prepare_to_wait(&me->wait, &wait, TASK_INTERRUPTIBLE); > + > smp_mb(); > if (list_empty(&bdi_pending_list)) > schedule(); > - else { > + > + finish_wait(&me->wait, &wait); > repeat: > - bdi = NULL; > + bdi = NULL; > + spin_lock_bh(&bdi_lock); > + if (!list_empty(&bdi_pending_list)) { > + bdi = list_entry(bdi_pending_list.next, > + struct backing_dev_info, bdi_list); > + list_del_init(&bdi->bdi_list); > + } > + spin_unlock_bh(&bdi_lock); > > - spin_lock_bh(&bdi_lock); > - if (!list_empty(&bdi_pending_list)) { > - bdi = list_entry(bdi_pending_list.next, > - struct backing_dev_info, > - bdi_list); > - list_del_init(&bdi->bdi_list); > - } > - spin_unlock_bh(&bdi_lock); > + if (!bdi) > + continue; > > - /* > - * If no bdi or bdi already got setup, continue > - */ > - if (!bdi || bdi->task) > - continue; > + wb = bdi_new_wb(bdi); > + if (!wb) > + goto readd_flush; > > - bdi->task = kthread_run(bdi_start_fn, bdi, "bdi-%s", > + wb->task = kthread_run(bdi_start_fn, wb, "bdi-%s", > dev_name(bdi->dev)); > + /* > + * If task creation fails, then readd the bdi to > + * the pending list and force writeout of the bdi > + * from this forker thread. That will free some memory > + * and we can try again. > + */ > + if (!wb->task) { > + bdi_put_wb(bdi, wb); > +readd_flush: > /* > - * If task creation fails, then readd the bdi to > - * the pending list and force writeout of the bdi > - * from this forker thread. That will free some memory > - * and we can try again. > + * Add this 'bdi' to the back, so we get > + * a chance to flush other bdi's to free > + * memory. > */ > - if (!bdi->task) { > - struct writeback_control wbc = { > - .bdi = bdi, > - .sync_mode = WB_SYNC_NONE, > - .older_than_this = NULL, > - .range_cyclic = 1, > - }; > - > - /* > - * Add this 'bdi' to the back, so we get > - * a chance to flush other bdi's to free > - * memory. > - */ > - spin_lock_bh(&bdi_lock); > - list_add_tail(&bdi->bdi_list, > - &bdi_pending_list); > - spin_unlock_bh(&bdi_lock); > - > - wbc.nr_to_write = 1024; > - generic_sync_bdi_inodes(NULL, &wbc); > - goto repeat; > - } > - } > + spin_lock_bh(&bdi_lock); > + list_add_tail(&bdi->bdi_list, &bdi_pending_list); > + spin_unlock_bh(&bdi_lock); > > - finish_wait(&me->wait, &wait); > + bdi_flush_io(bdi); > + goto repeat; > + } > } Quite a lot of changes in this function (and creation of bdi_flush_io()) are just cleanups of patch #2 so it would be nice to move them there... > > return 0; > @@ -318,11 +364,21 @@ static void bdi_add_to_pending(struct rcu_head *head) > list_add_tail(&bdi->bdi_list, &bdi_pending_list); > spin_unlock(&bdi_lock); > > - wake_up(&default_backing_dev_info.wait); > + wake_up(&default_backing_dev_info.wb.wait); > } > > +/* > + * Add a new flusher task that gets created for any bdi > + * that has dirty data pending writeout > + */ > void bdi_add_default_flusher_task(struct backing_dev_info *bdi) > { > + if (!bdi_cap_writeback_dirty(bdi)) > + return; > + > + /* > + * Someone already marked this pending for task creation > + */ > if (test_and_set_bit(BDI_pending, &bdi->state)) > return; > > @@ -363,9 +419,18 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, > * on-demand when they need it. > */ > if (bdi_cap_flush_forker(bdi)) { > - bdi->task = kthread_run(bdi_forker_task, bdi, "bdi-%s", > + struct bdi_writeback *wb; > + > + wb = bdi_new_wb(bdi); > + if (!wb) { > + ret = -ENOMEM; > + goto exit; > + } > + > + wb->task = kthread_run(bdi_forker_task, wb, "bdi-%s", > dev_name(dev)); > - if (!bdi->task) { > + if (!wb->task) { > + bdi_put_wb(bdi, wb); > ret = -ENOMEM; > goto exit; > } > @@ -395,34 +460,44 @@ static int sched_wait(void *word) > return 0; > } > > +/* > + * Remove bdi from global list and shutdown any threads we have running > + */ > static void bdi_wb_shutdown(struct backing_dev_info *bdi) > { > + if (!bdi_cap_writeback_dirty(bdi)) > + return; > + > /* > * If setup is pending, wait for that to complete first > + * Make sure nobody finds us on the bdi_list anymore > */ > wait_on_bit(&bdi->state, BDI_pending, sched_wait, TASK_UNINTERRUPTIBLE); > > + /* > + * Make sure nobody finds us on the bdi_list anymore > + */ > spin_lock_bh(&bdi_lock); > list_del_rcu(&bdi->bdi_list); > spin_unlock_bh(&bdi_lock); > > /* > - * In case the bdi is freed right after unregister, we need to > - * make sure any RCU sections have exited > + * Now make sure that anybody who is currently looking at us from > + * the bdi_list iteration have exited. > */ > synchronize_rcu(); > + > + /* > + * Finally, kill the kernel thread > + */ > + kthread_stop(bdi->wb.task); > } > > void bdi_unregister(struct backing_dev_info *bdi) > { > if (bdi->dev) { > - if (!bdi_cap_flush_forker(bdi)) { > + if (!bdi_cap_flush_forker(bdi)) > bdi_wb_shutdown(bdi); > - if (bdi->task) { > - kthread_stop(bdi->task); > - bdi->task = NULL; > - } > - } > bdi_debug_unregister(bdi); > device_unregister(bdi->dev); > bdi->dev = NULL; This whole chunk is just a cleanup of patch #2, isn't it? Maybe move there? > @@ -440,11 +515,10 @@ int bdi_init(struct backing_dev_info *bdi) > bdi->min_ratio = 0; > bdi->max_ratio = 100; > bdi->max_prop_frac = PROP_FRAC_BASE; > - init_waitqueue_head(&bdi->wait); > INIT_LIST_HEAD(&bdi->bdi_list); > - INIT_LIST_HEAD(&bdi->b_io); > - INIT_LIST_HEAD(&bdi->b_dirty); > - INIT_LIST_HEAD(&bdi->b_more_io); > + bdi->wb_mask = bdi->wb_active = 0; > + > + bdi_wb_init(&bdi->wb, bdi); > > for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { > err = percpu_counter_init(&bdi->bdi_stat[i], 0); > @@ -469,9 +543,7 @@ void bdi_destroy(struct backing_dev_info *bdi) > { > int i; > > - WARN_ON(!list_empty(&bdi->b_dirty)); > - WARN_ON(!list_empty(&bdi->b_io)); > - WARN_ON(!list_empty(&bdi->b_more_io)); > + WARN_ON(bdi_has_dirty_io(bdi)); > > bdi_unregister(bdi); > Honza -- Jan Kara <jack@xxxxxxx> SUSE Labs, CR -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html