Signed-off-by: Jan Kara <jack@xxxxxxx> --- fs/fs-writeback.c | 52 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 6caf55858dcb..f9d8aa7f1ff7 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -695,8 +695,7 @@ static bool over_bground_thresh(struct backing_dev_info *bdi) } /* - * Called under wb->list_lock. If there are multiple wb per bdi, - * only the flusher working on the first wb should do it. + * Update writeback bandwidth estimate */ static void update_bandwidth(struct backing_dev_info *bdi, unsigned long start_time) @@ -707,19 +706,22 @@ static void update_bandwidth(struct backing_dev_info *bdi, } /* - * Explicit flushing or periodic writeback of "old" data. + * Handle flushing according to passed work item * - * Define "old": the first time one of an inode's pages is dirtied, we mark the - * dirtying-time in the inode's address_space. So this periodic writeback code - * just walks the superblock inode list, writing back any inodes which are - * older than a specific point in time. + * There are two 'background' type of writeback work - flushing of old data + * (have work->for_kupdate set) and flushing to get below dirty limit (have + * work->for_background set). These types of work get interrupted when someone + * asks for more specific writeback by queueing work item. They get restarted + * when the more specific writeback terminates (if still necessary). * - * Try to run once per dirty_writeback_interval. But if a writeback event - * takes longer than a dirty_writeback_interval interval, then leave a - * one-second gap. + * For writeback which can be easily livelocked (!WB_SYNC_ALL && + * !work->tagged_writepages) we limit how many pages should be written for + * one inode (see writeback_chunk_size()). When these pages are written we + * continue with next inode in the dirty list to avoid spending too much time + * on one inode while starving other inodes from writeback. * - * older_than_this takes precedence over nr_to_write. So we'll only write back - * all dirty pages if they are all attached to "old" mappings. + * Note that older_than_this takes precedence over nr_to_write. So we'll only + * write back all dirty pages if they are all attached to "old" enough inodes. */ static long bdi_writeback(struct backing_dev_info *bdi, struct wb_writeback_work *work) @@ -823,6 +825,14 @@ static unsigned long get_nr_dirty_pages(void) get_nr_dirty_inodes(); } +/* + * Handle dirty data flushing when we are over background dirty limits + * + * In this type of writeback we writeback all dirty inodes (starting with + * the first dirtied inode) until we get below background dirty limits + * or until someone requests specific type of writeback from the flusher] + * (by queueing work item). + */ static long wb_check_background_flush(struct backing_dev_info *bdi) { if (over_bground_thresh(bdi)) { @@ -841,6 +851,17 @@ static long wb_check_background_flush(struct backing_dev_info *bdi) return 0; } +/* + * Handle periodic writeback of "old" data. + * + * Define "old": the first time one of an inode's pages is dirtied, we mark the + * dirtying-time in inode->i_dirtied_when. So this periodic writeback just + * walks the dirty inode list, writing back any inodes which are older than a + * specific point in time. + * + * Note that this type of writeback can be interrupted if anyone requests + * specific writeback by queueing work item. + */ static long wb_check_old_data_flush(struct backing_dev_info *bdi) { unsigned long expired; @@ -901,9 +922,7 @@ restart: kfree(work); } - /* - * Check for periodic writeback, kupdated() style - */ + /* Check for background and kupdate-style writeback */ wrote += wb_check_old_data_flush(bdi); wrote += wb_check_background_flush(bdi); @@ -917,7 +936,8 @@ restart: /* * Handle writeback of dirty data for the device backed by this bdi. Also - * reschedules periodically and does kupdated style flushing. + * reschedules periodically (once per dirty_writeback_interval) and does + * kupdated style flushing. */ void bdi_writeback_workfn(struct work_struct *work) { -- 1.8.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html