On Wed 20-03-24 19:02:17, Kemeng Shi wrote: > /sys/kernel/debug/bdi/xxx/stats is supposed to show writeback information > of whole bdi, but only writeback information of bdi in root cgroup is > collected. So writeback information in non-root cgroup are missing now. > To be more specific, considering following case: > > /* create writeback cgroup */ > cd /sys/fs/cgroup > echo "+memory +io" > cgroup.subtree_control > mkdir group1 > cd group1 > echo $$ > cgroup.procs > /* do writeback in cgroup */ > fio -name test -filename=/dev/vdb ... > /* get writeback info of bdi */ > cat /sys/kernel/debug/bdi/xxx/stats > The cat result unexpectedly implies that there is no writeback on target > bdi. > > Fix this by collecting stats of all wb in bdi instead of only wb in > root cgroup. > > Signed-off-by: Kemeng Shi <shikemeng@xxxxxxxxxxxxxxx> Looks mostly good, one comment below: > --- > mm/backing-dev.c | 93 ++++++++++++++++++++++++++++++++++++------------ > 1 file changed, 70 insertions(+), 23 deletions(-) > > diff --git a/mm/backing-dev.c b/mm/backing-dev.c > index 5f2be8c8df11..788702b6c5dd 100644 > --- a/mm/backing-dev.c > +++ b/mm/backing-dev.c > @@ -39,6 +39,19 @@ struct workqueue_struct *bdi_wq; > #include <linux/debugfs.h> > #include <linux/seq_file.h> > > +struct wb_stats { > + unsigned long nr_dirty; > + unsigned long nr_io; > + unsigned long nr_more_io; > + unsigned long nr_dirty_time; > + unsigned long nr_writeback; > + unsigned long nr_reclaimable; > + unsigned long nr_dirtied; > + unsigned long nr_written; > + unsigned long dirty_thresh; > + unsigned long wb_thresh; > +}; > + > static struct dentry *bdi_debug_root; > > static void bdi_debug_init(void) > @@ -46,31 +59,65 @@ static void bdi_debug_init(void) > bdi_debug_root = debugfs_create_dir("bdi", NULL); > } > > -static int bdi_debug_stats_show(struct seq_file *m, void *v) > +static void collect_wb_stats(struct wb_stats *stats, > + struct bdi_writeback *wb) > { > - struct backing_dev_info *bdi = m->private; > - struct bdi_writeback *wb = &bdi->wb; > - unsigned long background_thresh; > - unsigned long dirty_thresh; > - unsigned long wb_thresh; > - unsigned long nr_dirty, nr_io, nr_more_io, nr_dirty_time; > struct inode *inode; > > - nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0; > spin_lock(&wb->list_lock); > list_for_each_entry(inode, &wb->b_dirty, i_io_list) > - nr_dirty++; > + stats->nr_dirty++; > list_for_each_entry(inode, &wb->b_io, i_io_list) > - nr_io++; > + stats->nr_io++; > list_for_each_entry(inode, &wb->b_more_io, i_io_list) > - nr_more_io++; > + stats->nr_more_io++; > list_for_each_entry(inode, &wb->b_dirty_time, i_io_list) > if (inode->i_state & I_DIRTY_TIME) > - nr_dirty_time++; > + stats->nr_dirty_time++; > spin_unlock(&wb->list_lock); > > + stats->nr_writeback += wb_stat(wb, WB_WRITEBACK); > + stats->nr_reclaimable += wb_stat(wb, WB_RECLAIMABLE); > + stats->nr_dirtied += wb_stat(wb, WB_DIRTIED); > + stats->nr_written += wb_stat(wb, WB_WRITTEN); > + stats->wb_thresh += wb_calc_thresh(wb, stats->dirty_thresh); > +} > + > +#ifdef CONFIG_CGROUP_WRITEBACK > +static void bdi_collect_stats(struct backing_dev_info *bdi, > + struct wb_stats *stats) > +{ > + struct bdi_writeback *wb; > + > + /* protect wb from release */ > + mutex_lock(&bdi->cgwb_release_mutex); > + list_for_each_entry(wb, &bdi->wb_list, bdi_node) > + collect_wb_stats(stats, wb); > + mutex_unlock(&bdi->cgwb_release_mutex); > +} So AFAICT this function can race against bdi_unregister() -> wb_shutdown(&bdi->wb) because that doesn't take the cgwb_release_mutex. So we either need the RCU protection as Brian suggested or cgwb_lock or something. But given collect_wb_stats() can take a significant amount of time (traversing all the lists etc.) I think we'll need something more clever. Honza -- Jan Kara <jack@xxxxxxxx> SUSE Labs, CR