On Fri, 2024-07-19 at 07:31 -1000, tj@xxxxxxxxxx wrote: > > External email : Please do not click links or open attachments until > you have verified the sender or the content. > Hello, Boy. > > On Fri, Jul 19, 2024 at 01:47:35AM +0000, Boy Wu (吳勃誼) wrote: > ... > > If it is for readability, I think keeping using u64 sync instead of > > replacing it with spinlock is better, because what u64 sync > protects is > > 64-bit data for 32-bit systems, while spinlock can be used for many > > other reasons. The root cause of this issue is just the incorrect > use > > Yeah, but it can't be used when there are multiple updaters. > > > of u64 sync. Adding back the missing spinlock for the correct usage > of > > u64 sync is simpler. Is there any benefit to replacing u64 sync > with > > spinlock? > > It doesn't make sense to protect u64_sync with a spinlock. Both are > only > needed on 32bit. What's the point of having both? Also, note that > iostat_cpu > is also updated from two paths - bio issue and stat reset. If you > want to > keep that u64_sync, the only way to avoid possible deadlocks is > adding > spinlock in the bio issue path too, which will be pretty expensive. > The use of a spinlock with u64 sync is suggested in include/linux/u64_stats_sync.h:33. * Usage : * * Stats producer (writer) should use following template granted it already got * an exclusive access to counters (a lock is already taken, or per cpu * data is used [in a non preemptable context]) * * spin_lock_bh(...) or other synchronization to get exclusive access * ... * u64_stats_update_begin(&stats->syncp); * u64_stats_add(&stats->bytes64, len); // non atomic operation * u64_stats_inc(&stats->packets64); // non atomic operation * u64_stats_update_end(&stats->syncp); Is this a incorrect statment? > > > Also, for blkg_clear_stat(), we're running a slight chance of > > > clearing of > > > iostat_cpu racing against state updates from the hot path. Given > the > > > circumstances - stat reset is an cgroup1-only feature which isn't > > > used > > > widely and a problematic interface to begin with, I believe this > is a > > > valid > > > choice but it needs to be noted. > > > > I don't get this part, but if this is another issue, maybe another > > patch would be better? > > It's the same issue. Reset is another writer and whenever you have > more than > one writers w/ u64_sync, there's a chance of deadlocks. So, > iostat_cpu also > has two writers - bio issue and stat reset. If you want to keep using > u64_sync in both places, the only way to do it is adding spinlock > protection > to both paths, which is an expensive thing to do for the bio issue > path. So, > here, we'd rather just give up and let stat resetting be racy on > 32bit. > > Thanks. > > -- > tejun There are three places update iostat and two places update iostat_cpu in blk-cgroup.c in version 6.10.1. I assume the suggestion in u64_stats_sync.h is correct. For readability, how about the following change? diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 85b3b9051455..7472cfa9a506 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -62,6 +62,10 @@ static struct workqueue_struct *blkcg_punt_bio_wq; static DEFINE_RAW_SPINLOCK(blkg_stat_lock); +#if BITS_PER_LONG == 32 +static DEFINE_RAW_SPINLOCK(blkg_stat_cpu_lock); +#endif + #define BLKG_DESTROY_BATCH_SIZE 64 /* @@ -535,5 +539,55 @@ static void blkg_destroy_all(struct gendisk *disk) spin_unlock_irq(&q->queue_lock); } +static inline unsigned long blkcg_iostats_update_begin_irqsave(struct u64_stats_sync *syncp) +{ + unsigned long flags; + +#if BITS_PER_LONG == 64 + flags = u64_stats_update_begin_irqsave(syncp); +#else /* 64 bit */ + raw_spin_lock_irqsave(&blkg_stat_lock, flags); + u64_stats_update_begin(syncp); +#endif + + return flags; +} + +static inline void blkcg_iostats_update_end_irqrestore(struct u64_stats_sync *syncp, + unsigned long flags) +{ +#if BITS_PER_LONG == 64 + u64_stats_update_end_irqrestore(syncp, flags); +#else /* 64 bit */ + u64_stats_update_end(syncp); + raw_spin_unlock_irqrestore(&blkg_stat_lock, flags); +#endif +} + +static unsigned long blkcg_iostats_cpu_update_begin_irqsave(struct u64_stats_sync *syncp) +{ + unsigned long flags; + +#if BITS_PER_LONG == 64 + flags = u64_stats_update_begin_irqsave(syncp); +#else /* 64 bit */ + raw_spin_lock_irqsave(&blkg_stat_cpu_lock, flags); + u64_stats_update_begin(syncp); +#endif + + return flags; +} + +static inline void blkcg_iostats_cpu_update_end_irqrestore(struct u64_stats_sync *syncp, + unsigned long flags) +{ +#if BITS_PER_LONG == 64 + u64_stats_update_end_irqrestore(syncp, flags); +#else /* 64 bit */ + u64_stats_update_end(syncp); + raw_spin_unlock_irqrestore(&blkg_stat_cpu_lock, flags); +#endif +} + static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src) { int i; @@ -632,24 +686,26 @@ static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src) static void __blkg_clear_stat(struct blkg_iostat_set *bis) { struct blkg_iostat cur = {0}; - unsigned long flags; - flags = u64_stats_update_begin_irqsave(&bis->sync); blkg_iostat_set(&bis->cur, &cur); blkg_iostat_set(&bis->last, &cur); - u64_stats_update_end_irqrestore(&bis->sync, flags); } static void blkg_clear_stat(struct blkcg_gq *blkg) { int cpu; + unsigned long flags; for_each_possible_cpu(cpu) { struct blkg_iostat_set *s = per_cpu_ptr(blkg- >iostat_cpu, cpu); + flags = blkcg_iostats_cpu_update_begin_irqsave(&s- >sync); __blkg_clear_stat(s); + blkcg_iostats_cpu_update_end_irqrestore(&s->sync, flags); } + flags = blkcg_iostats_update_begin_irqsave(&blkg->iostat.sync); __blkg_clear_stat(&blkg->iostat); + blkcg_iostats_update_end_irqrestore(&blkg->iostat.sync, flags); } static int blkcg_reset_stats(struct cgroup_subsys_state *css, @@ -1134,9 +1190,9 @@ static void blkcg_fill_root_iostats(void) cpu_dkstats->sectors[STAT_DISCARD] << 9; } - flags = u64_stats_update_begin_irqsave(&blkg- >iostat.sync); + flags = blkcg_iostats_update_begin_irqsave(&blkg- >iostat.sync); blkg_iostat_set(&blkg->iostat.cur, &tmp); - u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); + blkcg_iostats_update_end_irqrestore(&blkg->iostat.sync, flags); } } @@ -2152,7 +2208,7 @@ void blk_cgroup_bio_start(struct bio *bio) cpu = get_cpu(); bis = per_cpu_ptr(bio->bi_blkg->iostat_cpu, cpu); - flags = u64_stats_update_begin_irqsave(&bis->sync); + flags = blkcg_iostats_cpu_update_begin_irqsave(&bis->sync); /* * If the bio is flagged with BIO_CGROUP_ACCT it means this is a split @@ -2175,7 +2231,7 @@ void blk_cgroup_bio_start(struct bio *bio) WRITE_ONCE(bis->lqueued, true); } - u64_stats_update_end_irqrestore(&bis->sync, flags); + blkcg_iostats_cpu_update_end_irqrestore(&bis->sync, flags); cgroup_rstat_updated(blkcg->css.cgroup, cpu); put_cpu(); } -- boy.wu