diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 0ce64dd73cfe..90c2efc3767f 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -160,13 +160,39 @@ static void blkg_free(struct blkcg_gq *blkg)
schedule_work(&blkg->free_work);
}
+static void __blkcg_rstat_flush(struct llist_node *lnode);
+
static void __blkg_release(struct rcu_head *rcu)
{
struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
+ struct blkcg *blkcg = blkg->blkcg;
+ int cpu;
#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
WARN_ON(!bio_list_empty(&blkg->async_bios));
#endif
+ /*
+ * Flush all the non-empty percpu lockless lists before releasing
+ * us, given these stat belongs to us.
+ *
+ * Hold the cgroup_rstat_lock before calling __blkcg_rstat_flush()
+ * to block concurrent cgroup_rstat_flush*() calls.
+ */
+ for_each_possible_cpu(cpu) {
+ struct llist_head *lhead = per_cpu_ptr(blkcg->lhead, cpu);
+ struct llist_node *lnode;
+
+ if (llist_empty(lhead))
+ continue;
+
+ lnode = llist_del_all(lhead);
+ if (!lnode)
+ continue;
+
+ cgroup_rstat_flush_acquire();
+ __blkcg_rstat_flush(lnode);
+ cgroup_rstat_flush_release();
+ }
/* release the blkcg and parent blkg refs this blkg has been holding */
css_put(&blkg->blkcg->css);
@@ -951,23 +977,12 @@ static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur,
u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
}
-static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
+static void __blkcg_rstat_flush(struct llist_node *lnode)
{
- struct blkcg *blkcg = css_to_blkcg(css);
- struct llist_head *lhead = per_cpu_ptr(blkcg->lhead, cpu);
- struct llist_node *lnode;
struct blkg_iostat_set *bisc, *next_bisc;
- /* Root-level stats are sourced from system-wide IO stats */
- if (!cgroup_parent(css->cgroup))
- return;
-
rcu_read_lock();
- lnode = llist_del_all(lhead);
- if (!lnode)
- goto out;
-
/*
* Iterate only the iostat_cpu's queued in the lockless list.
*/
@@ -991,13 +1006,26 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
if (parent && parent->parent)
blkcg_iostat_update(parent, &blkg->iostat.cur,
&blkg->iostat.last);
- percpu_ref_put(&blkg->refcnt);
}
-out:
rcu_read_unlock();
}
+static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
+{
+ struct blkcg *blkcg = css_to_blkcg(css);
+ struct llist_head *lhead = per_cpu_ptr(blkcg->lhead, cpu);
+ struct llist_node *lnode;Jay Shin <jaeshin@xxxxxxxxxx>
+
+ /* Root-level stats are sourced from system-wide IO stats */
+ if (!cgroup_parent(css->cgroup))
+ return;
+
+ lnode = llist_del_all(lhead);
+ if (lnode)
+ __blkcg_rstat_flush(lnode);
+}
+
/*
* We source root cgroup stats from the system-wide stats to avoid
* tracking the same information twice and incurring overhead when no
@@ -2075,7 +2103,6 @@ void blk_cgroup_bio_start(struct bio *bio)
llist_add(&bis->lnode, lhead);
WRITE_ONCE(bis->lqueued, true);
- percpu_ref_get(&bis->blkg->refcnt);
}
u64_stats_update_end_irqrestore(&bis->sync, flags);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 885f5395fcd0..88e6647f49df 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -694,6 +694,7 @@ void cgroup_rstat_updated(struct cgroup *cgrp, int cpu);
void cgroup_rstat_flush(struct cgroup *cgrp);
void cgroup_rstat_flush_atomic(struct cgroup *cgrp);
void cgroup_rstat_flush_hold(struct cgroup *cgrp);
+void cgroup_rstat_flush_acquire(void);
void cgroup_rstat_flush_release(void);
/*
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 9c4c55228567..b0fd4e27f466 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -273,7 +273,20 @@ void cgroup_rstat_flush_hold(struct cgroup *cgrp)
}
/**
- * cgroup_rstat_flush_release - release cgroup_rstat_flush_hold()
+ * cgroup_rstat_flush_acquire - acquire cgroup_rstat_lock
+ *
+ * Callers can acquire the internal cgroup_rstat_lock to prevent concurrent
+ * execution of cgroup_rstat_flush*() and the controller callbacks.
+ */
+void cgroup_rstat_flush_acquire(void)
+ __acquires(&cgroup_rstat_lock)
+{
+ spin_lock_irq(&cgroup_rstat_lock);
+}
+
+/**
+ * cgroup_rstat_flush_release - release cgroup_rstat_flush_hold() or
+ * cgroup_rstat_flush_acquire()
*/
void cgroup_rstat_flush_release(void)
__releases(&cgroup_rstat_lock)