在 2022/01/21 18:56, Jan Kara 写道:
Hello,
here is the fifth version of my patches to fix use-after-free issues
in BFQ
when processes with merged queues get moved to different cgroups. The
patches
have survived some beating in my test VM, but so far I fail to
reproduce the
original KASAN reports so testing from people who can reproduce them
is most
welcome. Kuai, can you please give these patches a run in your setup?
Thanks
a lot for your help with fixing this!
Changes since v4:
* Even more aggressive splitting of merged bfq queues to avoid
problems with
long merge chains.
Changes since v3:
* Changed handling of bfq group move to handle the case when target of
the
merge has moved.
Changes since v2:
* Improved handling of bfq queue splitting on move between cgroups
* Removed broken change to bfq_put_cooperator()
Changes since v1:
* Added fix for bfq_put_cooperator()
* Added fix to handle move between cgroups in bfq_merge_bio()
Honza
Previous versions:
Link: http://lore.kernel.org/r/20211223171425.3551-1-jack@xxxxxxx # v1
Link: http://lore.kernel.org/r/20220105143037.20542-1-jack@xxxxxxx # v2
Link: http://lore.kernel.org/r/20220112113529.6355-1-jack@xxxxxxx # v3
Link: http://lore.kernel.org/r/20220114164215.28972-1-jack@xxxxxxx # v4
.
Hi, Jan
I add a new BUG_ON() in bfq_setup_merge() while iterating new_bfqq, and
this time this BUG_ON() is triggered:
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 00184530c644..4b17eb4a29bc 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -731,8 +731,12 @@ static struct bfq_group
*__bfq_bic_change_cgroup(struct bfq_data *bfqd,
if (sync_bfqq) {
if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
/* We are the only user of this bfqq, just move
it */
- if (sync_bfqq->entity.sched_data !=
&bfqg->sched_data)
+ if (sync_bfqq->entity.sched_data !=
&bfqg->sched_data) {
+ printk("%s: bfqq %px move from %px to
%px\n",
+ __func__, sync_bfqq,
+ bfqq_group(sync_bfqq), bfqg);
bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
+ }
} else {
struct bfq_queue *bfqq;
@@ -741,10 +745,13 @@ static struct bfq_group
*__bfq_bic_change_cgroup(struct bfq_data *bfqd,
* that the merge chain still belongs to the same
* cgroup.
*/
- for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
+ for (bfqq = sync_bfqq; bfqq; bfqq =
bfqq->new_bfqq) {
+ printk("%s: bfqq %px(%px) bfqg %px\n",
__func__,
+ bfqq, bfqq_group(bfqq), bfqg);
if (bfqq->entity.sched_data !=
&bfqg->sched_data)
break;
+ }
if (bfqq) {
/*
* Some queue changed cgroup so the
merge is
@@ -878,6 +885,8 @@ static void bfq_reparent_leaf_entity(struct bfq_data
*bfqd,
}
bfqq = bfq_entity_to_bfqq(child_entity);
+ printk("%s: bfqq %px move from %px to %px\n", __func__, bfqq,
+ bfqq_group(bfqq), bfqd->root_group);
bfq_bfqq_move(bfqd, bfqq, bfqd->root_group);
}
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 07be51bc229b..6d4e243c9a1e 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2753,6 +2753,14 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct
bfq_queue *new_bfqq)
while ((__bfqq = new_bfqq->new_bfqq)) {
if (__bfqq == bfqq)
return NULL;
+ if (new_bfqq->entity.parent != __bfqq->entity.parent &&
+ bfqq_group(__bfqq) != __bfqq->bfqd->root_group) {
+ printk("%s: bfqq %px(%px) new_bfqq %px(%px)\n",
__func__,
+ new_bfqq, bfqq_group(new_bfqq), __bfqq,
+ bfqq_group(__bfqq));
+ BUG_ON(1);
+ }
+
new_bfqq = __bfqq;
}
@@ -2797,6 +2805,8 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct
bfq_queue *new_bfqq)
* are likely to increase the throughput.
*/
bfqq->new_bfqq = new_bfqq;
+ printk("%s: set bfqq %px(%px) new_bfqq %px(%px)\n", __func__,
bfqq,
+ bfqq_group(bfqq), new_bfqq, bfqq_group(new_bfqq));
new_bfqq->ref += process_refs;
return new_bfqq;
}
@@ -2963,8 +2973,16 @@ bfq_setup_cooperator(struct bfq_data *bfqd,
struct bfq_queue *bfqq,
if (bfq_too_late_for_merging(bfqq))
return NULL;
- if (bfqq->new_bfqq)
+ if (bfqq->new_bfqq) {
+ if (bfqq->entity.parent != bfqq->new_bfqq->entity.parent &&
+ bfqq_group(bfqq->new_bfqq) != bfqd->root_group) {
+ printk("%s: bfqq %px(%px) new_bfqq %px(%px)\n",
__func__,
+ bfqq, bfqq_group(bfqq), bfqq->new_bfqq,
+ bfqq_group(bfqq->new_bfqq));
+ BUG_ON(1);
+ }
return bfqq->new_bfqq;
+ }
if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq))
return NULL;
@@ -6928,6 +6946,9 @@ static void __bfq_put_async_bfqq(struct bfq_data
*bfqd,
bfq_log(bfqd, "put_async_bfqq: %p", bfqq);
if (bfqq) {
+ printk("%s: bfqq %px move from %px to %px\n", __func__,
bfqq,
+ bfqq_group(bfqq), bfqd->root_group);
+
bfq_bfqq_move(bfqd, bfqq, bfqd->root_group);
bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d",