> Il giorno 4 apr 2019, alle ore 21:22, Dmitrii Tcvetkov <demfloro@xxxxxxxxxxx> ha scritto: > > On Mon, 1 Apr 2019 12:35:11 +0200 > Paolo Valente <paolo.valente@xxxxxxxxxx> wrote: > >> >> >>> Il giorno 1 apr 2019, alle ore 11:22, Dmitrii Tcvetkov >>> <demfloro@xxxxxxxxxxx> ha scritto: >>> >>> On Mon, 1 Apr 2019 11:01:27 +0200 >>> Paolo Valente <paolo.valente@xxxxxxxxxx> wrote: >>>> Ok, thank you. Could you please do a >>>> >>>> list *(bfq_bfqq_expire+0x1f3) >>>> >>>> for me? >>>> >>>> Thanks, >>>> Paolo >>>> >>>>> >>>>> <gpf.txt><gpf-w-bfq-group-iosched.txt><config.txt> >>> >>> Reading symbols from vmlinux...done. >>> (gdb) list *(bfq_bfqq_expire+0x1f3) >>> 0xffffffff813d02c3 is in bfq_bfqq_expire (block/bfq-iosched.c:3390). >>> 3385 * even in case bfqq and thus parent entities go on >>> receiving 3386 * service with the same budget. >>> 3387 */ >>> 3388 entity = entity->parent; >>> 3389 for_each_entity(entity) >>> 3390 entity->service = 0; >>> 3391 } >>> 3392 >>> 3393 /* >>> 3394 * Budget timeout is not implemented through a dedicated >>> timer, but >> >> Thank you very much. Unfortunately this doesn't ring any bell. I'm >> trying to reproduce the failure. It will probably take a little >> time. If I don't make it, I'll ask you to kindly retry after applying >> some instrumentation patch. >> > > I looked at what git is doing just before panic and it's doing a lot of > lstat() syscalls on working tree. > > I've attached a python script which reproduces the crash in about > 10 seconds after it prepares testdir, git checkout origin/linux-5.0.y > reproduces it in about 2 seconds. I have to use multiprocessing Pool as > I couldn't reproduce the crash using ThreadPool, probably due to Python > GIL. > Unfortunately this failure doesn't reproduce on my systems. But I have a suspect. Could you please test this patch? (also attached as a compressed file): diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index fac188dd78fa..0a435bcfed20 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2822,7 +2822,7 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq) bfq_remove_request(q, rq); } -static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) +static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) { /* * If this bfqq is shared between multiple processes, check @@ -2857,7 +2857,7 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) * or requeued before executing the next function, which * resets all in-service entites as no more in service. */ - __bfq_bfqd_reset_in_service(bfqd); + return __bfq_bfqd_reset_in_service(bfqd); } /** @@ -3262,7 +3262,6 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, bool slow; unsigned long delta = 0; struct bfq_entity *entity = &bfqq->entity; - int ref; /* * Check whether the process is slow (see bfq_bfqq_is_slow). @@ -3347,10 +3346,8 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, * reason. */ __bfq_bfqq_recalc_budget(bfqd, bfqq, reason); - ref = bfqq->ref; - __bfq_bfqq_expire(bfqd, bfqq); - - if (ref == 1) /* bfqq is gone, no more actions on it */ + if (__bfq_bfqq_expire(bfqd, bfqq)) + /* bfqq is gone, no more actions on it */ return; bfqq->injected_service = 0; diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 062e1c4787f4..86394e503ca9 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -995,7 +995,7 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree); bool next_queue_may_preempt(struct bfq_data *bfqd); struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd); -void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd); +bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd); void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, bool ins_into_idle_tree, bool expiration); void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq); diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index a11bef75483d..a0c60c47ed1c 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -1605,7 +1605,7 @@ struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) return bfqq; } -void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) +bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) { struct bfq_queue *in_serv_bfqq = bfqd->in_service_queue; struct bfq_entity *in_serv_entity = &in_serv_bfqq->entity; @@ -1629,8 +1629,18 @@ void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) * service tree either, then release the service reference to * the queue it represents (taken with bfq_get_entity). */ - if (!in_serv_entity->on_st) + if (!in_serv_entity->on_st) { + /* + * bfqq may be freed here, if bfq_exit_bfqq(bfqq) has + * already been executed + */ + int ref = in_serv_bfqq->ref; bfq_put_queue(in_serv_bfqq); + if (ref == 1) + return true; + } + + return false; } void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
Attachment:
0001-block-bfq-tentative-fix-of-use-after-free-in-bfq_bfq.patch.gz
Description: GNU Zip compressed data
> <crash.py>