Re: Bisected GFP in bfq_bfqq_expire on v5.1-rc1

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




> Il giorno 4 apr 2019, alle ore 21:22, Dmitrii Tcvetkov <demfloro@xxxxxxxxxxx> ha scritto:
> 
> On Mon, 1 Apr 2019 12:35:11 +0200
> Paolo Valente <paolo.valente@xxxxxxxxxx> wrote:
> 
>> 
>> 
>>> Il giorno 1 apr 2019, alle ore 11:22, Dmitrii Tcvetkov
>>> <demfloro@xxxxxxxxxxx> ha scritto:
>>> 
>>> On Mon, 1 Apr 2019 11:01:27 +0200
>>> Paolo Valente <paolo.valente@xxxxxxxxxx> wrote:
>>>> Ok, thank you. Could you please do a
>>>> 
>>>> list *(bfq_bfqq_expire+0x1f3)
>>>> 
>>>> for me?
>>>> 
>>>> Thanks,
>>>> Paolo
>>>> 
>>>>> 
>>>>> <gpf.txt><gpf-w-bfq-group-iosched.txt><config.txt>
>>> 
>>> Reading symbols from vmlinux...done.
>>> (gdb) list *(bfq_bfqq_expire+0x1f3)
>>> 0xffffffff813d02c3 is in bfq_bfqq_expire (block/bfq-iosched.c:3390).
>>> 3385             * even in case bfqq and thus parent entities go on
>>> receiving 3386             * service with the same budget.
>>> 3387             */
>>> 3388            entity = entity->parent;
>>> 3389            for_each_entity(entity)
>>> 3390                    entity->service = 0;
>>> 3391    }
>>> 3392
>>> 3393    /*
>>> 3394     * Budget timeout is not implemented through a dedicated
>>> timer, but
>> 
>> Thank you very much.  Unfortunately this doesn't ring any bell.  I'm
>> trying to reproduce the failure.  It will probably take a little
>> time.  If I don't make it, I'll ask you to kindly retry after applying
>> some instrumentation patch.
>> 
> 
> I looked at what git is doing just before panic and it's doing a lot of
> lstat() syscalls on working tree.
> 
> I've attached a python script which reproduces the crash in about
> 10 seconds after it prepares testdir, git checkout origin/linux-5.0.y
> reproduces it in about 2 seconds. I have to use multiprocessing Pool as
> I couldn't reproduce the crash using ThreadPool, probably due to Python
> GIL.
> 

Unfortunately this failure doesn't reproduce on my systems.  But I
have a suspect.  Could you please test this patch? (also attached as a
compressed file):

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index fac188dd78fa..0a435bcfed20 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2822,7 +2822,7 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq)
 	bfq_remove_request(q, rq);
 }
 
-static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 {
 	/*
 	 * If this bfqq is shared between multiple processes, check
@@ -2857,7 +2857,7 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 	 * or requeued before executing the next function, which
 	 * resets all in-service entites as no more in service.
 	 */
-	__bfq_bfqd_reset_in_service(bfqd);
+	return __bfq_bfqd_reset_in_service(bfqd);
 }
 
 /**
@@ -3262,7 +3262,6 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
 	bool slow;
 	unsigned long delta = 0;
 	struct bfq_entity *entity = &bfqq->entity;
-	int ref;
 
 	/*
 	 * Check whether the process is slow (see bfq_bfqq_is_slow).
@@ -3347,10 +3346,8 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
 	 * reason.
 	 */
 	__bfq_bfqq_recalc_budget(bfqd, bfqq, reason);
-	ref = bfqq->ref;
-	__bfq_bfqq_expire(bfqd, bfqq);
-
-	if (ref == 1) /* bfqq is gone, no more actions on it */
+	if (__bfq_bfqq_expire(bfqd, bfqq))
+		/* bfqq is gone, no more actions on it */
 		return;
 
 	bfqq->injected_service = 0;
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 062e1c4787f4..86394e503ca9 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -995,7 +995,7 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity,
 			     bool ins_into_idle_tree);
 bool next_queue_may_preempt(struct bfq_data *bfqd);
 struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd);
-void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd);
+bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd);
 void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 			 bool ins_into_idle_tree, bool expiration);
 void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index a11bef75483d..a0c60c47ed1c 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -1605,7 +1605,7 @@ struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
 	return bfqq;
 }
 
-void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
+bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
 {
 	struct bfq_queue *in_serv_bfqq = bfqd->in_service_queue;
 	struct bfq_entity *in_serv_entity = &in_serv_bfqq->entity;
@@ -1629,8 +1629,18 @@ void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
 	 * service tree either, then release the service reference to
 	 * the queue it represents (taken with bfq_get_entity).
 	 */
-	if (!in_serv_entity->on_st)
+	if (!in_serv_entity->on_st) {
+		/*
+		 * bfqq may be freed here, if bfq_exit_bfqq(bfqq) has
+		 * already been executed
+		 */
+		int ref = in_serv_bfqq->ref;
 		bfq_put_queue(in_serv_bfqq);
+		if (ref == 1)
+			return true;
+	}
+
+	return false;
 }
 
 void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,

Attachment: 0001-block-bfq-tentative-fix-of-use-after-free-in-bfq_bfq.patch.gz
Description: GNU Zip compressed data



> <crash.py>


[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux