Hi Kiyoshi,
Kiyoshi Ueda wrote:
Hi Hannes,
On 2009/01/30 17:05 +0900, Kiyoshi Ueda wrote:
o kernel panic occurs by frequent table swapping during heavy I/Os.
That's probably fixed by this patch:
--- linux-2.6.27/drivers/md/dm.c.orig 2009-01-23 15:59:22.741461315 +0100
+++ linux-2.6.27/drivers/md/dm.c 2009-01-26 09:03:02.787605723 +0100
@@ -714,13 +714,14 @@ static void free_bio_clone(struct reques
struct dm_rq_target_io *tio = clone->end_io_data;
struct mapped_device *md = tio->md;
struct bio *bio;
- struct dm_clone_bio_info *info;
while ((bio = clone->bio) != NULL) {
clone->bio = bio->bi_next;
- info = bio->bi_private;
- free_bio_info(md, info);
+ if (bio->bi_private) {
+ struct dm_clone_bio_info *info = bio->bi_private;
+ free_bio_info(md, info);
+ }
bio->bi_private = md->bs;
bio_put(bio);
The info field is not necessarily filled here, so we have to check for it
explicitly.
With these two patches request-based multipathing have survived all stress-tests
so far. Except on mainframe (zfcp), but that's more a driver-related thing.
My problem was different from that one, and I have fixed my problem.
What was this? Was is something specific to your setup or some within the
request-based multipathing code?
If the latter, I'd be _very_ much interested in seeing the patch. Naturally.
Do you hit some problem without the patch above?
If so, that should be a programming bug and we need to fix it. Otherwise,
we should be leaking a memory (since all cloned bio should always have
the dm_clone_bio_info structure in ->bi_private).
Yes, I've found that one later on.
The real problem was in clone_setup_bios(), which might end up calling an
invalid end_io_data pointer. Patch is attached.
Cheers,
Hannes
--
Dr. Hannes Reinecke zSeries & Storage
hare@xxxxxxx +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Markus Rex, HRB 16746 (AG Nürnberg)
From: Hannes Reinecke <hare@xxxxxxx>
Subject: Kernel oops in free_bio_clone()
References: bnc#472360
Bug is here:
static int setup_clone(struct request *clone, struct request *rq,
struct dm_rq_target_io *tio)
{
int r;
blk_rq_init(NULL, clone);
r = clone_request_bios(clone, rq, tio->md);
if (r)
return r;
copy_request_info(clone, rq);
clone->start_time = jiffies;
clone->end_io = end_clone_request;
clone->end_io_data = tio;
return 0;
}
clone_request_bios() might end up calling free_bio_clone(), which references:
static void free_bio_clone(struct request *clone)
{
struct dm_rq_target_io *tio = clone->end_io_data;
struct mapped_device *md = tio->md;
...
but end_io_data will be set only _after_ the call to clone_request_bios().
So we should be passing the 'md' argument directly here to avoid this
bug and several pointless derefencings.
Signed-off-by: Hannes Reinecke <hare@xxxxxxx>
--- linux-2.6.27-SLE11_BRANCH/drivers/md/dm.c.orig 2009-02-04 10:33:22.656627650 +0100
+++ linux-2.6.27-SLE11_BRANCH/drivers/md/dm.c 2009-02-05 11:03:35.843251773 +0100
@@ -709,10 +709,8 @@ static void end_clone_bio(struct bio *cl
blk_update_request(tio->orig, 0, nr_bytes);
}
-static void free_bio_clone(struct request *clone)
+static void free_bio_clone(struct request *clone, struct mapped_device *md)
{
- struct dm_rq_target_io *tio = clone->end_io_data;
- struct mapped_device *md = tio->md;
struct bio *bio;
while ((bio = clone->bio) != NULL) {
@@ -743,7 +741,7 @@ static void dm_unprep_request(struct req
rq->special = NULL;
rq->cmd_flags &= ~REQ_DONTPREP;
- free_bio_clone(clone);
+ free_bio_clone(clone, tio->md);
dec_rq_pending(tio);
free_rq_tio(tio->md, tio);
}
@@ -820,7 +818,7 @@ static void dm_end_request(struct reques
rq->sense_len = clone->sense_len;
}
- free_bio_clone(clone);
+ free_bio_clone(clone, tio->md);
dec_rq_pending(tio);
free_rq_tio(tio->md, tio);
@@ -1406,7 +1404,7 @@ static int clone_request_bios(struct req
return 0;
free_and_out:
- free_bio_clone(clone);
+ free_bio_clone(clone, md);
return -ENOMEM;
}
--
dm-devel mailing list
dm-devel@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/dm-devel