On 11/18/12 8:19 PM, "Rusty Russell" <rusty@xxxxxxxxxxxxxxx> wrote: >Here's a patch to try which should tell us what species of corruption >it is: > >diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c >index 303779c..3e3081f 100644 >--- a/drivers/block/virtio_blk.c >+++ b/drivers/block/virtio_blk.c >@@ -55,6 +55,7 @@ struct virtio_blk > > struct virtblk_req > { >+ u32 magic; > struct list_head list; > struct request *req; > struct virtio_blk_outhdr out_hdr; >@@ -73,6 +74,11 @@ static void blk_done(struct virtqueue *vq) > while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { > int error; > >+ if (unlikely(vbr->magic != 0x87654321)) { >+ printk("vbr bad magic: 0x%08x\n", vbr->magic); >+ continue; /* And pray... */ >+ } >+ > switch (vbr->status) { > case VIRTIO_BLK_S_OK: > error = 0; >@@ -100,6 +106,7 @@ static void blk_done(struct virtqueue *vq) > > __blk_end_request_all(vbr->req, error); > list_del(&vbr->list); >+ vbr->magic = 0xfee1dead; > mempool_free(vbr, vblk->pool); > } > /* In case queue is stopped waiting for more buffers. */ >@@ -117,6 +124,7 @@ static bool do_req(struct request_queue *q, struct >virtio_blk *vblk, > if (!vbr) > /* When another request finishes we'll try again. */ > return false; >+ vbr->magic = 0x11111111; > > vbr->req = req; > >@@ -179,7 +187,9 @@ static bool do_req(struct request_queue *q, struct >virtio_blk *vblk, > } > } > >+ vbr->magic = 0x87654321; > if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr, GFP_ATOMIC)<0) { >+ vbr->magic = 0xc0ffee; > mempool_free(vbr, vblk->pool); > return false; > } I applied this patch to our setup, but we don't seem to have gotten any messages involving "vbr bad magic". I wonder if perhaps we are seeing a different oops than Nicholas had investigated? Or perhaps due to a difference in versions? Debug output we did get here: ftp://ftp.whamcloud.com/uploads/lio-debug-5.txt.bz2 And a copy of the total patch we're applying to the 3.6.3.fc17 kernel here, just as confirmation: diff -uNrp kernel-3.6.fc17/linux-3.6.3-1.fc17.x86_64/drivers/block/virtio_blk.c kernel-3.6.fc17.new/drivers/block/virtio_blk.c --- kernel-3.6.fc17/linux-3.6.3-1.fc17.x86_64/drivers/block/virtio_blk.c 2012-09-30 19:47:46.000000000 -0400 +++ kernel-3.6.fc17.new/drivers/block/virtio_blk.c 2012-11-19 22:12:24.810619849 -0500 @@ -50,6 +50,7 @@ struct virtio_blk struct virtblk_req { + u32 magic; struct request *req; struct virtio_blk_outhdr out_hdr; struct virtio_scsi_inhdr in_hdr; @@ -67,6 +68,11 @@ static void blk_done(struct virtqueue *v while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { int error; + if (unlikely(vbr->magic != 0x87654321)) { + printk("vbr bad magic: 0x%08x\n", vbr->magic); + continue; /* And pray... */ + } + switch (vbr->status) { case VIRTIO_BLK_S_OK: error = 0; @@ -93,6 +99,7 @@ static void blk_done(struct virtqueue *v } __blk_end_request_all(vbr->req, error); + vbr->magic = 0xfee1dead; mempool_free(vbr, vblk->pool); } /* In case queue is stopped waiting for more buffers. */ @@ -110,6 +117,7 @@ static bool do_req(struct request_queue if (!vbr) /* When another request finishes we'll try again. */ return false; + vbr->magic = 0x11111111; vbr->req = req; @@ -172,7 +180,9 @@ static bool do_req(struct request_queue } } + vbr->magic = 0x87654321; if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr, GFP_ATOMIC)<0) { + vbr->magic = 0xc0ffee; mempool_free(vbr, vblk->pool); return false; } diff -uNrp kernel-3.6.fc17/linux-3.6.3-1.fc17.x86_64/drivers/target/target_core_device .c kernel-3.6.fc17.new/drivers/target/target_core_device.c --- kernel-3.6.fc17/linux-3.6.3-1.fc17.x86_64/drivers/target/target_core_device .c 2012-09-30 19:47:46.000000000 -0400 +++ kernel-3.6.fc17.new/drivers/target/target_core_device.c 2012-11-08 13:47:46.949025221 -0500 @@ -850,20 +850,20 @@ int se_dev_check_shutdown(struct se_devi static u32 se_dev_align_max_sectors(u32 max_sectors, u32 block_size) { - u32 tmp, aligned_max_sectors; + u32 aligned_max_sectors; + u32 alignment; /* * Limit max_sectors to a PAGE_SIZE aligned value for modern * transport_allocate_data_tasks() operation. */ - tmp = rounddown((max_sectors * block_size), PAGE_SIZE); - aligned_max_sectors = (tmp / block_size); - if (max_sectors != aligned_max_sectors) { - printk(KERN_INFO "Rounding down aligned max_sectors from %u" - " to %u\n", max_sectors, aligned_max_sectors); - return aligned_max_sectors; - } + alignment = max(1ul, PAGE_SIZE / block_size); + aligned_max_sectors = rounddown(max_sectors, alignment); - return max_sectors; + if (max_sectors != aligned_max_sectors) + pr_info("Rounding down aligned max_sectors from %u to %u\n", + max_sectors, aligned_max_sectors); + + return aligned_max_sectors; } void se_dev_set_default_attribs( diff -uNrp kernel-3.6.fc17/linux-3.6.3-1.fc17.x86_64/drivers/target/target_core_iblock .c kernel-3.6.fc17.new/drivers/target/target_core_iblock.c --- kernel-3.6.fc17/linux-3.6.3-1.fc17.x86_64/drivers/target/target_core_iblock .c 2012-09-30 19:47:46.000000000 -0400 +++ kernel-3.6.fc17.new/drivers/target/target_core_iblock.c 2012-11-13 13:41:53.240368020 -0500 @@ -47,7 +47,6 @@ #include "target_core_iblock.h" -#define IBLOCK_MAX_BIO_PER_TASK 32 /* max # of bios to submit at a time */ #define IBLOCK_BIO_POOL_SIZE 128 static struct se_subsystem_api iblock_template; @@ -559,8 +558,15 @@ static void iblock_complete_cmd(struct s static void iblock_bio_destructor(struct bio *bio) { struct se_cmd *cmd = bio->bi_private; - struct iblock_dev *ib_dev = cmd->se_dev->dev_ptr; + struct iblock_dev *ib_dev; + + printk("IBLOCK: bio_destructor: bio: %p, bi_sector: %llu, bi_size: %u\n", + bio, bio->bi_sector, bio->bi_size); + BUG_ON(!cmd); + BUG_ON(!cmd->se_dev); + BUG_ON(!cmd->se_dev->dev_ptr); + ib_dev = cmd->se_dev->dev_ptr; bio_free(bio, ib_dev->ibd_bio_set); } @@ -614,7 +620,6 @@ static int iblock_execute_rw(struct se_c struct scatterlist *sg; u32 sg_num = sgl_nents; sector_t block_lba; - unsigned bio_cnt; int rw; int i; @@ -664,10 +669,13 @@ static int iblock_execute_rw(struct se_c bio_list_init(&list); bio_list_add(&list, bio); - atomic_set(&ibr->pending, 2); - bio_cnt = 1; + atomic_set(&ibr->pending, 1); + printk("IBLOCK: block_lba: %llu sgl_nents: %d\n", block_lba, sgl_nents); for_each_sg(sgl, sg, sgl_nents, i) { + printk("IBLOCK: sg[%d]: %p addr: %p length: %u offset: %u\n", + i, sg, sg_page(sg), sg->length, sg->offset); + /* * XXX: if the length the device accepts is shorter than the * length of the S/G list entry this will cause and @@ -675,10 +683,6 @@ static int iblock_execute_rw(struct se_c */ while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) != sg->length) { - if (bio_cnt >= IBLOCK_MAX_BIO_PER_TASK) { - iblock_submit_bios(&list, rw); - bio_cnt = 0; - } bio = iblock_get_bio(cmd, block_lba, sg_num); if (!bio) @@ -686,16 +690,15 @@ static int iblock_execute_rw(struct se_c atomic_inc(&ibr->pending); bio_list_add(&list, bio); - bio_cnt++; } - + printk("IBLOCK: bio: %p bi_sector: %llu bi_size: %u\n", + bio, bio->bi_sector, bio->bi_size); /* Always in 512 byte units for Linux/Block */ block_lba += sg->length >> IBLOCK_LBA_SHIFT; sg_num--; } iblock_submit_bios(&list, rw); - iblock_complete_cmd(cmd); return 0; fail_put_bios: @@ -739,8 +742,13 @@ static void iblock_bio_done(struct bio * err = -EIO; if (err != 0) { - pr_err("test_bit(BIO_UPTODATE) failed for bio: %p," - " err: %d\n", bio, err); + printk("test_bit(BIO_UPTODATE) failed for bio: %p sector: %llu, bi_flags: 0x%16x" + " bi_phys_segments: %u bi_vcnt: %hu bi_idx: %hu bi_max_vecs: %u" + " err: %d\n", bio, bio->bi_sector, bio->bi_flags, bio->bi_phys_segments, + bio->bi_vcnt, bio->bi_idx, bio->bi_max_vecs, err); + printk("cmd: %p, CDB: 0x%02x data_length: %u t_data_sg: %p t_data_nents: %u\n", + cmd, cmd->t_task_cdb[0], cmd->data_length, cmd->t_data_sg, cmd->t_data_nents); + dump_stack(); /* * Bump the ib_bio_err_cnt and release bio. */ diff -uNrp kernel-3.6.fc17/linux-3.6.3-1.fc17.x86_64/drivers/target/target_core_spc.c kernel-3.6.fc17.new/drivers/target/target_core_spc.c --- kernel-3.6.fc17/linux-3.6.3-1.fc17.x86_64/drivers/target/target_core_spc.c 2012-11-01 17:43:08.915000002 -0400 +++ kernel-3.6.fc17.new/drivers/target/target_core_spc.c 2012-11-01 18:19:34.530994988 -0400 @@ -605,6 +605,8 @@ static int spc_emulate_inquiry(struct se unsigned char buf[SE_INQUIRY_BUF]; int p, ret; + memset(buf, 0, SE_INQUIRY_BUF); + if (dev == tpg->tpg_virt_lun0.lun_se_dev) buf[0] = 0x3f; /* Not connected */ else diff -uNrp kernel-3.6.fc17/linux-3.6.3-1.fc17.x86_64/include/target/target_core_base.h kernel-3.6.fc17.new/include/target/target_core_base.h --- kernel-3.6.fc17/linux-3.6.3-1.fc17.x86_64/include/target/target_core_base.h 2012-09-30 19:47:46.000000000 -0400 +++ kernel-3.6.fc17.new/include/target/target_core_base.h 2012-11-06 21:41:24.998633100 -0500 @@ -86,7 +86,7 @@ /* Default unmap_granularity_alignment */ #define DA_UNMAP_GRANULARITY_ALIGNMENT_DEFAULT 0 /* Default max transfer length */ -#define DA_FABRIC_MAX_SECTORS 8192 +#define DA_FABRIC_MAX_SECTORS 16384 /* Emulation for Direct Page Out */ #define DA_EMULATE_DPO 0 /* Emulation for Forced Unit Access WRITEs */ @@ -113,7 +113,7 @@ /* Enforce SCSI Initiator Port TransportID with 'ISID' for PR */ #define DA_ENFORCE_PR_ISIDS 1 #define DA_STATUS_MAX_SECTORS_MIN 16 -#define DA_STATUS_MAX_SECTORS_MAX 8192 +#define DA_STATUS_MAX_SECTORS_MAX 32768 /* By default don't report non-rotating (solid state) medium */ #define DA_IS_NONROT 0 /* Queue Algorithm Modifier default for restricted reordering in control mode page */ -- To unsubscribe from this list: send the line "unsubscribe target-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html