On Thu, Apr 13, 2017 at 01:53:28PM +0200, Johannes Thumshirn wrote: > On Thu, Apr 13, 2017 at 06:02:21PM +0800, Ming Lei wrote: > > On Thu, Apr 13, 2017 at 10:06:29AM +0200, Johannes Thumshirn wrote: > > > Doing a mkfs.btrfs on a (qemu emulated) PCIe NVMe causes a kernel panic > > > in nvme_setup_prps() because the dma_len will drop below zero but the > > > length not. > > > > Looks I can't reproduce the issue in QEMU(32G nvme, either partitioned > > or not, just use 'mkfs.btrfs /dev/nvme0n1p1'), could you share the exact > > mkfs command line and size of your emulated NVMe? > > the exact cmdline is mkfs.btrfs -f /dev/nvme0n1p1 (-f because there was a > existing btrfs on the image). The image is 17179869184 (a.k.a 16G) bytes. > > [...] > > > Could you try the following patch to see if it fixes your issue? > > It's back to the old, erratic behaviour, see log below. Ok, could you apply the attached debug patch and collect the ftrace log? (ftrace_dump_on_oops need to be passed to kernel cmd line). Thanks, Ming
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 26a5fd05fe88..a813a36d48d9 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -491,6 +491,8 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req) break; if (dma_len > 0) continue; + if (dma_len < 0) + blk_dump_rq(req, "nvme dma sg gap"); BUG_ON(dma_len < 0); sg = sg_next(sg); dma_addr = sg_dma_address(sg); diff --git a/include/linux/bio.h b/include/linux/bio.h index 8e521194f6fc..f3b001e401d2 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -811,5 +811,29 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page, #endif /* CONFIG_BLK_DEV_INTEGRITY */ +static inline void blk_dump_bio(struct bio *bio, const char *msg) +{ + struct bvec_iter iter; + struct bio_vec bvec; + int i = 0; + unsigned sectors = 0; + + trace_printk("%s-%p: %hx/%hx %u %llu %u\n", + msg, bio, + bio->bi_flags, bio->bi_opf, + bio->bi_phys_segments, + (unsigned long long)bio->bi_iter.bi_sector, + bio->bi_iter.bi_size); + bio_for_each_segment(bvec, bio, iter) { + sectors += bvec.bv_len >> 9; + trace_printk("\t %d: %lu %u %u(%u)\n", i++, + (unsigned long)page_to_pfn(bvec.bv_page), + bvec.bv_offset, + bvec.bv_len, bvec.bv_len >> 12); + } + trace_printk("\t total sectors %u\n", sectors); +} + + #endif /* CONFIG_BLOCK */ #endif /* __LINUX_BIO_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7548f332121a..b75d6fe5a1b9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1698,6 +1698,22 @@ static inline bool req_gap_front_merge(struct request *req, struct bio *bio) return bio_will_gap(req->q, bio, req->bio); } +static inline void blk_dump_rq(const struct request *req, const char *msg) +{ + struct bio *bio; + int i = 0; + + trace_printk("%s: dump bvec for %p(f:%x, seg: %d)\n", + msg, req, req->cmd_flags, + req->nr_phys_segments); + + __rq_for_each_bio(bio, req) { + char num[16]; + snprintf(num, 16, "%d", i++); + blk_dump_bio(bio, num); + } +} + int kblockd_schedule_work(struct work_struct *work); int kblockd_schedule_work_on(int cpu, struct work_struct *work); int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay);