On 2020/12/13 19:31, Damien Le Moal wrote: > On Sat, 2020-12-12 at 21:50 -0800, Chaitanya Kulkarni wrote: > [...] >> +static int nvmet_bdev_validate_zns_zones_cb(struct blk_zone *z, >> + unsigned int idx, void *data) >> +{ >> + struct blk_zone *zone = data; >> + >> + memcpy(zone, z, sizeof(struct blk_zone)); > > See below. This is not necessary. > >> + >> + return 0; >> +} >> + >> +static bool nvmet_bdev_has_conv_zones(struct block_device *bdev) >> +{ >> + struct blk_zone zone; >> + int reported_zones; >> + unsigned int zno; >> + >> + if (bdev->bd_disk->queue->conv_zones_bitmap) >> + return false; > > Bug. > >> + >> + for (zno = 0; zno < blk_queue_nr_zones(bdev->bd_disk->queue); zno++) { > > Large capacity SMR drives have over 75,000 zones these days. Doing a report > zones one zone at a time will take forever. This needs to be optimized: see > below. > >> + reported_zones = blkdev_report_zones(bdev, >> + zno * bdev_zone_sectors(bdev), 1, >> + nvmet_bdev_validate_zns_zones_cb, >> + &zone); >> + >> + if (reported_zones != 1) >> + return true; >> + >> + if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) >> + return true; > > This test should be in the nvmet_bdev_validate_zns_zones_cb() callback. That > callback can return an error if it finds a conventional zone. That will stop > blkdev_report_zones(). > > >> + } >> + >> + return false; >> +} > > What about this: > > static int nvmet_bdev_validate_zns_zones_cb(struct blk_zone *z, > unsigned int idx, void *data) > { > if (z->type == BLK_ZONE_TYPE_CONVENTIONAL) > return -ENOTSUPP; > return 0; > } > > static bool nvmet_bdev_has_conv_zones(struct block_device *bdev) > { > int ret; > > if (bdev->bd_disk->queue->conv_zones_bitmap) > return true; > > ret = blkdev_report_zones(bdev, > get_capacity(bdev->bd_disk), blkdev_nr_zones(bdev), > nvmet_bdev_validate_zns_zones_cb, NULL); Oops. This is wrong. This should be: ret = blkdev_report_zones(bdev, 0, blkdev_nr_zones(bdev->bd_disk), nvmet_bdev_validate_zns_zones_cb, NULL); > if (ret < 1) > return true; > > return false; > } > > All zones are checked using the callback with the loop in > blkdev_report_zones(). > > [...] >> +void nvmet_bdev_execute_zone_append(struct nvmet_req *req) >> +{ >> + sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba); >> + struct request_queue *q = req->ns->bdev->bd_disk->queue; >> + unsigned int max_sects = queue_max_zone_append_sectors(q); >> + u16 status = NVME_SC_SUCCESS; >> + unsigned int total_len = 0; >> + struct scatterlist *sg; >> + int ret = 0, sg_cnt; >> + struct bio *bio; >> + >> + if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req))) >> + return; >> + >> + if (!req->sg_cnt) { >> + nvmet_req_complete(req, 0); >> + return; >> + } >> + >> + if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) { >> + bio = &req->b.inline_bio; >> + bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); >> + } else { >> + bio = bio_alloc(GFP_KERNEL, req->sg_cnt); >> + } >> + >> + bio_set_dev(bio, req->ns->bdev); >> + bio->bi_iter.bi_sector = sect; >> + bio->bi_opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE; >> + if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) >> + bio->bi_opf |= REQ_FUA; >> + >> + for_each_sg(req->sg, sg, req->sg_cnt, sg_cnt) { >> + struct page *p = sg_page(sg); >> + unsigned int l = sg->length; >> + unsigned int o = sg->offset; >> + bool same_page = false; >> + >> + ret = bio_add_hw_page(q, bio, p, l, o, max_sects, &same_page); >> + if (ret != sg->length) { >> + status = NVME_SC_INTERNAL; >> + goto out_bio_put; >> + } >> + if (same_page) >> + put_page(p); >> + >> + total_len += sg->length; >> + } >> + >> + if (total_len != nvmet_rw_data_len(req)) { >> + status = NVME_SC_INTERNAL | NVME_SC_DNR; >> + goto out_bio_put; >> + } >> + >> + ret = submit_bio_wait(bio); >> + status = ret < 0 ? NVME_SC_INTERNAL : status; >> + >> + req->cqe->result.u64 = nvmet_sect_to_lba(req->ns, >> + bio->bi_iter.bi_sector); > > Why set this if the BIO failed ? There may be no problems doing so, but I do > not see the point either. > >> + >> +out_bio_put: >> + if (bio != &req->b.inline_bio) >> + bio_put(bio); >> + nvmet_req_complete(req, status); >> +} > -- Damien Le Moal Western Digital Research