On 12/12/18 8:33 AM, Jens Axboe wrote: > On 12/12/18 8:32 AM, Christoph Hellwig wrote: >> On Wed, Dec 12, 2018 at 08:22:37AM -0700, Jens Axboe wrote: >>> That one is a little worse, since we only need a full page if we >>> use all 256 segments. I don't want to make the fast case of >>> 16 bytes single segment allocs get a full page, so we have to >>> track if we used kmalloc() or mempool_alloc() for this particular >>> range. >>> >>> I guess I could abuse ->end_io for that, set it if we end up >>> punting to mempool. I'll do that. >> >> How about a full emergency page hanging off struct nvme_ctrl, >> and then in the completion path we can do: >> >> if (req->special_vec.bv_page == ctrl->discard_emergency_page) >> // clear some bit in ctrl->flags >> else >> kfree(page_address(req->special_vec.bv_page) + > > That's not a bad idea, then we don't have to track it. Something like this, will test it. diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c71e879821ad..7ca988e58790 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -564,9 +564,14 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, struct nvme_dsm_range *range; struct bio *bio; - range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC); - if (!range) - return BLK_STS_RESOURCE; + range = kmalloc_array(segments, sizeof(*range), + GFP_ATOMIC | __GFP_NOWARN); + if (!range) { + if (test_and_set_bit_lock(0, &ns->ctrl->discard_page_busy)) + return BLK_STS_RESOURCE; + + range = page_address(ns->ctrl->discard_page); + } __rq_for_each_bio(bio, req) { u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector); @@ -581,7 +586,10 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, } if (WARN_ON_ONCE(n != segments)) { - kfree(range); + if (virt_to_page(range) == ns->ctrl->discard_page) + clear_bit_unlock(0, &ns->ctrl->discard_page_busy); + else + kfree(range); return BLK_STS_IOERR; } @@ -664,8 +672,13 @@ void nvme_cleanup_cmd(struct request *req) blk_rq_bytes(req) >> ns->lba_shift); } if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { - kfree(page_address(req->special_vec.bv_page) + - req->special_vec.bv_offset); + struct nvme_ns *ns = req->rq_disk->private_data; + struct page *page = req->special_vec.bv_page; + + if (page == ns->ctrl->discard_page) + clear_bit_unlock(0, &ns->ctrl->discard_page_busy); + else + kfree(page_address(page) + req->special_vec.bv_offset); } } EXPORT_SYMBOL_GPL(nvme_cleanup_cmd); @@ -3578,6 +3591,7 @@ static void nvme_free_ctrl(struct device *dev) ida_simple_remove(&nvme_instance_ida, ctrl->instance); kfree(ctrl->effects); nvme_mpath_uninit(ctrl); + kfree(ctrl->discard_page); if (subsys) { mutex_lock(&subsys->lock); @@ -3618,6 +3632,12 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd)); ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive; + ctrl->discard_page = alloc_page(GFP_KERNEL); + if (!ctrl->discard_page) { + ret = -ENOMEM; + goto out; + } + ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL); if (ret < 0) goto out; @@ -3655,6 +3675,8 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, out_release_instance: ida_simple_remove(&nvme_instance_ida, ctrl->instance); out: + if (ctrl->discard_page) + __free_page(ctrl->discard_page); return ret; } EXPORT_SYMBOL_GPL(nvme_init_ctrl); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index e20e737ac10c..f1fe88598a04 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -241,6 +241,9 @@ struct nvme_ctrl { u16 maxcmd; int nr_reconnects; struct nvmf_ctrl_options *opts; + + struct page *discard_page; + unsigned long discard_page_busy; }; struct nvme_subsystem { -- Jens Axboe