Re: [PATCH 11/11] nvme: add support for streams and directives

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Jun 13, 2017, at 11:15 AM, Jens Axboe <axboe@xxxxxxxxx> wrote:
> 
> This adds support for Directives in NVMe, particular for the Streams
> directive. We default to allocating 4 streams per name space, but
> it is configurable with the 'streams_per_ns' module option.
> 
> If a write stream is set in a write, flag is as such before
> sending it to the device.
> 
> Some debug stuff in this patch, dumping streams ID params when
> we load nvme.
> 
> Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
> ---
> drivers/nvme/host/core.c | 124 +++++++++++++++++++++++++++++++++++++++++++++++
> drivers/nvme/host/nvme.h |   1 +
> include/linux/nvme.h     |  48 ++++++++++++++++++
> 3 files changed, 173 insertions(+)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 903d5813023a..81225e7d4176 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -65,6 +65,10 @@ static bool force_apst;
> module_param(force_apst, bool, 0644);
> MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off");
> 
> +static char streams_per_ns = 4;
> +module_param(streams_per_ns, byte, 0644);
> +MODULE_PARM_DESC(streams_per_ns, "if available, allocate this many streams per NS");

Are there any limits here?  For example, has to be a power-of-two value, or maximum
number per namespace, per device, etc?

> static LIST_HEAD(nvme_ctrl_list);
> static DEFINE_SPINLOCK(dev_list_lock);
> 
> @@ -351,6 +355,15 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
> 	cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
> 	cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
> 
> +	if (req_op(req) == REQ_OP_WRITE) {
> +		if (bio_stream_valid(req->bio) && ns->streams) {
> +			unsigned stream = bio_stream(req->bio) & 0xffff;
> +
> +			control |= NVME_RW_DTYPE_STREAMS;
> +			dsmgmt |= (stream << 16);

It isn't really clear how this is converted from the 2^16 stream IDs masked
out of 2^32 possible streams in the bio into streams_per_ns = 4 streams per
NVMe device namespace.  This appears to be implicitly dependent on upper
layers submitting only 4 distinct WRITE_FILE_* stream IDs, but IMHO that
should be handled transparently at this level.  There isn't really any value
to mask the returned bio_stream with 0xffff, since either it will internally
be truncated by streams_per_ns (in which case we don't need to do anything),
or it should be explicitly folded into the accepted range, like:

			dsmgmt |= (bio_stream(req->bio) % streams_per_ns) << 16;

or if we want to avoid a 32-bit modulus here, we could pre-compute a mask from
streams_per_ns and enforce that this be a power-of-two value?

Cheers, Andreas

> +		}
> +	}
> +
> 	if (ns->ms) {
> 		switch (ns->pi_type) {
> 		case NVME_NS_DPS_PI_TYPE3:
> @@ -1073,6 +1086,109 @@ static int nvme_revalidate_disk(struct gendisk *disk)
> 	return 0;
> }
> 
> +static int nvme_enable_streams(struct nvme_ns *ns)
> +{
> +	struct nvme_command c;
> +
> +	memset(&c, 0, sizeof(c));
> +
> +	c.directive.opcode = nvme_admin_directive_send;
> +	c.directive.nsid = cpu_to_le32(ns->ns_id);
> +	c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE;
> +	c.directive.dtype = NVME_DIR_IDENTIFY;
> +	c.directive.tdtype = NVME_DIR_STREAMS;
> +	c.directive.endir = NVME_DIR_ENDIR;
> +
> +	return nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, NULL, 0);
> +}
> +
> +static int nvme_streams_params(struct nvme_ns *ns)
> +{
> +	struct nvme_ctrl *ctrl = ns->ctrl;
> +	struct streams_directive_params s;
> +	struct nvme_command c;
> +	int ret;
> +
> +	memset(&c, 0, sizeof(c));
> +	memset(&s, 0, sizeof(s));
> +
> +	c.directive.opcode = nvme_admin_directive_recv;
> +	c.directive.nsid = cpu_to_le32(ns->ns_id);
> +	c.directive.numd = sizeof(s);
> +	c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM;
> +	c.directive.dtype = NVME_DIR_STREAMS;
> +
> +	ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, &s, sizeof(s));
> +	if (ret)
> +		return ret;
> +
> +	s.msl = le16_to_cpu(s.msl);
> +	s.nssa = le16_to_cpu(s.nssa);
> +	s.nsso = le16_to_cpu(s.nsso);
> +	s.sws = le32_to_cpu(s.sws);
> +	s.sgs = le16_to_cpu(s.sgs);
> +	s.nsa = le16_to_cpu(s.nsa);
> +	s.nso = le16_to_cpu(s.nso);
> +
> +	dev_info(ctrl->device, "streams: msl=%u, nssa=%u, nsso=%u, sws=%u "
> +				"sgs=%u, nsa=%u, nso=%u\n", s.msl, s.nssa,
> +				s.nsso, s.sws, s.sgs, s.nsa, s.nso);
> +	return 0;
> +}
> +
> +static int nvme_streams_allocate(struct nvme_ns *ns, unsigned int streams)
> +{
> +	struct nvme_command c;
> +
> +	memset(&c, 0, sizeof(c));
> +
> +	c.directive.opcode = nvme_admin_directive_recv;
> +	c.directive.nsid = cpu_to_le32(ns->ns_id);
> +	c.directive.doper = NVME_DIR_RCV_ST_OP_RESOURCE;
> +	c.directive.dtype = NVME_DIR_STREAMS;
> +	c.directive.endir = streams;
> +
> +	return nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, NULL, 0);
> +}
> +
> +static int nvme_streams_deallocate(struct nvme_ns *ns)
> +{
> +	struct nvme_command c;
> +
> +	memset(&c, 0, sizeof(c));
> +
> +	c.directive.opcode = nvme_admin_directive_send;
> +	c.directive.nsid = cpu_to_le32(ns->ns_id);
> +	c.directive.doper = NVME_DIR_SND_ST_OP_REL_RSC;
> +	c.directive.dtype = NVME_DIR_STREAMS;
> +
> +	return nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, NULL, 0);
> +}
> +
> +static void nvme_config_streams(struct nvme_ns *ns)
> +{
> +	int ret;
> +
> +	ret = nvme_enable_streams(ns);
> +	if (ret)
> +		return;
> +
> +	ret = nvme_streams_params(ns);
> +	if (ret)
> +		return;
> +
> +	ret = nvme_streams_allocate(ns, streams_per_ns);
> +	if (ret)
> +		return;
> +
> +	ret = nvme_streams_params(ns);
> +	if (ret)
> +		return;
> +
> +	ns->streams = true;
> +	dev_info(ns->ctrl->device, "successfully enabled streams\n");
> +}
> +
> static char nvme_pr_type(enum pr_type type)
> {
> 	switch (type) {
> @@ -1606,6 +1722,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
> 	ctrl->sgls = le32_to_cpu(id->sgls);
> 	ctrl->kas = le16_to_cpu(id->kas);
> 
> +	if (ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES)
> +		dev_info(ctrl->dev, "supports directives\n");
> +
> 	ctrl->npss = id->npss;
> 	prev_apsta = ctrl->apsta;
> 	if (ctrl->quirks & NVME_QUIRK_NO_APST) {
> @@ -2060,6 +2179,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
> 		goto out_free_id;
> 	}
> 
> +	if (ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES)
> +		nvme_config_streams(ns);
> +
> 	disk = alloc_disk_node(0, node);
> 	if (!disk)
> 		goto out_free_id;
> @@ -2112,6 +2234,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
> 					&nvme_ns_attr_group);
> 		if (ns->ndev)
> 			nvme_nvm_unregister_sysfs(ns);
> +		if (ns->streams)
> +			nvme_streams_deallocate(ns);
> 		del_gendisk(ns->disk);
> 		blk_cleanup_queue(ns->queue);
> 	}
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 9d6a070d4391..c2d8d23c90de 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -195,6 +195,7 @@ struct nvme_ns {
> 	int lba_shift;
> 	u16 ms;
> 	bool ext;
> +	bool streams;
> 	u8 pi_type;
> 	unsigned long flags;
> 
> diff --git a/include/linux/nvme.h b/include/linux/nvme.h
> index b625bacf37ef..8b2f5b140134 100644
> --- a/include/linux/nvme.h
> +++ b/include/linux/nvme.h
> @@ -245,6 +245,7 @@ enum {
> 	NVME_CTRL_ONCS_WRITE_ZEROES		= 1 << 3,
> 	NVME_CTRL_VWC_PRESENT			= 1 << 0,
> 	NVME_CTRL_OACS_SEC_SUPP                 = 1 << 0,
> +	NVME_CTRL_OACS_DIRECTIVES		= 1 << 5,
> 	NVME_CTRL_OACS_DBBUF_SUPP		= 1 << 7,
> };
> 
> @@ -295,6 +296,19 @@ enum {
> };
> 
> enum {
> +	NVME_DIR_IDENTIFY		= 0x00,
> +	NVME_DIR_STREAMS		= 0x01,
> +	NVME_DIR_SND_ID_OP_ENABLE	= 0x01,
> +	NVME_DIR_SND_ST_OP_REL_ID	= 0x01,
> +	NVME_DIR_SND_ST_OP_REL_RSC	= 0x02,
> +	NVME_DIR_RCV_ID_OP_PARAM	= 0x01,
> +	NVME_DIR_RCV_ST_OP_PARAM	= 0x01,
> +	NVME_DIR_RCV_ST_OP_STATUS	= 0x02,
> +	NVME_DIR_RCV_ST_OP_RESOURCE	= 0x03,
> +	NVME_DIR_ENDIR			= 0x01,
> +};
> +
> +enum {
> 	NVME_NS_FEAT_THIN	= 1 << 0,
> 	NVME_NS_FLBAS_LBA_MASK	= 0xf,
> 	NVME_NS_FLBAS_META_EXT	= 0x10,
> @@ -535,6 +549,7 @@ enum {
> 	NVME_RW_PRINFO_PRCHK_APP	= 1 << 11,
> 	NVME_RW_PRINFO_PRCHK_GUARD	= 1 << 12,
> 	NVME_RW_PRINFO_PRACT		= 1 << 13,
> +	NVME_RW_DTYPE_STREAMS		= 1 << 4,
> };
> 
> struct nvme_dsm_cmd {
> @@ -604,6 +619,8 @@ enum nvme_admin_opcode {
> 	nvme_admin_download_fw		= 0x11,
> 	nvme_admin_ns_attach		= 0x15,
> 	nvme_admin_keep_alive		= 0x18,
> +	nvme_admin_directive_send	= 0x19,
> +	nvme_admin_directive_recv	= 0x1a,
> 	nvme_admin_dbbuf		= 0x7C,
> 	nvme_admin_format_nvm		= 0x80,
> 	nvme_admin_security_send	= 0x81,
> @@ -756,6 +773,24 @@ struct nvme_get_log_page_command {
> 	__u32			rsvd14[2];
> };
> 
> +struct nvme_directive_cmd {
> +	__u8			opcode;
> +	__u8			flags;
> +	__u16			command_id;
> +	__le32			nsid;
> +	__u64			rsvd2[2];
> +	union nvme_data_ptr	dptr;
> +	__le32			numd;
> +	__u8			doper;
> +	__u8			dtype;
> +	__le16			dspec;
> +	__u8			endir;
> +	__u8			tdtype;
> +	__u16			rsvd15;
> +
> +	__u32			rsvd16[3];
> +};
> +
> /*
>  * Fabrics subcommands.
>  */
> @@ -886,6 +921,18 @@ struct nvme_dbbuf {
> 	__u32			rsvd12[6];
> };
> 
> +struct streams_directive_params {
> +	__u16	msl;
> +	__u16	nssa;
> +	__u16	nsso;
> +	__u8	rsvd[10];
> +	__u32	sws;
> +	__u16	sgs;
> +	__u16	nsa;
> +	__u16	nso;
> +	__u8	rsvd2[6];
> +};
> +
> struct nvme_command {
> 	union {
> 		struct nvme_common_command common;
> @@ -906,6 +953,7 @@ struct nvme_command {
> 		struct nvmf_property_set_command prop_set;
> 		struct nvmf_property_get_command prop_get;
> 		struct nvme_dbbuf dbbuf;
> +		struct nvme_directive_cmd directive;
> 	};
> };
> 
> --
> 2.7.4
> 


Cheers, Andreas





Attachment: signature.asc
Description: Message signed with OpenPGP


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux