Signed-off-by: Hans Holmberg <hans.holmberg@xxxxxxx>
Signed-off-by: Dmitry Fomichev <dmitry.fomichev@xxxxxxx>
Signed-off-by: Ajay Joshi <ajay.joshi@xxxxxxx>
Signed-off-by: Aravind Ramesh <aravind.ramesh@xxxxxxx>
Signed-off-by: Niklas Cassel <niklas.cassel@xxxxxxx>
Signed-off-by: Matias Bjørling <matias.bjorling@xxxxxxx>
Signed-off-by: Damien Le Moal <damien.lemoal@xxxxxxx>
Signed-off-by: Keith Busch <keith.busch@xxxxxxx>
---
drivers/nvme/host/Makefile | 1 +
drivers/nvme/host/core.c | 91 ++++++++++++--
drivers/nvme/host/nvme.h | 39 ++++++
drivers/nvme/host/zns.c | 238 +++++++++++++++++++++++++++++++++++++
include/linux/nvme.h | 111 +++++++++++++++++
5 files changed, 468 insertions(+), 12 deletions(-)
create mode 100644 drivers/nvme/host/zns.c
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index fc7b26be692d..d7f6a87687b8 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -13,6 +13,7 @@ nvme-core-y := core.o
nvme-core-$(CONFIG_TRACING) += trace.o
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
nvme-core-$(CONFIG_NVM) += lightnvm.o
+nvme-core-$(CONFIG_BLK_DEV_ZONED) += zns.o
nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 58f137b9f2c5..e961910da4ac 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -89,7 +89,7 @@ static dev_t nvme_chr_devt;
static struct class *nvme_class;
static struct class *nvme_subsys_class;
-static int nvme_revalidate_disk(struct gendisk *disk);
+static int _nvme_revalidate_disk(struct gendisk *disk);
static void nvme_put_subsystem(struct nvme_subsystem *subsys);
static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
unsigned nsid);
@@ -287,6 +287,10 @@ void nvme_complete_rq(struct request *req)
nvme_retry_req(req);
return;
}
+ } else if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
+ req_op(req) == REQ_OP_ZONE_APPEND) {
+ req->__sector = nvme_lba_to_sect(req->q->queuedata,
+ le64_to_cpu(nvme_req(req)->result.u64));
}
nvme_trace_bio_complete(req, status);
@@ -673,7 +677,8 @@ static inline blk_status_t
nvme_setup_write_zeroes(struct nvme_ns *ns,
}
static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
- struct request *req, struct nvme_command *cmnd)
+ struct request *req, struct nvme_command *cmnd,
+ enum nvme_opcode op)
{
struct nvme_ctrl *ctrl = ns->ctrl;
u16 control = 0;
@@ -687,7 +692,7 @@ static inline blk_status_t nvme_setup_rw(struct
nvme_ns *ns,
if (req->cmd_flags & REQ_RAHEAD)
dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
- cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write :
nvme_cmd_read);
+ cmnd->rw.opcode = op;
cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >>
ns->lba_shift) - 1);
@@ -716,6 +721,8 @@ static inline blk_status_t nvme_setup_rw(struct
nvme_ns *ns,
case NVME_NS_DPS_PI_TYPE2:
control |= NVME_RW_PRINFO_PRCHK_GUARD |
NVME_RW_PRINFO_PRCHK_REF;
+ if (op == nvme_cmd_zone_append)
+ control |= NVME_RW_APPEND_PIREMAP;
cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req));
break;
}
@@ -756,6 +763,19 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns,
struct request *req,
case REQ_OP_FLUSH:
nvme_setup_flush(ns, cmd);
break;
+ case REQ_OP_ZONE_RESET_ALL:
+ case REQ_OP_ZONE_RESET:
+ ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_RESET);
+ break;
+ case REQ_OP_ZONE_OPEN:
+ ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_OPEN);
+ break;
+ case REQ_OP_ZONE_CLOSE:
+ ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_CLOSE);
+ break;
+ case REQ_OP_ZONE_FINISH:
+ ret = nvme_setup_zone_mgmt_send(ns, req, cmd,
NVME_ZONE_FINISH);
+ break;
case REQ_OP_WRITE_ZEROES:
ret = nvme_setup_write_zeroes(ns, req, cmd);
break;
@@ -763,8 +783,13 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns,
struct request *req,
ret = nvme_setup_discard(ns, req, cmd);
break;
case REQ_OP_READ:
+ ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_read);
+ break;
case REQ_OP_WRITE:
- ret = nvme_setup_rw(ns, req, cmd);
+ ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_write);
+ break;
+ case REQ_OP_ZONE_APPEND:
+ ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_zone_append);
break;
default:
WARN_ON_ONCE(1);
@@ -1392,14 +1417,23 @@ static u32 nvme_passthru_start(struct
nvme_ctrl *ctrl, struct nvme_ns *ns,
return effects;
}
-static void nvme_update_formats(struct nvme_ctrl *ctrl)
+static void nvme_update_formats(struct nvme_ctrl *ctrl, u32 *effects)
{
struct nvme_ns *ns;
down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list)
- if (ns->disk && nvme_revalidate_disk(ns->disk))
+ if (ns->disk && _nvme_revalidate_disk(ns->disk))
nvme_set_queue_dying(ns);
+ else if (blk_queue_is_zoned(ns->disk->queue)) {
+ /*
+ * IO commands are required to fully revalidate a zoned
+ * device. Force the command effects to trigger rescan
+ * work so report zones can run in a context with
+ * unfrozen IO queues.
+ */
+ *effects |= NVME_CMD_EFFECTS_NCC;
+ }
up_read(&ctrl->namespaces_rwsem);
}
@@ -1411,7 +1445,7 @@ static void nvme_passthru_end(struct nvme_ctrl
*ctrl, u32 effects)
* this command.
*/
if (effects & NVME_CMD_EFFECTS_LBCC)
- nvme_update_formats(ctrl);
+ nvme_update_formats(ctrl, &effects);
if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
nvme_unfreeze(ctrl);
nvme_mpath_unfreeze(ctrl->subsys);
@@ -1526,7 +1560,7 @@ static int nvme_user_cmd64(struct nvme_ctrl
*ctrl, struct nvme_ns *ns,
* Issue ioctl requests on the first available path. Note that
unlike normal
* block layer requests we will not retry failed request on another
controller.
*/
-static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
+struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
struct nvme_ns_head **head, int *srcu_idx)
{
#ifdef CONFIG_NVME_MULTIPATH
@@ -1546,7 +1580,7 @@ static struct nvme_ns
*nvme_get_ns_from_disk(struct gendisk *disk,
return disk->private_data;
}
-static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx)
+void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx)
{
if (head)
srcu_read_unlock(&head->srcu, idx);
@@ -1939,21 +1973,28 @@ static void nvme_update_disk_info(struct
gendisk *disk,
static int __nvme_revalidate_disk(struct gendisk *disk, struct
nvme_id_ns *id)
{
+ unsigned lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
struct nvme_ns *ns = disk->private_data;
struct nvme_ctrl *ctrl = ns->ctrl;
+ int ret;
u32 iob;
/*
* If identify namespace failed, use default 512 byte block size so
* block layer can use before failing read/write for 0 capacity.
*/
- ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds;
+ ns->lba_shift = id->lbaf[lbaf].ds;
if (ns->lba_shift == 0)
ns->lba_shift = 9;
switch (ns->head->ids.csi) {
case NVME_CSI_NVM:
break;
+ case NVME_CSI_ZNS:
+ ret = nvme_update_zone_info(disk, ns, lbaf);
+ if (ret)
+ return ret;
+ break;
default:
dev_warn(ctrl->device, "unknown csi:%d ns:%d\n",
ns->head->ids.csi, ns->head->ns_id);
@@ -1967,7 +2008,7 @@ static int __nvme_revalidate_disk(struct
gendisk *disk, struct nvme_id_ns *id)
iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob));
ns->features = 0;
- ns->ms = le16_to_cpu(id->lbaf[id->flbas &
NVME_NS_FLBAS_LBA_MASK].ms);
+ ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
/* the PI implementation requires metadata equal t10 pi tuple
size */
if (ns->ms == sizeof(struct t10_pi_tuple))
ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
@@ -2010,7 +2051,7 @@ static int __nvme_revalidate_disk(struct
gendisk *disk, struct nvme_id_ns *id)
return 0;
}
-static int nvme_revalidate_disk(struct gendisk *disk)
+static int _nvme_revalidate_disk(struct gendisk *disk)
{
struct nvme_ns *ns = disk->private_data;
struct nvme_ctrl *ctrl = ns->ctrl;
@@ -2058,6 +2099,28 @@ static int nvme_revalidate_disk(struct gendisk
*disk)
return ret;
}
+static int nvme_revalidate_disk(struct gendisk *disk)
+{
+ int ret;
+
+ ret = _nvme_revalidate_disk(disk);
+ if (ret)
+ return ret;
+
+#ifdef CONFIG_BLK_DEV_ZONED
+ if (blk_queue_is_zoned(disk->queue)) {
+ struct nvme_ns *ns = disk->private_data;
+ struct nvme_ctrl *ctrl = ns->ctrl;
+
+ ret = blk_revalidate_disk_zones(disk, NULL);
+ if (!ret)
+ blk_queue_max_zone_append_sectors(disk->queue,
+ ctrl->max_zone_append);
+ }
+#endif
+ return ret;
+}
+
static char nvme_pr_type(enum pr_type type)
{
switch (type) {
@@ -2188,6 +2251,7 @@ static const struct block_device_operations
nvme_fops = {
.release = nvme_release,
.getgeo = nvme_getgeo,
.revalidate_disk= nvme_revalidate_disk,
+ .report_zones = nvme_report_zones,
.pr_ops = &nvme_pr_ops,
};
@@ -2213,6 +2277,7 @@ const struct block_device_operations
nvme_ns_head_ops = {
.ioctl = nvme_ioctl,
.compat_ioctl = nvme_compat_ioctl,
.getgeo = nvme_getgeo,
+ .report_zones = nvme_report_zones,
.pr_ops = &nvme_pr_ops,
};
#endif /* CONFIG_NVME_MULTIPATH */
@@ -4439,6 +4504,8 @@ static inline void _nvme_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) !=
NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
+ BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) !=
NVME_IDENTIFY_DATA_SIZE);
+ BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) !=
NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 58428e3a590e..662f95fbd909 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -239,6 +239,9 @@ struct nvme_ctrl {
u32 max_hw_sectors;
u32 max_segments;
u32 max_integrity_segments;
+#ifdef CONFIG_BLK_DEV_ZONED
+ u32 max_zone_append;
+#endif
u16 crdt[3];
u16 oncs;
u16 oacs;
@@ -403,6 +406,9 @@ struct nvme_ns {
u16 sgs;
u32 sws;
u8 pi_type;
+#ifdef CONFIG_BLK_DEV_ZONED
+ u64 zsze;
+#endif
unsigned long features;
unsigned long flags;
#define NVME_NS_REMOVING 0
@@ -568,6 +574,9 @@ int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8
lsp, u8 csi,
void *log, size_t size, u64 offset);
+struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
+ struct nvme_ns_head **head, int *srcu_idx);
+void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx);
extern const struct attribute_group *nvme_ns_id_attr_groups[];
extern const struct block_device_operations nvme_ns_head_ops;
@@ -689,6 +698,36 @@ static inline void
nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
}
#endif /* CONFIG_NVME_MULTIPATH */
+#ifdef CONFIG_BLK_DEV_ZONED
+int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
+ unsigned lbaf);
+
+int nvme_report_zones(struct gendisk *disk, sector_t sector,
+ unsigned int nr_zones, report_zones_cb cb, void *data);
+
+blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct
request *req,
+ struct nvme_command *cmnd,
+ enum nvme_zone_mgmt_action action);
+#else
+#define nvme_report_zones NULL
+
+static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns
*ns,
+ struct request *req, struct nvme_command *cmnd,
+ enum nvme_zone_mgmt_action action)
+{
+ return BLK_STS_NOTSUPP;
+}
+
+static inline int nvme_update_zone_info(struct gendisk *disk,
+ struct nvme_ns *ns,
+ unsigned lbaf)
+{
+ dev_warn(ns->ctrl->device,
+ "Please enable CONFIG_BLK_DEV_ZONED to support ZNS
devices\n");
+ return -EPROTONOSUPPORT;
+}
+#endif
+
#ifdef CONFIG_NVM
int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
void nvme_nvm_unregister(struct nvme_ns *ns);
diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
new file mode 100644
index 000000000000..c08f6281b614
--- /dev/null
+++ b/drivers/nvme/host/zns.c
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+#include <linux/blkdev.h>
+#include <linux/vmalloc.h>
+#include "nvme.h"
+
+static int nvme_set_max_append(struct nvme_ctrl *ctrl)
+{
+ struct nvme_command c = { };
+ struct nvme_id_ctrl_zns *id;
+ int status;
+
+ id = kzalloc(sizeof(*id), GFP_KERNEL);
+ if (!id)
+ return -ENOMEM;
+
+ c.identify.opcode = nvme_admin_identify;
+ c.identify.cns = NVME_ID_CNS_CS_CTRL;
+ c.identify.csi = NVME_CSI_ZNS;
+
+ status = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
+ if (status) {
+ kfree(id);
+ return status;
+ }
+
+ ctrl->max_zone_append = 1 << (id->zamds + 3);
+ kfree(id);
+ return 0;
+}
+
+int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
+ unsigned lbaf)
+{
+ struct nvme_effects_log *log = ns->head->effects;
+ struct request_queue *q = disk->queue;
+ struct nvme_command c = { };
+ struct nvme_id_ns_zns *id;
+ int status;
+
+ /* Driver requires zone append support */
+ if (!(log->iocs[nvme_cmd_zone_append] & NVME_CMD_EFFECTS_CSUPP))
+ return -ENODEV;