This moves the block_device_operations over to common code mostly as-is. The only change is that the ns and ctrl refcounting got some small refcounting to have wrappers around the kref_put operations. A new free_ctrl operation is added to allow the PCI driver to free it's ressources on the final drop. Signed-off-by: Christoph Hellwig <hch@xxxxxx> [Moved the integrity and pr changes due to merge conflict] Signed-off-by: Keith Busch <keith.busch@xxxxxxxxx> --- drivers/nvme/host/core.c | 413 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 14 ++ drivers/nvme/host/pci.c | 412 ++-------------------------------------------- 3 files changed, 439 insertions(+), 400 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3ed9c90..b858df6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -15,12 +15,55 @@ #include <linux/blkdev.h> #include <linux/blk-mq.h> #include <linux/errno.h> +#include <linux/hdreg.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/types.h> +#include <linux/pr.h> +#include <linux/ptrace.h> +#include <linux/nvme_ioctl.h> +#include <linux/t10-pi.h> +#include <scsi/sg.h> +#include <asm/unaligned.h> #include "nvme.h" +DEFINE_SPINLOCK(dev_list_lock); + +static void nvme_free_ns(struct kref *kref) +{ + struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref); + + if (ns->type == NVME_NS_LIGHTNVM) + nvme_nvm_unregister(ns->queue, ns->disk->disk_name); + + spin_lock(&dev_list_lock); + ns->disk->private_data = NULL; + spin_unlock(&dev_list_lock); + + nvme_put_ctrl(ns->ctrl); + put_disk(ns->disk); + kfree(ns); +} + +void nvme_put_ns(struct nvme_ns *ns) +{ + kref_put(&ns->kref, nvme_free_ns); +} + +static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk) +{ + struct nvme_ns *ns; + + spin_lock(&dev_list_lock); + ns = disk->private_data; + if (ns && !kref_get_unless_zero(&ns->kref)) + ns = NULL; + spin_unlock(&dev_list_lock); + + return ns; +} + static struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd) { @@ -269,3 +312,373 @@ int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log) kfree(*log); return error; } + +static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) +{ + struct nvme_user_io io; + struct nvme_command c; + unsigned length, meta_len; + void __user *metadata; + + if (copy_from_user(&io, uio, sizeof(io))) + return -EFAULT; + + switch (io.opcode) { + case nvme_cmd_write: + case nvme_cmd_read: + case nvme_cmd_compare: + break; + default: + return -EINVAL; + } + + length = (io.nblocks + 1) << ns->lba_shift; + meta_len = (io.nblocks + 1) * ns->ms; + metadata = (void __user *)(uintptr_t)io.metadata; + + if (ns->ext) { + length += meta_len; + meta_len = 0; + } else if (meta_len) { + if ((io.metadata & 3) || !io.metadata) + return -EINVAL; + } + + memset(&c, 0, sizeof(c)); + c.rw.opcode = io.opcode; + c.rw.flags = io.flags; + c.rw.nsid = cpu_to_le32(ns->ns_id); + c.rw.slba = cpu_to_le64(io.slba); + c.rw.length = cpu_to_le16(io.nblocks); + c.rw.control = cpu_to_le16(io.control); + c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); + c.rw.reftag = cpu_to_le32(io.reftag); + c.rw.apptag = cpu_to_le16(io.apptag); + c.rw.appmask = cpu_to_le16(io.appmask); + + return __nvme_submit_user_cmd(ns->queue, &c, + (void __user *)(uintptr_t)io.addr, length, + metadata, meta_len, io.slba, NULL, 0); +} + +int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + struct nvme_passthru_cmd __user *ucmd) +{ + struct nvme_passthru_cmd cmd; + struct nvme_command c; + unsigned timeout = 0; + int status; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (copy_from_user(&cmd, ucmd, sizeof(cmd))) + return -EFAULT; + + memset(&c, 0, sizeof(c)); + c.common.opcode = cmd.opcode; + c.common.flags = cmd.flags; + c.common.nsid = cpu_to_le32(cmd.nsid); + c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); + c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); + c.common.cdw10[0] = cpu_to_le32(cmd.cdw10); + c.common.cdw10[1] = cpu_to_le32(cmd.cdw11); + c.common.cdw10[2] = cpu_to_le32(cmd.cdw12); + c.common.cdw10[3] = cpu_to_le32(cmd.cdw13); + c.common.cdw10[4] = cpu_to_le32(cmd.cdw14); + c.common.cdw10[5] = cpu_to_le32(cmd.cdw15); + + if (cmd.timeout_ms) + timeout = msecs_to_jiffies(cmd.timeout_ms); + + status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, + (void __user *)cmd.addr, cmd.data_len, + &cmd.result, timeout); + if (status >= 0) { + if (put_user(cmd.result, &ucmd->result)) + return -EFAULT; + } + + return status; +} + +static int nvme_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + struct nvme_ns *ns = bdev->bd_disk->private_data; + + switch (cmd) { + case NVME_IOCTL_ID: + force_successful_syscall_return(); + return ns->ns_id; + case NVME_IOCTL_ADMIN_CMD: + return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg); + case NVME_IOCTL_IO_CMD: + return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg); + case NVME_IOCTL_SUBMIT_IO: + return nvme_submit_io(ns, (void __user *)arg); + case SG_GET_VERSION_NUM: + return nvme_sg_get_version_num((void __user *)arg); + case SG_IO: + return nvme_sg_io(ns, (void __user *)arg); + default: + return -ENOTTY; + } +} + +#ifdef CONFIG_COMPAT +static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case SG_IO: + return -ENOIOCTLCMD; + } + return nvme_ioctl(bdev, mode, cmd, arg); +} +#else +#define nvme_compat_ioctl NULL +#endif + +static int nvme_open(struct block_device *bdev, fmode_t mode) +{ + return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO; +} + +static void nvme_release(struct gendisk *disk, fmode_t mode) +{ + nvme_put_ns(disk->private_data); +} + +static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + /* some standard values */ + geo->heads = 1 << 6; + geo->sectors = 1 << 5; + geo->cylinders = get_capacity(bdev->bd_disk) >> 11; + return 0; +} + +#ifdef CONFIG_BLK_DEV_INTEGRITY +static void nvme_init_integrity(struct nvme_ns *ns) +{ + struct blk_integrity integrity; + + switch (ns->pi_type) { + case NVME_NS_DPS_PI_TYPE3: + integrity.profile = &t10_pi_type3_crc; + break; + case NVME_NS_DPS_PI_TYPE1: + case NVME_NS_DPS_PI_TYPE2: + integrity.profile = &t10_pi_type1_crc; + break; + default: + integrity.profile = NULL; + break; + } + integrity.tuple_size = ns->ms; + blk_integrity_register(ns->disk, &integrity); + blk_queue_max_integrity_segments(ns->queue, 1); +} +#else +static void nvme_init_integrity(struct nvme_ns *ns) +{ +} +#endif /* CONFIG_BLK_DEV_INTEGRITY */ + +static void nvme_config_discard(struct nvme_ns *ns) +{ + u32 logical_block_size = queue_logical_block_size(ns->queue); + ns->queue->limits.discard_zeroes_data = 0; + ns->queue->limits.discard_alignment = logical_block_size; + ns->queue->limits.discard_granularity = logical_block_size; + blk_queue_max_discard_sectors(ns->queue, 0xffffffff); + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); +} + +int nvme_revalidate_disk(struct gendisk *disk) +{ + struct nvme_ns *ns = disk->private_data; + struct nvme_id_ns *id; + u8 lbaf, pi_type; + u16 old_ms; + unsigned short bs; + + if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) { + dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n", + __func__, ns->ctrl->instance, ns->ns_id); + return -ENODEV; + } + if (id->ncap == 0) { + kfree(id); + return -ENODEV; + } + + if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) { + if (nvme_nvm_register(ns->queue, disk->disk_name)) { + dev_warn(ns->ctrl->dev, + "%s: LightNVM init failure\n", __func__); + kfree(id); + return -ENODEV; + } + ns->type = NVME_NS_LIGHTNVM; + } + + old_ms = ns->ms; + lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; + ns->lba_shift = id->lbaf[lbaf].ds; + ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); + ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); + + /* + * If identify namespace failed, use default 512 byte block size so + * block layer can use before failing read/write for 0 capacity. + */ + if (ns->lba_shift == 0) + ns->lba_shift = 9; + bs = 1 << ns->lba_shift; + + /* XXX: PI implementation requires metadata equal t10 pi tuple size */ + pi_type = ns->ms == sizeof(struct t10_pi_tuple) ? + id->dps & NVME_NS_DPS_PI_MASK : 0; + + blk_mq_freeze_queue(disk->queue); + if (blk_get_integrity(disk) && (ns->pi_type != pi_type || + ns->ms != old_ms || + bs != queue_logical_block_size(disk->queue) || + (ns->ms && ns->ext))) + blk_integrity_unregister(disk); + + ns->pi_type = pi_type; + blk_queue_logical_block_size(ns->queue, bs); + + if (ns->ms && !ns->ext) + nvme_init_integrity(ns); + + if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk)) + set_capacity(disk, 0); + else + set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); + + if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM) + nvme_config_discard(ns); + blk_mq_unfreeze_queue(disk->queue); + + kfree(id); + return 0; +} + +static char nvme_pr_type(enum pr_type type) +{ + switch (type) { + case PR_WRITE_EXCLUSIVE: + return 1; + case PR_EXCLUSIVE_ACCESS: + return 2; + case PR_WRITE_EXCLUSIVE_REG_ONLY: + return 3; + case PR_EXCLUSIVE_ACCESS_REG_ONLY: + return 4; + case PR_WRITE_EXCLUSIVE_ALL_REGS: + return 5; + case PR_EXCLUSIVE_ACCESS_ALL_REGS: + return 6; + default: + return 0; + } +}; + +static int nvme_pr_command(struct block_device *bdev, u32 cdw10, + u64 key, u64 sa_key, u8 op) +{ + struct nvme_ns *ns = bdev->bd_disk->private_data; + struct nvme_command c; + u8 data[16] = { 0, }; + + put_unaligned_le64(key, &data[0]); + put_unaligned_le64(sa_key, &data[8]); + + memset(&c, 0, sizeof(c)); + c.common.opcode = op; + c.common.nsid = cpu_to_le32(ns->ns_id); + c.common.cdw10[0] = cpu_to_le32(cdw10); + + return nvme_submit_sync_cmd(ns->queue, &c, data, 16); +} + +static int nvme_pr_register(struct block_device *bdev, u64 old, + u64 new, unsigned flags) +{ + u32 cdw10; + + if (flags & ~PR_FL_IGNORE_KEY) + return -EOPNOTSUPP; + + cdw10 = old ? 2 : 0; + cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0; + cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */ + return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register); +} + +static int nvme_pr_reserve(struct block_device *bdev, u64 key, + enum pr_type type, unsigned flags) +{ + u32 cdw10; + + if (flags & ~PR_FL_IGNORE_KEY) + return -EOPNOTSUPP; + + cdw10 = nvme_pr_type(type) << 8; + cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0); + return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire); +} + +static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, + enum pr_type type, bool abort) +{ + u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1; + return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire); +} + +static int nvme_pr_clear(struct block_device *bdev, u64 key) +{ + u32 cdw10 = 1 | key ? 1 << 3 : 0; + return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register); +} + +static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) +{ + u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0; + return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); +} + +static const struct pr_ops nvme_pr_ops = { + .pr_register = nvme_pr_register, + .pr_reserve = nvme_pr_reserve, + .pr_release = nvme_pr_release, + .pr_preempt = nvme_pr_preempt, + .pr_clear = nvme_pr_clear, +}; + +const struct block_device_operations nvme_fops = { + .owner = THIS_MODULE, + .ioctl = nvme_ioctl, + .compat_ioctl = nvme_compat_ioctl, + .open = nvme_open, + .release = nvme_release, + .getgeo = nvme_getgeo, + .revalidate_disk= nvme_revalidate_disk, + .pr_ops = &nvme_pr_ops, +}; + +static void nvme_free_ctrl(struct kref *kref) +{ + struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref); + + ctrl->ops->free_ctrl(ctrl); +} + +void nvme_put_ctrl(struct nvme_ctrl *ctrl) +{ + kref_put(&ctrl->kref, nvme_free_ctrl); +} + diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 15bf2b8..0756633 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -19,6 +19,8 @@ #include <linux/kref.h> #include <linux/blk-mq.h> +struct nvme_passthru_cmd; + extern unsigned char nvme_io_timeout; #define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) @@ -34,6 +36,7 @@ struct nvme_ctrl { const struct nvme_ctrl_ops *ops; struct request_queue *admin_q; struct device *dev; + struct kref kref; int instance; char name[12]; @@ -70,6 +73,7 @@ struct nvme_ns { struct nvme_ctrl_ops { int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val); + void (*free_ctrl)(struct nvme_ctrl *ctrl); }; static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl) @@ -149,6 +153,9 @@ static inline int nvme_error_status(u16 status) } } +void nvme_put_ctrl(struct nvme_ctrl *ctrl); +void nvme_put_ns(struct nvme_ns *ns); + int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buf, unsigned bufflen); int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, @@ -169,6 +176,13 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, dma_addr_t dma_addr, u32 *result); +extern const struct block_device_operations nvme_fops; +extern spinlock_t dev_list_lock; + +int nvme_revalidate_disk(struct gendisk *disk); +int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + struct nvme_passthru_cmd __user *ucmd); + struct sg_io_hdr; int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 44b3b56..424ccea 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -79,7 +79,6 @@ static bool use_cmb_sqes = true; module_param(use_cmb_sqes, bool, 0644); MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes"); -static DEFINE_SPINLOCK(dev_list_lock); static LIST_HEAD(dev_list); static struct task_struct *nvme_thread; static struct workqueue_struct *nvme_workq; @@ -125,7 +124,6 @@ struct nvme_dev { struct msix_entry *entry; void __iomem *bar; struct list_head namespaces; - struct kref kref; struct device *device; struct work_struct reset_work; struct work_struct probe_work; @@ -599,27 +597,6 @@ static void nvme_dif_remap(struct request *req, } kunmap_atomic(pmap); } - -static void nvme_init_integrity(struct nvme_ns *ns) -{ - struct blk_integrity integrity; - - switch (ns->pi_type) { - case NVME_NS_DPS_PI_TYPE3: - integrity.profile = &t10_pi_type3_crc; - break; - case NVME_NS_DPS_PI_TYPE1: - case NVME_NS_DPS_PI_TYPE2: - integrity.profile = &t10_pi_type1_crc; - break; - default: - integrity.profile = NULL; - break; - } - integrity.tuple_size = ns->ms; - blk_integrity_register(ns->disk, &integrity); - blk_queue_max_integrity_segments(ns->queue, 1); -} #else /* CONFIG_BLK_DEV_INTEGRITY */ static void nvme_dif_remap(struct request *req, void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi)) @@ -631,9 +608,6 @@ static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi) static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi) { } -static void nvme_init_integrity(struct nvme_ns *ns) -{ -} #endif static void req_completion(struct nvme_queue *nvmeq, void *ctx, @@ -1628,94 +1602,6 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) return result; } -static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) -{ - struct nvme_user_io io; - struct nvme_command c; - unsigned length, meta_len; - void __user *metadata; - - if (copy_from_user(&io, uio, sizeof(io))) - return -EFAULT; - - switch (io.opcode) { - case nvme_cmd_write: - case nvme_cmd_read: - case nvme_cmd_compare: - break; - default: - return -EINVAL; - } - - length = (io.nblocks + 1) << ns->lba_shift; - meta_len = (io.nblocks + 1) * ns->ms; - metadata = (void __user *)(uintptr_t)io.metadata; - - if (ns->ext) { - length += meta_len; - meta_len = 0; - } else if (meta_len) { - if ((io.metadata & 3) || !io.metadata) - return -EINVAL; - } - - memset(&c, 0, sizeof(c)); - c.rw.opcode = io.opcode; - c.rw.flags = io.flags; - c.rw.nsid = cpu_to_le32(ns->ns_id); - c.rw.slba = cpu_to_le64(io.slba); - c.rw.length = cpu_to_le16(io.nblocks); - c.rw.control = cpu_to_le16(io.control); - c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); - c.rw.reftag = cpu_to_le32(io.reftag); - c.rw.apptag = cpu_to_le16(io.apptag); - c.rw.appmask = cpu_to_le16(io.appmask); - - return __nvme_submit_user_cmd(ns->queue, &c, - (void __user *)(uintptr_t)io.addr, length, - metadata, meta_len, io.slba, NULL, 0); -} - -static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, - struct nvme_passthru_cmd __user *ucmd) -{ - struct nvme_passthru_cmd cmd; - struct nvme_command c; - unsigned timeout = 0; - int status; - - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - if (copy_from_user(&cmd, ucmd, sizeof(cmd))) - return -EFAULT; - - memset(&c, 0, sizeof(c)); - c.common.opcode = cmd.opcode; - c.common.flags = cmd.flags; - c.common.nsid = cpu_to_le32(cmd.nsid); - c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); - c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); - c.common.cdw10[0] = cpu_to_le32(cmd.cdw10); - c.common.cdw10[1] = cpu_to_le32(cmd.cdw11); - c.common.cdw10[2] = cpu_to_le32(cmd.cdw12); - c.common.cdw10[3] = cpu_to_le32(cmd.cdw13); - c.common.cdw10[4] = cpu_to_le32(cmd.cdw14); - c.common.cdw10[5] = cpu_to_le32(cmd.cdw15); - - if (cmd.timeout_ms) - timeout = msecs_to_jiffies(cmd.timeout_ms); - - status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, - (void __user *)(uintptr_t)cmd.addr, cmd.data_len, - &cmd.result, timeout); - if (status >= 0) { - if (put_user(cmd.result, &ucmd->result)) - return -EFAULT; - } - - return status; -} - static int nvme_subsys_reset(struct nvme_dev *dev) { if (!dev->subsystem) @@ -1725,281 +1611,6 @@ static int nvme_subsys_reset(struct nvme_dev *dev) return 0; } -static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, - unsigned long arg) -{ - struct nvme_ns *ns = bdev->bd_disk->private_data; - - switch (cmd) { - case NVME_IOCTL_ID: - force_successful_syscall_return(); - return ns->ns_id; - case NVME_IOCTL_ADMIN_CMD: - return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg); - case NVME_IOCTL_IO_CMD: - return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg); - case NVME_IOCTL_SUBMIT_IO: - return nvme_submit_io(ns, (void __user *)arg); - case SG_GET_VERSION_NUM: - return nvme_sg_get_version_num((void __user *)arg); - case SG_IO: - return nvme_sg_io(ns, (void __user *)arg); - default: - return -ENOTTY; - } -} - -#ifdef CONFIG_COMPAT -static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - switch (cmd) { - case SG_IO: - return -ENOIOCTLCMD; - } - return nvme_ioctl(bdev, mode, cmd, arg); -} -#else -#define nvme_compat_ioctl NULL -#endif - -static void nvme_free_dev(struct kref *kref); -static void nvme_free_ns(struct kref *kref) -{ - struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref); - struct nvme_dev *dev = to_nvme_dev(ns->ctrl); - - if (ns->type == NVME_NS_LIGHTNVM) - nvme_nvm_unregister(ns->queue, ns->disk->disk_name); - - spin_lock(&dev_list_lock); - ns->disk->private_data = NULL; - spin_unlock(&dev_list_lock); - - kref_put(&dev->kref, nvme_free_dev); - put_disk(ns->disk); - kfree(ns); -} - -static int nvme_open(struct block_device *bdev, fmode_t mode) -{ - int ret = 0; - struct nvme_ns *ns; - - spin_lock(&dev_list_lock); - ns = bdev->bd_disk->private_data; - if (!ns) - ret = -ENXIO; - else if (!kref_get_unless_zero(&ns->kref)) - ret = -ENXIO; - spin_unlock(&dev_list_lock); - - return ret; -} - -static void nvme_release(struct gendisk *disk, fmode_t mode) -{ - struct nvme_ns *ns = disk->private_data; - kref_put(&ns->kref, nvme_free_ns); -} - -static int nvme_getgeo(struct block_device *bd, struct hd_geometry *geo) -{ - /* some standard values */ - geo->heads = 1 << 6; - geo->sectors = 1 << 5; - geo->cylinders = get_capacity(bd->bd_disk) >> 11; - return 0; -} - -static void nvme_config_discard(struct nvme_ns *ns) -{ - u32 logical_block_size = queue_logical_block_size(ns->queue); - ns->queue->limits.discard_zeroes_data = 0; - ns->queue->limits.discard_alignment = logical_block_size; - ns->queue->limits.discard_granularity = logical_block_size; - blk_queue_max_discard_sectors(ns->queue, 0xffffffff); - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); -} - -static int nvme_revalidate_disk(struct gendisk *disk) -{ - struct nvme_ns *ns = disk->private_data; - struct nvme_dev *dev = to_nvme_dev(ns->ctrl); - struct nvme_id_ns *id; - u8 lbaf, pi_type; - u16 old_ms; - unsigned short bs; - - if (nvme_identify_ns(&dev->ctrl, ns->ns_id, &id)) { - dev_warn(dev->dev, "%s: Identify failure nvme%dn%d\n", __func__, - dev->ctrl.instance, ns->ns_id); - return -ENODEV; - } - if (id->ncap == 0) { - kfree(id); - return -ENODEV; - } - - if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) { - if (nvme_nvm_register(ns->queue, disk->disk_name)) { - dev_warn(dev->dev, - "%s: LightNVM init failure\n", __func__); - kfree(id); - return -ENODEV; - } - ns->type = NVME_NS_LIGHTNVM; - } - - old_ms = ns->ms; - lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; - ns->lba_shift = id->lbaf[lbaf].ds; - ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); - ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); - - /* - * If identify namespace failed, use default 512 byte block size so - * block layer can use before failing read/write for 0 capacity. - */ - if (ns->lba_shift == 0) - ns->lba_shift = 9; - bs = 1 << ns->lba_shift; - - /* XXX: PI implementation requires metadata equal t10 pi tuple size */ - pi_type = ns->ms == sizeof(struct t10_pi_tuple) ? - id->dps & NVME_NS_DPS_PI_MASK : 0; - - blk_mq_freeze_queue(disk->queue); - if (blk_get_integrity(disk) && (ns->pi_type != pi_type || - ns->ms != old_ms || - bs != queue_logical_block_size(disk->queue) || - (ns->ms && ns->ext))) - blk_integrity_unregister(disk); - - ns->pi_type = pi_type; - blk_queue_logical_block_size(ns->queue, bs); - - if (ns->ms && !ns->ext) - nvme_init_integrity(ns); - - if ((ns->ms && !(ns->ms == 8 && ns->pi_type) && - !blk_get_integrity(disk)) || - ns->type == NVME_NS_LIGHTNVM) - set_capacity(disk, 0); - else - set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); - - if (dev->ctrl.oncs & NVME_CTRL_ONCS_DSM) - nvme_config_discard(ns); - blk_mq_unfreeze_queue(disk->queue); - - kfree(id); - return 0; -} - -static char nvme_pr_type(enum pr_type type) -{ - switch (type) { - case PR_WRITE_EXCLUSIVE: - return 1; - case PR_EXCLUSIVE_ACCESS: - return 2; - case PR_WRITE_EXCLUSIVE_REG_ONLY: - return 3; - case PR_EXCLUSIVE_ACCESS_REG_ONLY: - return 4; - case PR_WRITE_EXCLUSIVE_ALL_REGS: - return 5; - case PR_EXCLUSIVE_ACCESS_ALL_REGS: - return 6; - default: - return 0; - } -}; - -static int nvme_pr_command(struct block_device *bdev, u32 cdw10, - u64 key, u64 sa_key, u8 op) -{ - struct nvme_ns *ns = bdev->bd_disk->private_data; - struct nvme_command c; - u8 data[16] = { 0, }; - - put_unaligned_le64(key, &data[0]); - put_unaligned_le64(sa_key, &data[8]); - - memset(&c, 0, sizeof(c)); - c.common.opcode = op; - c.common.nsid = cpu_to_le32(ns->ns_id); - c.common.cdw10[0] = cpu_to_le32(cdw10); - - return nvme_submit_sync_cmd(ns->queue, &c, data, 16); -} - -static int nvme_pr_register(struct block_device *bdev, u64 old, - u64 new, unsigned flags) -{ - u32 cdw10; - - if (flags & ~PR_FL_IGNORE_KEY) - return -EOPNOTSUPP; - - cdw10 = old ? 2 : 0; - cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0; - cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */ - return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register); -} - -static int nvme_pr_reserve(struct block_device *bdev, u64 key, - enum pr_type type, unsigned flags) -{ - u32 cdw10; - - if (flags & ~PR_FL_IGNORE_KEY) - return -EOPNOTSUPP; - - cdw10 = nvme_pr_type(type) << 8; - cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0); - return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire); -} - -static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, - enum pr_type type, bool abort) -{ - u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1; - return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire); -} - -static int nvme_pr_clear(struct block_device *bdev, u64 key) -{ - u32 cdw10 = 1 | (key ? 1 << 3 : 0); - return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register); -} - -static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) -{ - u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0; - return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); -} - -static const struct pr_ops nvme_pr_ops = { - .pr_register = nvme_pr_register, - .pr_reserve = nvme_pr_reserve, - .pr_release = nvme_pr_release, - .pr_preempt = nvme_pr_preempt, - .pr_clear = nvme_pr_clear, -}; - -static const struct block_device_operations nvme_fops = { - .owner = THIS_MODULE, - .ioctl = nvme_ioctl, - .compat_ioctl = nvme_compat_ioctl, - .open = nvme_open, - .release = nvme_release, - .getgeo = nvme_getgeo, - .revalidate_disk= nvme_revalidate_disk, - .pr_ops = &nvme_pr_ops, -}; - static int nvme_kthread(void *data) { struct nvme_dev *dev, *next; @@ -2100,7 +1711,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid) if (nvme_revalidate_disk(ns->disk)) goto out_free_disk; - kref_get(&dev->kref); + kref_get(&dev->ctrl.kref); if (ns->type != NVME_NS_LIGHTNVM) { add_disk(ns->disk); if (ns->ms) { @@ -2349,7 +1960,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) blk_cleanup_queue(ns->queue); } list_del_init(&ns->list); - kref_put(&ns->kref, nvme_free_ns); + nvme_put_ns(ns); } static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn) @@ -2819,9 +2430,9 @@ static void nvme_release_instance(struct nvme_dev *dev) spin_unlock(&dev_list_lock); } -static void nvme_free_dev(struct kref *kref) +static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) { - struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); + struct nvme_dev *dev = to_nvme_dev(ctrl); put_device(dev->dev); put_device(dev->device); @@ -2848,7 +2459,7 @@ static int nvme_dev_open(struct inode *inode, struct file *f) ret = -EWOULDBLOCK; break; } - if (!kref_get_unless_zero(&dev->kref)) + if (!kref_get_unless_zero(&dev->ctrl.kref)) break; f->private_data = dev; ret = 0; @@ -2863,7 +2474,7 @@ static int nvme_dev_open(struct inode *inode, struct file *f) static int nvme_dev_release(struct inode *inode, struct file *f) { struct nvme_dev *dev = f->private_data; - kref_put(&dev->kref, nvme_free_dev); + nvme_put_ctrl(&dev->ctrl); return 0; } @@ -2978,19 +2589,19 @@ static int nvme_remove_dead_ctrl(void *arg) if (pci_get_drvdata(pdev)) pci_stop_and_remove_bus_device_locked(pdev); - kref_put(&dev->kref, nvme_free_dev); + nvme_put_ctrl(&dev->ctrl); return 0; } static void nvme_dead_ctrl(struct nvme_dev *dev) { dev_warn(dev->dev, "Device failed to resume\n"); - kref_get(&dev->kref); + kref_get(&dev->ctrl.kref); if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d", dev->ctrl.instance))) { dev_err(dev->dev, "Failed to start controller remove task\n"); - kref_put(&dev->kref, nvme_free_dev); + nvme_put_ctrl(&dev->ctrl); } } @@ -3068,6 +2679,7 @@ static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .reg_read32 = nvme_pci_reg_read32, + .free_ctrl = nvme_pci_free_ctrl, }; static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) @@ -3108,7 +2720,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto release; - kref_init(&dev->kref); + kref_init(&dev->ctrl.kref); dev->device = device_create(nvme_class, &pdev->dev, MKDEV(nvme_char_major, dev->ctrl.instance), dev, "nvme%d", dev->ctrl.instance); @@ -3181,7 +2793,7 @@ static void nvme_remove(struct pci_dev *pdev) nvme_free_queues(dev, 0); nvme_release_cmb(dev); nvme_release_prp_pools(dev); - kref_put(&dev->kref, nvme_free_dev); + nvme_put_ctrl(&dev->ctrl); } /* These functions are yet to be implemented */ -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-block" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html