On 11/1/22 5:15 AM, Christoph Hellwig wrote: > On Sun, Oct 30, 2022 at 06:05:35PM -0500, Mike Christie wrote: >> The problem I hit is that in the ioctl code I then have to do: >> >> @@ -269,7 +270,14 @@ static int blkdev_pr_register(struct block_device *bdev, >> >> if (reg.flags & ~PR_FL_IGNORE_KEY) >> return -EOPNOTSUPP; >> - return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags); >> + ret = ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags); >> + if (ret == -EBADE) { >> + if (bdev_is_nvme(bdev)) >> + ret = NVME_SC_RESERVATION_CONFLICT; >> + else if (bdev_is_scsi(bdev) >> + ret = SAM_STAT_RESERVATION_CONFLICT; >> + } >> + return ret; > > Eww. We should have never leaked protocol specific values out to > userspace. This is an original bug I introduceѕ, so all blame is on me. > > I suspect the right way to fix is is to keep the numeric value of > SAM_STAT_RESERVATION_CONFLICT and give it a new constant exposed in > the uapi header, assuming that SCSI is the thing people actually > used the PR API for, and nvme was just an nice little add-on. > Do you mean just doing this: diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c index 727123c611e6..6ac70514170d 100644 --- a/drivers/nvme/host/pr.c +++ b/drivers/nvme/host/pr.c @@ -72,12 +72,17 @@ static int nvme_send_ns_pr_command(struct nvme_ns *ns, struct nvme_command *c, static int nvme_send_pr_command(struct block_device *bdev, struct nvme_command *c, void *data, unsigned int data_len) { + int ret; + if (IS_ENABLED(CONFIG_NVME_MULTIPATH) && bdev->bd_disk->fops == &nvme_ns_head_ops) - return nvme_send_ns_head_pr_command(bdev, c, data, data_len); + ret = nvme_send_ns_head_pr_command(bdev, c, data, data_len); else - return nvme_send_ns_pr_command(bdev->bd_disk->private_data, c, - data, data_len); + ret = nvme_send_ns_pr_command(bdev->bd_disk->private_data, c, + data, data_len); + if (ret == NVME_SC_RESERVATION_CONFLICT) + ret = PR_STS_RESERVATION_CONFLICT; + return ret; } static int nvme_pr_command(struct block_device *bdev, u32 cdw10, diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index c0a614efcfce..c7621d8ffa51 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1840,7 +1840,8 @@ static int sd_pr_out_command(struct block_device *bdev, u8 sa, scsi_sense_valid(&sshdr)) { sdev_printk(KERN_INFO, sdev, "PR command failed: %d\n", result); scsi_print_sense_hdr(sdev, NULL, &sshdr); - } + } else if (__get_status_byte(result) == SAM_STAT_RESERVATION_CONFLICT) + result = PR_STS_RESERVATION_CONFLICT; return result; } diff --git a/include/uapi/linux/pr.h b/include/uapi/linux/pr.h index ccc78cbf1221..7a637f9e5b49 100644 --- a/include/uapi/linux/pr.h +++ b/include/uapi/linux/pr.h @@ -13,6 +13,15 @@ enum pr_type { PR_EXCLUSIVE_ACCESS_ALL_REGS = 6, }; +enum pr_status { + PR_STS_SUCCESS = 0x0, + /* + * The error codes are based on SCSI, because it was the primary user + * and had userspace users. + */ + PR_STS_RESERVATION_CONFLICT = 0x18, +}; + struct pr_reservation { __u64 key; __u32 type; --------------------------------------------------------------------------- Or we could add a new flag and make it nicer for the user in the future, but also uglier for the drivers but a little less uglier than adding the blk_sts field to the calls: diff --git a/block/ioctl.c b/block/ioctl.c index 60121e89052b..cae58f57ea13 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -269,7 +269,8 @@ static int blkdev_pr_register(struct block_device *bdev, if (reg.flags & ~PR_FL_IGNORE_KEY) return -EOPNOTSUPP; - return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags); + return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags, + reg.flags & PR_FL_PR_STAT); } static int blkdev_pr_reserve(struct block_device *bdev, @@ -287,7 +288,8 @@ static int blkdev_pr_reserve(struct block_device *bdev, if (rsv.flags & ~PR_FL_IGNORE_KEY) return -EOPNOTSUPP; - return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags); + return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags, + rsv.flags & PR_FL_PR_STAT) } static int blkdev_pr_release(struct block_device *bdev, @@ -305,7 +307,8 @@ static int blkdev_pr_release(struct block_device *bdev, if (rsv.flags) return -EOPNOTSUPP; - return ops->pr_release(bdev, rsv.key, rsv.type); + return ops->pr_release(bdev, rsv.key, rsv.type, + rsv.flags & PR_FL_PR_STAT); } static int blkdev_pr_preempt(struct block_device *bdev, @@ -323,7 +326,8 @@ static int blkdev_pr_preempt(struct block_device *bdev, if (p.flags) return -EOPNOTSUPP; - return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort); + return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort, + p.flags & PR_FL_PR_STAT) } static int blkdev_pr_clear(struct block_device *bdev, @@ -341,7 +345,7 @@ static int blkdev_pr_clear(struct block_device *bdev, if (c.flags) return -EOPNOTSUPP; - return ops->pr_clear(bdev, c.key); + return ops->pr_clear(bdev, c.key, c.flags & PR_FL_PR_STAT); } static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode, diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c index 727123c611e6..7c317b646cde 100644 --- a/drivers/nvme/host/pr.c +++ b/drivers/nvme/host/pr.c @@ -70,18 +70,44 @@ static int nvme_send_ns_pr_command(struct nvme_ns *ns, struct nvme_command *c, } static int nvme_send_pr_command(struct block_device *bdev, - struct nvme_command *c, void *data, unsigned int data_len) + struct nvme_command *c, void *data, unsigned int data_len, + bool use_pr_sts) { + int ret; + if (IS_ENABLED(CONFIG_NVME_MULTIPATH) && bdev->bd_disk->fops == &nvme_ns_head_ops) - return nvme_send_ns_head_pr_command(bdev, c, data, data_len); + ret = nvme_send_ns_head_pr_command(bdev, c, data, data_len); else - return nvme_send_ns_pr_command(bdev->bd_disk->private_data, c, - data, data_len); + ret = nvme_send_ns_pr_command(bdev->bd_disk->private_data, c, + data, data_len); + if (!ret || ret < 0 || (ret > 0 && !use_pr_sts)) + return ret; + + switch (ret) { + case NVME_SC_RESERVATION_CONFLICT: + ret = PR_STS_RESERVATION_CONFLICT; + break; + case NVME_SC_HOST_PATH_ERROR: + ret = PR_STS_PATH_FAILURE; + break + case NVME_SC_BAD_ATTRIBUTES: + case NVME_SC_ONCS_NOT_SUPPORTED: + case NVME_SC_INVALID_OPCODE: + case NVME_SC_INVALID_FIELD: + case NVME_SC_INVALID_NS: + ret = PR_STS_INVALID_OP; + break; + default: + ret = PR_STS_IOERR; + break; + } + + return ret; } static int nvme_pr_command(struct block_device *bdev, u32 cdw10, - u64 key, u64 sa_key, u8 op) + u64 key, u64 sa_key, u8 op, bool use_pr_sts) { struct nvme_command c = { }; u8 data[16] = { 0, }; @@ -92,11 +118,11 @@ static int nvme_pr_command(struct block_device *bdev, u32 cdw10, c.common.opcode = op; c.common.cdw10 = cpu_to_le32(cdw10); - return nvme_send_pr_command(bdev, &c, data, sizeof(data)); + return nvme_send_pr_command(bdev, &c, data, sizeof(data), use_pr_sts); } static int nvme_pr_register(struct block_device *bdev, u64 old, - u64 new, unsigned flags) + u64 new, unsigned flags, bool use_pr_sts) { u32 cdw10; @@ -106,11 +132,12 @@ static int nvme_pr_register(struct block_device *bdev, u64 old, cdw10 = old ? 2 : 0; cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0; cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */ - return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register); + return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register, + use_pr_sts); } static int nvme_pr_reserve(struct block_device *bdev, u64 key, - enum pr_type type, unsigned flags) + enum pr_type type, unsigned flags, bool use_pr_sts) { u32 cdw10; @@ -119,29 +146,34 @@ static int nvme_pr_reserve(struct block_device *bdev, u64 key, cdw10 = nvme_pr_type_from_blk(type) << 8; cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0); - return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire); + return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire, + use_pr_sts); } static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, - enum pr_type type, bool abort) + enum pr_type type, bool abort, bool use_pr_sts) { u32 cdw10 = nvme_pr_type_from_blk(type) << 8 | (abort ? 2 : 1); - return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire); + return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire, + use_pr_sts); } -static int nvme_pr_clear(struct block_device *bdev, u64 key) +static int nvme_pr_clear(struct block_device *bdev, u64 key, bool use_pr_sts) { u32 cdw10 = 1 | (key ? 0 : 1 << 3); - return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); + return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release, + use_pr_sts); } -static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) +static int nvme_pr_release(struct block_device *bdev, u64 key, + enum pr_type type, bool use_pr_sts) { u32 cdw10 = nvme_pr_type_from_blk(type) << 8 | (key ? 0 : 1 << 3); - return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); + return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release, + use_pr_sts); } static int nvme_pr_resv_report(struct block_device *bdev, void *data, diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index c0a614efcfce..46393bdd7427 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1797,7 +1797,8 @@ static int sd_pr_read_reservation(struct block_device *bdev, } static int sd_pr_out_command(struct block_device *bdev, u8 sa, - u64 key, u64 sa_key, enum scsi_pr_type type, u8 flags) + u64 key, u64 sa_key, enum scsi_pr_type type, u8 flags, + bool use_pr_sts) { struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); struct scsi_device *sdev = sdkp->device; @@ -1842,44 +1843,74 @@ static int sd_pr_out_command(struct block_device *bdev, u8 sa, scsi_print_sense_hdr(sdev, NULL, &sshdr); } + if (!result || result < 0 || (result > 0 && !use_pr_sts)) + return result; + + result = PR_STS_IOERR; + + switch host_byte(result) { + case DID_TRANSPORT_FAILFAST: + case DID_TRANSPORT_MARGINAL: + case DID_TRANSPORT_DISRUPTED: + result = PR_STS_PATH_FAILURE; + goto done; + } + + switch (__get_status_byte(result)) { + case SAM_STAT_RESERVATION_CONFLICT: + result = PR_STS_RESERVATION_CONFLICT; + goto done; + case SAM_STAT_CHECK_CONDITION: + if (!scsi_sense_valid(&sshdr)) + goto done; + + if (sshdr.sense_key == ILLEGAL_REQUEST && + (sshdr.asc == 0x26 || sshdr.asc == 0x24)) { + result = PR_STS_INVALID_OP; + goto done; + } + } + +done: return result; } static int sd_pr_register(struct block_device *bdev, u64 old_key, u64 new_key, - u32 flags) + u32 flags, bool use_pr_sts) { if (flags & ~PR_FL_IGNORE_KEY) return -EOPNOTSUPP; return sd_pr_out_command(bdev, (flags & PR_FL_IGNORE_KEY) ? 0x06 : 0x00, - old_key, new_key, 0, - (1 << 0) /* APTPL */); + old_key, new_key, 0, (1 << 0) /* APTPL */, + use_pr_sts); } static int sd_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type, - u32 flags) + u32 flags, bool use_pr_sts) { if (flags) return -EOPNOTSUPP; return sd_pr_out_command(bdev, 0x01, key, 0, - block_pr_type_to_scsi(type), 0); + block_pr_type_to_scsi(type), 0, use_pr_sts) } -static int sd_pr_release(struct block_device *bdev, u64 key, enum pr_type type) +static int sd_pr_release(struct block_device *bdev, u64 key, enum pr_type type, + bool use_pr_sts) { return sd_pr_out_command(bdev, 0x02, key, 0, - block_pr_type_to_scsi(type), 0); + block_pr_type_to_scsi(type), 0, use_pr_sts); } static int sd_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key, - enum pr_type type, bool abort) + enum pr_type type, bool abort, bool use_pr_sts) { return sd_pr_out_command(bdev, abort ? 0x05 : 0x04, old_key, new_key, - block_pr_type_to_scsi(type), 0); + block_pr_type_to_scsi(type), 0, use_pr_sts); } -static int sd_pr_clear(struct block_device *bdev, u64 key) +static int sd_pr_clear(struct block_device *bdev, u64 key, bool use_pr_sts) { - return sd_pr_out_command(bdev, 0x03, key, 0, 0, 0); + return sd_pr_out_command(bdev, 0x03, key, 0, 0, 0, use_pr_sts); } static const struct pr_ops sd_pr_ops = { diff --git a/include/linux/pr.h b/include/linux/pr.h index 3003daec28a5..51e03e73a87f 100644 --- a/include/linux/pr.h +++ b/include/linux/pr.h @@ -18,14 +18,14 @@ struct pr_held_reservation { struct pr_ops { int (*pr_register)(struct block_device *bdev, u64 old_key, u64 new_key, - u32 flags); + u32 flags, bool use_pr_sts); int (*pr_reserve)(struct block_device *bdev, u64 key, - enum pr_type type, u32 flags); + enum pr_type type, u32 flags, bool use_pr_sts); int (*pr_release)(struct block_device *bdev, u64 key, - enum pr_type type); + enum pr_type type, bool use_pr_sts); int (*pr_preempt)(struct block_device *bdev, u64 old_key, u64 new_key, - enum pr_type type, bool abort); - int (*pr_clear)(struct block_device *bdev, u64 key); + enum pr_type type, bool abort, bool use_pr_sts); + int (*pr_clear)(struct block_device *bdev, u64 key, bool use_pr_sts); /* * pr_read_keys - Read the registered keys and return them in the * pr_keys->keys array. The keys array will have been allocated at the diff --git a/include/uapi/linux/pr.h b/include/uapi/linux/pr.h index ccc78cbf1221..ac8f7fc404ff 100644 --- a/include/uapi/linux/pr.h +++ b/include/uapi/linux/pr.h @@ -13,6 +13,14 @@ enum pr_type { PR_EXCLUSIVE_ACCESS_ALL_REGS = 6, }; +enum pr_status { + PR_STS_SUCCESS, + PR_STS_IOERR, + PR_STS_RESERVATION_CONFLICT, + PR_STS_PATH_FAILURE, + PR_STS_INVALID_OP, +}; + struct pr_reservation { __u64 key; __u32 type; @@ -40,6 +48,7 @@ struct pr_clear { }; #define PR_FL_IGNORE_KEY (1 << 0) /* ignore existing key */ +#define PR_FL_PR_STAT (1 << 1) /* Convert device errors to pr_status */ #define IOC_PR_REGISTER _IOW('p', 200, struct pr_registration) #define IOC_PR_RESERVE _IOW('p', 201, struct pr_reservation) Bart, if this is what you were suggesting I misunderstood you. I thought you wanted to only pass the new code in the kernel so that's why I was saying we still have the problem of converting the error back when passing it back to userspace.