The new SG_CTL_FLAGM_EXCL_WAITQ boolean flag can be set on a sg file descriptor so that subsequent wait_event_interruptible() calls can be changed to their "_exclusive()" variants. This is to address the potential "thundering herd" problem with the wait_queue Signed-off-by: Douglas Gilbert <dgilbert@xxxxxxxxxxxx> --- drivers/scsi/sg.c | 83 ++++++++++++++++++++++++++++-------------- include/uapi/scsi/sg.h | 29 ++++++++++----- 2 files changed, 74 insertions(+), 38 deletions(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 4ccb7ab469f1..02435d2ef555 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -139,6 +139,7 @@ enum sg_shr_var { #define SG_FRQ_FOR_MMAP 7 /* request needs PAGE_SIZE elements */ #define SG_FRQ_COUNT_ACTIVE 8 /* sfp->submitted + waiting active */ #define SG_FRQ_ISSUED 9 /* blk_execute_rq_nowait() finished */ +#define SG_FRQ_POLL_SLEPT 10 /* stop re-entry of hybrid_sleep() */ /* Bit positions (flags) for sg_fd::ffd_bm bitmask follow */ #define SG_FFD_FORCE_PACKID 0 /* receive only given pack_id/tag */ @@ -153,6 +154,7 @@ enum sg_shr_var { #define SG_FFD_NO_DURATION 9 /* don't do command duration calc */ #define SG_FFD_MORE_ASYNC 10 /* yield EBUSY more often */ #define SG_FFD_MRQ_ABORT 11 /* SG_IOABORT + FLAG_MULTIPLE_REQS */ +#define SG_FFD_EXCL_WAITQ 12 /* append _exclusive to wait_event */ /* Bit positions (flags) for sg_device::fdev_bm bitmask follow */ #define SG_FDEV_EXCLUDE 0 /* have fd open with O_EXCL */ @@ -962,6 +964,17 @@ sg_mrq_1complet(struct sg_io_v4 *cop, struct sg_io_v4 *a_hds, return 0; } +static int +sg_wait_mrq_event(struct sg_fd *sfp, struct sg_request **srpp) +{ + if (test_bit(SG_FFD_EXCL_WAITQ, sfp->ffd_bm)) + return __wait_event_interruptible_exclusive + (sfp->cmpl_wait, + sg_mrq_get_ready_srp(sfp, srpp)); + return __wait_event_interruptible(sfp->cmpl_wait, + sg_mrq_get_ready_srp(sfp, srpp)); +} + /* * This is a fair-ish algorithm for an interruptible wait on two file * descriptors. It favours the main fd over the secondary fd (sec_sfp). @@ -1002,9 +1015,7 @@ sg_mrq_complets(struct sg_io_v4 *cop, struct sg_io_v4 *a_hds, return res; } } else if (mreqs > 0) { - res = wait_event_interruptible - (sfp->cmpl_wait, - sg_mrq_get_ready_srp(sfp, &srp)); + res = sg_wait_mrq_event(sfp, &srp); if (unlikely(res)) return res; /* signal --> -ERESTARTSYS */ if (IS_ERR(srp)) { @@ -1017,9 +1028,7 @@ sg_mrq_complets(struct sg_io_v4 *cop, struct sg_io_v4 *a_hds, return res; } } else if (sec_reqs > 0) { - res = wait_event_interruptible - (sec_sfp->cmpl_wait, - sg_mrq_get_ready_srp(sec_sfp, &srp)); + res = sg_wait_mrq_event(sec_sfp, &srp); if (unlikely(res)) return res; /* signal --> -ERESTARTSYS */ if (IS_ERR(srp)) { @@ -1082,6 +1091,7 @@ sg_mrq_sanity(struct sg_device *sdp, struct sg_io_v4 *cop, rip, k, "no IMMED with COMPLETE_B4"); return -ERANGE; } + /* N.B. SGV4_FLAG_SIG_ON_OTHER is allowed */ } if (!sg_fd_is_shared(sfp)) { if (unlikely(flags & SGV4_FLAG_SHARE)) { @@ -1113,8 +1123,9 @@ sg_mrq_sanity(struct sg_device *sdp, struct sg_io_v4 *cop, /* * Implements the multiple request functionality. When 'blocking' is true * invocation was via ioctl(SG_IO), otherwise it was via ioctl(SG_IOSUBMIT). - * Only fully non-blocking if IMMED flag given or when ioctl(SG_IOSUBMIT) - * is used with O_NONBLOCK set on its file descriptor. + * Submit non-blocking if IMMED flag given or when ioctl(SG_IOSUBMIT) + * is used with O_NONBLOCK set on its file descriptor. Hipri non-blocking + * is when the HIPRI flag is given. */ static int sg_do_multi_req(struct sg_comm_wr_t *cwrp, bool blocking) @@ -1174,8 +1185,7 @@ sg_do_multi_req(struct sg_comm_wr_t *cwrp, bool blocking) immed = true; SG_LOG(3, fp, "%s: %s, tot_reqs=%u, id_of_mrq=%d\n", __func__, (immed ? "IMMED" : (blocking ? "ordered blocking" : - "variable blocking")), - tot_reqs, id_of_mrq); + "variable blocking")), tot_reqs, id_of_mrq); sg_sgv4_out_zero(cop); if (unlikely(tot_reqs > U16_MAX)) { @@ -2018,9 +2028,7 @@ sg_mrq_iorec_complets(struct sg_fd *sfp, bool non_block, int max_mrqs, return k; for ( ; k < max_mrqs; ++k) { - res = wait_event_interruptible - (sfp->cmpl_wait, - sg_mrq_get_ready_srp(sfp, &srp)); + res = sg_wait_mrq_event(sfp, &srp); if (unlikely(res)) return res; /* signal --> -ERESTARTSYS */ if (IS_ERR(srp)) @@ -2083,6 +2091,19 @@ sg_mrq_ioreceive(struct sg_fd *sfp, struct sg_io_v4 *cop, void __user *p, return res; } +static int +sg_wait_id_event(struct sg_fd *sfp, struct sg_request **srpp, int id, + bool is_tag) +{ + if (test_bit(SG_FFD_EXCL_WAITQ, sfp->ffd_bm)) + return __wait_event_interruptible_exclusive + (sfp->cmpl_wait, + sg_get_ready_srp(sfp, srpp, id, is_tag)); + return __wait_event_interruptible + (sfp->cmpl_wait, + sg_get_ready_srp(sfp, srpp, id, is_tag)); +} + /* * Called when ioctl(SG_IORECEIVE) received. Expects a v4 interface object. * Checks if O_NONBLOCK file flag given, if not checks given 'flags' field @@ -2134,9 +2155,7 @@ sg_ctl_ioreceive(struct sg_fd *sfp, void __user *p) return -ENODEV; if (non_block) return -EAGAIN; - res = wait_event_interruptible - (sfp->cmpl_wait, - sg_get_ready_srp(sfp, &srp, id, use_tag)); + res = sg_wait_id_event(sfp, &srp, id, use_tag); if (unlikely(res)) return res; /* signal --> -ERESTARTSYS */ if (IS_ERR(srp)) @@ -2191,9 +2210,7 @@ sg_ctl_ioreceive_v3(struct sg_fd *sfp, void __user *p) return -ENODEV; if (non_block) return -EAGAIN; - res = wait_event_interruptible - (sfp->cmpl_wait, - sg_get_ready_srp(sfp, &srp, pack_id, false)); + res = sg_wait_id_event(sfp, &srp, pack_id, false); if (unlikely(res)) return res; /* signal --> -ERESTARTSYS */ if (IS_ERR(srp)) @@ -2351,7 +2368,7 @@ sg_read(struct file *filp, char __user *p, size_t count, loff_t *ppos) int flgs; ret = get_user(flgs, &h3_up->flags); - if (ret) + if (unlikely(ret)) return ret; if (flgs & SGV4_FLAG_IMMED) non_block = true; @@ -2374,9 +2391,7 @@ sg_read(struct file *filp, char __user *p, size_t count, loff_t *ppos) return -ENODEV; if (non_block) /* O_NONBLOCK or v3::flags & SGV4_FLAG_IMMED */ return -EAGAIN; - ret = wait_event_interruptible - (sfp->cmpl_wait, - sg_get_ready_srp(sfp, &srp, want_id, false)); + ret = sg_wait_id_event(sfp, &srp, want_id, false); if (unlikely(ret)) /* -ERESTARTSYS as signal hit process */ return ret; if (IS_ERR(srp)) @@ -2846,9 +2861,9 @@ sg_wait_event_srp(struct sg_fd *sfp, void __user *p, struct sg_io_v4 *h4p, goto skip_wait; } SG_LOG(3, sfp, "%s: about to wait_event...()\n", __func__); - /* usually will be woken up by sg_rq_end_io() callback */ - res = wait_event_interruptible(sfp->cmpl_wait, - sg_rq_landed(sdp, srp)); + /* N.B. The SG_FFD_EXCL_WAITQ flag is ignored here. */ + res = __wait_event_interruptible(sfp->cmpl_wait, + sg_rq_landed(sdp, srp)); if (unlikely(res)) { /* -ERESTARTSYS because signal hit thread */ set_bit(SG_FRQ_IS_ORPHAN, srp->frq_bm); /* orphans harvested when sfp->keep_orphan is false */ @@ -3316,7 +3331,7 @@ sg_find_sfp_by_fd(const struct file *search_for, int search_fd, ++num_d; for (k = 0; k < num_d; ++k) { sdp = idr_find(&sg_index_idr, k); - if (unlikely(!sdp || SG_IS_DETACHING(sdp))) + if (unlikely(!sdp) || SG_IS_DETACHING(sdp)) continue; xa_for_each_marked(&sdp->sfp_arr, idx, sfp, SG_XA_FD_UNSHARED) { @@ -3354,7 +3369,7 @@ sg_find_sfp_by_fd(const struct file *search_for, int search_fd, ++num_d; for (k = 0; k < num_d; ++k) { sdp = idr_find(&sg_index_idr, k); - if (unlikely(!sdp || SG_IS_DETACHING(sdp))) + if (unlikely(!sdp) || SG_IS_DETACHING(sdp)) continue; xa_for_each(&sdp->sfp_arr, idx, sfp) { if (!sg_fd_is_shared(sfp)) @@ -3781,6 +3796,18 @@ sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip) else c_flgs_val_out &= ~SG_CTL_FLAGM_MORE_ASYNC; } + /* EXCL_WAITQ boolean, [rbw] */ + if (c_flgs_rm & SG_CTL_FLAGM_EXCL_WAITQ) + flg = test_bit(SG_FFD_EXCL_WAITQ, sfp->ffd_bm); + if (c_flgs_wm & SG_CTL_FLAGM_EXCL_WAITQ) + assign_bit(SG_FFD_EXCL_WAITQ, sfp->ffd_bm, + !!(c_flgs_val_in & SG_CTL_FLAGM_EXCL_WAITQ)); + if (c_flgs_rm & SG_CTL_FLAGM_EXCL_WAITQ) { + if (flg) + c_flgs_val_out |= SG_CTL_FLAGM_EXCL_WAITQ; + else + c_flgs_val_out &= ~SG_CTL_FLAGM_EXCL_WAITQ; + } if (c_flgs_val_in != c_flgs_val_out) seip->ctl_flags = c_flgs_val_out; diff --git a/include/uapi/scsi/sg.h b/include/uapi/scsi/sg.h index e1919eadf036..8b3fe773dfd5 100644 --- a/include/uapi/scsi/sg.h +++ b/include/uapi/scsi/sg.h @@ -114,16 +114,16 @@ typedef struct sg_io_hdr { #define SGV4_FLAG_YIELD_TAG 0x8 /* sg_io_v4::generated_tag set after SG_IOS */ #define SGV4_FLAG_Q_AT_TAIL SG_FLAG_Q_AT_TAIL #define SGV4_FLAG_Q_AT_HEAD SG_FLAG_Q_AT_HEAD -#define SGV4_FLAG_COMPLETE_B4 0x100 -#define SGV4_FLAG_SIGNAL 0x200 /* v3: ignored; v4 signal on completion */ -#define SGV4_FLAG_IMMED 0x400 /* for polling with SG_IOR, ignored in SG_IOS */ +#define SGV4_FLAG_COMPLETE_B4 0x100 /* mrq: complete this rq before next */ +#define SGV4_FLAG_SIGNAL 0x200 /* v3: ignored; v4 signal on completion */ +#define SGV4_FLAG_IMMED 0x400 /* issue request and return immediately ... */ #define SGV4_FLAG_HIPRI 0x800 /* request will use blk_poll to complete */ #define SGV4_FLAG_STOP_IF 0x1000 /* Stops sync mrq if error or warning */ #define SGV4_FLAG_DEV_SCOPE 0x2000 /* permit SG_IOABORT to have wider scope */ #define SGV4_FLAG_SHARE 0x4000 /* share IO buffer; needs SG_SEIM_SHARE_FD */ #define SGV4_FLAG_DO_ON_OTHER 0x8000 /* available on either of shared pair */ #define SGV4_FLAG_NO_DXFER SG_FLAG_NO_DXFER /* but keep dev<-->kernel xfr */ -#define SGV4_FLAG_MULTIPLE_REQS 0x20000 /* n sg_io_v4s in data-in */ +#define SGV4_FLAG_MULTIPLE_REQS 0x20000 /* 1 or more sg_io_v4-s in data-in */ /* Output (potentially OR-ed together) in v3::info or v4::info field */ #define SG_INFO_OK_MASK 0x1 @@ -151,7 +151,7 @@ typedef struct sg_scsi_id { short h_cmd_per_lun;/* host (adapter) maximum commands per lun */ short d_queue_depth;/* device (or adapter) maximum queue length */ union { - int unused[2]; /* as per version 3 driver */ + int unused[2]; /* as per version 3 driver */ __u8 scsi_lun[8]; /* full 8 byte SCSI LUN [in v4 driver] */ }; } sg_scsi_id_t; @@ -163,8 +163,14 @@ typedef struct sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */ /* sg_io_owned set imples synchronous, clear implies asynchronous */ char sg_io_owned;/* 0 -> complete with read(), 1 -> owned by SG_IO */ char problem; /* 0 -> no problem detected, 1 -> error to report */ + /* If SG_CTL_FLAGM_TAG_FOR_PACK_ID set on fd then next field is tag */ int pack_id; /* pack_id, in v4 driver may be tag instead */ void __user *usr_ptr; /* user provided pointer in v3+v4 interface */ + /* + * millisecs elapsed since the command started (req_state==1) or + * command duration (req_state==2). Will be in nanoseconds after + * the SG_SET_GET_EXTENDED{TIME_IN_NS} ioctl. + */ unsigned int duration; int unused; } sg_req_info_t; @@ -199,12 +205,13 @@ typedef struct sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */ #define SG_CTL_FLAGM_IS_SHARE 0x20 /* rd: fd is read-side or write-side share */ #define SG_CTL_FLAGM_IS_READ_SIDE 0x40 /* rd: this fd is read-side share */ #define SG_CTL_FLAGM_UNSHARE 0x80 /* undo share after inflight cmd */ -/* rd> 1: read-side finished 0: not; wr> 1: finish share post read-side */ +/* rd> 1: read-side finished, 0: not; wr> 1: finish share post read-side */ #define SG_CTL_FLAGM_READ_SIDE_FINI 0x100 /* wr> 0: setup for repeat write-side req */ #define SG_CTL_FLAGM_READ_SIDE_ERR 0x200 /* rd: sharing, read-side got error */ #define SG_CTL_FLAGM_NO_DURATION 0x400 /* don't calc command duration */ #define SG_CTL_FLAGM_MORE_ASYNC 0x800 /* yield EAGAIN in more cases */ -#define SG_CTL_FLAGM_ALL_BITS 0xfff /* should be OR of previous items */ +#define SG_CTL_FLAGM_EXCL_WAITQ 0x1000 /* only 1 wake up per response */ +#define SG_CTL_FLAGM_ALL_BITS 0x1fff /* should be OR of previous items */ /* Write one of the following values to sg_extended_info::read_value, get... */ #define SG_SEIRV_INT_MASK 0x0 /* get SG_SEIM_ALL_BITS */ @@ -437,9 +444,11 @@ struct sg_header { /* * New ioctls to replace async (non-blocking) write()/read() interface. * Present in version 4 and later of the sg driver [>20190427]. The - * SG_IOSUBMIT and SG_IORECEIVE ioctls accept the sg_v4 interface based on - * struct sg_io_v4 found in <include/uapi/linux/bsg.h>. These objects are - * passed by a pointer in the third argument of the ioctl. + * SG_IOSUBMIT_V3 and SG_IORECEIVE_V3 ioctls accept the sg_v3 interface + * based on struct sg_io_hdr shown above. The SG_IOSUBMIT and SG_IORECEIVE + * ioctls accept the sg_v4 interface based on struct sg_io_v4 found in + * <include/uapi/linux/bsg.h>. These objects are passed by a pointer in + * the third argument of the ioctl. * * Data may be transferred both from the user space to the driver by these * ioctls. Hence the _IOWR macro is used here to generate the ioctl number -- 2.25.1