Experimental version. Add support for user space to pass a file descriptor generated by the eventfd(2) system call by ioctl(2) to this driver, thereby associating the eventfd with a sg file descriptor. Add support to remove the eventfd relationship so another can be added to the same sg file descriptor. If a eventfd is active on a sg fd and a request has the SGV4_FLAG_EVENTFD flag set then on completion of that request, it "signals" that eventfd by adding 1 to its internal count. Signed-off-by: Douglas Gilbert <dgilbert@xxxxxxxxxxxx> --- drivers/scsi/sg.c | 157 +++++++++++++++++++++++++++++++---------- include/uapi/scsi/sg.h | 9 ++- 2 files changed, 124 insertions(+), 42 deletions(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 48bf5ccca5b5..d030f7c43bf0 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -46,6 +46,7 @@ static char *sg_version_date = "20210421"; #include <linux/timekeeping.h> #include <linux/proc_fs.h> /* used if CONFIG_SCSI_PROC_FS */ #include <linux/xarray.h> +#include <linux/eventfd.h> #include <linux/debugfs.h> #include <scsi/scsi.h> @@ -293,6 +294,7 @@ struct sg_fd { /* holds the state of a file descriptor */ struct file *filp; /* my identity when sharing */ struct sg_fd __rcu *share_sfp;/* fd share cross-references, else NULL */ struct fasync_struct *async_qp; /* used by asynchronous notification */ + struct eventfd_ctx *efd_ctxp; /* eventfd context or NULL */ struct xarray srp_arr; /* xarray of sg_request object pointers */ struct sg_request *rsv_arr[SG_MAX_RSV_REQS]; struct kref f_ref; @@ -412,6 +414,7 @@ static void sg_take_snap(struct sg_fd *sfp, bool clear_first); #define SG_HAVE_EXCLUDE(sdp) test_bit(SG_FDEV_EXCLUDE, (sdp)->fdev_bm) #define SG_IS_O_NONBLOCK(sfp) (!!((sfp)->filp->f_flags & O_NONBLOCK)) #define SG_RQ_ACTIVE(srp) (atomic_read(&(srp)->rq_st) != SG_RQ_INACTIVE) +#define SG_IS_V4I(srp) test_bit(SG_FRQ_IS_V4I, (srp)->frq_bm) /* * Kernel needs to be built with CONFIG_SCSI_LOGGING to see log messages. @@ -1098,7 +1101,7 @@ sg_mrq_arr_flush(struct sg_mrq_hold *mhp) } static int -sg_mrq_1complet(struct sg_mrq_hold *mhp, struct sg_fd *do_on_sfp, +sg_mrq_1complet(struct sg_mrq_hold *mhp, struct sg_fd *sfp, struct sg_request *srp) { int s_res, indx; @@ -1109,30 +1112,37 @@ sg_mrq_1complet(struct sg_mrq_hold *mhp, struct sg_fd *do_on_sfp, if (unlikely(!srp)) return -EPROTO; indx = srp->s_hdr4.mrq_ind; - if (unlikely(srp->parentfp != do_on_sfp)) { - SG_LOG(1, do_on_sfp, "%s: mrq_ind=%d, sfp out-of-sync\n", + if (unlikely(srp->parentfp != sfp)) { + SG_LOG(1, sfp, "%s: mrq_ind=%d, sfp out-of-sync\n", __func__, indx); return -EPROTO; } - SG_LOG(3, do_on_sfp, "%s: mrq_ind=%d, pack_id=%d\n", __func__, indx, + SG_LOG(3, sfp, "%s: mrq_ind=%d, pack_id=%d\n", __func__, indx, srp->pack_id); if (unlikely(indx < 0 || indx >= tot_reqs)) return -EPROTO; hp = a_hds + indx; - s_res = sg_receive_v4(do_on_sfp, srp, NULL, hp); + s_res = sg_receive_v4(sfp, srp, NULL, hp); if (unlikely(s_res == -EFAULT)) return s_res; hp->info |= SG_INFO_MRQ_FINI; if (mhp->co_mmap) { sg_sgat_cp_into(mhp->co_mmap_sgatp, indx * SZ_SG_IO_V4, (const u8 *)hp, SZ_SG_IO_V4); - if (do_on_sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL)) - kill_fasync(&do_on_sfp->async_qp, SIGPOLL, POLL_IN); - } else if (do_on_sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL)) { + if (sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL)) + kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN); + if (sfp->efd_ctxp && (srp->rq_flags & SGV4_FLAG_EVENTFD)) { + u64 n = eventfd_signal(sfp->efd_ctxp, 1); + + if (n != 1) + pr_info("%s: srp=%pK eventfd_signal problem\n", + __func__, srp); + } + } else if (sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL)) { s_res = sg_mrq_arr_flush(mhp); if (unlikely(s_res)) /* can only be -EFAULT */ return s_res; - kill_fasync(&do_on_sfp->async_qp, SIGPOLL, POLL_IN); + kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN); } return 0; } @@ -1474,6 +1484,14 @@ sg_process_most_mrq(struct sg_fd *fp, struct sg_fd *o_sfp, if (rq_sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL)) kill_fasync(&rq_sfp->async_qp, SIGPOLL, POLL_IN); + if (rq_sfp->efd_ctxp && + (srp->rq_flags & SGV4_FLAG_EVENTFD)) { + u64 n = eventfd_signal(rq_sfp->efd_ctxp, 1); + + if (n != 1) + pr_info("%s: eventfd_signal prob\n", + __func__); + } } else if (rq_sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL)) { res = sg_mrq_arr_flush(mhp); @@ -2677,6 +2695,34 @@ sg_rec_state_v3v4(struct sg_fd *sfp, struct sg_request *srp, bool v4_active) return err; } +static void +sg_complete_shr_rs(struct sg_fd *sfp, struct sg_request *srp, bool other_err, + enum sg_rq_state sr_st) +{ + int poll_type = POLL_OUT; + struct sg_fd *ws_sfp = sg_fd_share_ptr(sfp); + + if (unlikely(!sg_result_is_good(srp->rq_result) || other_err)) { + set_bit(SG_FFD_READ_SIDE_ERR, sfp->ffd_bm); + sg_rq_chg_state_force(srp, SG_RQ_BUSY); + poll_type = POLL_HUP; /* "Hang-UP flag */ + } else if (sr_st != SG_RQ_SHR_SWAP) { + sg_rq_chg_state_force(srp, SG_RQ_SHR_SWAP); + } + if (ws_sfp && !srp->sh_srp) { + if (ws_sfp->async_qp && + (!SG_IS_V4I(srp) || (srp->rq_flags & SGV4_FLAG_SIGNAL))) + kill_fasync(&ws_sfp->async_qp, SIGPOLL, poll_type); + if (ws_sfp->efd_ctxp && (srp->rq_flags & SGV4_FLAG_EVENTFD)) { + u64 n = eventfd_signal(ws_sfp->efd_ctxp, 1); + + if (n != 1) + pr_info("%s: srp=%pK eventfd prob\n", + __func__, srp); + } + } +} + static void sg_complete_v3v4(struct sg_fd *sfp, struct sg_request *srp, bool other_err) { @@ -2687,25 +2733,7 @@ sg_complete_v3v4(struct sg_fd *sfp, struct sg_request *srp, bool other_err) sg_shr_str(srp->sh_var, true)); switch (srp->sh_var) { case SG_SHR_RS_RQ: - { - int poll_type = POLL_OUT; - struct sg_fd *ws_sfp = sg_fd_share_ptr(sfp); - - if (unlikely(!sg_result_is_good(srp->rq_result) || - other_err)) { - set_bit(SG_FFD_READ_SIDE_ERR, sfp->ffd_bm); - if (sr_st != SG_RQ_BUSY) - sg_rq_chg_state_force(srp, SG_RQ_BUSY); - poll_type = POLL_HUP; /* "Hang-UP flag */ - } else if (sr_st != SG_RQ_SHR_SWAP) { - sg_rq_chg_state_force(srp, SG_RQ_SHR_SWAP); - } - if (ws_sfp && ws_sfp->async_qp && !srp->sh_srp && - (!test_bit(SG_FRQ_IS_V4I, srp->frq_bm) || - (srp->rq_flags & SGV4_FLAG_SIGNAL))) - kill_fasync(&ws_sfp->async_qp, SIGPOLL, - poll_type); - } + sg_complete_shr_rs(sfp, srp, other_err, sr_st); break; case SG_SHR_WS_RQ: /* cleanup both on write-side completion */ if (likely(sg_fd_is_shared(sfp))) { @@ -3655,8 +3683,8 @@ sg_fill_request_element(struct sg_fd *sfp, struct sg_request *srp, rip->problem = !sg_result_is_good(srp->rq_result); rip->pack_id = test_bit(SG_FFD_PREFER_TAG, sfp->ffd_bm) ? srp->tag : srp->pack_id; - rip->usr_ptr = test_bit(SG_FRQ_IS_V4I, srp->frq_bm) ? - uptr64(srp->s_hdr4.usr_ptr) : srp->s_hdr3.usr_ptr; + rip->usr_ptr = SG_IS_V4I(srp) ? uptr64(srp->s_hdr4.usr_ptr) + : srp->s_hdr3.usr_ptr; xa_unlock_irqrestore(&sfp->srp_arr, iflags); } @@ -3713,7 +3741,7 @@ sg_wait_event_srp(struct sg_fd *sfp, void __user *p, struct sg_io_v4 *h4p, #endif return res; } - if (test_bit(SG_FRQ_IS_V4I, srp->frq_bm)) + if (SG_IS_V4I(srp)) res = sg_receive_v4(sfp, srp, p, h4p); else res = sg_receive_v3(sfp, srp, p); @@ -4237,6 +4265,23 @@ sg_fd_reshare(struct sg_fd *rs_sfp, int new_ws_fd) return found ? 0 : -ENOTSOCK; /* ENOTSOCK for fd exists but not sg */ } +static int +sg_eventfd_new(struct sg_fd *rs_sfp, int eventfd) + __must_hold(&rs_sfp->f_mutex) +{ + int ret = 0; + + if (rs_sfp->efd_ctxp) + return -EBUSY; + rs_sfp->efd_ctxp = eventfd_ctx_fdget(eventfd); + if (IS_ERR(rs_sfp->efd_ctxp)) { + ret = PTR_ERR(rs_sfp->efd_ctxp); + rs_sfp->efd_ctxp = NULL; + return ret; + } + return ret; +} + /* * First normalize want_rsv_sz to be >= sfp->sgat_elem_sz and * <= max_segment_size. Exit if that is the same as old size; otherwise @@ -4465,7 +4510,6 @@ sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip) const u32 c_flgs_rm = seip->ctl_flags_rd_mask; const u32 c_flgs_val_in = seip->ctl_flags; u32 c_flgs_val_out = c_flgs_val_in; - struct sg_fd *rs_sfp; struct sg_device *sdp = sfp->parentdp; /* TIME_IN_NS boolean, [raw] time in nanoseconds (def: millisecs) */ @@ -4545,7 +4589,8 @@ sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip) * when written: 1 --> write-side doesn't want to continue */ if ((c_flgs_rm & SG_CTL_FLAGM_READ_SIDE_FINI) && sg_fd_is_shared(sfp)) { - rs_sfp = sg_fd_share_ptr(sfp); + struct sg_fd *rs_sfp = sg_fd_share_ptr(sfp); + if (rs_sfp && !IS_ERR_OR_NULL(rs_sfp->rsv_arr[0])) { struct sg_request *res_srp = rs_sfp->rsv_arr[0]; @@ -4562,7 +4607,8 @@ sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip) res = sg_finish_rs_rq(sfp); /* READ_SIDE_ERR boolean, [ro] share: read-side finished with error */ if (c_flgs_rm & SG_CTL_FLAGM_READ_SIDE_ERR) { - rs_sfp = sg_fd_share_ptr(sfp); + struct sg_fd *rs_sfp = sg_fd_share_ptr(sfp); + if (rs_sfp && test_bit(SG_FFD_READ_SIDE_ERR, rs_sfp->ffd_bm)) c_flgs_val_out |= SG_CTL_FLAGM_READ_SIDE_ERR; else @@ -4618,6 +4664,21 @@ sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip) else c_flgs_val_out &= ~SG_CTL_FLAGM_SNAP_DEV; } + /* RM_EVENTFD boolean, [rbw] */ + if (c_flgs_rm & SG_CTL_FLAGM_RM_EVENTFD) + flg = !!sfp->efd_ctxp; + if ((c_flgs_wm & SG_CTL_FLAGM_RM_EVENTFD) && (c_flgs_val_in & SG_CTL_FLAGM_RM_EVENTFD)) { + if (sfp->efd_ctxp && atomic_read(&sfp->submitted) < 1) { + eventfd_ctx_put(sfp->efd_ctxp); + sfp->efd_ctxp = NULL; + } + } + if (c_flgs_rm & SG_CTL_FLAGM_RM_EVENTFD) { + if (flg) + c_flgs_val_out |= SG_CTL_FLAGM_RM_EVENTFD; + else + c_flgs_val_out &= ~SG_CTL_FLAGM_RM_EVENTFD; + } if (c_flgs_val_in != c_flgs_val_out) seip->ctl_flags = c_flgs_val_out; @@ -4773,6 +4834,15 @@ sg_ctl_extended(struct sg_fd *sfp, void __user *p) } mutex_unlock(&sfp->f_mutex); } + if (or_masks & SG_SEIM_EVENTFD) { + mutex_lock(&sfp->f_mutex); + if (s_wr_mask & SG_SEIM_EVENTFD) { + result = sg_eventfd_new(sfp, (int)seip->share_fd); + if (ret == 0 && unlikely(result)) + ret = result; + } + mutex_unlock(&sfp->f_mutex); + } /* call blk_poll() on this fd's HIPRI requests [raw] */ if (or_masks & SG_SEIM_BLK_POLL) { n = 0; @@ -5514,7 +5584,7 @@ sg_rq_end_io(struct request *rqq, blk_status_t status) a_resid = scsi_rp->resid_len; if (unlikely(a_resid)) { - if (test_bit(SG_FRQ_IS_V4I, srp->frq_bm)) { + if (SG_IS_V4I(srp)) { if (rq_data_dir(rqq) == READ) srp->in_resid = a_resid; else @@ -5603,9 +5673,16 @@ sg_rq_end_io(struct request *rqq, blk_status_t status) } if (!(srp->rq_flags & SGV4_FLAG_HIPRI)) wake_up_interruptible(&sfp->cmpl_wait); - if (sfp->async_qp && (!test_bit(SG_FRQ_IS_V4I, srp->frq_bm) || + if (sfp->async_qp && (!SG_IS_V4I(srp) || (srp->rq_flags & SGV4_FLAG_SIGNAL))) kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN); + if (sfp->efd_ctxp && (srp->rq_flags & SGV4_FLAG_EVENTFD)) { + u64 n = eventfd_signal(sfp->efd_ctxp, 1); + + if (n != 1) + pr_info("%s: srp=%pK eventfd_signal problem\n", + __func__, srp); + } kref_put(&sfp->f_ref, sg_remove_sfp); /* get in: sg_execute_cmd() */ } @@ -5943,7 +6020,7 @@ sg_rq_map_kern(struct sg_request *srp, struct request_queue *q, struct request * if (rw_ind == WRITE) op_flags = REQ_SYNC | REQ_IDLE; k = 0; /* N.B. following condition may increase k */ - if (test_bit(SG_FRQ_IS_V4I, srp->frq_bm)) { + if (SG_IS_V4I(srp)) { struct sg_slice_hdr4 *slh4p = &srp->s_hdr4; if (slh4p->dir == SG_DXFER_TO_DEV) { @@ -6028,7 +6105,7 @@ sg_start_req(struct sg_request *srp, struct sg_comm_wr_t *cwrp, int dxfer_dir) } SG_LOG(5, sfp, "%s: long_cmdp=0x%pK ++\n", __func__, long_cmdp); } - if (likely(test_bit(SG_FRQ_IS_V4I, srp->frq_bm))) { + if (SG_IS_V4I(srp)) { struct sg_io_v4 *h4p = cwrp->h4p; if (dxfer_dir == SG_DXFER_TO_DEV) { @@ -7225,6 +7302,8 @@ sg_uc_remove_sfp(struct work_struct *work) if (subm != 0) SG_LOG(1, sfp, "%s: expected submitted=0 got %d\n", __func__, subm); + if (sfp->efd_ctxp) + eventfd_ctx_put(sfp->efd_ctxp); xa_destroy(xafp); xadp = &sdp->sfp_arr; xa_lock_irqsave(xadp, iflags); @@ -7553,7 +7632,7 @@ sg_proc_debug_sreq(struct sg_request *srp, int to, bool t_in_ns, char *obp, if (unlikely(len < 1)) return 0; - v4 = test_bit(SG_FRQ_IS_V4I, srp->frq_bm); + v4 = SG_IS_V4I(srp); is_v3v4 = v4 ? true : (srp->s_hdr3.interface_id != '\0'); sg_get_rsv_str(srp, " ", "", sizeof(b), b); if (strlen(b) > 5) diff --git a/include/uapi/scsi/sg.h b/include/uapi/scsi/sg.h index 52eccedf2f33..148a5f2786ee 100644 --- a/include/uapi/scsi/sg.h +++ b/include/uapi/scsi/sg.h @@ -115,6 +115,7 @@ typedef struct sg_io_hdr { #define SGV4_FLAG_Q_AT_TAIL SG_FLAG_Q_AT_TAIL #define SGV4_FLAG_Q_AT_HEAD SG_FLAG_Q_AT_HEAD #define SGV4_FLAG_DOUT_OFFSET 0x40 /* dout byte offset in v4::spare_in */ +#define SGV4_FLAG_EVENTFD 0x80 /* signal completion on ... */ #define SGV4_FLAG_COMPLETE_B4 0x100 /* mrq: complete this rq before next */ #define SGV4_FLAG_SIGNAL 0x200 /* v3: ignored; v4 signal on completion */ #define SGV4_FLAG_IMMED 0x400 /* issue request and return immediately ... */ @@ -196,7 +197,8 @@ typedef struct sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */ #define SG_SEIM_CHG_SHARE_FD 0x40 /* read-side given new write-side fd */ #define SG_SEIM_SGAT_ELEM_SZ 0x80 /* sgat element size (>= PAGE_SIZE) */ #define SG_SEIM_BLK_POLL 0x100 /* call blk_poll, uses 'num' field */ -#define SG_SEIM_ALL_BITS 0x1ff /* should be OR of previous items */ +#define SG_SEIM_EVENTFD 0x200 /* pass eventfd to driver */ +#define SG_SEIM_ALL_BITS 0x3ff /* should be OR of previous items */ /* flag and mask values for boolean fields follow */ #define SG_CTL_FLAGM_TIME_IN_NS 0x1 /* time: nanosecs (def: millisecs) */ @@ -214,7 +216,8 @@ typedef struct sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */ #define SG_CTL_FLAGM_MORE_ASYNC 0x800 /* yield EAGAIN in more cases */ #define SG_CTL_FLAGM_EXCL_WAITQ 0x1000 /* only 1 wake up per response */ #define SG_CTL_FLAGM_SNAP_DEV 0x2000 /* output to debugfs::snapped */ -#define SG_CTL_FLAGM_ALL_BITS 0x3fff /* should be OR of previous items */ +#define SG_CTL_FLAGM_RM_EVENTFD 0x4000 /* only if new eventfd wanted */ +#define SG_CTL_FLAGM_ALL_BITS 0x7fff /* should be OR of previous items */ /* Write one of the following values to sg_extended_info::read_value, get... */ #define SG_SEIRV_INT_MASK 0x0 /* get SG_SEIM_ALL_BITS */ @@ -253,7 +256,7 @@ struct sg_extended_info { __u32 reserved_sz; /* data/sgl size of pre-allocated request */ __u32 tot_fd_thresh; /* total data/sgat for this fd, 0: no limit */ __u32 minor_index; /* rd: kernel's sg device minor number */ - __u32 share_fd; /* SHARE_FD and CHG_SHARE_FD use this */ + __u32 share_fd; /* for SHARE_FD, CHG_SHARE_FD or EVENTFD */ __u32 sgat_elem_sz; /* sgat element size (must be power of 2) */ __s32 num; /* blk_poll: loop_count (-1 -> spin)) */ __u8 pad_to_96[48]; /* pad so struct is 96 bytes long */ -- 2.25.1