On Tue, 2015-02-03 at 12:14 +0200, Michael S. Tsirkin wrote: > On Tue, Feb 03, 2015 at 06:29:59AM +0000, Nicholas A. Bellinger wrote: > > From: Nicholas Bellinger <nab@xxxxxxxxxxxxxxx> > > > > This patch adds ANY_LAYOUT support with a new vqs[].vq.handle_kick() > > callback in vhost_scsi_handle_vqal(). > > > > It calculates data_direction + exp_data_len for the new tcm_vhost_cmd > > descriptor by walking both outgoing + incoming iovecs using iov_iter, > > assuming the layout of outgoing request header + T10_PI + Data payload > > comes first. > > > > It also uses copy_from_iter() to copy leading virtio-scsi request header > > that may or may not include SCSI CDB, that returns a re-calculated iovec > > to start of T10_PI or Data SGL memory. > > > > v2 changes: > > - Fix up vhost_scsi_handle_vqal comments > > - Minor vhost_scsi_handle_vqal simplifications > > - Add missing minimum virtio-scsi response buffer size check > > - Fix pi_bytes* error message typo > > - Convert to use vhost_skip_iovec_bytes() common code > > - Add max_niov sanity checks vs. out + in offset into vq > > > > v3 changes: > > - Convert to copy_from_iter usage > > - Update vhost_scsi_handle_vqal comments for iov_iter usage > > - Convert prot_bytes offset to use iov_iter_advance > > - Drop max_niov usage in vhost_scsi_handle_vqal > > - Drop vhost_skip_iovec_bytes in favour of iov_iter > > > > Cc: Michael S. Tsirkin <mst@xxxxxxxxxx> > > Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> > > Signed-off-by: Nicholas Bellinger <nab@xxxxxxxxxxxxxxx> > > --- > > drivers/vhost/scsi.c | 260 +++++++++++++++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 260 insertions(+) > > > > diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c > > index 7dfff15..e1fe003 100644 > > --- a/drivers/vhost/scsi.c > > +++ b/drivers/vhost/scsi.c > > @@ -1062,6 +1062,266 @@ vhost_scsi_send_bad_target(struct vhost_scsi *vs, > > } > > > > static void > > +vhost_scsi_handle_vqal(struct vhost_scsi *vs, struct vhost_virtqueue *vq) > > +{ > > + struct tcm_vhost_tpg **vs_tpg, *tpg; > > + struct virtio_scsi_cmd_req v_req; > > + struct virtio_scsi_cmd_req_pi v_req_pi; > > + struct tcm_vhost_cmd *cmd; > > + struct iov_iter out_iter, in_iter, prot_iter, data_iter; > > + u64 tag; > > + u32 exp_data_len, data_direction; > > + unsigned out, in, i; > > + int head, ret, prot_bytes; > > + size_t req_size, rsp_size = sizeof(struct virtio_scsi_cmd_resp); > > + size_t out_size, in_size; > > + u16 lun; > > + u8 *target, *lunp, task_attr; > > + bool t10_pi = vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI); > > + void *req, *cdb; > > + > > + mutex_lock(&vq->mutex); > > + /* > > + * We can handle the vq only after the endpoint is setup by calling the > > + * VHOST_SCSI_SET_ENDPOINT ioctl. > > + */ > > + vs_tpg = vq->private_data; > > + if (!vs_tpg) > > + goto out; > > + > > + vhost_disable_notify(&vs->dev, vq); > > + > > + for (;;) { > > + head = vhost_get_vq_desc(vq, vq->iov, > > + ARRAY_SIZE(vq->iov), &out, &in, > > + NULL, NULL); > > + pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n", > > + head, out, in); > > + /* On error, stop handling until the next kick. */ > > + if (unlikely(head < 0)) > > + break; > > + /* Nothing new? Wait for eventfd to tell us they refilled. */ > > + if (head == vq->num) { > > + if (unlikely(vhost_enable_notify(&vs->dev, vq))) { > > + vhost_disable_notify(&vs->dev, vq); > > + continue; > > + } > > + break; > > + } > > + /* > > + * Check for a sane response buffer so we can report early > > + * errors back to the guest. > > + */ > > + if (unlikely(vq->iov[out].iov_len < rsp_size)) { > > + vq_err(vq, "Expecting at least virtio_scsi_cmd_resp" > > + " size, got %zu bytes\n", vq->iov[out].iov_len); > > + break; > > + } > > + /* > > + * Setup pointers and values based upon different virtio-scsi > > + * request header if T10_PI is enabled in KVM guest. > > + */ > > + if (t10_pi) { > > + req = &v_req_pi; > > + req_size = sizeof(v_req_pi); > > + lunp = &v_req_pi.lun[0]; > > + target = &v_req_pi.lun[1]; > > + } else { > > + req = &v_req; > > + req_size = sizeof(v_req); > > + lunp = &v_req.lun[0]; > > + target = &v_req.lun[1]; > > + } > > + /* > > + * Determine data_direction for ANY_LAYOUT > > It's not just for ANY_LAYOUT though, is it? > After patchset is fully applied this will run for > all guests? Dropping ANY_LAYOUT specific wording here.. > > > by calculating the > > + * total outgoing iovec sizes / incoming iovec sizes vs. > > + * virtio-scsi request / response headers respectively. > > + * > > + * FIXME: Not correct for BIDI operation > > + */ > > + out_size = in_size = 0; > > + for (i = 0; i < out; i++) > > + out_size += vq->iov[i].iov_len; > > + for (i = out; i < out + in; i++) > > + in_size += vq->iov[i].iov_len; > > Why not use iov_length? <nod>, converted to use iov_length. > > > + /* > > + * Any associated T10_PI bytes for the outgoing / incoming > > + * payloads are included in calculation of exp_data_len here. > > + */ > > + if (out_size > req_size) { > > + data_direction = DMA_TO_DEVICE; > > + exp_data_len = out_size - req_size; > > + } else if (in_size > rsp_size) { > > + data_direction = DMA_FROM_DEVICE; > > + exp_data_len = in_size - rsp_size; > > + } else { > > + data_direction = DMA_NONE; > > + exp_data_len = 0; > > + } > > We must validate this doesn't cause exp_data_len to be negative. > AFAICT, exp_data_len is always >= 0 here. > > + /* > > + * Copy over the virtio-scsi request header, which when > > + * ANY_LAYOUT is enabled may span multiple iovecs, or a > > + * single iovec may contain both the header + outgoing > > + * WRITE payloads. > > + * > > + * copy_from_iter() is modifying the iovecs as copies over > > + * req_size bytes into req, so the returned out_iter.iov[0] > > + * will contain the correct start + offset of the outgoing > > + * WRITE payload, if DMA_TO_DEVICE is set. > > + */ > > + iov_iter_init(&out_iter, READ, &vq->iov[0], out, > > I'd prefer just using vq->iov here, then we know that > any iov[number] user is left-over that needs to be converted > to use iterators. > Done. > > + (data_direction == DMA_TO_DEVICE) ? > > + req_size + exp_data_len : req_size); > > Hmm does not look safe: iovecs are pre-validated with > in_size/out_size, not with req_size. > The following should be equivalent, should it not? > iov_iter_init(&out_iter, READ, &vq->iov[0], out, > out_size); > <nod>, using out_size here instead. > > > + > > + ret = copy_from_iter(req, req_size, &out_iter); > > + if (unlikely(ret != req_size)) { > > + vq_err(vq, "Faulted on copy_from_iter\n"); > > + vhost_scsi_send_bad_target(vs, vq, head, out); > > + continue; > > + } > > + /* virtio-scsi spec requires byte 0 of the lun to be 1 */ > > + if (unlikely(*lunp != 1)) { > > + vq_err(vq, "Illegal virtio-scsi lun: %u\n", *lunp); > > + vhost_scsi_send_bad_target(vs, vq, head, out); > > + continue; > > + } > > + > > + tpg = ACCESS_ONCE(vs_tpg[*target]); > > + if (unlikely(!tpg)) { > > + /* Target does not exist, fail the request */ > > + vhost_scsi_send_bad_target(vs, vq, head, out); > > + continue; > > + } > > + /* > > + * Determine start of T10_PI or data payload iovec in ANY_LAYOUT > > + * mode based upon data_direction. > > Same comments as above. > Fixed. > > + * > > + * For DMA_TO_DEVICE, this is iov_out from copy_from_iter() > > + * with the already recalculated iov_base + iov_len. > > + * > > + * For DMA_FROM_DEVICE, the iovec will be just past the end > > + * of the virtio-scsi response header in either the same > > + * or immediately following iovec. > > + */ > > + prot_bytes = 0; > > + > > + if (data_direction == DMA_TO_DEVICE) { > > + data_iter = out_iter; > > + } else if (data_direction == DMA_FROM_DEVICE) { > > + iov_iter_init(&in_iter, WRITE, &vq->iov[out], in, > > + rsp_size + exp_data_len); > > + iov_iter_advance(&in_iter, rsp_size); > > + data_iter = in_iter; > > + } > > + /* > > + * If T10_PI header + payload is present, setup prot_iter values > > + * and recalculate data_iter for vhost_scsi_mapal() mapping to > > + * host scatterlists via get_user_pages_fast(). > > + */ > > + if (t10_pi) { > > + if (v_req_pi.pi_bytesout) { > > + if (data_direction != DMA_TO_DEVICE) { > > + vq_err(vq, "Received non zero pi_bytesout," > > + " but wrong data_direction\n"); > > + vhost_scsi_send_bad_target(vs, vq, head, out); > > + continue; > > + } > > + prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesout); > > + } else if (v_req_pi.pi_bytesin) { > > + if (data_direction != DMA_FROM_DEVICE) { > > + vq_err(vq, "Received non zero pi_bytesin," > > + " but wrong data_direction\n"); > > + vhost_scsi_send_bad_target(vs, vq, head, out); > > + continue; > > + } > > + prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesin); > > + } > > + /* > > + * Set prot_iter to data_iter, and advance past any > > + * preceeding prot_bytes that may be present. > > + * > > + * Also fix up the exp_data_len to reflect only the > > + * actual data payload length. > > + */ > > + if (prot_bytes) { > > + exp_data_len -= prot_bytes; > > + prot_iter = data_iter; > > + iov_iter_advance(&data_iter, prot_bytes); > > + } > > + tag = vhost64_to_cpu(vq, v_req_pi.tag); > > + task_attr = v_req_pi.task_attr; > > + cdb = &v_req_pi.cdb[0]; > > + lun = ((v_req_pi.lun[2] << 8) | v_req_pi.lun[3]) & 0x3FFF; > > + } else { > > + tag = vhost64_to_cpu(vq, v_req.tag); > > + task_attr = v_req.task_attr; > > + cdb = &v_req.cdb[0]; > > + lun = ((v_req.lun[2] << 8) | v_req.lun[3]) & 0x3FFF; > > + } > > + /* > > + * Check that the recieved CDB size does not exceeded our > > received. > Fixed. --nab -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html