[RFC] [PATCH] SCSI passthrough for virtio-blk

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi all,

I got bored and implemented SCSI passthrough for the virtio-blk driver.
Principle is quite simple, just put the missing fields (cdb, sense and
status header) on the virtio queue and then call the SG_IO ioctl on the
host.

So when using '-drive file=/dev/sgXX,if=virtio,format=host_device' you
can happily call any sg_XX command on the resulting vdX device. Quite
neat, methinks. And it's even backwards compatible, so each of these
patches should work without the other one applied.

As one would have guessed there are two patches, one for the linux
kernel to modify the virtio-blk driver in the guest and one for the
qemu/kvm userland program to modify the virtio-blk driver on the host.
This patch is relative to avi's kvm-userland tree from kernel.org.

As usual, comments etc to me.

Cheers,

Hannes
--
Dr. Hannes Reinecke		      zSeries & Storage
hare@xxxxxxx			      +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Markus Rex, HRB 16746 (AG Nürnberg)
virtio: Implement SCSI passthrough for virtio-blk

This patch implements SCSI passthrough for any virtio-blk device.
The data on the virtio queue will only be modified for a SCSI command,
so the normal I/O flow is unchanged.

Signed-off-by: Hannes Reinecke <hare@xxxxxxx>

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 4225109..46f03d2 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -35,6 +35,7 @@ struct virtblk_req
 	struct list_head list;
 	struct request *req;
 	struct virtio_blk_outhdr out_hdr;
+	struct virtio_blk_inhdr in_hdr;
 	u8 status;
 };
 
@@ -47,20 +48,29 @@ static void blk_done(struct virtqueue *vq)
 
 	spin_lock_irqsave(&vblk->lock, flags);
 	while ((vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len)) != NULL) {
-		int uptodate;
+		int error;
+		unsigned int bytes;
 		switch (vbr->status) {
 		case VIRTIO_BLK_S_OK:
-			uptodate = 1;
+			error = 0;
 			break;
 		case VIRTIO_BLK_S_UNSUPP:
-			uptodate = -ENOTTY;
+			error = -ENOTTY;
 			break;
 		default:
-			uptodate = 0;
+			error = -EIO;
 			break;
 		}
 
-		end_dequeued_request(vbr->req, uptodate);
+		if (blk_pc_request(vbr->req)) {
+			vbr->req->data_len = vbr->in_hdr.residual;
+			bytes = vbr->in_hdr.data_len;
+			vbr->req->sense_len = vbr->in_hdr.sense_len;
+			vbr->req->errors = vbr->in_hdr.status;
+		} else
+			bytes = blk_rq_bytes(vbr->req);
+
+		__blk_end_request(vbr->req, error, bytes);
 		list_del(&vbr->list);
 		mempool_free(vbr, vblk->pool);
 	}
@@ -72,7 +82,7 @@ static void blk_done(struct virtqueue *vq)
 static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 		   struct request *req)
 {
-	unsigned long num, out, in;
+	unsigned long num, out = 0, in = 0;
 	struct virtblk_req *vbr;
 
 	vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
@@ -99,20 +109,31 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 
 	/* This init could be done at vblk creation time */
 	sg_init_table(vblk->sg, VIRTIO_MAX_SG);
-	sg_set_buf(&vblk->sg[0], &vbr->out_hdr, sizeof(vbr->out_hdr));
-	num = blk_rq_map_sg(q, vbr->req, vblk->sg+1);
-	sg_set_buf(&vblk->sg[num+1], &vbr->status, sizeof(vbr->status));
-
-	if (rq_data_dir(vbr->req) == WRITE) {
-		vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
-		out = 1 + num;
-		in = 1;
-	} else {
-		vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
-		out = 1;
-		in = 1 + num;
+	sg_set_buf(&vblk->sg[out], &vbr->out_hdr, sizeof(vbr->out_hdr));
+	out++;
+	if (blk_pc_request(vbr->req)) {
+		sg_set_buf(&vblk->sg[out], vbr->req->cmd, vbr->req->cmd_len);
+		out++;
+	}
+	num = blk_rq_map_sg(q, vbr->req, vblk->sg+out);
+	if (blk_pc_request(vbr->req)) {
+		sg_set_buf(&vblk->sg[num+out+in], vbr->req->sense, 96);
+		in++;
+		sg_set_buf(&vblk->sg[num+out+in], &vbr->in_hdr,
+			   sizeof(vbr->in_hdr));
+		in++;
+	}
+	sg_set_buf(&vblk->sg[num+out+in], &vbr->status, sizeof(vbr->status));
+	in++;
+	if (num) {
+		if (rq_data_dir(vbr->req) == WRITE) {
+			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
+			out += num;
+		} else {
+			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
+			in += num;
+		}
 	}
-
 	if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr)) {
 		mempool_free(vbr, vblk->pool);
 		return false;
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index c1aef85..089e596 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -54,6 +54,14 @@ struct virtio_blk_outhdr
 	__u64 sector;
 };
 
+struct virtio_blk_inhdr
+{
+	__u32 status;
+	__u32 data_len;
+	__u32 sense_len;
+	__u32 residual;
+};
+
 /* And this is the final byte of the write scatter-gather list. */
 #define VIRTIO_BLK_S_OK		0
 #define VIRTIO_BLK_S_IOERR	1
virtio: Implement SCSI passthrough for virtio-blk

This patch implements SCSI passthrough handling for Qemu/KVM.
It's actually quite simple; just construct a SG header and
call SG_IO ioctl on the underlying block device. But we have
to make sure to map the result and error codes correctly upon
return.

Signed-off-by: Hannes Reinecke <hare@xxxxxxx>

diff --git a/qemu/hw/virtio-blk.c b/qemu/hw/virtio-blk.c
index 88dc086..53608b7 100644
--- a/qemu/hw/virtio-blk.c
+++ b/qemu/hw/virtio-blk.c
@@ -16,6 +16,54 @@
 #include "block_int.h"
 #include "pc.h"
 
+/* from Linux's scsi/sg.h */
+
+typedef struct sg_iovec /* same structure as used by readv() Linux system */
+{			/* call. It defines one scatter-gather element. */
+    void __user *iov_base;      /* Starting address  */
+    size_t iov_len;		/* Length in bytes  */
+} sg_iovec_t;
+
+typedef struct sg_io_hdr
+{
+    int interface_id;		/* [i] 'S' for SCSI generic (required) */
+    int dxfer_direction;	/* [i] data transfer direction  */
+    unsigned char cmd_len;      /* [i] SCSI command length ( <= 16 bytes) */
+    unsigned char mx_sb_len;    /* [i] max length to write to sbp */
+    unsigned short iovec_count; /* [i] 0 implies no scatter gather */
+    unsigned int dxfer_len;     /* [i] byte count of data transfer */
+    void __user *dxferp;	/* [i], [*io] points to data transfer memory
+					      or scatter gather list */
+    unsigned char __user *cmdp; /* [i], [*i] points to command to perform */
+    void __user *sbp;		/* [i], [*o] points to sense_buffer memory */
+    unsigned int timeout;       /* [i] MAX_UINT->no timeout (unit: millisec) */
+    unsigned int flags;		/* [i] 0 -> default, see SG_FLAG... */
+    int pack_id;		/* [i->o] unused internally (normally) */
+    void __user * usr_ptr;      /* [i->o] unused internally */
+    unsigned char status;       /* [o] scsi status */
+    unsigned char masked_status;/* [o] shifted, masked scsi status */
+    unsigned char msg_status;   /* [o] messaging level data (optional) */
+    unsigned char sb_len_wr;    /* [o] byte count actually written to sbp */
+    unsigned short host_status; /* [o] errors from host adapter */
+    unsigned short driver_status;/* [o] errors from software driver */
+    int resid;			/* [o] dxfer_len - actual_transferred */
+    unsigned int duration;      /* [o] time taken by cmd (unit: millisec) */
+    unsigned int info;		/* [o] auxiliary information */
+} sg_io_hdr_t;  /* 64 bytes long (on i386) */
+
+#define SG_IO 0x2285
+#define SG_INTERFACE_ID_ORIG 'S'
+
+/* Use negative values to flag difference from original sg_header structure */
+#define SG_DXFER_NONE (-1)      /* e.g. a SCSI Test Unit Ready command */
+#define SG_DXFER_TO_DEV (-2)    /* e.g. a SCSI WRITE command */
+#define SG_DXFER_FROM_DEV (-3)  /* e.g. a SCSI READ command */
+#define SG_DXFER_TO_FROM_DEV (-4) /* treated like SG_DXFER_FROM_DEV with the
+				   additional property than during indirect
+				   IO the user buffer is copied into the
+				   kernel buffers before the transfer */
+#define SG_DXFER_UNKNOWN (-5)   /* Unknown data direction */
+
 /* from Linux's linux/virtio_blk.h */
 
 /* The ID for virtio_block */
@@ -62,12 +110,21 @@ struct virtio_blk_outhdr
 #define VIRTIO_BLK_S_IOERR	1
 #define VIRTIO_BLK_S_UNSUPP	2
 
-/* This is the first element of the write scatter-gather list */
+/* This is the last element of the write scatter-gather list */
 struct virtio_blk_inhdr
 {
     unsigned char status;
 };
 
+/* SCSI pass-through header */
+struct virtio_scsi_inhdr
+{
+    unsigned int errors;
+    unsigned int data_len;
+    unsigned int sense_len;
+    unsigned int residual;
+};
+
 typedef struct VirtIOBlock
 {
     VirtIODevice vdev;
@@ -86,6 +143,7 @@ typedef struct VirtIOBlockReq
     VirtQueueElement elem;
     struct virtio_blk_inhdr *in;
     struct virtio_blk_outhdr *out;
+    struct virtio_scsi_inhdr *scsi;
     size_t size;
     uint8_t *buffer;
 } VirtIOBlockReq;
@@ -162,10 +220,62 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
 	req->in = (void *)req->elem.in_sg[req->elem.in_num - 1].iov_base;
 
 	if (req->out->type & VIRTIO_BLK_T_SCSI_CMD) {
-	    unsigned int len = sizeof(*req->in);
+	    /* Construct sg header */
+	    struct sg_io_hdr hdr;
+	    int ret, size = 0;
+
+	    memset(&hdr, 0, sizeof(struct sg_io_hdr));
+	    hdr.interface_id = 'S';
+	    hdr.cmd_len = req->elem.out_sg[1].iov_len;
+	    hdr.cmdp = req->elem.out_sg[1].iov_base;
+	    hdr.dxfer_len = 0;
+	    hdr.dxfer_direction = SG_DXFER_NONE;
+
+	    if (req->elem.out_num < 2 || req->elem.in_num < 3) {
+		req->in->status = VIRTIO_BLK_S_IOERR;
+		goto out_scsi;
+	    }
+	    req->scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base;
+	    size = sizeof(*req->in) + sizeof(*req->scsi);
+
+	    if (req->elem.out_num > 2) {
+		hdr.dxfer_direction = SG_DXFER_TO_DEV;
+		hdr.iovec_count = req->elem.out_num - 2;
 
-	    req->in->status = VIRTIO_BLK_S_UNSUPP;
-	    virtqueue_push(vq, &req->elem, len);
+		for (i = 0; i < hdr.iovec_count; i++)
+		    hdr.dxfer_len += req->elem.out_sg[i + 2].iov_len;
+
+		hdr.dxferp = req->elem.out_sg + 2;
+	    } else if (req->elem.in_num > 3) {
+		hdr.dxfer_direction = SG_DXFER_FROM_DEV;
+		hdr.iovec_count = req->elem.in_num - 3;
+		for (i = 0; i < hdr.iovec_count; i++)
+		    hdr.dxfer_len += req->elem.in_sg[i].iov_len;
+
+		hdr.dxferp = req->elem.in_sg;
+		size += hdr.dxfer_len;
+	    }
+	    hdr.sbp = req->elem.in_sg[req->elem.in_num - 3].iov_base;
+	    hdr.mx_sb_len = req->elem.in_sg[req->elem.in_num - 3].iov_len;
+	    size += hdr.mx_sb_len;
+
+	    ret = bdrv_ioctl(s->bs, SG_IO, &hdr);
+
+	    if (ret) {
+		req->in->status = VIRTIO_BLK_S_UNSUPP;
+		hdr.status = ret;
+		hdr.resid = hdr.dxfer_len;
+	    } else if (hdr.status)
+		req->in->status = VIRTIO_BLK_S_IOERR;
+	    else
+		req->in->status = VIRTIO_BLK_S_OK;
+
+	    req->scsi->errors = hdr.status;
+	    req->scsi->residual = hdr.resid;
+	    req->scsi->sense_len = hdr.sb_len_wr;
+	    req->scsi->data_len = hdr.dxfer_len;
+	out_scsi:
+	    virtqueue_push(vq, &req->elem, size);
 	    virtio_notify(vdev, vq);
 	    qemu_free(req);
 	} else if (req->out->type & VIRTIO_BLK_T_OUT) {
_______________________________________________
Virtualization mailing list
Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linux-foundation.org/mailman/listinfo/virtualization

[Index of Archives]     [KVM Development]     [Libvirt Development]     [Libvirt Users]     [CentOS Virtualization]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux