How to use:
-----------------------------
Add 'virtio_blk.use_bio=1' to kernel cmdline or 'modprobe virtio_blk
use_bio=1' to enable ->make_request_fn() based I/O path.
Cc: Rusty Russell <rusty@xxxxxxxxxxxxxxx>
Cc: "Michael S. Tsirkin" <mst@xxxxxxxxxx>
Cc: virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx
Cc: kvm@xxxxxxxxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
Acked-by: Rusty Russell <rusty@xxxxxxxxxxxxxxx>
Signed-off-by: Christoph Hellwig <hch@xxxxxx>
Signed-off-by: Minchan Kim <minchan.kim@xxxxxxxxx>
Signed-off-by: Asias He <asias@xxxxxxxxxx>
---
drivers/block/virtio_blk.c | 203 +++++++++++++++++++++++++++++++++++---------
1 file changed, 163 insertions(+), 40 deletions(-)
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c0bbeb4..95cfeed 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -14,6 +14,9 @@
#define PART_BITS 4
+static bool use_bio;
+module_param(use_bio, bool, S_IRUGO);
+
static int major;
static DEFINE_IDA(vd_index_ida);
@@ -23,6 +26,7 @@ struct virtio_blk
{
struct virtio_device *vdev;
struct virtqueue *vq;
+ wait_queue_head_t queue_wait;
/* The disk structure for the kernel. */
struct gendisk *disk;
@@ -51,53 +55,87 @@ struct virtio_blk
struct virtblk_req
{
struct request *req;
+ struct bio *bio;
struct virtio_blk_outhdr out_hdr;
struct virtio_scsi_inhdr in_hdr;
u8 status;
+ struct scatterlist sg[];
};
-static void blk_done(struct virtqueue *vq)
+static inline int virtblk_result(struct virtblk_req *vbr)
+{
+ switch (vbr->status) {
+ case VIRTIO_BLK_S_OK:
+ return 0;
+ case VIRTIO_BLK_S_UNSUPP:
+ return -ENOTTY;
+ default:
+ return -EIO;
+ }
+}
+
+static inline void virtblk_request_done(struct virtio_blk *vblk,
+ struct virtblk_req *vbr)
+{
+ struct request *req = vbr->req;
+ int error = virtblk_result(vbr);
+
+ if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
+ req->resid_len = vbr->in_hdr.residual;
+ req->sense_len = vbr->in_hdr.sense_len;
+ req->errors = vbr->in_hdr.errors;
+ } else if (req->cmd_type == REQ_TYPE_SPECIAL) {
+ req->errors = (error != 0);
+ }
+
+ __blk_end_request_all(req, error);
+ mempool_free(vbr, vblk->pool);
+}
+
+static inline void virtblk_bio_done(struct virtio_blk *vblk,
+ struct virtblk_req *vbr)
+{
+ bio_endio(vbr->bio, virtblk_result(vbr));
+ mempool_free(vbr, vblk->pool);
+}
+
+static void virtblk_done(struct virtqueue *vq)
{
struct virtio_blk *vblk = vq->vdev->priv;
+ unsigned long bio_done = 0, req_done = 0;
struct virtblk_req *vbr;
- unsigned int len;
unsigned long flags;
+ unsigned int len;
spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
- int error;
-
- switch (vbr->status) {
- case VIRTIO_BLK_S_OK:
- error = 0;
- break;
- case VIRTIO_BLK_S_UNSUPP:
- error = -ENOTTY;
- break;
- default:
- error = -EIO;
- break;
- }
-
- switch (vbr->req->cmd_type) {
- case REQ_TYPE_BLOCK_PC:
- vbr->req->resid_len = vbr->in_hdr.residual;
- vbr->req->sense_len = vbr->in_hdr.sense_len;
- vbr->req->errors = vbr->in_hdr.errors;
- break;
- case REQ_TYPE_SPECIAL:
- vbr->req->errors = (error != 0);
- break;
- default:
- break;
+ if (vbr->bio) {
+ virtblk_bio_done(vblk, vbr);
+ bio_done++;
+ } else {
+ virtblk_request_done(vblk, vbr);
+ req_done++;
}
-
- __blk_end_request_all(vbr->req, error);
- mempool_free(vbr, vblk->pool);
}
/* In case queue is stopped waiting for more buffers. */
- blk_start_queue(vblk->disk->queue);
+ if (req_done)
+ blk_start_queue(vblk->disk->queue);
spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
+
+ if (bio_done)
+ wake_up(&vblk->queue_wait);
+}
+
+static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
+ gfp_t gfp_mask)
+{
+ struct virtblk_req *vbr;
+
+ vbr = mempool_alloc(vblk->pool, gfp_mask);
+ if (vbr && use_bio)
+ sg_init_table(vbr->sg, vblk->sg_elems);
+
+ return vbr;
}
static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
@@ -106,13 +144,13 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
unsigned long num, out = 0, in = 0;
struct virtblk_req *vbr;
- vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
+ vbr = virtblk_alloc_req(vblk, GFP_ATOMIC);
if (!vbr)
/* When another request finishes we'll try again. */
return false;
vbr->req = req;
-
+ vbr->bio = NULL;
if (req->cmd_flags & REQ_FLUSH) {
vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
vbr->out_hdr.sector = 0;
@@ -172,7 +210,8 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
}
}
- if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr, GFP_ATOMIC)<0) {
+ if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr,
+ GFP_ATOMIC) < 0) {
mempool_free(vbr, vblk->pool);
return false;
}
@@ -180,7 +219,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
return true;
}
-static void do_virtblk_request(struct request_queue *q)
+static void virtblk_request(struct request_queue *q)
{
struct virtio_blk *vblk = q->queuedata;
struct request *req;
@@ -203,6 +242,82 @@ static void do_virtblk_request(struct request_queue *q)
virtqueue_kick(vblk->vq);
}
+static void virtblk_add_buf_wait(struct virtio_blk *vblk,
+ struct virtblk_req *vbr,
+ unsigned long out,
+ unsigned long in)
+{
+ DEFINE_WAIT(wait);
+
+ for (;;) {
+ prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+
+ spin_lock_irq(vblk->disk->queue->queue_lock);
+ if (virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
+ GFP_ATOMIC) < 0) {
+ spin_unlock_irq(vblk->disk->queue->queue_lock);
+ io_schedule();
+ } else {
+ virtqueue_kick(vblk->vq);
+ spin_unlock_irq(vblk->disk->queue->queue_lock);
+ break;
+ }
+
+ }
+
+ finish_wait(&vblk->queue_wait, &wait);
+}
+
+static void virtblk_make_request(struct request_queue *q, struct bio *bio)
+{
+ struct virtio_blk *vblk = q->queuedata;
+ unsigned int num, out = 0, in = 0;
+ struct virtblk_req *vbr;
+
+ BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
+ BUG_ON(bio->bi_rw & (REQ_FLUSH | REQ_FUA));
+
+ vbr = virtblk_alloc_req(vblk, GFP_NOIO);
+ if (!vbr) {
+ bio_endio(bio, -ENOMEM);
+ return;
+ }
+
+ vbr->bio = bio;
+ vbr->req = NULL;
+ vbr->out_hdr.type = 0;
+ vbr->out_hdr.sector = bio->bi_sector;
+ vbr->out_hdr.ioprio = bio_prio(bio);
+
+ sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
+
+ num = blk_bio_map_sg(q, bio, vbr->sg + out);
+
+ sg_set_buf(&vbr->sg[num + out + in++], &vbr->status,
+ sizeof(vbr->status));
+
+ if (num) {
+ if (bio->bi_rw & REQ_WRITE) {
+ vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
+ out += num;
+ } else {
+ vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
+ in += num;
+ }
+ }
+
+ spin_lock_irq(vblk->disk->queue->queue_lock);
+ if (unlikely(virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
+ GFP_ATOMIC) < 0)) {
+ spin_unlock_irq(vblk->disk->queue->queue_lock);
+ virtblk_add_buf_wait(vblk, vbr, out, in);
+ return;
+ }
+ virtqueue_kick(vblk->vq);
+ spin_unlock_irq(vblk->disk->queue->queue_lock);
+}
+
/* return id (s/n) string for *disk to *id_str
*/
static int virtblk_get_id(struct gendisk *disk, char *id_str)
@@ -360,7 +475,7 @@ static int init_vq(struct virtio_blk *vblk)
int err = 0;
/* We expect one virtqueue, for output. */
- vblk->vq = virtio_find_single_vq(vblk->vdev, blk_done, "requests");
+ vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests");
if (IS_ERR(vblk->vq))
err = PTR_ERR(vblk->vq);
@@ -414,7 +529,7 @@ static void virtblk_update_cache_mode(struct virtio_device *vdev)
u8 writeback = virtblk_get_cache_mode(vdev);
struct virtio_blk *vblk = vdev->priv;
- if (writeback)
+ if (writeback && !use_bio)
blk_queue_flush(vblk->disk->queue, REQ_FLUSH);
else
blk_queue_flush(vblk->disk->queue, 0);
@@ -477,6 +592,8 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
struct virtio_blk *vblk;
struct request_queue *q;
int err, index;
+ int pool_size;
+
u64 cap;
u32 v, blk_size, sg_elems, opt_io_size;
u16 min_io_size;
@@ -506,10 +623,12 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
goto out_free_index;
}
+ init_waitqueue_head(&vblk->queue_wait);
vblk->vdev = vdev;
vblk->sg_elems = sg_elems;
sg_init_table(vblk->sg, vblk->sg_elems);
mutex_init(&vblk->config_lock);
+
INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
vblk->config_enable = true;
@@ -517,7 +636,10 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
if (err)
goto out_free_vblk;
- vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req));
+ pool_size = sizeof(struct virtblk_req);
+ if (use_bio)
+ pool_size += sizeof(struct scatterlist) * sg_elems;
+ vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
if (!vblk->pool) {
err = -ENOMEM;
goto out_free_vq;
@@ -530,12 +652,14 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
goto out_mempool;
}
- q = vblk->disk->queue = blk_init_queue(do_virtblk_request, NULL);
+ q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL);
if (!q) {
err = -ENOMEM;
goto out_put_disk;
}
+ if (use_bio)
+ blk_queue_make_request(q, virtblk_make_request);
q->queuedata = vblk;
virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
@@ -620,7 +744,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
if (!err && opt_io_size)
blk_queue_io_opt(q, blk_size * opt_io_size);
-
add_disk(vblk->disk);
err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
if (err)
--
1.7.10.4