Currently virtio-blk does support barriers for ordering requests which is enough to guarantee filesystem metadata integrity with write back caches, but it does not support any way to flush that writeback cache, to guarantee that data is stable on disk on a fsync. This patch implements a new VIRTIO_BLK_T_FLUSH command to flush the cache and exposes the functionality to the block layer by implementing a prepare_flush method. Do we need a new feature flag for this command or can we expect that all previous barrier support was buggy enough anyway? Signed-off-by: Christoph Hellwig <hch@xxxxxx> Index: xfs/drivers/block/virtio_blk.c =================================================================== --- xfs.orig/drivers/block/virtio_blk.c 2009-05-11 10:11:28.884784539 +0200 +++ xfs/drivers/block/virtio_blk.c 2009-05-11 10:31:16.642783620 +0200 @@ -65,13 +65,25 @@ static void blk_done(struct virtqueue *v break; } - if (blk_pc_request(vbr->req)) { + switch (vbr->req->cmd_type) { + case REQ_TYPE_FS: + nr_bytes = blk_rq_bytes(vbr->req); + break; + case REQ_TYPE_BLOCK_PC: vbr->req->data_len = vbr->in_hdr.residual; nr_bytes = vbr->in_hdr.data_len; vbr->req->sense_len = vbr->in_hdr.sense_len; vbr->req->errors = vbr->in_hdr.errors; - } else - nr_bytes = blk_rq_bytes(vbr->req); + break; + case REQ_TYPE_LINUX_BLOCK: + if (vbr->req->cmd[0] == REQ_LB_OP_FLUSH) { + nr_bytes = blk_rq_bytes(vbr->req); + break; + } + /*FALLTHRU*/ + default: + BUG(); + } __blk_end_request(vbr->req, error, nr_bytes); list_del(&vbr->list); @@ -82,7 +94,7 @@ static void blk_done(struct virtqueue *v spin_unlock_irqrestore(&vblk->lock, flags); } -static bool do_req(struct request_queue *q, struct virtio_blk *vblk, +static noinline bool do_req(struct request_queue *q, struct virtio_blk *vblk, struct request *req) { unsigned long num, out = 0, in = 0; @@ -94,15 +106,27 @@ static bool do_req(struct request_queue return false; vbr->req = req; - if (blk_fs_request(vbr->req)) { + + switch (req->cmd_type) { + case REQ_TYPE_FS: vbr->out_hdr.type = 0; vbr->out_hdr.sector = vbr->req->sector; vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); - } else if (blk_pc_request(vbr->req)) { + break; + case REQ_TYPE_BLOCK_PC: vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD; vbr->out_hdr.sector = 0; vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); - } else { + break; + case REQ_TYPE_LINUX_BLOCK: + if (req->cmd[0] == REQ_LB_OP_FLUSH) { + vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; + vbr->out_hdr.sector = 0; + vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); + break; + } + /*FALLTHRU*/ + default: /* We don't put anything else in the queue. */ BUG(); } @@ -174,6 +198,12 @@ static void do_virtblk_request(struct re vblk->vq->vq_ops->kick(vblk->vq); } +static void virtblk_prepare_flush(struct request_queue *q, struct request *req) +{ + req->cmd_type = REQ_TYPE_LINUX_BLOCK; + req->cmd[0] = REQ_LB_OP_FLUSH; +} + static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, unsigned long data) { @@ -310,7 +340,8 @@ static int virtblk_probe(struct virtio_d /* If barriers are supported, tell block layer that queue is ordered */ if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER)) - blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL); + blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG_FLUSH, + virtblk_prepare_flush); /* If disk is read-only in the host, the guest should obey */ if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) Index: xfs/include/linux/virtio_blk.h =================================================================== --- xfs.orig/include/linux/virtio_blk.h 2009-05-11 10:18:39.933666519 +0200 +++ xfs/include/linux/virtio_blk.h 2009-05-11 10:22:14.396660919 +0200 @@ -35,6 +35,17 @@ struct virtio_blk_config __u32 blk_size; } __attribute__((packed)); +/* + * Command types + * + * Usage is a bit tricky as some bits are used as flags and some not. + * + * Rules: + * VIRTIO_BLK_T_OUT may be combinaed with VIRTIO_BLK_T_SCSI_CMD or + * VIRTIO_BLK_T_BARRIER. VIRTIO_BLK_T_FLUSH is a command of it's own + * and may no be comined with any of the other flags. + */ + /* These two define direction. */ #define VIRTIO_BLK_T_IN 0 #define VIRTIO_BLK_T_OUT 1 @@ -42,6 +53,9 @@ struct virtio_blk_config /* This bit says it's a scsi command, not an actual read or write. */ #define VIRTIO_BLK_T_SCSI_CMD 2 +/* Cache flush command */ +#define VIRTIO_BLK_T_FLUSH 4 + /* Barrier before this op. */ #define VIRTIO_BLK_T_BARRIER 0x80000000 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html