On Sat, Sep 24, 2011 at 12:19 AM, Kevin Wolf <kwolf@xxxxxxxxxx> wrote: > Am 08.09.2011 12:11, schrieb Zhi Yong Wu: >> Note: >> 1.) When bps/iops limits are specified to a small value such as 511 bytes/s, this VM will hang up. We are considering how to handle this senario. >> 2.) When "dd" command is issued in guest, if its option bs is set to a large value such as "bs=1024K", the result speed will slightly bigger than the limits. >> >> For these problems, if you have nice thought, pls let us know.:) >> >> Signed-off-by: Zhi Yong Wu <wuzhy@xxxxxxxxxxxxxxxxxx> >> --- >> block.c | 259 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- >> block.h | 1 - >> 2 files changed, 248 insertions(+), 12 deletions(-) > > One general comment: What about synchronous and/or coroutine I/O > operations? Do you think they are just not important enough to consider > here or were they forgotten? For sync ops, we assume that it will be converse into async mode at some point of future, right? For coroutine I/O, it is introduced in image driver layer, and behind bdrv_aio_readv/writev. I think that we need not consider them, right? > > Also, do I understand correctly that you're always submitting the whole Right, when the block timer fire, it will flush whole request queue. > queue at once? Does this effectively enforce the limit all the time or > will it lead to some peaks and then no requests at all for a while until In fact, it only try to submit those enqueued request one by one. If fail to pass the limit, this request will be enqueued again. > the average is right again? Yeah, it is possible. Do you better idea? > > Maybe some documentation on how it all works from a high level > perspective would be helpful. > >> diff --git a/block.c b/block.c >> index cd75183..c08fde8 100644 >> --- a/block.c >> +++ b/block.c >> @@ -30,6 +30,9 @@ >> #include "qemu-objects.h" >> #include "qemu-coroutine.h" >> >> +#include "qemu-timer.h" >> +#include "block/blk-queue.h" >> + >> #ifdef CONFIG_BSD >> #include <sys/types.h> >> #include <sys/stat.h> >> @@ -72,6 +75,13 @@ static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, >> QEMUIOVector *iov); >> static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs); >> >> +static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors, >> + bool is_write, double elapsed_time, uint64_t *wait); >> +static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write, >> + double elapsed_time, uint64_t *wait); >> +static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors, >> + bool is_write, int64_t *wait); >> + >> static QTAILQ_HEAD(, BlockDriverState) bdrv_states = >> QTAILQ_HEAD_INITIALIZER(bdrv_states); >> >> @@ -745,6 +755,11 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags, >> bs->change_cb(bs->change_opaque, CHANGE_MEDIA); >> } >> >> + /* throttling disk I/O limits */ >> + if (bs->io_limits_enabled) { >> + bdrv_io_limits_enable(bs); >> + } >> + >> return 0; >> >> unlink_and_fail: >> @@ -783,6 +798,18 @@ void bdrv_close(BlockDriverState *bs) >> if (bs->change_cb) >> bs->change_cb(bs->change_opaque, CHANGE_MEDIA); >> } >> + >> + /* throttling disk I/O limits */ >> + if (bs->block_queue) { >> + qemu_del_block_queue(bs->block_queue); >> + bs->block_queue = NULL; >> + } >> + >> + if (bs->block_timer) { >> + qemu_del_timer(bs->block_timer); >> + qemu_free_timer(bs->block_timer); >> + bs->block_timer = NULL; >> + } > > Why not io_limits_disable() instead of copying the code here? Good point, thanks. > >> } >> >> void bdrv_close_all(void) >> @@ -2341,16 +2368,48 @@ BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, >> BlockDriverCompletionFunc *cb, void *opaque) >> { >> BlockDriver *drv = bs->drv; >> - >> + BlockDriverAIOCB *ret; >> + int64_t wait_time = -1; >> +printf("sector_num=%ld, nb_sectors=%d\n", sector_num, nb_sectors); > > Debugging leftover (more of them follow, won't comment on each one) Removed. > >> trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque); >> >> - if (!drv) >> - return NULL; >> - if (bdrv_check_request(bs, sector_num, nb_sectors)) >> + if (!drv || bdrv_check_request(bs, sector_num, nb_sectors)) { >> return NULL; >> + } > > This part is unrelated. Have changed it to original. > >> + >> + /* throttling disk read I/O */ >> + if (bs->io_limits_enabled) { >> + if (bdrv_exceed_io_limits(bs, nb_sectors, false, &wait_time)) { >> + ret = qemu_block_queue_enqueue(bs->block_queue, bs, bdrv_aio_readv, >> + sector_num, qiov, nb_sectors, cb, opaque); >> + printf("wait_time=%ld\n", wait_time); >> + if (wait_time != -1) { >> + printf("reset block timer\n"); >> + qemu_mod_timer(bs->block_timer, >> + wait_time + qemu_get_clock_ns(vm_clock)); >> + } >> + >> + if (ret) { >> + printf("ori ret is not null\n"); >> + } else { >> + printf("ori ret is null\n"); >> + } >> + >> + return ret; >> + } >> + } >> >> - return drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors, >> + ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors, >> cb, opaque); >> + if (ret) { >> + if (bs->io_limits_enabled) { >> + bs->io_disps.bytes[BLOCK_IO_LIMIT_READ] += >> + (unsigned) nb_sectors * BDRV_SECTOR_SIZE; >> + bs->io_disps.ios[BLOCK_IO_LIMIT_READ]++; >> + } > > I wonder if you can't reuse bs->nr_bytes/nr_ops instead of introducing a > second counting mechanism. Would have the advantage that numbers are NO, our counting variables will be reset to ZERO if current slice time(0.1ms) is used up. > actually consistent (your metric counts slightly differently than the > existing info blockstats one). Yeah, i notice this, and don't think there's wrong with it. and you? > >> + } >> + >> + return ret; >> } >> >> typedef struct BlockCompleteData { >> @@ -2396,15 +2455,14 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, >> BlockDriver *drv = bs->drv; >> BlockDriverAIOCB *ret; >> BlockCompleteData *blk_cb_data; >> + int64_t wait_time = -1; >> >> trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque); >> >> - if (!drv) >> - return NULL; >> - if (bs->read_only) >> - return NULL; >> - if (bdrv_check_request(bs, sector_num, nb_sectors)) >> + if (!drv || bs->read_only >> + || bdrv_check_request(bs, sector_num, nb_sectors)) { >> return NULL; >> + } > > Again, unrelated changes. Have changed it to original. > >> >> if (bs->dirty_bitmap) { >> blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb, >> @@ -2413,13 +2471,32 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, >> opaque = blk_cb_data; >> } >> >> + /* throttling disk write I/O */ >> + if (bs->io_limits_enabled) { >> + if (bdrv_exceed_io_limits(bs, nb_sectors, true, &wait_time)) { >> + ret = qemu_block_queue_enqueue(bs->block_queue, bs, bdrv_aio_writev, >> + sector_num, qiov, nb_sectors, cb, opaque); >> + if (wait_time != -1) { >> + qemu_mod_timer(bs->block_timer, >> + wait_time + qemu_get_clock_ns(vm_clock)); >> + } >> + >> + return ret; >> + } >> + } > > This looks very similar to the code in bdrv_aio_readv. Can it be moved > into a common function? Good advice, done. thanks. > > Kevin > -- Regards, Zhi Yong Wu -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html