When meeting request to write the cluster without copied flag, allocate a new cluster and write original data with modification to the new cluster. This also can add support for the writing operation of the qcow2 compressed image. Signed-off-by: Lan Tianyu <tianyu.lan@xxxxxxxxx> --- tools/kvm/disk/qcow.c | 322 ++++++++++++++++++++++++++++-------------- tools/kvm/include/kvm/qcow.h | 2 + 2 files changed, 218 insertions(+), 106 deletions(-) diff --git a/tools/kvm/disk/qcow.c b/tools/kvm/disk/qcow.c index 680b37d..2b9af73 100644 --- a/tools/kvm/disk/qcow.c +++ b/tools/kvm/disk/qcow.c @@ -122,9 +122,6 @@ static int cache_table(struct qcow *q, struct qcow_l2_table *c) */ lru = list_first_entry(&l1t->lru_list, struct qcow_l2_table, list); - if (qcow_l2_cache_write(q, lru) < 0) - goto error; - /* Remove the node from the cache */ rb_erase(&lru->node, r); list_del_init(&lru->list); @@ -728,35 +725,110 @@ error_free_rfb: return NULL; } -/* - * QCOW file might grow during a write operation. Not only data but metadata is - * also written at the end of the file. Therefore it is necessary to ensure - * every write is committed to disk. Hence we use uses qcow_pwrite_sync() to - * synchronize the in-core state of QCOW image to disk. - * - * We also try to restore the image to a consistent state if the metdata - * operation fails. The two metadat operations are: level 1 and level 2 table - * update. If either of them fails the image is truncated to a consistent state. +static u16 qcow_get_refcount(struct qcow *q, u64 clust_idx) +{ + struct qcow_refcount_block *rfb = NULL; + struct qcow_header *header = q->header; + u64 rfb_idx; + + rfb = qcow_read_refcount_block(q, clust_idx); + if (!rfb) { + pr_warning("error while reading refcount table"); + return -1; + } + + rfb_idx = clust_idx & (((1ULL << + (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1)); + + if (rfb_idx >= rfb->size) { + pr_warning("L1: refcount block index out of bounds"); + return -1; + } + + return be16_to_cpu(rfb->entries[rfb_idx]); +} + +static int update_cluster_refcount(struct qcow *q, u64 clust_idx, u16 append) +{ + struct qcow_refcount_block *rfb = NULL; + struct qcow_header *header = q->header; + u16 refcount; + u64 rfb_idx; + + rfb = qcow_read_refcount_block(q, clust_idx); + if (!rfb) { + pr_warning("error while reading refcount table"); + return -1; + } + + rfb_idx = clust_idx & (((1ULL << + (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1)); + if (rfb_idx >= rfb->size) { + pr_warning("refcount block index out of bounds"); + return -1; + } + + refcount = be16_to_cpu(rfb->entries[rfb_idx]) + append; + rfb->entries[rfb_idx] = cpu_to_be16(refcount); + rfb->dirty = 1; + + /*write refcount block*/ + write_refcount_block(q, rfb); + + /*update free_clust_idx since refcount becomes zero*/ + if (!refcount && clust_idx < q->free_clust_idx) + q->free_clust_idx = clust_idx; + + return 0; +} + +/*Allocate clusters according to the size. Find a postion that + *can satisfy the size. free_clust_idx is initialized to zero and + *Record last position. +*/ +static u64 qcow_alloc_clusters(struct qcow *q, u64 size) +{ + struct qcow_header *header = q->header; + u16 clust_refcount; + u32 clust_idx, i; + u64 clust_num; + + clust_num = (size + (q->cluster_size - 1)) >> header->cluster_bits; + +again: + for (i = 0; i < clust_num; i++) { + clust_idx = q->free_clust_idx++; + clust_refcount = qcow_get_refcount(q, clust_idx); + if (clust_refcount < 0) + return -1; + else if (clust_refcount > 0) + goto again; + } + + for (i = 0; i < clust_num; i++) + update_cluster_refcount(q, + q->free_clust_idx - clust_num + i, 1); + + return (q->free_clust_idx - clust_num) << header->cluster_bits; +} + +/*Get l2 table. If the table has been copied, read table directly. + *If the table exists, allocate a new cluster and copy the table + *to the new cluster. */ -static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src_len) +static int get_cluster_table(struct qcow *q, u64 offset, + struct qcow_l2_table **result_l2t, u64 *result_l2_index) { struct qcow_header *header = q->header; struct qcow_l1_table *l1t = &q->table; struct qcow_l2_table *l2t; - u64 clust_start; - u64 clust_flags; - u64 l2t_offset; - u64 clust_off; - u64 l2t_size; - u64 clust_sz; u64 l1t_idx; + u64 l2t_offset; u64 l2t_idx; - u64 f_sz; - u64 len; + u64 l2t_size; + u64 l2t_new_offset; - l2t = NULL; - l2t_size = 1 << header->l2_bits; - clust_sz = 1 << header->cluster_bits; + l2t_size = 1 << header->l2_bits; l1t_idx = get_l1_index(q, offset); if (l1t_idx >= l1t->table_size) @@ -766,122 +838,149 @@ static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src if (l2t_idx >= l2t_size) return -1; - clust_off = get_cluster_offset(q, offset); - if (clust_off >= clust_sz) - return -1; - - len = clust_sz - clust_off; - if (len > src_len) - len = src_len; - - mutex_lock(&q->mutex); - l2t_offset = be64_to_cpu(l1t->l1_table[l1t_idx]); - if (l2t_offset & QCOW2_OFLAG_COMPRESSED) { - pr_warning("compressed clusters are not supported"); - goto error; - } - if (!(l2t_offset & QCOW2_OFLAG_COPIED)) { - pr_warning("L2 copy-on-write clusters are not supported"); - goto error; - } - - l2t_offset &= QCOW2_OFFSET_MASK; - if (l2t_offset) { - /* read and cache l2 table */ + if (l2t_offset & QCOW2_OFLAG_COPIED) { + l2t_offset &= ~QCOW2_OFLAG_COPIED; l2t = qcow_read_l2_table(q, l2t_offset); if (!l2t) goto error; } else { - l2t = new_cache_table(q, l2t_offset); + l2t_new_offset = qcow_alloc_clusters(q, l2t_size*sizeof(u64)); + if (l2t_new_offset < 0) + goto error; + + l2t = new_cache_table(q, l2t_new_offset); if (!l2t) goto error; - /* Capture the state of the consistent QCOW image */ - f_sz = file_size(q->fd); - if (!f_sz) - goto free_cache; + if (l2t_offset) + qcow2_read_cluster(q, l2t_offset, l2t->table, + l2t_size*sizeof(u64)); + else + memset(l2t->table, 0x00, l2t_size * sizeof(u64)); - /* Write the l2 table of 0's at the end of the file */ - l2t_offset = qcow_write_l2_table(q, l2t->table); - if (!l2t_offset) + /*write l2 table*/ + l2t->dirty = 1; + if (qcow_l2_cache_write(q, l2t) < 0) goto free_cache; - if (cache_table(q, l2t) < 0) { - if (ftruncate(q->fd, f_sz) < 0) - goto free_cache; + /*cache l2 table*/ + cache_table(q, l2t); - goto free_cache; - } + /* Update the l1 talble */ + l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_new_offset + | QCOW2_OFLAG_COPIED); - /* Update the in-core entry */ - l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_offset); + if (pwrite_in_full(q->fd, l1t->l1_table, + l1t->table_size * sizeof(u64), + header->l1_table_offset) < 0) + goto error; } - /* Capture the state of the consistent QCOW image */ - f_sz = file_size(q->fd); - if (!f_sz) - goto error; + *result_l2t = l2t; + *result_l2_index = l2t_idx; - clust_start = be64_to_cpu(l2t->table[l2t_idx]); + return 0; - clust_flags = clust_start & QCOW2_OFLAGS_MASK; - if (clust_flags & QCOW2_OFLAG_COMPRESSED) { - pr_warning("compressed clusters are not supported"); +free_cache: + free(l2t); +error: + return -1; +} + +/*If the cluster has been copied, write data directly. If not, + *read the original data and write it to the new cluster with + *modification. +*/ +static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, + void *buf, u32 src_len) +{ + struct qcow_header *header = q->header; + struct qcow_l2_table *l2t; + u64 clust_start; + u64 clust_flags; + u64 clust_off; + u64 l2t_idx; + u64 len; + + l2t = NULL; + + clust_off = get_cluster_offset(q, offset); + if (clust_off >= q->cluster_size) + return -1; + + len = q->cluster_size - clust_off; + if (len > src_len) + len = src_len; + + mutex_lock(&q->mutex); + + if (get_cluster_table(q, offset, &l2t, &l2t_idx)) { + pr_warning("Get l2 table error"); goto error; } - clust_start &= QCOW2_OFFSET_MASK; - if (!clust_start) { - clust_start = ALIGN(f_sz, clust_sz); - l2t->table[l2t_idx] = cpu_to_be64(clust_start | QCOW2_OFLAG_COPIED); - l2t->dirty = 1; - } + clust_start = be64_to_cpu(l2t->table[l2t_idx]); + clust_flags = clust_start & QCOW2_OFLAGS_MASK; + clust_start &= QCOW2_OFFSET_MASK; if (!(clust_flags & QCOW2_OFLAG_COPIED)) { - struct qcow_refcount_block *rfb = NULL; - u16 clust_refcount; - u64 clust_idx; - u64 rfb_idx; + u64 clust_new_idx; + u64 clust_new_start; - clust_idx = (clust_start & QCOW2_OFFSET_MASK) - >> (header->cluster_bits); - - rfb = qcow_read_refcount_block(q, clust_idx); - if (!rfb) { - pr_warning("L1: error while reading refcount table"); + clust_new_start = qcow_alloc_clusters(q, q->cluster_size); + if (clust_new_start < 0) { + pr_warning("Cluster alloc error!"); goto error; } - rfb_idx = clust_idx & (((1ULL << (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1)); - if (rfb_idx >= rfb->size) { - pr_warning("L1: refcount block index out of bounds"); + clust_new_idx = clust_new_start >> header->cluster_bits; + offset &= ~(q->cluster_size - 1); + + /*if clust_start is not zero, read the original data*/ + if (clust_start) { + mutex_unlock(&q->mutex); + qcow2_read_cluster(q, offset, q->copy_buff, + q->cluster_size); + mutex_lock(&q->mutex); + } else + memset(q->copy_buff, 0x00, q->cluster_size); + + memcpy(q->copy_buff + clust_off, buf, len); + + /* Write actual data */ + if (pwrite_in_full(q->fd, q->copy_buff, q->cluster_size, + clust_new_start) < 0) goto error; - } - clust_refcount = be16_to_cpu(rfb->entries[rfb_idx]); - if (!clust_refcount) { - clust_refcount = 1; - rfb->entries[rfb_idx] = cpu_to_be16(clust_refcount); - rfb->dirty = 1; + /*update l2 table*/ + l2t->table[l2t_idx] = cpu_to_be64(clust_new_start + | QCOW2_OFLAG_COPIED); + l2t->dirty = 1; + qcow_l2_cache_write(q, l2t); + + if (clust_flags & QCOW2_OFLAG_COMPRESSED) { + clust_start &= q->cluster_offset_mask; + clust_start &= ~511; } - if (clust_refcount > 1) { - pr_warning("L1 copy-on-write clusters are not supported"); + /*update reference count. Reduce the refcount of the old cluster + * and increase the one of the new cluster + */ + if (clust_start) + update_cluster_refcount(q, + (clust_start >> header->cluster_bits), -1); + + update_cluster_refcount(q, clust_new_idx, 1); + } else { + /* Write actual data */ + if (pwrite_in_full(q->fd, buf, len, + clust_start + clust_off) < 0) goto error; - } } - mutex_unlock(&q->mutex); - - /* Write actual data */ - if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0) - return -1; - return len; -free_cache: - free(l2t); error: mutex_unlock(&q->mutex); return -1; @@ -993,6 +1092,7 @@ static int qcow_disk_close(struct disk_image *disk) refcount_table_free_cache(&q->refcount_table); l1_table_free_cache(&q->table); + free(q->copy_buff); free(q->cluster_data); free(q->cluster_cache); free(q->refcount_table.rf_table); @@ -1117,10 +1217,16 @@ static struct disk_image *qcow2_probe(int fd, bool readonly) q->cluster_offset_mask = (1LL << q->csize_shift) - 1; q->cluster_size = 1 << q->header->cluster_bits; + q->copy_buff = malloc(q->cluster_size); + if (!q->copy_buff) { + pr_warning("copy buff malloc error!"); + goto free_header; + } + q->cluster_data = malloc(q->cluster_size); if (!q->cluster_data) { pr_warning("cluster data malloc error!"); - goto free_header; + goto free_copy_buff; } q->cluster_cache = malloc(q->cluster_size); @@ -1163,6 +1269,9 @@ free_cluster_cache: free_cluster_data: if (q->cluster_data) free(q->cluster_data); +free_copy_buff: + if (q->cluster_data) + free(q->cluster_data); free_header: if (q->header) free(q->header); @@ -1252,6 +1361,7 @@ static struct disk_image *qcow1_probe(int fd, bool readonly) q->version = QCOW1_VERSION; q->cluster_size = 1 << q->header->cluster_bits; q->cluster_offset_mask = (1LL << (63 - q->header->cluster_bits)) - 1; + q->free_clust_idx = 0; q->cluster_data = malloc(q->cluster_size); if (!q->cluster_data) { diff --git a/tools/kvm/include/kvm/qcow.h b/tools/kvm/include/kvm/qcow.h index bbf7913..e032a1e 100644 --- a/tools/kvm/include/kvm/qcow.h +++ b/tools/kvm/include/kvm/qcow.h @@ -84,8 +84,10 @@ struct qcow { u32 version; u64 cluster_size; u64 cluster_offset_mask; + u64 free_clust_idx; void *cluster_cache; void *cluster_data; + void *copy_buff; }; struct qcow1_header_disk { -- 1.7.6.rc2.8.g28eb -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html