On 09/23/2014 06:47 PM, Alexey Kardashevskiy wrote: > On 09/19/2014 06:47 PM, Kevin Wolf wrote:> Am 16.09.2014 um 14:59 hat Paolo Bonzini geschrieben: >>> Il 16/09/2014 14:52, Kevin Wolf ha scritto: >>>> Yes, that's true. We can't fix this problem in qcow2, though, because >>>> it's a more general one. I think we must make sure that >>>> bdrv_invalidate_cache() doesn't yield. >>>> >>>> Either by forbidding to run bdrv_invalidate_cache() in a coroutine and >>>> moving the problem to the caller (where and why is it even called from a >>>> coroutine?), or possibly by creating a new coroutine for the driver >>>> callback and running that in a nested event loop that only handles >>>> bdrv_invalidate_cache() callbacks, so that the NBD server doesn't get a >>>> chance to process new requests in this thread. >>> >>> Incoming migration runs in a coroutine (the coroutine entry point is >>> process_incoming_migration_co). But everything after qemu_fclose() can >>> probably be moved into a separate bottom half, so that it gets out of >>> coroutine context. >> >> Alexey, you should probably rather try this (and add a bdrv_drain_all() >> in bdrv_invalidate_cache) than messing around with qcow2 locks. This >> isn't a problem that can be completely fixed in qcow2. > > > Ok. Tried :) Not very successful though. The patch is below. > > Is that the correct bottom half? When I did it, I started getting crashes > in various sport on accesses to s->l1_cache which is NULL after qcow2_close. > Normally the code would check s->l1_size and then use but they are out of sync. > > So I clear it in qcow2_close(). This allowed migrated guest to work and not > to crash until I shut it down when it aborted at "HERE IT FAILS ON SHUTDOWN". > > Here I realized I am missing something in this picture again, what is it? > Thanks! To be more precise, I can remove that abort() and it seems working for a while but when shutting migrated guest down, the disk fails: Will now unmount local filesystems:sd 0:0:0:0: [sda] Result: hostbyte=0x00 driverbyte=0x08 sd 0:0:0:0: [sda] Sense Key : 0xb [current] sd 0:0:0:0: [sda] ASC=0x0 ASCQ=0x6 sd 0:0:0:0: [sda] CDB: cdb[0]=0x2a: 2a 00 00 3c 10 10 00 00 08 00 end_request: I/O error, dev sda, sector 3936272 end_request: I/O error, dev sda, sector 3936272 Buffer I/O error on device sda, logical block 492034 lost page write due to I/O error on sda JBD2: Error -5 detected when updating journal superblock for sda-8. [...] spapr-vscsi or virtio-scsi - does not matter. Or crash: Program received signal SIGSEGV, Segmentation fault. 0x000000001050a69c in qcow2_cache_find_entry_to_replace (c=0x10038317bb0) at /home/alexey/p/qemu/block/qcow2-cache.c:256 (gdb) l 251 min_count = c->entries[i].cache_hits; (gdb) p i $2 = 0xfd6 (gdb) p c->size $3 = 0x3ffe (gdb) p c->entries[i] $5 = { table = 0x804dd70210, offset = 0x40, dirty = 0x0, cache_hits = 0xee498, ref = 0x0 } Weird things are happening, that's my point :) > > > --- > block.c | 2 ++ > block/qcow2-cache.c | 2 +- > block/qcow2.c | 50 ++++++++++++++++++++++++++++++++++++-------------- > block/qcow2.h | 4 ++++ > 4 files changed, 43 insertions(+), 15 deletions(-) > > diff --git a/block.c b/block.c > index d06dd51..1e6dfd1 100644 > --- a/block.c > +++ b/block.c > @@ -5044,6 +5044,8 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) > error_setg_errno(errp, -ret, "Could not refresh total sector count"); > return; > } > + > + bdrv_drain_all(); > } > > void bdrv_invalidate_cache_all(Error **errp) > diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c > index 904f6b1..59ff48c 100644 > --- a/block/qcow2-cache.c > +++ b/block/qcow2-cache.c > @@ -259,7 +259,7 @@ static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c) > if (min_index == -1) { > /* This can't happen in current synchronous code, but leave the check > * here as a reminder for whoever starts using AIO with the cache */ > - abort(); > + abort(); // <==== HERE IT FAILS ON SHUTDOWN > } > return min_index; > } > diff --git a/block/qcow2.c b/block/qcow2.c > index f9e045f..2b84562 100644 > --- a/block/qcow2.c > +++ b/block/qcow2.c > @@ -1399,6 +1399,7 @@ static void qcow2_close(BlockDriverState *bs) > qemu_vfree(s->l1_table); > /* else pre-write overlap checks in cache_destroy may crash */ > s->l1_table = NULL; > + s->l1_size = 0; > > if (!(bs->open_flags & BDRV_O_INCOMING)) { > qcow2_cache_flush(bs, s->l2_table_cache); > @@ -1419,16 +1420,11 @@ static void qcow2_close(BlockDriverState *bs) > qcow2_free_snapshots(bs); > } > > +static void qcow2_invalidate_cache_bh_cb(void *opaque); > + > static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp) > { > BDRVQcowState *s = bs->opaque; > - int flags = s->flags; > - AES_KEY aes_encrypt_key; > - AES_KEY aes_decrypt_key; > - uint32_t crypt_method = 0; > - QDict *options; > - Error *local_err = NULL; > - int ret; > > /* > * Backing files are read-only which makes all of their metadata immutable, > @@ -1436,13 +1432,28 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp) > */ > > if (s->crypt_method) { > - crypt_method = s->crypt_method; > - memcpy(&aes_encrypt_key, &s->aes_encrypt_key, sizeof(aes_encrypt_key)); > - memcpy(&aes_decrypt_key, &s->aes_decrypt_key, sizeof(aes_decrypt_key)); > + s->bh_crypt_method = s->crypt_method; > + memcpy(&s->bh_aes_encrypt_key, &s->aes_encrypt_key, sizeof(s->bh_aes_encrypt_key)); > + memcpy(&s->bh_aes_decrypt_key, &s->aes_decrypt_key, sizeof(s->bh_aes_decrypt_key)); > + } else { > + s->bh_crypt_method = 0; > } > > qcow2_close(bs); > > + s->cache_inv_bh = aio_bh_new(bdrv_get_aio_context(bs), > + qcow2_invalidate_cache_bh_cb, > + bs); > +} > + > +static void qcow2_invalidate_cache_bh(BlockDriverState *bs, Error **errp) > +{ > + BDRVQcowState *s = bs->opaque; > + int flags = s->flags; > + QDict *options; > + Error *local_err = NULL; > + int ret; > + > bdrv_invalidate_cache(bs->file, &local_err); > if (local_err) { > error_propagate(errp, local_err); > @@ -1464,11 +1475,22 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp) > return; > } > > - if (crypt_method) { > - s->crypt_method = crypt_method; > - memcpy(&s->aes_encrypt_key, &aes_encrypt_key, sizeof(aes_encrypt_key)); > - memcpy(&s->aes_decrypt_key, &aes_decrypt_key, sizeof(aes_decrypt_key)); > + if (s->bh_crypt_method) { > + s->crypt_method = s->bh_crypt_method; > + memcpy(&s->aes_encrypt_key, &s->bh_aes_encrypt_key, sizeof(s->bh_aes_encrypt_key)); > + memcpy(&s->aes_decrypt_key, &s->bh_aes_decrypt_key, sizeof(s->bh_aes_decrypt_key)); > } > + > + qemu_bh_delete(s->cache_inv_bh); > + s->cache_inv_bh = NULL; > +} > + > +static void qcow2_invalidate_cache_bh_cb(void *opaque) > +{ > + BlockDriverState *bs = opaque; > + Error *local_err = NULL; > + > + qcow2_invalidate_cache_bh(bs, &local_err); > } > > static size_t header_ext_add(char *buf, uint32_t magic, const void *s, > diff --git a/block/qcow2.h b/block/qcow2.h > index 6aeb7ea..58d1859 100644 > --- a/block/qcow2.h > +++ b/block/qcow2.h > @@ -271,6 +271,10 @@ typedef struct BDRVQcowState { > QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext; > QTAILQ_HEAD (, Qcow2DiscardRegion) discards; > bool cache_discards; > + QEMUBH *cache_inv_bh; > + AES_KEY bh_aes_encrypt_key; > + AES_KEY bh_aes_decrypt_key; > + uint32_t bh_crypt_method; > } BDRVQcowState; > > /* XXX: use std qcow open function ? */ > -- Alexey -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list