* Juan Quintela (quintela@xxxxxxxxxx) wrote: > From: Ivan Ren <renyime@xxxxxxxxx> > > When we 'migrate_cancel' a multifd migration, live_migration thread may > go into endless loop in multifd_send_pages functions. > > Reproduce steps: > > (qemu) migrate_set_capability multifd on > (qemu) migrate -d url > (qemu) [wait a while] > (qemu) migrate_cancel > > Then may get live_migration 100% cpu usage in following stack: > > pthread_mutex_lock > qemu_mutex_lock_impl > multifd_send_pages > multifd_queue_page > ram_save_multifd_page > ram_save_target_page > ram_save_host_page > ram_find_and_save_block > ram_find_and_save_block > ram_save_iterate > qemu_savevm_state_iterate > migration_iteration_run > migration_thread > qemu_thread_start > start_thread > clone > > Signed-off-by: Ivan Ren <ivanren@xxxxxxxxxxx> > Message-Id: <1561468699-9819-2-git-send-email-ivanren@xxxxxxxxxxx> > Reviewed-by: Juan Quintela <quintela@xxxxxxxxxx> > Signed-off-by: Juan Quintela <quintela@xxxxxxxxxx> Reviewed-by: Dr. David Alan Gilbert <dgilbert@xxxxxxxxxx> > --- > migration/ram.c | 36 +++++++++++++++++++++++++++++------- > 1 file changed, 29 insertions(+), 7 deletions(-) > > diff --git a/migration/ram.c b/migration/ram.c > index 2b0774c2bf..52a2d498e4 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -920,7 +920,7 @@ struct { > * false. > */ > > -static void multifd_send_pages(void) > +static int multifd_send_pages(void) > { > int i; > static int next_channel; > @@ -933,6 +933,11 @@ static void multifd_send_pages(void) > p = &multifd_send_state->params[i]; > > qemu_mutex_lock(&p->mutex); > + if (p->quit) { > + error_report("%s: channel %d has already quit!", __func__, i); > + qemu_mutex_unlock(&p->mutex); > + return -1; > + } > if (!p->pending_job) { > p->pending_job++; > next_channel = (i + 1) % migrate_multifd_channels(); > @@ -951,9 +956,11 @@ static void multifd_send_pages(void) > ram_counters.transferred += transferred;; > qemu_mutex_unlock(&p->mutex); > qemu_sem_post(&p->sem); > + > + return 1; > } > > -static void multifd_queue_page(RAMBlock *block, ram_addr_t offset) > +static int multifd_queue_page(RAMBlock *block, ram_addr_t offset) > { > MultiFDPages_t *pages = multifd_send_state->pages; > > @@ -968,15 +975,19 @@ static void multifd_queue_page(RAMBlock *block, ram_addr_t offset) > pages->used++; > > if (pages->used < pages->allocated) { > - return; > + return 1; > } > } > > - multifd_send_pages(); > + if (multifd_send_pages() < 0) { > + return -1; > + } > > if (pages->block != block) { > - multifd_queue_page(block, offset); > + return multifd_queue_page(block, offset); > } > + > + return 1; > } > > static void multifd_send_terminate_threads(Error *err) > @@ -1049,7 +1060,10 @@ static void multifd_send_sync_main(void) > return; > } > if (multifd_send_state->pages->used) { > - multifd_send_pages(); > + if (multifd_send_pages() < 0) { > + error_report("%s: multifd_send_pages fail", __func__); > + return; > + } > } > for (i = 0; i < migrate_multifd_channels(); i++) { > MultiFDSendParams *p = &multifd_send_state->params[i]; > @@ -1058,6 +1072,12 @@ static void multifd_send_sync_main(void) > > qemu_mutex_lock(&p->mutex); > > + if (p->quit) { > + error_report("%s: channel %d has already quit", __func__, i); > + qemu_mutex_unlock(&p->mutex); > + return; > + } > + > p->packet_num = multifd_send_state->packet_num++; > p->flags |= MULTIFD_FLAG_SYNC; > p->pending_job++; > @@ -2033,7 +2053,9 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) > static int ram_save_multifd_page(RAMState *rs, RAMBlock *block, > ram_addr_t offset) > { > - multifd_queue_page(block, offset); > + if (multifd_queue_page(block, offset) < 0) { > + return -1; > + } > ram_counters.normal++; > > return 1; > -- > 2.21.0 > -- Dr. David Alan Gilbert / dgilbert@xxxxxxxxxx / Manchester, UK