Hi, Zhou, Minfei > -----Original Message----- > From: kexec [mailto:kexec-bounces at lists.infradead.org] On Behalf Of Minfei Huang > Sent: Wednesday, February 24, 2016 11:25 AM > To: "Zhou, Wenjian/???" <zhouwj-fnst at cn.fujitsu.com> > Cc: kexec at lists.infradead.org > Subject: Re: [PATCH v2] Improve the performance of --num-threads -d 31 > > > > On Feb 24, 2016, at 10:20, Zhou, Wenjian/??? <zhouwj-fnst at cn.fujitsu.com> wrote: > > > > Hi, > > > > On 02/24/2016 09:43 AM, Minfei Huang wrote: > >> On 02/23/16 at 01:47pm, "Zhou, Wenjian/???" wrote: > >>> Hello, Minfei, > >>> > >>> Does it occur every time? > >>> If not, I think I have known the reason. > >> > >> Hi, Wenjian. > >> > >> This patch is applied directly on version 1.5.9. And makedumpfile hangs > >> if option num-thread is appended. > >> > > > > I see. > > I'm working on it. > > > > BTW, did you only test it with --num-threads 128? > > How is it with --num-threads 32(or smaller value)? > > Yes. I have tested with num-thread 1, 2, 8 and 32. Except for ?num-threads 1, > all of the test fail. I wrote an RFC patch which fixed this problem. This patch is incremental one from the Zhou's original v2 patch. I think the problem was caused by page_flag_buf race between consumer and producers. So, I add the mutex to each page_flag_buf. Change summary is as follows. * Add mutex to each page_flag_buf. * Producer: - Keeping page_flag_buf mutex until producer finish writing all data to page_flag_buf. - Fix the issue that info->current_pfn becomes larger than end_pfn. - Keeping info->current_pfn_mutex until producer gets dumpable pfn. * Consumer: - Keeping page_flag_buf mutex until consumer finish writing all data to page_flag_buf. The result of my test in 5GB dumpfile is following. === Common Option: -c --cyclic-buffer=10 --num-threads [-d0] num-thread real vs num-threads 0 ---------------------------------------- 0 209.621 100.0% 1 217.921 104.0% 2 84.539 40.3% 4 51.787 24.7% 8 37.260 17.8% [-d31] num-thread real vs num-threads 0 ---------------------------------------- 0 11.492 100.0% 1 8.572 74.6% 2 4.928 42.9% 4 3.115 27.1% 8 2.259 19.7% === How about this approach? Could you test this? === diff --git a/makedumpfile.c b/makedumpfile.c index 8a0c636..c697f93 100644 --- a/makedumpfile.c +++ b/makedumpfile.c @@ -7521,7 +7521,7 @@ kdump_thread_function_cyclic(void *arg) { * unfiltered zero page will only take a page_flag_buf * unfiltered non-zero page will take a page_flag_buf and a page_data_buf */ - while (pfn < kdump_thread_args->end_pfn) { + while (1) { buf_ready = FALSE; while (page_data_buf[index].used != FALSE || @@ -7532,35 +7532,46 @@ kdump_thread_function_cyclic(void *arg) { while (buf_ready == FALSE) { pthread_testcancel(); - if (page_flag_buf->ready == FLAG_READY) + pthread_mutex_lock(&page_flag_buf->mutex); + if (page_flag_buf->ready == FLAG_READY) { + pthread_mutex_unlock(&page_flag_buf->mutex); continue; + } - /* get next pfn */ + /* get next dumpable pfn */ pthread_mutex_lock(&info->current_pfn_mutex); pfn = info->current_pfn; - info->current_pfn++; - page_flag_buf->ready = FLAG_FILLING; - pthread_mutex_unlock(&info->current_pfn_mutex); - - page_flag_buf->pfn = pfn; + while(1) { + if (pfn >= kdump_thread_args->end_pfn) { + pthread_mutex_unlock(&info->current_pfn_mutex); + page_flag_buf->pfn = pfn; + page_flag_buf->ready = FLAG_READY; + pthread_mutex_unlock(&page_flag_buf->mutex); + page_data_buf[index].used = FALSE; + pthread_mutex_unlock(&page_data_buf[index].mutex); + goto finish; + } + dumpable = is_dumpable( + info->fd_bitmap ? &bitmap_parallel : info->bitmap2, + pfn, cycle); + if (dumpable) + break; - if (pfn >= kdump_thread_args->end_pfn) { - page_data_buf[index].used = FALSE; - page_flag_buf->ready = FLAG_READY; - break; + pfn++; } - - dumpable = is_dumpable( - info->fd_bitmap ? &bitmap_parallel : info->bitmap2, - pfn, - cycle); - if (!dumpable) - continue; + info->current_pfn = pfn + 1; + pthread_mutex_unlock(&info->current_pfn_mutex); + page_flag_buf->pfn = pfn; + page_flag_buf->ready = FLAG_FILLING; if (!read_pfn_parallel(fd_memory, pfn, buf, &bitmap_memory_parallel, - mmap_cache)) + mmap_cache)) { + pthread_mutex_unlock(&page_flag_buf->mutex); + page_data_buf[index].used = FALSE; + pthread_mutex_unlock(&page_data_buf[index].mutex); goto fail; + } filter_data_buffer_parallel(buf, pfn_to_paddr(pfn), info->page_size, @@ -7626,12 +7637,12 @@ kdump_thread_function_cyclic(void *arg) { buf_ready = TRUE; next: page_flag_buf->ready = FLAG_READY; + pthread_mutex_unlock(&page_flag_buf->mutex); page_flag_buf = page_flag_buf->next; - } - pthread_mutex_unlock(&page_data_buf[index].mutex); } +finish: retval = NULL; fail: @@ -7658,15 +7669,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, mdf_pfn_t start_pfn, end_pfn; struct page_desc pd; struct timeval tv_start; - struct timeval last, new; pthread_t **threads = NULL; struct thread_args *kdump_thread_args = NULL; void *thread_result; int page_buf_num; struct page_data *page_data_buf = NULL; + struct page_flag *page_flag_buf = NULL; int i; int index; - int end_count, consuming, check_count; + int end_count, consuming, check_count, prevlocked; mdf_pfn_t current_pfn, temp_pfn; if (info->flag_elf_dumpfile) @@ -7728,6 +7739,19 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, } for (i = 0; i < info->num_threads; i++) { + for (page_flag_buf = info->page_flag_buf[i]; + page_flag_buf->next != info->page_flag_buf[i]; + page_flag_buf = page_flag_buf->next) { + page_flag_buf->ready = FLAG_UNUSED; + page_flag_buf->pfn = start_pfn; + res = pthread_mutex_init(&page_flag_buf->mutex, NULL); + if (res != 0) { + ERRMSG("Can't initialize mutex of page_flag_buf. %s\n", + strerror(res)); + goto out; + } + } + kdump_thread_args[i].thread_num = i; kdump_thread_args[i].len_buf_out = len_buf_out; kdump_thread_args[i].start_pfn = start_pfn; @@ -7749,9 +7773,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, end_count = 0; while (1) { - consuming = 0; - check_count = 0; - /* * The basic idea is producer producing page and consumer writing page. * Each producer have a page_flag_buf list which is used for storing page's description. @@ -7760,8 +7781,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, * The main thread is the consumer. It will find the next pfn and write it into file. * The next pfn is smallest pfn in all page_flag_buf. */ - gettimeofday(&last, NULL); - while (1) { + do { + consuming = prevlocked = -1; + check_count = 0; current_pfn = end_pfn; /* @@ -7772,32 +7794,36 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, * current_pfn is used for recording the value of pfn when checking the pfn. */ for (i = 0; i < info->num_threads; i++) { + pthread_mutex_lock(&info->page_flag_buf[i]->mutex); if (info->page_flag_buf[i]->ready == FLAG_UNUSED) - continue; + goto next; temp_pfn = info->page_flag_buf[i]->pfn; /* * count how many threads have reached the end. */ if (temp_pfn >= end_pfn) { - /* - * prevent setting FLAG_UNUSED being optimized. - */ - MSG("-"); - info->page_flag_buf[i]->ready = FLAG_UNUSED; - - info->current_pfn = end_pfn; end_count++; - continue; + goto next; } if (current_pfn < temp_pfn) - continue; + goto next; check_count++; + prevlocked = consuming; consuming = i; current_pfn = temp_pfn; +next: + /* + * Keep mutex of page_flag which has minimam pfn + */ + if (consuming != i) { // unlock page_flag which is not minimum pfn + pthread_mutex_unlock(&info->page_flag_buf[i]->mutex); + } else if (prevlocked != -1) { // unlock page_flag which previously locked + pthread_mutex_unlock(&info->page_flag_buf[prevlocked]->mutex); + } } /* @@ -7805,40 +7831,18 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, */ if (end_count >= info->num_threads) goto finish; - - /* - * Since it has the probabilty that there is no page_flag_buf being ready, - * we should recheck if it happens. - */ - if (check_count == 0) - continue; - - /* - * If the page_flag_buf is not ready, the pfn recorded may be changed. - * So we should recheck. - */ - if (info->page_flag_buf[consuming]->ready != FLAG_READY) { - gettimeofday(&new, NULL); - if (new.tv_sec - last.tv_sec > WAIT_TIME) { - ERRMSG("Can't get data of pfn.\n"); - goto out; - } - continue; - } - - if (current_pfn == info->page_flag_buf[consuming]->pfn) - break; - } + } while (check_count == 0); if ((num_dumped % per) == 0) print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); num_dumped++; - if (info->page_flag_buf[consuming]->zero == TRUE) { - if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) + if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) { + pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex); goto out; + } pfn_zero++; } else { index = info->page_flag_buf[consuming]->index; @@ -7850,16 +7854,21 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, /* * Write the page header. */ - if (!write_cache(cd_header, &pd, sizeof(page_desc_t))) + if (!write_cache(cd_header, &pd, sizeof(page_desc_t))) { + pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex); goto out; + } /* * Write the page data. */ - if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) + if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) { + pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex); goto out; + } page_data_buf[index].used = FALSE; } info->page_flag_buf[consuming]->ready = FLAG_UNUSED; + pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex); info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; } finish: @@ -7906,6 +7915,14 @@ out: } } + for (i = 0; i < info->num_threads; i++) { + for (page_flag_buf = info->page_flag_buf[i]; + page_flag_buf->next != info->page_flag_buf[i]; + page_flag_buf = page_flag_buf->next) { + pthread_mutex_destroy(&page_flag_buf->mutex); + } + } + pthread_rwlock_destroy(&info->usemmap_rwlock); pthread_mutex_destroy(&info->filter_mutex); pthread_mutex_destroy(&info->consumed_pfn_mutex); diff --git a/makedumpfile.h b/makedumpfile.h index 80ce23a..188ec17 100644 --- a/makedumpfile.h +++ b/makedumpfile.h @@ -971,7 +971,6 @@ typedef unsigned long long int ulonglong; */ #define PAGE_DATA_NUM (50) -#define WAIT_TIME (60 * 10) #define PTHREAD_FAIL ((void *)-2) #define NUM_BUFFERS (20) @@ -987,6 +986,7 @@ enum { FLAG_FILLING }; struct page_flag { + pthread_mutex_t mutex; mdf_pfn_t pfn; char zero; char ready; === Thanks Minoru Usui