Hi, Minoru. Sure. I will test this patch. Thanks Minfei On 03/04/16 at 12:59am, Minoru Usui wrote: > Hi, Zhou, Minfei > > > -----Original Message----- > > From: kexec [mailto:kexec-bounces at lists.infradead.org] On Behalf Of Minfei Huang > > Sent: Wednesday, February 24, 2016 11:25 AM > > To: "Zhou, Wenjian/???" <zhouwj-fnst at cn.fujitsu.com> > > Cc: kexec at lists.infradead.org > > Subject: Re: [PATCH v2] Improve the performance of --num-threads -d 31 > > > > > > > On Feb 24, 2016, at 10:20, Zhou, Wenjian/??? <zhouwj-fnst at cn.fujitsu.com> wrote: > > > > > > Hi, > > > > > > On 02/24/2016 09:43 AM, Minfei Huang wrote: > > >> On 02/23/16 at 01:47pm, "Zhou, Wenjian/???" wrote: > > >>> Hello, Minfei, > > >>> > > >>> Does it occur every time? > > >>> If not, I think I have known the reason. > > >> > > >> Hi, Wenjian. > > >> > > >> This patch is applied directly on version 1.5.9. And makedumpfile hangs > > >> if option num-thread is appended. > > >> > > > > > > I see. > > > I'm working on it. > > > > > > BTW, did you only test it with --num-threads 128? > > > How is it with --num-threads 32(or smaller value)? > > > > Yes. I have tested with num-thread 1, 2, 8 and 32. Except for ?num-threads 1, > > all of the test fail. > > I wrote an RFC patch which fixed this problem. > This patch is incremental one from the Zhou's original v2 patch. > > I think the problem was caused by page_flag_buf race between consumer and producers. > So, I add the mutex to each page_flag_buf. > Change summary is as follows. > > * Add mutex to each page_flag_buf. > * Producer: > - Keeping page_flag_buf mutex until producer finish writing all data to page_flag_buf. > - Fix the issue that info->current_pfn becomes larger than end_pfn. > - Keeping info->current_pfn_mutex until producer gets dumpable pfn. > * Consumer: > - Keeping page_flag_buf mutex until consumer finish writing all data to page_flag_buf. > > The result of my test in 5GB dumpfile is following. > > === > Common Option: -c --cyclic-buffer=10 --num-threads > > [-d0] > num-thread real vs num-threads 0 > ---------------------------------------- > 0 209.621 100.0% > 1 217.921 104.0% > 2 84.539 40.3% > 4 51.787 24.7% > 8 37.260 17.8% > > [-d31] > num-thread real vs num-threads 0 > ---------------------------------------- > 0 11.492 100.0% > 1 8.572 74.6% > 2 4.928 42.9% > 4 3.115 27.1% > 8 2.259 19.7% > === > > How about this approach? > Could you test this? > > === > diff --git a/makedumpfile.c b/makedumpfile.c > index 8a0c636..c697f93 100644 > --- a/makedumpfile.c > +++ b/makedumpfile.c > @@ -7521,7 +7521,7 @@ kdump_thread_function_cyclic(void *arg) { > * unfiltered zero page will only take a page_flag_buf > * unfiltered non-zero page will take a page_flag_buf and a page_data_buf > */ > - while (pfn < kdump_thread_args->end_pfn) { > + while (1) { > buf_ready = FALSE; > > while (page_data_buf[index].used != FALSE || > @@ -7532,35 +7532,46 @@ kdump_thread_function_cyclic(void *arg) { > > while (buf_ready == FALSE) { > pthread_testcancel(); > - if (page_flag_buf->ready == FLAG_READY) > + pthread_mutex_lock(&page_flag_buf->mutex); > + if (page_flag_buf->ready == FLAG_READY) { > + pthread_mutex_unlock(&page_flag_buf->mutex); > continue; > + } > > - /* get next pfn */ > + /* get next dumpable pfn */ > pthread_mutex_lock(&info->current_pfn_mutex); > pfn = info->current_pfn; > - info->current_pfn++; > - page_flag_buf->ready = FLAG_FILLING; > - pthread_mutex_unlock(&info->current_pfn_mutex); > - > - page_flag_buf->pfn = pfn; > + while(1) { > + if (pfn >= kdump_thread_args->end_pfn) { > + pthread_mutex_unlock(&info->current_pfn_mutex); > + page_flag_buf->pfn = pfn; > + page_flag_buf->ready = FLAG_READY; > + pthread_mutex_unlock(&page_flag_buf->mutex); > + page_data_buf[index].used = FALSE; > + pthread_mutex_unlock(&page_data_buf[index].mutex); > + goto finish; > + } > + dumpable = is_dumpable( > + info->fd_bitmap ? &bitmap_parallel : info->bitmap2, > + pfn, cycle); > + if (dumpable) > + break; > > - if (pfn >= kdump_thread_args->end_pfn) { > - page_data_buf[index].used = FALSE; > - page_flag_buf->ready = FLAG_READY; > - break; > + pfn++; > } > - > - dumpable = is_dumpable( > - info->fd_bitmap ? &bitmap_parallel : info->bitmap2, > - pfn, > - cycle); > - if (!dumpable) > - continue; > + info->current_pfn = pfn + 1; > + pthread_mutex_unlock(&info->current_pfn_mutex); > + page_flag_buf->pfn = pfn; > + page_flag_buf->ready = FLAG_FILLING; > > if (!read_pfn_parallel(fd_memory, pfn, buf, > &bitmap_memory_parallel, > - mmap_cache)) > + mmap_cache)) { > + pthread_mutex_unlock(&page_flag_buf->mutex); > + page_data_buf[index].used = FALSE; > + pthread_mutex_unlock(&page_data_buf[index].mutex); > goto fail; > + } > > filter_data_buffer_parallel(buf, pfn_to_paddr(pfn), > info->page_size, > @@ -7626,12 +7637,12 @@ kdump_thread_function_cyclic(void *arg) { > buf_ready = TRUE; > next: > page_flag_buf->ready = FLAG_READY; > + pthread_mutex_unlock(&page_flag_buf->mutex); > page_flag_buf = page_flag_buf->next; > - > } > - > pthread_mutex_unlock(&page_data_buf[index].mutex); > } > +finish: > retval = NULL; > > fail: > @@ -7658,15 +7669,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > mdf_pfn_t start_pfn, end_pfn; > struct page_desc pd; > struct timeval tv_start; > - struct timeval last, new; > pthread_t **threads = NULL; > struct thread_args *kdump_thread_args = NULL; > void *thread_result; > int page_buf_num; > struct page_data *page_data_buf = NULL; > + struct page_flag *page_flag_buf = NULL; > int i; > int index; > - int end_count, consuming, check_count; > + int end_count, consuming, check_count, prevlocked; > mdf_pfn_t current_pfn, temp_pfn; > > if (info->flag_elf_dumpfile) > @@ -7728,6 +7739,19 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > } > > for (i = 0; i < info->num_threads; i++) { > + for (page_flag_buf = info->page_flag_buf[i]; > + page_flag_buf->next != info->page_flag_buf[i]; > + page_flag_buf = page_flag_buf->next) { > + page_flag_buf->ready = FLAG_UNUSED; > + page_flag_buf->pfn = start_pfn; > + res = pthread_mutex_init(&page_flag_buf->mutex, NULL); > + if (res != 0) { > + ERRMSG("Can't initialize mutex of page_flag_buf. %s\n", > + strerror(res)); > + goto out; > + } > + } > + > kdump_thread_args[i].thread_num = i; > kdump_thread_args[i].len_buf_out = len_buf_out; > kdump_thread_args[i].start_pfn = start_pfn; > @@ -7749,9 +7773,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > > end_count = 0; > while (1) { > - consuming = 0; > - check_count = 0; > - > /* > * The basic idea is producer producing page and consumer writing page. > * Each producer have a page_flag_buf list which is used for storing page's description. > @@ -7760,8 +7781,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > * The main thread is the consumer. It will find the next pfn and write it into file. > * The next pfn is smallest pfn in all page_flag_buf. > */ > - gettimeofday(&last, NULL); > - while (1) { > + do { > + consuming = prevlocked = -1; > + check_count = 0; > current_pfn = end_pfn; > > /* > @@ -7772,32 +7794,36 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > * current_pfn is used for recording the value of pfn when checking the pfn. > */ > for (i = 0; i < info->num_threads; i++) { > + pthread_mutex_lock(&info->page_flag_buf[i]->mutex); > if (info->page_flag_buf[i]->ready == FLAG_UNUSED) > - continue; > + goto next; > temp_pfn = info->page_flag_buf[i]->pfn; > > /* > * count how many threads have reached the end. > */ > if (temp_pfn >= end_pfn) { > - /* > - * prevent setting FLAG_UNUSED being optimized. > - */ > - MSG("-"); > - > info->page_flag_buf[i]->ready = FLAG_UNUSED; > - > - info->current_pfn = end_pfn; > end_count++; > - continue; > + goto next; > } > > if (current_pfn < temp_pfn) > - continue; > + goto next; > > check_count++; > + prevlocked = consuming; > consuming = i; > current_pfn = temp_pfn; > +next: > + /* > + * Keep mutex of page_flag which has minimam pfn > + */ > + if (consuming != i) { // unlock page_flag which is not minimum pfn > + pthread_mutex_unlock(&info->page_flag_buf[i]->mutex); > + } else if (prevlocked != -1) { // unlock page_flag which previously locked > + pthread_mutex_unlock(&info->page_flag_buf[prevlocked]->mutex); > + } > } > > /* > @@ -7805,40 +7831,18 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > */ > if (end_count >= info->num_threads) > goto finish; > - > - /* > - * Since it has the probabilty that there is no page_flag_buf being ready, > - * we should recheck if it happens. > - */ > - if (check_count == 0) > - continue; > - > - /* > - * If the page_flag_buf is not ready, the pfn recorded may be changed. > - * So we should recheck. > - */ > - if (info->page_flag_buf[consuming]->ready != FLAG_READY) { > - gettimeofday(&new, NULL); > - if (new.tv_sec - last.tv_sec > WAIT_TIME) { > - ERRMSG("Can't get data of pfn.\n"); > - goto out; > - } > - continue; > - } > - > - if (current_pfn == info->page_flag_buf[consuming]->pfn) > - break; > - } > + } while (check_count == 0); > > if ((num_dumped % per) == 0) > print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); > > num_dumped++; > > - > if (info->page_flag_buf[consuming]->zero == TRUE) { > - if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) > + if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) { > + pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex); > goto out; > + } > pfn_zero++; > } else { > index = info->page_flag_buf[consuming]->index; > @@ -7850,16 +7854,21 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > /* > * Write the page header. > */ > - if (!write_cache(cd_header, &pd, sizeof(page_desc_t))) > + if (!write_cache(cd_header, &pd, sizeof(page_desc_t))) { > + pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex); > goto out; > + } > /* > * Write the page data. > */ > - if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) > + if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) { > + pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex); > goto out; > + } > page_data_buf[index].used = FALSE; > } > info->page_flag_buf[consuming]->ready = FLAG_UNUSED; > + pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex); > info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; > } > finish: > @@ -7906,6 +7915,14 @@ out: > } > } > > + for (i = 0; i < info->num_threads; i++) { > + for (page_flag_buf = info->page_flag_buf[i]; > + page_flag_buf->next != info->page_flag_buf[i]; > + page_flag_buf = page_flag_buf->next) { > + pthread_mutex_destroy(&page_flag_buf->mutex); > + } > + } > + > pthread_rwlock_destroy(&info->usemmap_rwlock); > pthread_mutex_destroy(&info->filter_mutex); > pthread_mutex_destroy(&info->consumed_pfn_mutex); > diff --git a/makedumpfile.h b/makedumpfile.h > index 80ce23a..188ec17 100644 > --- a/makedumpfile.h > +++ b/makedumpfile.h > @@ -971,7 +971,6 @@ typedef unsigned long long int ulonglong; > */ > > #define PAGE_DATA_NUM (50) > -#define WAIT_TIME (60 * 10) > #define PTHREAD_FAIL ((void *)-2) > #define NUM_BUFFERS (20) > > @@ -987,6 +986,7 @@ enum { > FLAG_FILLING > }; > struct page_flag { > + pthread_mutex_t mutex; > mdf_pfn_t pfn; > char zero; > char ready; > === > > > Thanks > Minoru Usui