Hello Cliff, >From: Cliff Wickman <cpw at sgi.com> > >This patch adds a -j to makedumpfile. With this option it uses direct i/o on the dump >file and the bitmap file, thus enabling makedumpfile to run mode in a fairly small >crashkernel area without using cyclic mode. It can dump system with many terabytes of >memory using crashkernel=450M. First, let's separate the problems that you have. (Actually you did it in previous patches.) 1. The cyclic mode is slow. -> You try to avoid this by using a disk for the bitmap. 2. Page cache uses up the memory for crash kernel. -> You try to avoid this by using direct i/o. >Without direct i/o the crash kernel will use kernel page cache for the writes. This >will use up a great deal of the crash kernel's alloted memory. This is the second problem. Actually we faced a OOM caused by page cache (probably): http://lists.infradead.org/pipermail/kexec/2014-April/011639.html so direct i/o may be helpful for such small crashkernel environments. >The -j option will also implicitly avoid cyclic mode. Cyclic mode is slower, and >is not needed if we use direct i/o. This is the first problem. Direct i/o doesn't enable the non-cyclic mode, using a disk does it. Anyway, I still think it's enough to change TMPDIR to a disk if you want to choose --non-cyclic. I haven't gotten the reason why you change the code yet. >Direct i/o is of course a bit slower, but not significantly slower when used in this >almost-entirely sequential fashion. If you have a performance comparison between direct i/o and normal file i/o, I'm curious to see it. Thanks Atsushi Kumagai >--- > makedumpfile.c | 417 ++++++++++++++++++++++++++++++++++++++++++++++----------- > makedumpfile.h | 6 > print_info.c | 5 > 3 files changed, 347 insertions(+), 81 deletions(-) > >Index: makedumpfile-1.5.7/makedumpfile.h >=================================================================== >--- makedumpfile-1.5.7.orig/makedumpfile.h >+++ makedumpfile-1.5.7/makedumpfile.h >@@ -18,6 +18,7 @@ > > #include <stdio.h> > #include <stdlib.h> >+#define __USE_GNU > #include <fcntl.h> > #include <gelf.h> > #include <sys/stat.h> >@@ -222,6 +223,7 @@ isAnon(unsigned long mapping) > #define FILENAME_BITMAP "kdump_bitmapXXXXXX" > #define FILENAME_STDOUT "STDOUT" > #define MAP_REGION (4096*1024) >+#define DIRECT_ALIGN (512) > > /* > * Minimam vmcore has 2 ProgramHeaderTables(PT_NOTE and PT_LOAD). >@@ -892,7 +894,8 @@ struct dump_bitmap { > int fd; > int no_block; > char *file_name; >- char buf[BUFSIZE_BITMAP]; >+ char *buf; >+ char *buf_malloced; > off_t offset; > }; > >@@ -900,6 +903,7 @@ struct cache_data { > int fd; > char *file_name; > char *buf; >+ char *buf_malloced; > size_t buf_size; > size_t cache_size; > off_t offset; >Index: makedumpfile-1.5.7/print_info.c >=================================================================== >--- makedumpfile-1.5.7.orig/print_info.c >+++ makedumpfile-1.5.7/print_info.c >@@ -58,7 +58,7 @@ print_usage(void) > MSG("\n"); > MSG("Usage:\n"); > MSG(" Creating DUMPFILE:\n"); >- MSG(" # makedumpfile [-c|-l|-p|-E] [-d DL] [-x VMLINUX|-i VMCOREINFO] VMCORE\n"); >+ MSG(" # makedumpfile [-c|-l|-p|-E] [-d DL] [-j] [-x VMLINUX|-i VMCOREINFO] VMCORE\n"); > MSG(" DUMPFILE\n"); > MSG("\n"); > MSG(" Creating DUMPFILE with filtered kernel data specified through filter config\n"); >@@ -108,6 +108,9 @@ print_usage(void) > MSG(" -E option, because the ELF format does not support compressed data.\n"); > MSG(" THIS IS ONLY FOR THE CRASH UTILITY.\n"); > MSG("\n"); >+ MSG(" [-j]:\n"); >+ MSG(" Use raw (O_DIRECT) i/o on dump and bitmap files to avoid expanding kernel pagecache.\n"); >+ MSG("\n"); > MSG(" [-d DL]:\n"); > MSG(" Specify the type of unnecessary page for analysis.\n"); > MSG(" Pages of the specified type are not copied to DUMPFILE. The page type\n"); >Index: makedumpfile-1.5.7/makedumpfile.c >=================================================================== >--- makedumpfile-1.5.7.orig/makedumpfile.c >+++ makedumpfile-1.5.7/makedumpfile.c >@@ -79,8 +79,11 @@ mdf_pfn_t pfn_free; > mdf_pfn_t pfn_hwpoison; > > mdf_pfn_t num_dumped; >+long blocksize; > > int retcd = FAILED; /* return code */ >+// jflag is rawio on the dumpfile and bitmap file >+int jflag = 0; > > #define INITIALIZE_LONG_TABLE(table, value) \ > do { \ >@@ -966,10 +969,17 @@ int > open_dump_file(void) > { > int fd; >- int open_flags = O_RDWR|O_CREAT|O_TRUNC; >+ int open_flags; > >+ if (jflag) >+ open_flags = O_RDWR|O_CREAT|O_TRUNC|O_DIRECT; >+ else >+ open_flags = O_RDWR|O_CREAT|O_TRUNC; >+ >+#if 0 > if (!info->flag_force) > open_flags |= O_EXCL; >+#endif > > if (info->flag_flatten) { > fd = STDOUT_FILENO; >@@ -1005,12 +1015,40 @@ check_dump_file(const char *path) > int > open_dump_bitmap(void) > { >- int i, fd; >- char *tmpname; >- >- tmpname = getenv("TMPDIR"); >- if (!tmpname) >- tmpname = "/tmp"; >+ int i, fd, flags; >+ char *tmpname, *cp; >+ char prefix[100]; >+ int len; >+ >+ /* -j: saving memory by doing direct i/o, so also avoid /tmp for the bit map files >+ * because /tmp is using tmpfs */ >+ if (!jflag) { >+ tmpname = getenv("TMPDIR"); >+ if (!tmpname) >+ tmpname = "/tmp"; >+ } else { >+ /* for the crash kernel environment use the prefix of >+ the dump name e.g. /mnt//var/.... */ >+ if (!strchr(info->name_dumpfile,'v')) { >+ printf("no /var found in name_dumpfile %s\n", >+ info->name_dumpfile); >+ exit(1); >+ } else { >+ cp = strchr(info->name_dumpfile,'v'); >+ if (strncmp(cp-1, "/var", 4)) { >+ printf("no /var found in name_dumpfile %s\n", >+ info->name_dumpfile); >+ exit(1); >+ } >+ } >+ len = cp - info->name_dumpfile - 1; >+ strncpy(prefix, info->name_dumpfile, len); >+ if (*(prefix + len - 1) == '/') >+ len -= 1; >+ *(prefix + len) = '\0'; >+ tmpname = prefix; >+ strcat(tmpname, "/"); >+ } > > if ((info->name_bitmap = (char *)malloc(sizeof(FILENAME_BITMAP) + > strlen(tmpname) + 1)) == NULL) { >@@ -1019,9 +1057,12 @@ open_dump_bitmap(void) > return FALSE; > } > strcpy(info->name_bitmap, tmpname); >- strcat(info->name_bitmap, "/"); > strcat(info->name_bitmap, FILENAME_BITMAP); >- if ((fd = mkstemp(info->name_bitmap)) < 0) { >+ if (jflag) >+ flags = O_RDWR|O_CREAT|O_TRUNC|O_DIRECT; >+ else >+ flags = O_RDWR|O_CREAT|O_TRUNC; >+ if ((fd = open(info->name_bitmap, flags)) < 0) { > ERRMSG("Can't open the bitmap file(%s). %s\n", > info->name_bitmap, strerror(errno)); > return FALSE; >@@ -2985,6 +3026,7 @@ initialize_bitmap_memory(void) > struct dump_bitmap *bmp; > off_t bitmap_offset; > off_t bitmap_len, max_sect_len; >+ char *cp; > mdf_pfn_t pfn; > int i, j; > long block_size; >@@ -3006,7 +3048,14 @@ initialize_bitmap_memory(void) > bmp->fd = info->fd_memory; > bmp->file_name = info->name_memory; > bmp->no_block = -1; >- memset(bmp->buf, 0, BUFSIZE_BITMAP); >+ if ((cp = malloc(blocksize + DIRECT_ALIGN)) == NULL) { >+ ERRMSG("Can't allocate memory for the bitmap buffer. %s\n", >+ strerror(errno)); >+ exit(1); >+ } >+ bmp->buf_malloced = cp; >+ bmp->buf = cp - ((unsigned long)cp % DIRECT_ALIGN) + DIRECT_ALIGN; >+ memset(bmp->buf, 0, blocksize); > bmp->offset = bitmap_offset + bitmap_len / 2; > info->bitmap_memory = bmp; > >@@ -3018,6 +3067,7 @@ initialize_bitmap_memory(void) > if (info->valid_pages == NULL) { > ERRMSG("Can't allocate memory for the valid_pages. %s\n", > strerror(errno)); >+ free(bmp->buf_malloced); > free(bmp); > return FALSE; > } >@@ -3318,9 +3368,18 @@ out: > void > initialize_bitmap(struct dump_bitmap *bitmap) > { >+ char *cp; >+ > bitmap->fd = info->fd_bitmap; > bitmap->file_name = info->name_bitmap; > bitmap->no_block = -1; >+ if ((cp = malloc(blocksize + DIRECT_ALIGN)) == NULL) { >+ ERRMSG("Can't allocate memory for the bitmap buffer. %s\n", >+ strerror(errno)); >+ exit(1); >+ } >+ bitmap->buf_malloced = cp; >+ bitmap->buf = cp - ((unsigned long)cp % DIRECT_ALIGN) + DIRECT_ALIGN; > memset(bitmap->buf, 0, BUFSIZE_BITMAP); > } > >@@ -3385,9 +3444,9 @@ set_bitmap(struct dump_bitmap *bitmap, m > byte = (pfn%PFN_BUFBITMAP)>>3; > bit = (pfn%PFN_BUFBITMAP) & 7; > if (val) >- bitmap->buf[byte] |= 1<<bit; >+ *(bitmap->buf + byte) |= 1<<bit; > else >- bitmap->buf[byte] &= ~(1<<bit); >+ *(bitmap->buf + byte) &= ~(1<<bit); > > return TRUE; > } >@@ -3570,6 +3629,29 @@ read_cache(struct cache_data *cd) > return TRUE; > } > >+void >+fill_to_offset(struct cache_data *cd, int blocksize) >+{ >+ off_t current; >+ long num_blocks; >+ long i; >+ >+ current = lseek(cd->fd, 0, SEEK_CUR); >+ if ((cd->offset - current) % blocksize) { >+ printf("ERROR: fill area is %#lx\n", cd->offset - current); >+ exit(1); >+ } >+ if (cd->cache_size < blocksize) { >+ printf("ERROR: cache buf is only %ld\n", cd->cache_size); >+ exit(1); >+ } >+ num_blocks = (cd->offset - current) / blocksize; >+ for (i = 0; i < num_blocks; i++) { >+ write(cd->fd, cd->buf, blocksize); >+ } >+ return; >+} >+ > int > is_bigendian(void) > { >@@ -3639,6 +3721,14 @@ write_buffer(int fd, off_t offset, void > int > write_cache(struct cache_data *cd, void *buf, size_t size) > { >+ /* sanity check; do not overflow this buffer */ >+ /* (it is of cd->cache_size + info->page_size) */ >+ if (size > ((cd->cache_size - cd->buf_size) + info->page_size)) { >+ fprintf(stderr, "write_cache buffer overflow! size %#lx\n", >+ size); >+ exit(1); >+ } >+ > memcpy(cd->buf + cd->buf_size, buf, size); > cd->buf_size += size; > >@@ -3651,6 +3741,8 @@ write_cache(struct cache_data *cd, void > > cd->buf_size -= cd->cache_size; > memcpy(cd->buf, cd->buf + cd->cache_size, cd->buf_size); >+ if (cd->buf_size) >+ memcpy(cd->buf, cd->buf + cd->cache_size, cd->buf_size); > cd->offset += cd->cache_size; > return TRUE; > } >@@ -3682,6 +3774,21 @@ write_cache_zero(struct cache_data *cd, > return write_cache_bufsz(cd); > } > >+/* flush the full cache to the file */ >+int >+write_cache_flush(struct cache_data *cd) >+{ >+ if (cd->buf_size == 0) >+ return TRUE; >+ if (cd->buf_size < cd->cache_size) { >+ memset(cd->buf + cd->buf_size, 0, cd->cache_size - cd->buf_size); >+ } >+ cd->buf_size = cd->cache_size; >+ if (!write_cache_bufsz(cd)) >+ return FALSE; >+ return TRUE; >+} >+ > int > read_buf_from_stdin(void *buf, int buf_size) > { >@@ -4414,11 +4521,19 @@ create_1st_bitmap(void) > { > int i; > unsigned int num_pt_loads = get_num_pt_loads(); >- char buf[info->page_size]; >+ char *buf; > mdf_pfn_t pfn, pfn_start, pfn_end, pfn_bitmap1; > unsigned long long phys_start, phys_end; > struct timeval tv_start; > off_t offset_page; >+ char *cp; >+ >+ if ((cp = malloc(blocksize + DIRECT_ALIGN)) == NULL) { >+ ERRMSG("Can't allocate memory for the bitmap buffer. %s\n", >+ strerror(errno)); >+ exit(1); >+ } >+ buf = cp - ((unsigned long)cp % DIRECT_ALIGN) + DIRECT_ALIGN; > > if (info->flag_refiltering) > return copy_1st_bitmap_from_memory(); >@@ -4429,7 +4544,7 @@ create_1st_bitmap(void) > /* > * At first, clear all the bits on the 1st-bitmap. > */ >- memset(buf, 0, sizeof(buf)); >+ memset(buf, 0, blocksize); > > if (lseek(info->bitmap1->fd, info->bitmap1->offset, SEEK_SET) < 0) { > ERRMSG("Can't seek the bitmap(%s). %s\n", >@@ -4975,9 +5090,17 @@ int > copy_bitmap(void) > { > off_t offset; >- unsigned char buf[info->page_size]; >+ unsigned char *buf; >+ unsigned char *cp; > const off_t failed = (off_t)-1; > >+ if ((cp = malloc(blocksize + DIRECT_ALIGN)) == NULL) { >+ ERRMSG("Can't allocate memory for the bitmap buffer. %s\n", >+ strerror(errno)); >+ exit(1); >+ } >+ buf = cp - ((unsigned long)cp % DIRECT_ALIGN) + DIRECT_ALIGN; >+ > offset = 0; > while (offset < (info->len_bitmap / 2)) { > if (lseek(info->bitmap1->fd, info->bitmap1->offset + offset, >@@ -4986,7 +5109,7 @@ copy_bitmap(void) > info->name_bitmap, strerror(errno)); > return FALSE; > } >- if (read(info->bitmap1->fd, buf, sizeof(buf)) != sizeof(buf)) { >+ if (read(info->bitmap1->fd, buf, blocksize) != blocksize) { > ERRMSG("Can't read the dump memory(%s). %s\n", > info->name_memory, strerror(errno)); > return FALSE; >@@ -4997,12 +5120,12 @@ copy_bitmap(void) > info->name_bitmap, strerror(errno)); > return FALSE; > } >- if (write(info->bitmap2->fd, buf, sizeof(buf)) != sizeof(buf)) { >+ if (write(info->bitmap2->fd, buf, blocksize) != blocksize) { > ERRMSG("Can't write the bitmap(%s). %s\n", > info->name_bitmap, strerror(errno)); > return FALSE; > } >- offset += sizeof(buf); >+ offset += blocksize; > } > > return TRUE; >@@ -5160,6 +5283,8 @@ void > free_bitmap1_buffer(void) > { > if (info->bitmap1) { >+ if (info->bitmap1->buf_malloced) >+ free(info->bitmap1->buf_malloced); > free(info->bitmap1); > info->bitmap1 = NULL; > } >@@ -5169,6 +5294,8 @@ void > free_bitmap2_buffer(void) > { > if (info->bitmap2) { >+ if (info->bitmap2->buf_malloced) >+ free(info->bitmap2->buf_malloced); > free(info->bitmap2); > info->bitmap2 = NULL; > } >@@ -5287,25 +5414,31 @@ get_loads_dumpfile(void) > int > prepare_cache_data(struct cache_data *cd) > { >+ char *cp; >+ > cd->fd = info->fd_dumpfile; > cd->file_name = info->name_dumpfile; > cd->cache_size = info->page_size << info->block_order; > cd->buf_size = 0; > cd->buf = NULL; > >- if ((cd->buf = malloc(cd->cache_size + info->page_size)) == NULL) { >+ if ((cp = malloc(cd->cache_size + info->page_size + DIRECT_ALIGN)) == NULL) { > ERRMSG("Can't allocate memory for the data buffer. %s\n", > strerror(errno)); > return FALSE; > } >+ cd->buf_malloced = cp; >+ cd->buf = cp - ((unsigned long)cp % DIRECT_ALIGN) + DIRECT_ALIGN; > return TRUE; > } > > void > free_cache_data(struct cache_data *cd) > { >- free(cd->buf); >+ if (cd->buf_malloced) >+ free(cd->buf_malloced); > cd->buf = NULL; >+ cd->buf_malloced = NULL; > } > > int >@@ -5554,19 +5687,21 @@ out: > } > > int >-write_kdump_header(void) >+write_kdump_header(struct cache_data *cd) > { > int ret = FALSE; > size_t size; > off_t offset_note, offset_vmcoreinfo; >- unsigned long size_note, size_vmcoreinfo; >+ unsigned long size_note, size_vmcoreinfo, remaining_size_note; >+ unsigned long write_size, room; > struct disk_dump_header *dh = info->dump_header; > struct kdump_sub_header kh; >- char *buf = NULL; >+ char *buf = NULL, *cp; > > if (info->flag_elf_dumpfile) > return FALSE; > >+ /* uses reads of /proc/vmcore */ > get_pt_note(&offset_note, &size_note); > > /* >@@ -5583,6 +5718,7 @@ write_kdump_header(void) > dh->bitmap_blocks = divideup(info->len_bitmap, dh->block_size); > memcpy(&dh->timestamp, &info->timestamp, sizeof(dh->timestamp)); > memcpy(&dh->utsname, &info->system_utsname, sizeof(dh->utsname)); >+ blocksize = dh->block_size; > if (info->flag_compress & DUMP_DH_COMPRESSED_ZLIB) > dh->status |= DUMP_DH_COMPRESSED_ZLIB; > #ifdef USELZO >@@ -5595,7 +5731,7 @@ write_kdump_header(void) > #endif > > size = sizeof(struct disk_dump_header); >- if (!write_buffer(info->fd_dumpfile, 0, dh, size, info->name_dumpfile)) >+ if (!write_cache(cd, dh, size)) > return FALSE; > > /* >@@ -5651,9 +5787,21 @@ write_kdump_header(void) > goto out; > } > >- if (!write_buffer(info->fd_dumpfile, kh.offset_note, buf, >- kh.size_note, info->name_dumpfile)) >- goto out; >+ /* the note may be huge, so do this in a loop to not >+ overflow the cache */ >+ remaining_size_note = kh.size_note; >+ cp = buf; >+ do { >+ room = cd->cache_size - cd->buf_size; >+ if (remaining_size_note > room) >+ write_size = room; >+ else >+ write_size = remaining_size_note; >+ if (!write_cache(cd, cp, write_size)) >+ goto out; >+ remaining_size_note -= write_size; >+ cp += write_size; >+ } while (remaining_size_note); > > if (has_vmcoreinfo()) { > get_vmcoreinfo(&offset_vmcoreinfo, &size_vmcoreinfo); >@@ -5669,8 +5817,7 @@ write_kdump_header(void) > kh.size_vmcoreinfo = size_vmcoreinfo; > } > } >- if (!write_buffer(info->fd_dumpfile, dh->block_size, &kh, >- size, info->name_dumpfile)) >+ if (!write_cache(cd, &kh, size)) > goto out; > > info->sub_header = kh; >@@ -6267,13 +6414,15 @@ write_elf_pages_cyclic(struct cache_data > } > > int >-write_kdump_pages(struct cache_data *cd_header, struct cache_data *cd_page) >+write_kdump_pages(struct cache_data *cd_descs, struct cache_data *cd_page) > { > mdf_pfn_t pfn, per, num_dumpable; > mdf_pfn_t start_pfn, end_pfn; > unsigned long size_out; >+ long prefix; > struct page_desc pd, pd_zero; > off_t offset_data = 0; >+ off_t initial_offset_data; > struct disk_dump_header *dh = info->dump_header; > unsigned char buf[info->page_size], *buf_out = NULL; > unsigned long len_buf_out; >@@ -6281,8 +6430,12 @@ write_kdump_pages(struct cache_data *cd_ > struct timeval tv_start; > const off_t failed = (off_t)-1; > unsigned long len_buf_out_zlib, len_buf_out_lzo, len_buf_out_snappy; >+ int saved_bytes = 0; >+ int cpysize; >+ char *save_block1, *save_block_cur, *save_block2; > > int ret = FALSE; >+ int status; > > if (info->flag_elf_dumpfile) > return FALSE; >@@ -6324,13 +6477,42 @@ write_kdump_pages(struct cache_data *cd_ > per = per ? per : 1; > > /* >- * Calculate the offset of the page data. >+ * Calculate the offset of the page_desc's and page data. > */ >- cd_header->offset >+ cd_descs->offset > = (DISKDUMP_HEADER_BLOCKS + dh->sub_hdr_size + dh->bitmap_blocks) > * dh->block_size; >- cd_page->offset = cd_header->offset + sizeof(page_desc_t)*num_dumpable; >- offset_data = cd_page->offset; >+ >+ /* this is already a pagesize multiple, so well-formed for i/o */ >+ >+ cd_page->offset = cd_descs->offset + (sizeof(page_desc_t) * num_dumpable); >+ offset_data = cd_page->offset; >+ >+ /* for i/o, round this page data offset down to a block boundary */ >+ prefix = cd_page->offset % blocksize; >+ cd_page->offset -= prefix; >+ initial_offset_data = cd_page->offset; >+ cd_page->buf_size = prefix; >+ memset(cd_page->buf, 0, prefix); >+ >+ fill_to_offset(cd_descs, blocksize); >+ >+ if ((save_block1 = malloc(blocksize * 2)) == NULL) { >+ ERRMSG("Can't allocate memory for save block. %s\n", >+ strerror(errno)); >+ goto out; >+ } >+ /* put on block address boundary for well-rounded i/o */ >+ save_block1 += (blocksize - (unsigned long)save_block1 % blocksize); >+ save_block_cur = save_block1 + prefix; >+ saved_bytes += prefix; >+ if ((save_block2 = malloc(blocksize + DIRECT_ALIGN)) == NULL) { >+ ERRMSG("Can't allocate memory for save block2. %s\n", >+ strerror(errno)); >+ goto out; >+ } >+ /* put on block address boundary for well-rounded i/o */ >+ save_block2 += (DIRECT_ALIGN - (unsigned long)save_block2 % DIRECT_ALIGN); > > /* > * Set a fileoffset of Physical Address 0x0. >@@ -6354,6 +6536,14 @@ write_kdump_pages(struct cache_data *cd_ > memset(buf, 0, pd_zero.size); > if (!write_cache(cd_page, buf, pd_zero.size)) > goto out; >+ >+ cpysize = pd_zero.size; >+ if ((saved_bytes + cpysize) > blocksize) >+ cpysize = blocksize - saved_bytes; >+ memcpy(save_block_cur, buf, cpysize); >+ saved_bytes += cpysize; >+ save_block_cur += cpysize; >+ > offset_data += pd_zero.size; > } > if (info->flag_split) { >@@ -6387,7 +6577,7 @@ write_kdump_pages(struct cache_data *cd_ > */ > if ((info->dump_level & DL_EXCLUDE_ZERO) > && is_zero_page(buf, info->page_size)) { >- if (!write_cache(cd_header, &pd_zero, sizeof(page_desc_t))) >+ if (!write_cache(cd_descs, &pd_zero, sizeof(page_desc_t))) > goto out; > pfn_zero++; > continue; >@@ -6435,25 +6625,68 @@ write_kdump_pages(struct cache_data *cd_ > /* > * Write the page header. > */ >- if (!write_cache(cd_header, &pd, sizeof(page_desc_t))) >+ if (!write_cache(cd_descs, &pd, sizeof(page_desc_t))) > goto out; > > /* > * Write the page data. > */ >+ /* kludge: save the partial block where page desc's and data overlap */ >+ /* (this is the second part of the full block (save_block) where >+ they overlap) */ >+ if (saved_bytes < blocksize) { >+ memcpy(save_block_cur, buf, pd.size); >+ saved_bytes += pd.size; >+ save_block_cur += pd.size; >+ } > if (!write_cache(cd_page, pd.flags ? buf_out : buf, pd.size)) > goto out; > } > > /* >- * Write the remainder. >+ * Write the remainder (well-formed blocks) > */ >- if (!write_cache_bufsz(cd_page)) >- goto out; >- if (!write_cache_bufsz(cd_header)) >+ /* adjust the cd_descs to write out only full blocks beyond the >+ data in the buffer */ >+ if (cd_descs->buf_size % blocksize) { >+ cd_descs->buf_size += >+ (blocksize - (cd_descs->buf_size % blocksize)); >+ cd_descs->cache_size = cd_descs->buf_size; >+ } >+ if (!write_cache_flush(cd_descs)) > goto out; > > /* >+ * kludge: the page data will overwrite the last block of the page_desc's, >+ * so re-construct a block from: >+ * the last block of the page_desc's (length 'prefix') (will read into >+ * save_block2) and the end (4096-prefix) of the page data we saved in >+ * save_block1. >+ */ >+ if (!write_cache_flush(cd_page)) >+ goto out; >+ >+ if (lseek(cd_page->fd, initial_offset_data, SEEK_SET) == failed) { >+ printf("kludge: seek to %#lx, fd %d failed errno %d\n", >+ initial_offset_data, cd_page->fd, errno); >+ exit(1); >+ } >+ if (read(cd_page->fd, save_block2, blocksize) != blocksize) { >+ printf("kludge: read block2 failed\n"); >+ exit(1); >+ } >+ /* combine the overlapping parts into save_block1 */ >+ memcpy(save_block1, save_block2, prefix); >+ >+ if (lseek(cd_page->fd, initial_offset_data, SEEK_SET) == failed) { >+ printf("kludge: seek to %#lx, fd %d failed errno %d\n", >+ initial_offset_data, cd_page->fd, errno); >+ exit(1); >+ } >+ status = write(cd_page->fd, save_block1, blocksize); >+ /* end of kludged block */ >+ >+ /* > * print [100 %] > */ > print_progress(PROGRESS_COPY, num_dumpable, num_dumpable); >@@ -6462,8 +6695,6 @@ write_kdump_pages(struct cache_data *cd_ > > ret = TRUE; > out: >- if (buf_out != NULL) >- free(buf_out); > #ifdef USELZO > if (wrkmem != NULL) > free(wrkmem); >@@ -6863,51 +7094,47 @@ write_kdump_eraseinfo(struct cache_data > } > > int >-write_kdump_bitmap(void) >+write_kdump_bitmap(struct cache_data *cd) > { > struct cache_data bm; > long long buf_size; >- off_t offset; >+ long write_size; > > int ret = FALSE; > > if (info->flag_elf_dumpfile) > return FALSE; > >+ /* set up to read bit map file in big blocks from the start */ > bm.fd = info->fd_bitmap; > bm.file_name = info->name_bitmap; > bm.offset = 0; > bm.buf = NULL; >- >- if ((bm.buf = calloc(1, BUFSIZE_BITMAP)) == NULL) { >- ERRMSG("Can't allocate memory for dump bitmap buffer. %s\n", >- strerror(errno)); >- goto out; >+ bm.cache_size = cd->cache_size; >+ bm.buf = cd->buf; /* use the bitmap cd */ >+ /* using the dumpfile cd_bitmap buffer and fd */ >+ if (lseek(cd->fd, info->offset_bitmap1, SEEK_SET) < 0) { >+ ERRMSG("Can't seek the dump file(%s). %s\n", >+ info->name_memory, strerror(errno)); >+ return FALSE; > } >- offset = info->offset_bitmap1; > buf_size = info->len_bitmap; >- > while (buf_size > 0) { >- if (buf_size >= BUFSIZE_BITMAP) >- bm.cache_size = BUFSIZE_BITMAP; >- else >- bm.cache_size = buf_size; >- > if(!read_cache(&bm)) > goto out; >- >- if (!write_buffer(info->fd_dumpfile, offset, >- bm.buf, bm.cache_size, info->name_dumpfile)) >- goto out; >- >- offset += bm.cache_size; >- buf_size -= BUFSIZE_BITMAP; >+ write_size = cd->cache_size; >+ if (buf_size < cd->cache_size) { >+ write_size = buf_size; >+ } >+ if (write(cd->fd, cd->buf, write_size) != write_size) { >+ ERRMSG("Can't write a destination file. %s\n", >+ strerror(errno)); >+ exit(1); >+ } >+ buf_size -= bm.cache_size; > } > ret = TRUE; > out: >- if (bm.buf != NULL) >- free(bm.buf); >- > return ret; > } > >@@ -7992,7 +8219,7 @@ int > writeout_dumpfile(void) > { > int ret = FALSE; >- struct cache_data cd_header, cd_page; >+ struct cache_data cd_header, cd_page_descs, cd_page, cd_bitmap; > > info->flag_nospace = FALSE; > >@@ -8005,11 +8232,20 @@ writeout_dumpfile(void) > } > if (!prepare_cache_data(&cd_header)) > return FALSE; >+ cd_header.offset = 0; > > if (!prepare_cache_data(&cd_page)) { > free_cache_data(&cd_header); > return FALSE; > } >+ if (!prepare_cache_data(&cd_page_descs)) { >+ free_cache_data(&cd_header); >+ free_cache_data(&cd_page); >+ return FALSE; >+ } >+ if (!prepare_cache_data(&cd_bitmap)) >+ return FALSE; >+ > if (info->flag_elf_dumpfile) { > if (!write_elf_header(&cd_header)) > goto out; >@@ -8023,22 +8259,36 @@ writeout_dumpfile(void) > if (!write_elf_eraseinfo(&cd_header)) > goto out; > } else if (info->flag_cyclic) { >- if (!write_kdump_header()) >+ if (!write_kdump_header(&cd_header)) > goto out; > if (!write_kdump_pages_and_bitmap_cyclic(&cd_header, &cd_page)) > goto out; > if (!write_kdump_eraseinfo(&cd_page)) > goto out; > } else { >- if (!write_kdump_header()) >- goto out; >- if (!write_kdump_pages(&cd_header, &cd_page)) >- goto out; >- if (!write_kdump_eraseinfo(&cd_page)) >- goto out; >- if (!write_kdump_bitmap()) >- goto out; >- } >+ /* >+ * Use cd_header for the caching operation up to the bit map. >+ * Use cd_bitmap for 1-block (4096) operations on the bit map. >+ * (it fits between the file header and page_desc's, both of >+ * which end and start on block boundaries) >+ * Then use cd_page_descs and cd_page for page headers and >+ * data (and eraseinfo). >+ * Then back to cd_header to fill in the bitmap. >+ */ >+ >+ if (!write_kdump_header(&cd_header)) >+ goto out; >+ write_cache_flush(&cd_header); >+ >+ if (!write_kdump_pages(&cd_page_descs, &cd_page)) >+ goto out; >+ if (!write_kdump_eraseinfo(&cd_page)) >+ goto out; >+ >+ cd_bitmap.offset = info->offset_bitmap1; >+ if (!write_kdump_bitmap(&cd_bitmap)) >+ goto out; >+ } > if (info->flag_flatten) { > if (!write_end_flat_header()) > goto out; >@@ -8198,11 +8448,17 @@ create_dumpfile(void) > if (!get_elf_info(info->fd_memory, info->name_memory)) > return FALSE; > } >+ blocksize = info->page_size; >+ if (!blocksize) >+ blocksize = sysconf(_SC_PAGE_SIZE); > if (!initial()) > return FALSE; > > print_vtop(); > >+ if (jflag) >+ PROGRESS_MSG("Using O_DIRECT i/o for dump and bitmap.\n"); >+ > num_retry = 0; > retry: > if (info->flag_refiltering) { >@@ -9285,7 +9541,6 @@ int show_mem_usage(void) > return FALSE; > } > >- > if (!info->flag_cyclic) > info->flag_cyclic = TRUE; > >@@ -9379,7 +9634,7 @@ main(int argc, char *argv[]) > > info->block_order = DEFAULT_ORDER; > message_level = DEFAULT_MSG_LEVEL; >- while ((opt = getopt_long(argc, argv, "b:cDd:EFfg:hi:lpRvXx:", longopts, >+ while ((opt = getopt_long(argc, argv, "b:cDd:EFfg:hi:jlpRvXx:", longopts, > NULL)) != -1) { > switch (opt) { > case OPT_BLOCK_ORDER: >@@ -9423,6 +9678,10 @@ main(int argc, char *argv[]) > info->flag_read_vmcoreinfo = 1; > info->name_vmcoreinfo = optarg; > break; >+ case 'j': >+ jflag = 1; >+ info->flag_cyclic = FALSE; // saving memory to avoid cyclic >+ break; > case OPT_DISKSET: > if (!sadump_add_diskset_info(optarg)) > goto out; > >_______________________________________________ >kexec mailing list >kexec at lists.infradead.org >http://lists.infradead.org/mailman/listinfo/kexec