-----Original Message----- > 1.) The vmcore file maybe very big. > > For example, I have a vmcore file which is over 23G, > and the panic kernel had 767.6G memory, > its max_sect_len is 4468736. 4468736 * 4096 * 4k -> about 68T. Apparently there are systems that can have a large physical space (max_mapnr) comparing with its actual memory. anyway, thanks for the huge improvement. The v2 patch looks good to me. Acked-by: Kazuhito Hagio <k-hagio-ab@xxxxxxx> Thanks, Kazu > > Current code costs too much time to do the following loop: > .............................................. > for (i = 1; i < max_sect_len + 1; i++) { > dd->valid_pages[i] = dd->valid_pages[i - 1]; > for (j = 0; j < BITMAP_SECT_LEN; j++, pfn++) > if (page_is_dumpable(pfn)) > dd->valid_pages[i]++; > .............................................. > > For my case, it costs about 56 seconds to finish the > big loop. > > This patch moves the hweightXX macros to defs.h, > and uses hweight64 to optimize the loop. > > For my vmcore, the loop only costs about one second now. > > 2.) Tests result: > # cat ./commands.txt > quit > > Before: > > #echo 3 > /proc/sys/vm/drop_caches; > #time ./crash -i ./commands.txt /root/t/vmlinux /root/t/vmcore > /dev/null 2>&1 > ............................ > real 1m54.259s > user 1m12.494s > sys 0m3.857s > ............................ > > After this patch: > > #echo 3 > /proc/sys/vm/drop_caches; > #time ./crash -i ./commands.txt /root/t/vmlinux /root/t/vmcore > /dev/null 2>&1 > ............................ > real 0m55.217s > user 0m15.114s > sys 0m3.560s > ............................ > > Signed-off-by: Huang Shijie <shijie@xxxxxxxxxxxxxxxxxxxxxx> > --- > v1 --> v2: > 1.) change u64 to ulonglong. > 2.) compile this patch in x86_64. > > --- > defs.h | 20 ++++++++++++++++++++ > diskdump.c | 12 +++++++++--- > sbitmap.c | 19 ------------------- > 3 files changed, 29 insertions(+), 22 deletions(-) > > diff --git a/defs.h b/defs.h > index 81ac049..1e8360d 100644 > --- a/defs.h > +++ b/defs.h > @@ -4531,6 +4531,26 @@ struct machine_specific { > #define NUM_IN_BITMAP(bitmap, x) (bitmap[(x)/BITS_PER_LONG] & NUM_TO_BIT(x)) > #define SET_BIT(bitmap, x) (bitmap[(x)/BITS_PER_LONG] |= NUM_TO_BIT(x)) > > +static inline unsigned int __const_hweight8(unsigned long w) > +{ > + return > + (!!((w) & (1ULL << 0))) + > + (!!((w) & (1ULL << 1))) + > + (!!((w) & (1ULL << 2))) + > + (!!((w) & (1ULL << 3))) + > + (!!((w) & (1ULL << 4))) + > + (!!((w) & (1ULL << 5))) + > + (!!((w) & (1ULL << 6))) + > + (!!((w) & (1ULL << 7))); > +} > + > +#define __const_hweight16(w) (__const_hweight8(w) + __const_hweight8((w) >> 8)) > +#define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w) >> 16)) > +#define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w) >> 32)) > + > +#define hweight32(w) __const_hweight32(w) > +#define hweight64(w) __const_hweight64(w) > + > /* > * precision lengths for fprintf > */ > diff --git a/diskdump.c b/diskdump.c > index d567427..ff1e9a3 100644 > --- a/diskdump.c > +++ b/diskdump.c > @@ -547,6 +547,7 @@ read_dump_header(char *file) > ulong pfn; > int i, j, max_sect_len; > int is_split = 0; > + ulonglong tmp, *bitmap; > > if (block_size < 0) > return FALSE; > @@ -899,11 +900,16 @@ restart: > > dd->valid_pages = calloc(sizeof(ulong), max_sect_len + 1); > dd->max_sect_len = max_sect_len; > + > + /* It is safe to convert it to (ulonglong *). */ > + bitmap = (ulonglong *)dd->dumpable_bitmap; > for (i = 1; i < max_sect_len + 1; i++) { > dd->valid_pages[i] = dd->valid_pages[i - 1]; > - for (j = 0; j < BITMAP_SECT_LEN; j++, pfn++) > - if (page_is_dumpable(pfn)) > - dd->valid_pages[i]++; > + for (j = 0; j < BITMAP_SECT_LEN; j += 64, pfn += 64) { > + tmp = bitmap[pfn >> 6]; > + if (tmp) > + dd->valid_pages[i] += hweight64(tmp); > + } > } > > return TRUE; > diff --git a/sbitmap.c b/sbitmap.c > index 286259f..96a61e6 100644 > --- a/sbitmap.c > +++ b/sbitmap.c > @@ -49,25 +49,6 @@ struct sbitmapq_data { > > static uint sb_flags = 0; > > -static inline unsigned int __const_hweight8(unsigned long w) > -{ > - return > - (!!((w) & (1ULL << 0))) + > - (!!((w) & (1ULL << 1))) + > - (!!((w) & (1ULL << 2))) + > - (!!((w) & (1ULL << 3))) + > - (!!((w) & (1ULL << 4))) + > - (!!((w) & (1ULL << 5))) + > - (!!((w) & (1ULL << 6))) + > - (!!((w) & (1ULL << 7))); > -} > - > -#define __const_hweight16(w) (__const_hweight8(w) + __const_hweight8((w) >> 8)) > -#define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w) >> 16)) > -#define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w) >> 32)) > - > -#define hweight32(w) __const_hweight32(w) > -#define hweight64(w) __const_hweight64(w) > > #define BIT(nr) (1UL << (nr)) > > -- > 2.30.2 -- Crash-utility mailing list Crash-utility@xxxxxxxxxx https://listman.redhat.com/mailman/listinfo/crash-utility Contribution Guidelines: https://github.com/crash-utility/crash/wiki