It occurred to me -- and I know it will sound like a heresy -- that maybe providing an overly long example in C is not the best option here. Why not page_owner.py with the following content instead (not tested): #!/usr/bin/python import collections import sys counts = collections.defaultdict(int) txt = '' for line in sys.stdin: if line == '\n': counts[txt] += 1 txt = '' else: txt += line counts[txt] += 1 for txt, num in sorted(counts.items(), txt=lambda x: x[1]): if len(txt) > 1: print '%d times:\n%s' % num, txt And it's so “long” only because I chose not to read the whole file at once as in: counts = collections.defaultdict(int) for txt in sys.stdin.read().split('\n\n'): counts[txt] += 1 On Fri, Jan 11 2013, Minchan Kim wrote: > The read_block reads char one by one until meeting two newline. > It's not good for the performance and current code isn't good shape > for readability. > > This patch enhances speed and clean up. > > Cc: Mel Gorman <mgorman@xxxxxxx> > Cc: Andy Whitcroft <apw@xxxxxxxxxxxx> > Cc: Alexander Nyberg <alexn@xxxxxxxxx> > Cc: Randy Dunlap <rdunlap@xxxxxxxxxxxxx> > Signed-off-by: Michal Nazarewicz <mina86@xxxxxxxxxx> > Signed-off-by: Minchan Kim <minchan@xxxxxxxxxx> > --- > Documentation/page_owner.c | 34 +++++++++++++--------------------- > 1 file changed, 13 insertions(+), 21 deletions(-) > > diff --git a/Documentation/page_owner.c b/Documentation/page_owner.c > index 43dde96..96bf481 100644 > --- a/Documentation/page_owner.c > +++ b/Documentation/page_owner.c > @@ -28,26 +28,17 @@ static int max_size; > > struct block_list *block_head; > > -int read_block(char *buf, FILE *fin) > +int read_block(char *buf, int buf_size, FILE *fin) > { > - int ret = 0; > - int hit = 0; > - int val; > - char *curr = buf; > - > - for (;;) { > - val = getc(fin); > - if (val == EOF) return -1; > - *curr = val; > - ret++; > - if (*curr == '\n' && hit == 1) > - return ret - 1; > - else if (*curr == '\n') > - hit = 1; > - else > - hit = 0; > - curr++; > + char *curr = buf, *const buf_end = buf + buf_size; > + > + while (buf_end - curr > 1 && fgets(curr, buf_end - curr, fin)) { > + if (*curr == '\n') /* empty line */ > + return curr - buf; > + curr += strlen(curr); > } > + > + return -1; /* EOF or no space left in buf. */ > } > > static int compare_txt(struct block_list *l1, struct block_list *l2) > @@ -84,10 +75,12 @@ static void add_list(char *buf, int len) > } > } > > +#define BUF_SIZE 1024 > + > int main(int argc, char **argv) > { > FILE *fin, *fout; > - char buf[1024]; > + char buf[BUF_SIZE]; > int ret, i, count; > struct block_list *list2; > struct stat st; > @@ -106,11 +99,10 @@ int main(int argc, char **argv) > list = malloc(max_size * sizeof(*list)); > > for(;;) { > - ret = read_block(buf, fin); > + ret = read_block(buf, BUF_SIZE, fin); > if (ret < 0) > break; > > - buf[ret] = '\0'; > add_list(buf, ret); > } > > -- > 1.7.9.5 > -- Best regards, _ _ .o. | Liege of Serenely Enlightened Majesty of o' \,=./ `o ..o | Computer Science, Michał “mina86” Nazarewicz (o o) ooo +----<email/xmpp: mpn@xxxxxxxxxx>--------------ooO--(_)--Ooo--
Attachment:
pgpaT88Gm9fZR.pgp
Description: PGP signature