On Sat, Jul 13, 2013 at 12:26 AM, Thomas Gummerer <t.gummerer@xxxxxxxxx> wrote: > +struct directory_entry { > + struct directory_entry *next; > + struct directory_entry *next_hash; > + struct cache_entry *ce; > + struct cache_entry *ce_last; > + struct conflict_entry *conflict; > + struct conflict_entry *conflict_last; > + unsigned int conflict_size; > + unsigned int de_foffset; > + unsigned int de_cr; > + unsigned int de_ncr; > + unsigned int de_nsubtrees; > + unsigned int de_nfiles; > + unsigned int de_nentries; > + unsigned char sha1[20]; > + unsigned short de_flags; > + unsigned int de_pathlen; > + char pathname[FLEX_ARRAY]; > +}; > + > +struct conflict_part { > + struct conflict_part *next; > + unsigned short flags; > + unsigned short entry_mode; > + unsigned char sha1[20]; > +}; > + > +struct conflict_entry { > + struct conflict_entry *next; > + unsigned int nfileconflicts; > + struct conflict_part *entries; > + unsigned int namelen; > + unsigned int pathlen; > + char name[FLEX_ARRAY]; > +}; > + > +struct ondisk_conflict_part { > + unsigned short flags; > + unsigned short entry_mode; > + unsigned char sha1[20]; > +}; These new structs should probably be in read-cache-v5.c, or read-cache.h > #define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1) > +#define directory_entry_size(len) (offsetof(struct directory_entry,pathname) + (len) + 1) > +#define conflict_entry_size(len) (offsetof(struct conflict_entry,name) + (len) + 1) These are used by read-cache-v5.c only so far. I'd say move them to read-cache.h or read-cache-v5.c together with the new structs. > +struct ondisk_cache_entry { > + unsigned short flags; > + unsigned short mode; > + struct cache_time mtime; > + unsigned int size; > + int stat_crc; > + unsigned char sha1[20]; > +}; > + > +struct ondisk_directory_entry { > + unsigned int foffset; > + unsigned int cr; > + unsigned int ncr; > + unsigned int nsubtrees; > + unsigned int nfiles; > + unsigned int nentries; > + unsigned char sha1[20]; > + unsigned short flags; > +}; Perhaps use uint32_t, uint16_t and friends for all on-disk structures? > +static struct directory_entry *read_directories(unsigned int *dir_offset, > + unsigned int *dir_table_offset, > + void *mmap, > + int mmap_size) > +{ > + int i, ondisk_directory_size; > + uint32_t *filecrc, *beginning, *end; > + struct directory_entry *current = NULL; > + struct ondisk_directory_entry *disk_de; > + struct directory_entry *de; > + unsigned int data_len, len; > + char *name; > + > + /* > + * Length of pathname + nul byte for termination + size of > + * members of ondisk_directory_entry. (Just using the size > + * of the struct doesn't work, because there may be padding > + * bytes for the struct) > + */ > + ondisk_directory_size = sizeof(disk_de->flags) > + + sizeof(disk_de->foffset) > + + sizeof(disk_de->cr) > + + sizeof(disk_de->ncr) > + + sizeof(disk_de->nsubtrees) > + + sizeof(disk_de->nfiles) > + + sizeof(disk_de->nentries) > + + sizeof(disk_de->sha1); > + name = ptr_add(mmap, *dir_offset); > + beginning = ptr_add(mmap, *dir_table_offset); > + end = ptr_add(mmap, *dir_table_offset + 4); > + len = ntoh_l(*end) - ntoh_l(*beginning) - ondisk_directory_size - 5; > + disk_de = ptr_add(mmap, *dir_offset + len + 1); > + de = directory_entry_from_ondisk(disk_de, name, len); > + de->next = NULL; > + > + data_len = len + 1 + ondisk_directory_size; > + filecrc = ptr_add(mmap, *dir_offset + data_len); > + if (!check_crc32(0, ptr_add(mmap, *dir_offset), data_len, ntoh_l(*filecrc))) > + goto unmap; > + > + *dir_table_offset += 4; > + *dir_offset += data_len + 4; /* crc code */ > + > + current = de; > + for (i = 0; i < de->de_nsubtrees; i++) { > + current->next = read_directories(dir_offset, dir_table_offset, > + mmap, mmap_size); > + while (current->next) > + current = current->next; > + } > + > + return de; > +unmap: > + munmap(mmap, mmap_size); > + die("directory crc doesn't match for '%s'", de->pathname); > +} You don't have to munmap when you die() anway. I'm not sure if flatten the directory hierarchy into a list (linked by next pointer) is a good idea, or we should maintain the tree structure in memory. Still a lot of reading to figure that out.. I skipped from here.. > +static void ce_queue_push(struct cache_entry **head, > + struct cache_entry **tail, > + struct cache_entry *ce) > +{ ... > +static int read_conflicts(struct conflict_entry **head, > + struct directory_entry *de, > + void **mmap, unsigned long mmap_size) > +{ till the end of this function. Not interested in conflict stuff yet. > +static struct directory_entry *read_head_directories(struct index_state *istate, > + unsigned int *entry_offset, > + unsigned int *foffsetblock, > + unsigned int *ndirs, > + void *mmap, unsigned long mmap_size) > +{ Maybe read_all_directories is a better nam. > +static int read_index_filtered_v5(struct index_state *istate, void *mmap, > + unsigned long mmap_size, struct filter_opts *opts) > +{ > + unsigned int entry_offset, ndirs, foffsetblock, nr = 0; > + struct directory_entry *root_directory, *de; > + const char **adjusted_pathspec = NULL; > + int need_root = 0, i, n; > + char *oldpath, *seen; > + > + ... > + > + de = root_directory; > + while (de) { > + if (need_root || > + match_pathspec(adjusted_pathspec, de->pathname, de->de_pathlen, 0, NULL)) { > + unsigned int subdir_foffsetblock = de->de_foffset + foffsetblock; > + unsigned int *off = mmap + subdir_foffsetblock; > + unsigned int subdir_entry_offset = entry_offset + ntoh_l(*off); > + oldpath = de->pathname; > + do { > + if (read_entries(istate, &de, &subdir_entry_offset, > + &mmap, mmap_size, &nr, > + &subdir_foffsetblock) < 0) > + return -1; > + } while (de && !prefixcmp(de->pathname, oldpath)); > + } else > + de = de->next; > + } Hm.. if we maintain a tree structure here (one link to the first subdir, one link to the next sibling), I think the "do" loop could be done without prefixcmp. Just check if "de" returned by read_entries is the next sibling "de" (iow the end of current directory recursively). > + istate->cache_nr = nr; > + return 0; > +} > + > +static int read_index_v5(struct index_state *istate, void *mmap, > + unsigned long mmap_size, struct filter_opts *opts) > +{ > + unsigned int entry_offset, ndirs, foffsetblock, nr = 0; > + struct directory_entry *root_directory, *de; > + > + if (opts != NULL) > + return read_index_filtered_v5(istate, mmap, mmap_size, opts); > + > + root_directory = read_head_directories(istate, &entry_offset, > + &foffsetblock, &ndirs, > + mmap, mmap_size); > + de = root_directory; > + while (de) > + if (read_entries(istate, &de, &entry_offset, &mmap, > + mmap_size, &nr, &foffsetblock) < 0) > + return -1; > + istate->cache_nr = nr; > + return 0; > +} Make it call read_index_filtered_v5 with an empty pathspec instead. match_pathspec* returns true immediately if pathspec is empty. Without the removal of prefixcmp() in the "do" loop mentioned above, read_index_filtered_v5 can't be more expensive than this version. That was it! Lunch time! Maybe I'll read the rest in the afternoon, or someday next week. -- Duy -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html