Re: [PATCH v2 12/19] read-cache: read index-v5

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sat, Jul 13, 2013 at 12:26 AM, Thomas Gummerer <t.gummerer@xxxxxxxxx> wrote:
> +struct directory_entry {
> +       struct directory_entry *next;
> +       struct directory_entry *next_hash;
> +       struct cache_entry *ce;
> +       struct cache_entry *ce_last;
> +       struct conflict_entry *conflict;
> +       struct conflict_entry *conflict_last;
> +       unsigned int conflict_size;
> +       unsigned int de_foffset;
> +       unsigned int de_cr;
> +       unsigned int de_ncr;
> +       unsigned int de_nsubtrees;
> +       unsigned int de_nfiles;
> +       unsigned int de_nentries;
> +       unsigned char sha1[20];
> +       unsigned short de_flags;
> +       unsigned int de_pathlen;
> +       char pathname[FLEX_ARRAY];
> +};
> +
> +struct conflict_part {
> +       struct conflict_part *next;
> +       unsigned short flags;
> +       unsigned short entry_mode;
> +       unsigned char sha1[20];
> +};
> +
> +struct conflict_entry {
> +       struct conflict_entry *next;
> +       unsigned int nfileconflicts;
> +       struct conflict_part *entries;
> +       unsigned int namelen;
> +       unsigned int pathlen;
> +       char name[FLEX_ARRAY];
> +};
> +
> +struct ondisk_conflict_part {
> +       unsigned short flags;
> +       unsigned short entry_mode;
> +       unsigned char sha1[20];
> +};

These new structs should probably be in read-cache-v5.c, or read-cache.h

>  #define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1)
> +#define directory_entry_size(len) (offsetof(struct directory_entry,pathname) + (len) + 1)
> +#define conflict_entry_size(len) (offsetof(struct conflict_entry,name) + (len) + 1)

These are used by read-cache-v5.c only so far. I'd say move them to
read-cache.h or read-cache-v5.c together with the new structs.

> +struct ondisk_cache_entry {
> +       unsigned short flags;
> +       unsigned short mode;
> +       struct cache_time mtime;
> +       unsigned int size;
> +       int stat_crc;
> +       unsigned char sha1[20];
> +};
> +
> +struct ondisk_directory_entry {
> +       unsigned int foffset;
> +       unsigned int cr;
> +       unsigned int ncr;
> +       unsigned int nsubtrees;
> +       unsigned int nfiles;
> +       unsigned int nentries;
> +       unsigned char sha1[20];
> +       unsigned short flags;
> +};

Perhaps use uint32_t, uint16_t and friends for all on-disk structures?

> +static struct directory_entry *read_directories(unsigned int *dir_offset,
> +                               unsigned int *dir_table_offset,
> +                               void *mmap,
> +                               int mmap_size)
> +{
> +       int i, ondisk_directory_size;
> +       uint32_t *filecrc, *beginning, *end;
> +       struct directory_entry *current = NULL;
> +       struct ondisk_directory_entry *disk_de;
> +       struct directory_entry *de;
> +       unsigned int data_len, len;
> +       char *name;
> +
> +       /*
> +        * Length of pathname + nul byte for termination + size of
> +        * members of ondisk_directory_entry. (Just using the size
> +        * of the struct doesn't work, because there may be padding
> +        * bytes for the struct)
> +        */
> +       ondisk_directory_size = sizeof(disk_de->flags)
> +               + sizeof(disk_de->foffset)
> +               + sizeof(disk_de->cr)
> +               + sizeof(disk_de->ncr)
> +               + sizeof(disk_de->nsubtrees)
> +               + sizeof(disk_de->nfiles)
> +               + sizeof(disk_de->nentries)
> +               + sizeof(disk_de->sha1);
> +       name = ptr_add(mmap, *dir_offset);
> +       beginning = ptr_add(mmap, *dir_table_offset);
> +       end = ptr_add(mmap, *dir_table_offset + 4);
> +       len = ntoh_l(*end) - ntoh_l(*beginning) - ondisk_directory_size - 5;
> +       disk_de = ptr_add(mmap, *dir_offset + len + 1);
> +       de = directory_entry_from_ondisk(disk_de, name, len);
> +       de->next = NULL;
> +
> +       data_len = len + 1 + ondisk_directory_size;
> +       filecrc = ptr_add(mmap, *dir_offset + data_len);
> +       if (!check_crc32(0, ptr_add(mmap, *dir_offset), data_len, ntoh_l(*filecrc)))
> +               goto unmap;
> +
> +       *dir_table_offset += 4;
> +       *dir_offset += data_len + 4; /* crc code */
> +
> +       current = de;
> +       for (i = 0; i < de->de_nsubtrees; i++) {
> +               current->next = read_directories(dir_offset, dir_table_offset,
> +                                               mmap, mmap_size);
> +               while (current->next)
> +                       current = current->next;
> +       }
> +
> +       return de;
> +unmap:
> +       munmap(mmap, mmap_size);
> +       die("directory crc doesn't match for '%s'", de->pathname);
> +}

You don't have to munmap when you die() anway. I'm not sure if flatten
the directory hierarchy into a list (linked by next pointer) is a good
idea, or we should maintain the tree structure in memory. Still a lot
of reading to figure that out..

I skipped from here..

> +static void ce_queue_push(struct cache_entry **head,
> +                            struct cache_entry **tail,
> +                            struct cache_entry *ce)
> +{

...

> +static int read_conflicts(struct conflict_entry **head,
> +                         struct directory_entry *de,
> +                         void **mmap, unsigned long mmap_size)
> +{

till the end of this function. Not interested in conflict stuff yet.


> +static struct directory_entry *read_head_directories(struct index_state *istate,
> +                                                    unsigned int *entry_offset,
> +                                                    unsigned int *foffsetblock,
> +                                                    unsigned int *ndirs,
> +                                                    void *mmap, unsigned long mmap_size)
> +{

Maybe read_all_directories is a better nam.

> +static int read_index_filtered_v5(struct index_state *istate, void *mmap,
> +                                 unsigned long mmap_size, struct filter_opts *opts)
> +{
> +       unsigned int entry_offset, ndirs, foffsetblock, nr = 0;
> +       struct directory_entry *root_directory, *de;
> +       const char **adjusted_pathspec = NULL;
> +       int need_root = 0, i, n;
> +       char *oldpath, *seen;
> +
> +       ...
> +
> +       de = root_directory;
> +       while (de) {
> +               if (need_root ||
> +                   match_pathspec(adjusted_pathspec, de->pathname, de->de_pathlen, 0, NULL)) {
> +                       unsigned int subdir_foffsetblock = de->de_foffset + foffsetblock;
> +                       unsigned int *off = mmap + subdir_foffsetblock;
> +                       unsigned int subdir_entry_offset = entry_offset + ntoh_l(*off);
> +                       oldpath = de->pathname;
> +                       do {
> +                               if (read_entries(istate, &de, &subdir_entry_offset,
> +                                                &mmap, mmap_size, &nr,
> +                                                &subdir_foffsetblock) < 0)
> +                                       return -1;
> +                       } while (de && !prefixcmp(de->pathname, oldpath));
> +               } else
> +                       de = de->next;
> +       }

Hm.. if we maintain a tree structure here (one link to the first
subdir, one link to the next sibling), I think the "do" loop could be
done without prefixcmp. Just check if "de" returned by read_entries is
the next sibling "de" (iow the end of current directory recursively).

> +       istate->cache_nr = nr;
> +       return 0;
> +}
> +
> +static int read_index_v5(struct index_state *istate, void *mmap,
> +                        unsigned long mmap_size, struct filter_opts *opts)
> +{
> +       unsigned int entry_offset, ndirs, foffsetblock, nr = 0;
> +       struct directory_entry *root_directory, *de;
> +
> +       if (opts != NULL)
> +               return read_index_filtered_v5(istate, mmap, mmap_size, opts);
> +
> +       root_directory = read_head_directories(istate, &entry_offset,
> +                                              &foffsetblock, &ndirs,
> +                                              mmap, mmap_size);
> +       de = root_directory;
> +       while (de)
> +               if (read_entries(istate, &de, &entry_offset, &mmap,
> +                                mmap_size, &nr, &foffsetblock) < 0)
> +                       return -1;
> +       istate->cache_nr = nr;
> +       return 0;
> +}

Make it call read_index_filtered_v5 with an empty pathspec instead.
match_pathspec* returns true immediately if pathspec is empty. Without
the removal of prefixcmp() in the "do" loop mentioned above,
read_index_filtered_v5 can't be more expensive than this version.

That was it! Lunch time! Maybe I'll read the rest in the afternoon, or
someday next week.
--
Duy
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]