On Monday, July 28, 2014, Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx> wrote: > Instead of reading the index from disk and worrying about disk > corruption, the index is cached in memory (memory bit-flips happen > too, but hopefully less often). The result is faster read. > > The biggest gain is not having to verify the trailing SHA-1, which > takes lots of time especially on large index files. But this also > opens doors for further optimiztions: s/optimiztions/optimizations/ > - we could create an in-memory format that's essentially the memory > dump of the index to eliminate most of parsing/allocation > overhead. The mmap'd memory can be used straight away. > > - we could cache non-index info such as name hash > > The shared memory's name folows the template "git-<object>-<SHA1>" s/folows/follows/ > where <SHA1> is the trailing SHA-1 of the index file. <object> is > "index" for cached index files (and may be "name-hash" for name-hash > cache). If such shared memory exists, it contains the same index > content as on disk. The content is already validated by the daemon and > git won't validate it again (except comparing the trailing SHA-1s). > > Git can poke the daemon to tell it to refresh the index cache, or to > keep it alive some more minutes via UNIX signals. It can't give any > real index data directly to the daemon. Real data goes to disk first, > then the daemon reads and verifies it from there. > > $GIT_DIR/index-helper.pid contains a reference to daemon process (and > it's pid on *nix). The file's mtime is updated every time it's accessed s/it's pid/its pid/ > (or should be updated often enough). Old index-helper.pid is considered > stale and ignored. > > Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx> > --- > diff --git a/index-helper.c b/index-helper.c > new file mode 100644 > index 0000000..8fa0af9 > --- /dev/null > +++ b/index-helper.c > @@ -0,0 +1,157 @@ > +static void share_index(struct index_state *istate, struct index_shm *is) > +{ > + void *new_mmap; > + if (istate->mmap_size <= 20 || > + hashcmp(istate->sha1, > + (unsigned char *)istate->mmap + istate->mmap_size - 20) || > + !hashcmp(istate->sha1, is->sha1) || > + git_shm_map(O_CREAT | O_EXCL | O_RDWR, 0700, istate->mmap_size, > + &new_mmap, PROT_READ | PROT_WRITE, MAP_SHARED, > + "git-index-%s", sha1_to_hex(istate->sha1)) < 0) Do any of these failure conditions deserve a diagnostic message to let the user know that there was a problem? > + return; > + > + free_index_shm(is); > + is->size = istate->mmap_size; > + is->shm = new_mmap; > + hashcpy(is->sha1, istate->sha1); > + memcpy(new_mmap, istate->mmap, istate->mmap_size - 20); > + > + /* > + * The trailing hash must be written last after everything is > + * written. It's the indication that the shared memory is now > + * ready. > + */ > + hashcpy((unsigned char *)new_mmap + istate->mmap_size - 20, is->sha1); > +} > diff --git a/read-cache.c b/read-cache.c > index b679781..ff28142 100644 > --- a/read-cache.c > +++ b/read-cache.c > @@ -1465,6 +1467,65 @@ static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk, > return ce; > } > > +/* > + * Try to open and verify a cached shm index if available. Return 0 if > + * succeeds (istate->mmap and istate->mmap_size are updated). Return > + * negative otherwise. > + */ > +static int try_shm(struct index_state *istate) > +{ > + void *new_mmap = NULL; > + size_t old_size = istate->mmap_size; Is old_size needed? Can you not simply reference istate->mmap_size directly each place old_size is mentioned? > + ssize_t new_length; Nit: 'size' vs. 'length' inconsistency in variable name choices. > + const unsigned char *sha1; > + struct stat st; > + > + if (old_size <= 20 || > + stat(git_path("index-helper.pid"), &st)) > + return -1; > + sha1 = (unsigned char *)istate->mmap + old_size - 20; > + new_length = git_shm_map(O_RDONLY, 0700, -1, &new_mmap, > + PROT_READ, MAP_SHARED, > + "git-index-%s", sha1_to_hex(sha1)); > + if (new_length <= 20 || > + hashcmp((unsigned char *)istate->mmap + old_size - 20, > + (unsigned char *)new_mmap + new_length - 20)) { > + if (new_mmap) > + munmap(new_mmap, new_length); > + poke_daemon(&st, 1); > + return -1; > + } > + munmap(istate->mmap, istate->mmap_size); > + istate->mmap = new_mmap; > + istate->mmap_size = new_length; > + poke_daemon(&st, 0); > + return 0; > +} > + > /* remember to discard_cache() before reading a different cache! */ > int do_read_index(struct index_state *istate, const char *path, int must_exist) > { > diff --git a/shm.c b/shm.c > new file mode 100644 > index 0000000..4ec1a00 > --- /dev/null > +++ b/shm.c > @@ -0,0 +1,67 @@ > +#include "git-compat-util.h" > +#include "shm.h" > + > +#ifdef HAVE_SHM > + > +#define SHM_PATH_LEN 72 /* we don't create very long paths.. */ > + > +ssize_t git_shm_map(int oflag, int perm, ssize_t length, void **mmap, > + int prot, int flags, const char *fmt, ...) > +{ > + va_list ap; > + char path[SHM_PATH_LEN]; Is there a reason to avoid strbuf here? > + int fd; > + > + path[0] = '/'; > + va_start(ap, fmt); > + vsprintf(path + 1, fmt, ap); > + va_end(ap); > + fd = shm_open(path, oflag, perm); > + if (fd < 0) > + return -1; > + if (length > 0 && ftruncate(fd, length)) { > + shm_unlink(path); > + close(fd); > + return -1; > + } > + if (length < 0 && !(oflag & O_CREAT)) { > + struct stat st; > + if (fstat(fd, &st)) > + die_errno("unable to stat %s", path); > + length = st.st_size; > + } > + *mmap = xmmap(NULL, length, prot, flags, fd, 0); > + close(fd); > + if (*mmap == MAP_FAILED) { > + *mmap = NULL; > + shm_unlink(path); > + return -1; > + } > + return length; > +} -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html