Some workflows may involve running "git cherry" a lot to look for identical patches. Git ends up calculating the patch-id of some commits many times, which can be slow. This patch provides an option to cache the calculated patch ids persistently on disk. This trades more disk space (and more RAM used for disk cache) for less CPU time. Whether this is a good idea depends on your workflow and how much disk and RAM you have (the cache uses 40 bytes per stored commit). Here's one cherry-heavy workflow (checking which topic branches have been accepted upstream), and some timings: have_commits() { test -z "`git cherry "$@" | grep -v ^-`" } for i in $topic_branches; do if have_commits origin/master $i $i@{u}; then echo $i: merged to origin/master elif have_commits origin/next $i $i@{u}; then echo $i: merged to origin/next else echo $i: not merged done # without patch real 0m9.709s user 0m8.693s sys 0m0.676s # with patch, first run real 0m1.946s user 0m1.244s sys 0m0.428s # with patch, subsequent run real 0m1.379s user 0m0.844s sys 0m0.268s and the disk used: $ du -h .git/cache/patch_id/* 8.0K .git/cache/patch_id/0000000000000000000000000000000000000000 Signed-off-by: Jeff King <peff@xxxxxxxx> --- cache.h | 1 + config.c | 5 +++++ map.c | 16 ++++++++++++++++ map.h | 6 ++++++ metadata-cache.c | 2 ++ metadata-cache.h | 2 ++ patch-ids.c | 22 +++++++++++++++++++++- 7 files changed, 53 insertions(+), 1 deletions(-) diff --git a/cache.h b/cache.h index 9e12d55..060f0f9 100644 --- a/cache.h +++ b/cache.h @@ -596,6 +596,7 @@ extern int read_replace_refs; extern int fsync_object_files; extern int core_preload_index; extern int core_apply_sparse_checkout; +extern int core_cache_patch_id; enum branch_track { BRANCH_TRACK_UNSPECIFIED = -1, diff --git a/config.c b/config.c index e42c59b..09e84c3 100644 --- a/config.c +++ b/config.c @@ -659,6 +659,11 @@ static int git_default_core_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.cachepatchid")) { + core_cache_patch_id = git_config_bool(var, value); + return 0; + } + /* Add other config variables here and to Documentation/config.txt. */ return 0; } diff --git a/map.c b/map.c index bb0d60a..9d8d5ab 100644 --- a/map.c +++ b/map.c @@ -33,6 +33,16 @@ static void disk_to_uint32(const unsigned char *disk, uint32_t *out) *out = ntohl(*out); } +static void sha1_to_disk(struct sha1 v, unsigned char *out) +{ + hashcpy(out, v.v); +} + +static void disk_to_sha1(const unsigned char *disk, struct sha1 *out) +{ + hashcpy(out->v, disk); +} + static const unsigned char *disk_lookup_sha1(const unsigned char *buf, unsigned nr, unsigned ksize, unsigned vsize, @@ -244,3 +254,9 @@ int map_persist_flush_##name(struct map_persist_##name *m, int fd) \ IMPLEMENT_MAP(object_uint32, obj_equal, hash_obj) IMPLEMENT_MAP(object_void, obj_equal, hash_obj) + +IMPLEMENT_MAP(object_sha1, obj_equal, hash_obj) +IMPLEMENT_MAP_PERSIST(object_sha1, + 20, obj_to_disk, + 20, sha1_to_disk, disk_to_sha1, + disk_lookup_sha1) diff --git a/map.h b/map.h index ceddc14..18eb939 100644 --- a/map.h +++ b/map.h @@ -40,7 +40,13 @@ extern void map_persist_attach_##name(struct map_persist_##name *, \ unsigned int len); \ extern int map_persist_flush_##name(struct map_persist_##name *, int fd); +struct sha1 { + unsigned char v[20]; +}; + DECLARE_MAP(object_uint32, const struct object *, uint32_t) DECLARE_MAP(object_void, const struct object *, void *) +DECLARE_MAP(object_sha1, const struct object *, struct sha1) +DECLARE_MAP_PERSIST(object_sha1) #endif /* MAP_H */ diff --git a/metadata-cache.c b/metadata-cache.c index e217db1..0ce0e90 100644 --- a/metadata-cache.c +++ b/metadata-cache.c @@ -124,3 +124,5 @@ int name##_cache_set(map_ktype_##map key, map_vtype_##map value) \ init_##name##_cache(); \ return map_persist_set_##map(&name##_map, key, value); \ } + +IMPLEMENT_METADATA_CACHE(patch_id, object_sha1, NULL) diff --git a/metadata-cache.h b/metadata-cache.h index 851a4eb..ff2f6d3 100644 --- a/metadata-cache.h +++ b/metadata-cache.h @@ -7,4 +7,6 @@ extern int name##_cache_get(map_ktype_##map key, map_vtype_##map *value); \ extern int name##_cache_set(map_ktype_##map key, map_vtype_##map value); +DECLARE_METADATA_CACHE(patch_id, object_sha1) + #endif /* METADATA_CACHE_H */ diff --git a/patch-ids.c b/patch-ids.c index 5717257..d1818eb 100644 --- a/patch-ids.c +++ b/patch-ids.c @@ -3,17 +3,37 @@ #include "commit.h" #include "sha1-lookup.h" #include "patch-ids.h" +#include "metadata-cache.h" + +int core_cache_patch_id; static int commit_patch_id(struct commit *commit, struct diff_options *options, unsigned char *sha1) { + if (core_cache_patch_id) { + struct sha1 v; + if (patch_id_cache_get(&commit->object, &v)) { + hashcpy(sha1, v.v); + return 0; + } + } + if (commit->parents) diff_tree_sha1(commit->parents->item->object.sha1, commit->object.sha1, "", options); else diff_root_tree_sha1(commit->object.sha1, "", options); diffcore_std(options); - return diff_flush_patch_id(options, sha1); + if (diff_flush_patch_id(options, sha1) < 0) + return -1; + + if (core_cache_patch_id) { + struct sha1 v; + hashcpy(v.v, sha1); + patch_id_cache_set(&commit->object, v); + } + + return 0; } static const unsigned char *patch_id_access(size_t index, void *table) -- 1.7.6.34.g86521e -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html