[PATCH 1/2] ref-filter: hacky "streaming" mode

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The ref-filter code is very keen to collect all of the refs in an array
before writing any output. This makes things slower than necessary when
using the default sort order (which is already sorted by refname when we
call for_each_ref()), and when no filtering options require it.

This commit adds a mildly-ugly interface to detect this case and stream
directly from filter_refs(). The caller just needs to call the
"maybe_stream" function. Either way, they can call the usual sort/print
functions; they'll just be noops if we did stream instead of collecting.

Here are some timings on a fully-packed 500k-ref repo:

  Benchmark #1: ./git.orig for-each-ref --format='%(objectname) %(refname)'
    Time (mean ± σ):     340.2 ms ±   5.3 ms    [User: 300.5 ms, System: 39.6 ms]
    Range (min … max):   332.9 ms … 347.0 ms    10 runs

  Benchmark #2: ./git.stream for-each-ref --format='%(objectname) %(refname)'
    Time (mean ± σ):     224.0 ms ±   3.4 ms    [User: 222.7 ms, System: 1.3 ms]
    Range (min … max):   218.1 ms … 228.5 ms    13 runs

  Summary
    './git.stream for-each-ref --format='%(objectname) %(refname)'' ran
      1.52 ± 0.03 times faster than './git.orig for-each-ref --format='%(objectname) %(refname)''

So we definitely shave off some time, though we're still _much_ slower
than a simple `wc -l <packed-refs` (which is around 10ms, though of
course it isn't actually doing as much work).

The point here is to reduce overall effort, though of course the time to
first output is much improved in the streaming version, too (about 250ms
versus 4ms).

Signed-off-by: Jeff King <peff@xxxxxxxx>
---
 builtin/for-each-ref.c |  7 ++++++
 ref-filter.c           | 50 ++++++++++++++++++++++++++++++++++--------
 ref-filter.h           |  8 +++++++
 3 files changed, 56 insertions(+), 9 deletions(-)

diff --git a/builtin/for-each-ref.c b/builtin/for-each-ref.c
index 89cb6307d4..fe0b92443f 100644
--- a/builtin/for-each-ref.c
+++ b/builtin/for-each-ref.c
@@ -70,6 +70,13 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
 	if (verify_ref_format(&format))
 		usage_with_options(for_each_ref_usage, opts);
 
+	/*
+	 * try streaming, but only without maxcount; in theory the ref-filter
+	 * code could learn about maxcount, but for now it is our own thing
+	 */
+	if (!maxcount)
+		ref_filter_maybe_stream(&filter, sorting, icase, &format);
+
 	if (!sorting)
 		sorting = ref_default_sorting();
 	ref_sorting_set_sort_flags_all(sorting, REF_SORTING_ICASE, icase);
diff --git a/ref-filter.c b/ref-filter.c
index 93ce2a6ef2..17b78b1d30 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -2283,15 +2283,19 @@ static int ref_filter_handler(const char *refname, const struct object_id *oid,
 			return 0;
 	}
 
-	/*
-	 * We do not open the object yet; sort may only need refname
-	 * to do its job and the resulting list may yet to be pruned
-	 * by maxcount logic.
-	 */
-	ref = ref_array_push(ref_cbdata->array, refname, oid);
-	ref->commit = commit;
-	ref->flag = flag;
-	ref->kind = kind;
+	if (ref_cbdata->filter->streaming_format) {
+		pretty_print_ref(refname, oid, ref_cbdata->filter->streaming_format);
+	} else {
+		/*
+		 * We do not open the object yet; sort may only need refname
+		 * to do its job and the resulting list may yet to be pruned
+		 * by maxcount logic.
+		 */
+		ref = ref_array_push(ref_cbdata->array, refname, oid);
+		ref->commit = commit;
+		ref->flag = flag;
+		ref->kind = kind;
+	}
 
 	return 0;
 }
@@ -2563,6 +2567,34 @@ void ref_array_sort(struct ref_sorting *sorting, struct ref_array *array)
 	QSORT_S(array->items, array->nr, compare_refs, sorting);
 }
 
+void ref_filter_maybe_stream(struct ref_filter *filter,
+			     const struct ref_sorting *sort, int icase,
+			     struct ref_format *format)
+{
+	/* streaming only works with default for_each_ref sort order */
+	if (sort || icase)
+		return;
+
+	/* these filters want to see all candidates up front */
+	if (filter->reachable_from || filter->unreachable_from)
+		return;
+
+	/*
+	 * the %(symref) placeholder is broken with pretty_print_ref(),
+	 * which our streaming code uses. I suspect this is a sign of breakage
+	 * in other callers like verify_tag(), which should be fixed. But for
+	 * now just disable streaming.
+	 *
+	 * Note that this implies we've parsed the format already with
+	 * verify_ref_format().
+	 */
+	if (need_symref)
+		return;
+
+	/* OK to stream */
+	filter->streaming_format = format;
+}
+
 static void append_literal(const char *cp, const char *ep, struct ref_formatting_state *state)
 {
 	struct strbuf *s = &state->stack->output;
diff --git a/ref-filter.h b/ref-filter.h
index c15dee8d6b..ecea1837a2 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -69,6 +69,9 @@ struct ref_filter {
 		lines;
 	int abbrev,
 		verbose;
+
+	/* if non-NULL, streaming output during filter_refs() is enabled */
+	struct ref_format *streaming_format;
 };
 
 struct ref_format {
@@ -135,6 +138,11 @@ char *get_head_description(void);
 /*  Set up translated strings in the output. */
 void setup_ref_filter_porcelain_msg(void);
 
+/* enable streaming during filter_refs() if options allow it */
+void ref_filter_maybe_stream(struct ref_filter *filter,
+			     const struct ref_sorting *sort, int icase,
+			     struct ref_format *format);
+
 /*
  * Print a single ref, outside of any ref-filter. Note that the
  * name must be a fully qualified refname.
-- 
2.33.0.618.g5b11852304




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux