Re: [PATCH] grep: enable multi-threading for -p and -W

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



René Scharfe wrote:
> Move attribute reading, which is not thread-safe, into add_work(), under
> grep_mutex.  That way we can stop turning off multi-threading if -p or -W
> is given, as the diff attribute for each file is gotten safely now.
> 
> Signed-off-by: Rene Scharfe <rene.scharfe@xxxxxxxxxxxxxx>
> ---
> Goes on top of your patch.  Needs testing..

On a random old linux-2.6 clone at v2.6.37-rc2, I'm seeing (best of 5):

* none of the patches:
  git grep --cached INITRAMFS_ROOT_UID
    2.13user 0.14system 0:02.10elapsed
  git grep --cached -W INITRAMFS_ROOT_UID    # note: broken!
    2.23user 0.18system 0:02.21elapsed 

* my patch, but not yours:
  git grep --cached INITRAMFS_ROOT_UID
    2.21user 0.21system 0:02.27elapsed
  git grep --cached -W INITRAMFS_ROOT_UID
    3.01user 0.05system 0:03.07elapsed

* my patch + your patch:
  git grep --cached INITRAMFS_ROOT_UID
    2.22user 0.17system 0:02.22elapsed
  git grep --cached -W INITRAMFS_ROOT_UID
    4.45user 0.22system 0:02.67elapsed

So while it ends up being faster overall, it also uses way more CPU
and would presumably be *slower* on a single-processor system.
Apparently those attribute lookups really hurt.

So I had the following idea: if we load attributes only very lazily
(that is, when match_funcname() is first called), we can ask for them
much more rarely.  The revised timings:

* my patch + the new patch at the end:
  git grep --cached INITRAMFS_ROOT_UID
    2.15user 0.16system 0:02.15elapsed 107%CPU
  git grep --cached -W INITRAMFS_ROOT_UID
    2.20user 0.18system 0:02.24elapsed

However, locking on a specific lock relies on the fact that the
attributes are only read from the tree, but never from the object
store.  Perhaps it would be more futureproof to lock on
read_sha1_mutex instead.  Either way the lazy attributes lookup seems
a big win.

------ 8< ------ 8< ------
Subject: [PATCH] grep: fine-grained locking around attributes access

Lazily load the userdiff attributes in match_funcname().  Use a
separate mutex around this loading to protect the (not thread-safe)
attributes machinery.  This lets us re-enable threading with -p and
-W while reducing the overhead caused by looking up attributes.

diff --git a/builtin/grep.c b/builtin/grep.c
index 988ea1d..822b32f 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -256,6 +256,7 @@ static void start_threads(struct grep_opt *opt)
 
 	pthread_mutex_init(&grep_mutex, NULL);
 	pthread_mutex_init(&read_sha1_mutex, NULL);
+	pthread_mutex_init(&grep_attr_mutex, NULL);
 	pthread_cond_init(&cond_add, NULL);
 	pthread_cond_init(&cond_write, NULL);
 	pthread_cond_init(&cond_result, NULL);
@@ -303,6 +304,7 @@ static int wait_all(void)
 
 	pthread_mutex_destroy(&grep_mutex);
 	pthread_mutex_destroy(&read_sha1_mutex);
+	pthread_mutex_destroy(&grep_attr_mutex);
 	pthread_cond_destroy(&cond_add);
 	pthread_cond_destroy(&cond_write);
 	pthread_cond_destroy(&cond_result);
@@ -1002,9 +1004,13 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
 		opt.regflags |= REG_ICASE;
 
 #ifndef NO_PTHREADS
-	if (online_cpus() == 1 || !grep_threads_ok(&opt))
+	if (online_cpus() == 1)
 		use_threads = 0;
+#endif
+
+	opt.use_threads = use_threads;
 
+#ifndef NO_PTHREADS
 	if (use_threads) {
 		if (opt.pre_context || opt.post_context || opt.file_break ||
 		    opt.funcbody)
diff --git a/grep.c b/grep.c
index 7a070e9..e9c3df3 100644
--- a/grep.c
+++ b/grep.c
@@ -2,6 +2,7 @@
 #include "grep.h"
 #include "userdiff.h"
 #include "xdiff-interface.h"
+#include "thread-utils.h"
 
 void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field field, const char *pat)
 {
@@ -806,10 +807,38 @@ static void show_line(struct grep_opt *opt, char *bol, char *eol,
 	opt->output(opt, "\n", 1);
 }
 
-static int match_funcname(struct grep_opt *opt, char *bol, char *eol)
+#ifndef NO_PTHREADS
+pthread_mutex_t grep_attr_mutex;
+
+static inline void grep_attr_lock(struct grep_opt *opt)
+{
+	if (opt->use_threads)
+		pthread_mutex_lock(&grep_attr_mutex);
+}
+
+static inline void grep_attr_unlock(struct grep_opt *opt)
+{
+	if (opt->use_threads)
+		pthread_mutex_unlock(&grep_attr_mutex);
+}
+#endif
+
+static int match_funcname(struct grep_opt *opt, const char *name, char *bol, char *eol)
 {
 	xdemitconf_t *xecfg = opt->priv;
-	if (xecfg && xecfg->find_func) {
+	if (xecfg && !xecfg->find_func) {
+		grep_attr_lock(opt);
+		struct userdiff_driver *drv = userdiff_find_by_path(name);
+		grep_attr_unlock(opt);
+		if (drv && drv->funcname.pattern) {
+			const struct userdiff_funcname *pe = &drv->funcname;
+			xdiff_set_find_func(xecfg, pe->pattern, pe->cflags);
+		} else {
+			xecfg = opt->priv = NULL;
+		}
+	}
+
+	if (xecfg) {
 		char buf[1];
 		return xecfg->find_func(bol, eol - bol, buf, 1,
 					xecfg->find_func_priv) >= 0;
@@ -835,7 +864,7 @@ static void show_funcname_line(struct grep_opt *opt, const char *name,
 		if (lno <= opt->last_shown)
 			break;
 
-		if (match_funcname(opt, bol, eol)) {
+		if (match_funcname(opt, name, bol, eol)) {
 			show_line(opt, bol, eol, name, lno, '=');
 			break;
 		}
@@ -848,7 +877,7 @@ static void show_pre_context(struct grep_opt *opt, const char *name, char *buf,
 	unsigned cur = lno, from = 1, funcname_lno = 0;
 	int funcname_needed = !!opt->funcname;
 
-	if (opt->funcbody && !match_funcname(opt, bol, end))
+	if (opt->funcbody && !match_funcname(opt, name, bol, end))
 		funcname_needed = 2;
 
 	if (opt->pre_context < lno)
@@ -864,7 +893,7 @@ static void show_pre_context(struct grep_opt *opt, const char *name, char *buf,
 		while (bol > buf && bol[-1] != '\n')
 			bol--;
 		cur--;
-		if (funcname_needed && match_funcname(opt, bol, eol)) {
+		if (funcname_needed && match_funcname(opt, name, bol, eol)) {
 			funcname_lno = cur;
 			funcname_needed = 0;
 		}
@@ -942,19 +971,6 @@ static int look_ahead(struct grep_opt *opt,
 	return 0;
 }
 
-int grep_threads_ok(const struct grep_opt *opt)
-{
-	/* If this condition is true, then we may use the attribute
-	 * machinery in grep_buffer_1. The attribute code is not
-	 * thread safe, so we disable the use of threads.
-	 */
-	if ((opt->funcname || opt->funcbody)
-	    && !opt->unmatch_name_only && !opt->status_only && !opt->name_only)
-		return 0;
-
-	return 1;
-}
-
 static void std_output(struct grep_opt *opt, const void *buf, size_t size)
 {
 	fwrite(buf, size, 1, stdout);
@@ -1011,12 +1027,7 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
 	if ((opt->funcname || opt->funcbody)
 	    && !opt->unmatch_name_only && !opt->status_only &&
 	    !opt->name_only && !binary_match_only && !collect_hits) {
-		struct userdiff_driver *drv = userdiff_find_by_path(name);
-		if (drv && drv->funcname.pattern) {
-			const struct userdiff_funcname *pe = &drv->funcname;
-			xdiff_set_find_func(&xecfg, pe->pattern, pe->cflags);
-			opt->priv = &xecfg;
-		}
+		opt->priv = &xecfg;
 	}
 	try_lookahead = should_lookahead(opt);
 
@@ -1093,7 +1104,7 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
 				show_function = 1;
 			goto next_line;
 		}
-		if (show_function && match_funcname(opt, bol, eol))
+		if (show_function && match_funcname(opt, name, bol, eol))
 			show_function = 0;
 		if (show_function ||
 		    (last_hit && lno <= last_hit + opt->post_context)) {
diff --git a/grep.h b/grep.h
index a652800..15d227c 100644
--- a/grep.h
+++ b/grep.h
@@ -115,6 +115,7 @@ struct grep_opt {
 	int show_hunk_mark;
 	int file_break;
 	int heading;
+	int use_threads;
 	void *priv;
 
 	void (*output)(struct grep_opt *opt, const void *data, size_t size);
@@ -131,4 +132,10 @@ struct grep_opt {
 extern struct grep_opt *grep_opt_dup(const struct grep_opt *opt);
 extern int grep_threads_ok(const struct grep_opt *opt);
 
+#ifndef NO_PTHREADS
+/* Mutex used around access to the attributes machinery if
+ * opt->use_threads.  Must be initialized/destroyed by callers! */
+extern pthread_mutex_t grep_attr_mutex;
+#endif
+
 #endif


-- 
Thomas Rast
trast@{inf,student}.ethz.ch
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]