[patch] libselinux: lazily compile regexes to reduce overhead of matchpathcon / selabel_lookup

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Ulrich Drepper noted that we could reduce the overhead of matchpathcon
by lazily compiling the pathname regexes on demand when there is a stem
match rather than compiling them all.  Below is a patch that does that
for the libselinux 2.x series.  As with the context validation, the
regex compilation defaults to being done lazily for typical users, but
will still be done up front if the caller requested validation, as
setfiles does.  Thus, setfiles will still compile and check the entire
specification up front for errors, while most other programs will
compile the regexes lazily.  Back porting to the libselinux 1.x series
wouldn't be difficult, although it would have to be done manually.

Signed-off-by: Stephen Smalley <sds@xxxxxxxxxxxxx>

---

 src/label_file.c |  125 +++++++++++++++++++++++++++++++++----------------------
 1 file changed, 75 insertions(+), 50 deletions(-)

Index: libselinux/src/label_file.c
===================================================================
--- libselinux/src/label_file.c	(revision 2683)
+++ libselinux/src/label_file.c	(working copy)
@@ -30,6 +30,7 @@
 	char *regex_str;	/* regular expession string for diagnostics */
 	char *type_str;		/* type string for diagnostic messages */
 	regex_t regex;		/* compiled regular expression */
+	char regcomp;           /* regex_str has been compiled to regex */
 	mode_t mode;		/* mode format value */
 	int matches;		/* number of matching pathnames */
 	int hasMetaChars;	/* regular expression has meta-chars */
@@ -50,6 +51,7 @@
 	 */
 	spec_t *spec_arr;
 	unsigned int nspec;
+	unsigned int ncomp;
 
 	/*
 	 * The array of regular expression stems.
@@ -88,21 +90,18 @@
 
 /* find the stem of a file spec, returns the index into stem_arr for a new
  * or existing stem, (or -1 if there is no possible stem - IE for a file in
- * the root directory or a regex that is too complex for us).  Makes buf
- * point to the text AFTER the stem. */
-static int find_stem_from_spec(struct saved_data *data, const char **buf)
+ * the root directory or a regex that is too complex for us). */
+static int find_stem_from_spec(struct saved_data *data, const char *buf)
 {
 	int i, num = data->num_stems;
-	int stem_len = get_stem_from_spec(*buf);
+	int stem_len = get_stem_from_spec(buf);
 
 	if (!stem_len)
 		return -1;
 	for (i = 0; i < num; i++) {
 		if (stem_len == data->stem_arr[i].len
-		    && !strncmp(*buf, data->stem_arr[i].buf, stem_len)) {
-			*buf += stem_len;
+		    && !strncmp(buf, data->stem_arr[i].buf, stem_len))
 			return i;
-		}
 	}
 	if (data->alloc_stems == num) {
 		stem_t *tmp_arr;
@@ -117,10 +116,10 @@
 	data->stem_arr[num].buf = malloc(stem_len + 1);
 	if (!data->stem_arr[num].buf)
 		return -1;
-	memcpy(data->stem_arr[num].buf, *buf, stem_len);
+	memcpy(data->stem_arr[num].buf, buf, stem_len);
 	data->stem_arr[num].buf[stem_len] = '\0';
 	data->num_stems++;
-	*buf += stem_len;
+	buf += stem_len;
 	return num;
 }
 
@@ -220,16 +219,68 @@
 	return;
 }
 
+static int compile_regex(struct saved_data *data, spec_t *spec, char **errbuf)
+{
+	char *reg_buf, *anchored_regex, *cp;
+	stem_t *stem_arr = data->stem_arr;
+	size_t len;
+	int regerr;
+
+	if (spec->regcomp)
+		return 0; /* already done */
+
+	data->ncomp++; /* how many compiled regexes required */
+
+	/* Skip the fixed stem. */
+	reg_buf = spec->regex_str;
+	if (spec->stem_id >= 0)
+		reg_buf += stem_arr[spec->stem_id].len;
+
+	/* Anchor the regular expression. */
+	len = strlen(reg_buf);
+	cp = anchored_regex = malloc(len + 3);
+	if (!anchored_regex)
+		return -1;
+	/* Create ^...$ regexp.  */
+	*cp++ = '^';
+	cp = mempcpy(cp, reg_buf, len);
+	*cp++ = '$';
+	*cp = '\0';
+
+	/* Compile the regular expression. */
+	regerr = regcomp(&spec->regex, anchored_regex, 
+			 REG_EXTENDED | REG_NOSUB);
+	if (regerr != 0) {
+		size_t errsz = 0;
+		errsz = regerror(regerr, &spec->regex, NULL, 0);
+		if (errsz && errbuf)
+			*errbuf = malloc(errsz);
+		if (errbuf && *errbuf)
+			(void)regerror(regerr, &spec->regex,
+				       *errbuf, errsz);
+
+		free(anchored_regex);
+		return -1;
+	}
+	free(anchored_regex);
+
+	/* Done. */
+	spec->regcomp = 1;
+
+	return 0;
+}
+
+
 static int process_line(struct selabel_handle *rec,
 			const char *path, const char *prefix,
 			char *line_buf, int pass, unsigned lineno)
 {
-	int items, len, regerr;
-	char *buf_p, *regex, *anchored_regex, *type, *context;
-	const char *reg_buf;
+	int items, len;
+	char *buf_p, *regex, *type, *context;
 	struct saved_data *data = (struct saved_data *)rec->data;
 	spec_t *spec_arr = data->spec_arr;
 	unsigned int nspec = data->nspec;
+	stem_t *stem_arr = data->stem_arr;
 
 	len = strlen(line_buf);
 	if (line_buf[len - 1] == '\n')
@@ -253,8 +304,7 @@
 		type = 0;
 	}
 
-	reg_buf = regex;
-	len = get_stem_from_spec(reg_buf);
+	len = get_stem_from_spec(regex);
 	if (len && prefix && strncmp(prefix, regex, len)) {
 		/* Stem of regex does not match requested prefix, discard. */
 		free(regex);
@@ -264,45 +314,16 @@
 	}
 
 	if (pass == 1) {
-		/* On the second pass, compile and store the specification in spec. */
-		char *cp;
-		spec_arr[nspec].stem_id = find_stem_from_spec(data, &reg_buf);
+		/* On the second pass, process and store the specification in spec. */
+		char *errbuf = NULL;
+		spec_arr[nspec].stem_id = find_stem_from_spec(data, regex);
 		spec_arr[nspec].regex_str = regex;
-
-		/* Anchor the regular expression. */
-		len = strlen(reg_buf);
-		cp = anchored_regex = malloc(len + 3);
-		if (!anchored_regex)
-			return -1;
-		/* Create ^...$ regexp.  */
-		*cp++ = '^';
-		cp = mempcpy(cp, reg_buf, len);
-		*cp++ = '$';
-		*cp = '\0';
-
-		/* Compile the regular expression. */
-		regerr =
-			regcomp(&spec_arr[nspec].regex,
-				anchored_regex, REG_EXTENDED | REG_NOSUB);
-		if (regerr != 0) {
-			size_t errsz = 0;
-			char *errbuf = NULL;
-			errsz = regerror(regerr, &spec_arr[nspec].regex,
-					 errbuf, errsz);
-			if (errsz)
-				errbuf = malloc(errsz);
-			if (errbuf)
-				(void)regerror(regerr,
-					       &spec_arr[nspec].regex,
-					       errbuf, errsz);
+		if (rec->validating && compile_regex(data, &spec_arr[nspec], &errbuf)) {
 			COMPAT_LOG(SELINUX_WARNING,
-				    "%s:  line %d has invalid regex %s:  %s\n",
-				    path, lineno, anchored_regex,
-				    (errbuf ? errbuf : "out of memory"));
-			free(anchored_regex);
-			return 0;
+				   "%s:  line %d has invalid regex %s:  %s\n",
+				   path, lineno, regex,
+				   (errbuf ? errbuf : "out of memory"));
 		}
-		free(anchored_regex);
 
 		/* Convert the type string to a mode format */
 		spec_arr[nspec].type_str = type;
@@ -437,6 +458,7 @@
 	for (pass = 0; pass < 2; pass++) {
 		lineno = 0;
 		data->nspec = 0;
+		data->ncomp = 0;
 		while (getline(&line_buf, &line_len, fp) > 0
 		       && data->nspec < maxnspec) {
 			if (process_line(rec, path, prefix, line_buf,
@@ -549,6 +571,7 @@
 {
 	struct saved_data *data = (struct saved_data *)rec->data;
 	spec_t *spec_arr = data->spec_arr;
+	stem_t *stem_arr = data->stem_arr;
 	int i, rc, file_stem;
 	mode_t mode = (mode_t)type;
 	const char *buf = key;
@@ -574,6 +597,8 @@
 		     || spec_arr[i].stem_id == file_stem)
 		    && (!mode || !spec_arr[i].mode
 			|| mode == spec_arr[i].mode)) {
+			if (compile_regex(data, &spec_arr[i], NULL) < 0)
+				return NULL;
 			if (spec_arr[i].stem_id == -1)
 				rc = regexec(&spec_arr[i].regex, key, 0, 0, 0);
 			else



-- 
Stephen Smalley
National Security Agency


--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@xxxxxxxxxxxxx with
the words "unsubscribe selinux" without quotes as the message.

[Index of Archives]     [Selinux Refpolicy]     [Linux SGX]     [Fedora Users]     [Fedora Desktop]     [Yosemite Photos]     [Yosemite Camping]     [Yosemite Campsites]     [KDE Users]     [Gnome Users]

  Powered by Linux