[PATCH 2/2] libselinux: performance optimization for duplicate detection

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When semodule -i some.pp to install a module package, duplicate items are
detected for the module. The detection function is nodups_specs in
libselinux/src/label_file.c. The algorithm complexity of implementing
this function is O(N^2). In scenarios where N is very large, the efficiency
is very low.

To solve this problem, I propose to use the hash table to detect duplicates.
The algorithm complexity of new implementing is O(N). The execution efficiency
will be greatly improved.

Comparison between the execution time of the nodups_specs function.

Old double-layer loop implementation O(N^2):

semodule -i myapp1.pp
nodups_specs data->nspec: 5002
nodups_specs start: 11785.242s
nodups_specs end:   11785.588s
nodups_specs consumes:  0.346s

semodule -i myapp2.pp
nodups_specs data->nspec: 10002
nodups_specs start: 11804.280s
nodups_specs end:   11806.546s
nodups_specs consumes:  2.266s

semodule -i myapp3.pp
nodups_specs data->nspec: 20002
nodups_specs start: 11819.106s
nodups_specs end:   11830.892s
nodups_specs consumes: 11.786s

New hash table implementation O(N):

semodule -i myapp1.pp
nodups_specs data->nspec: 5002
nodups_specs start: 11785.588s
nodups_specs end:   11785.590s
nodups_specs consumes:  0.002s

semodule -i myapp2.pp
nodups_specs data->nspec: 10002
nodups_specs start: 11806.546s
nodups_specs end:   11806.552s
nodups_specs consumes:  0.006s

semodule -i myapp3.pp
nodups_specs data->nspec: 20002
nodups_specs start: 11830.892s
nodups_specs end:   11830.905s
nodups_specs consumes:  0.013s

Signed-off-by: wanghuizhao <wanghuizhao1@xxxxxxxxxx>
---
 libselinux/src/label_file.c | 112 ++++++++++++++++++++++++++++++++++----------
 libselinux/src/label_file.h |   5 ++
 2 files changed, 93 insertions(+), 24 deletions(-)

diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c
index 74ae9b9f..e4a85043 100644
--- a/libselinux/src/label_file.c
+++ b/libselinux/src/label_file.c
@@ -19,6 +19,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
+#include "hashtab.h"
 #include "callbacks.h"
 #include "label_internal.h"
 #include "label_file.h"
@@ -57,40 +58,103 @@ static int find_stem_from_file(struct saved_data *data, const char *key)
 }
 
 /*
+ * hash calculation and key comparison of hash table
+ */
+
+static unsigned int symhash(hashtab_t h, const_hashtab_key_t key)
+{
+	const struct chkdups_key *k = (const struct chkdups_key *)key;
+	const char *p = NULL;
+	size_t size;
+	unsigned int val = 0;
+
+	size = strlen(k->regex);
+	for (p = k->regex; ((size_t) (p - k->regex)) < size; p++)
+		val =
+			(val << 4 | (val >> (8 * sizeof(unsigned int) - 4)) +
+			k->mode) ^ (*p);
+	return val % h->size;
+}
+
+static int symcmp(hashtab_t h
+		  __attribute__ ((unused)), const_hashtab_key_t key1,
+		  const_hashtab_key_t key2)
+{
+	const struct chkdups_key *a = (const struct chkdups_key *)key1;
+	const struct chkdups_key *b = (const struct chkdups_key *)key2;
+
+	return strcmp(a->regex, b->regex) || (a->mode && b->mode && a->mode != b->mode);
+}
+
+/*
  * Warn about duplicate specifications.
  */
 static int nodups_specs(struct saved_data *data, const char *path)
 {
-	int rc = 0;
-	unsigned int ii, jj;
+	int rc = 0, ret = 0;
+	unsigned int ii;
 	struct spec *curr_spec, *spec_arr = data->spec_arr;
+	struct chkdups_key *new = NULL;
+	unsigned int hashtab_len = (data->nspec / 10) ? data->nspec / 10 : 1;
+	hashtab_ptr_t cur, temp;
 
+	hashtab_t hash_table = hashtab_create(symhash, symcmp, data->nspec);
+	if (hash_table == NULL) {
+		rc = -1;
+		COMPAT_LOG(SELINUX_ERROR, "%s: hashtab create failed.\n", path);
+		return rc;
+	}
 	for (ii = 0; ii < data->nspec; ii++) {
-		curr_spec = &spec_arr[ii];
-		for (jj = ii + 1; jj < data->nspec; jj++) {
-			if ((!strcmp(spec_arr[jj].regex_str,
-				curr_spec->regex_str))
-			    && (!spec_arr[jj].mode || !curr_spec->mode
-				|| spec_arr[jj].mode == curr_spec->mode)) {
-				rc = -1;
-				errno = EINVAL;
-				if (strcmp(spec_arr[jj].lr.ctx_raw,
-					    curr_spec->lr.ctx_raw)) {
-					COMPAT_LOG
-						(SELINUX_ERROR,
-						 "%s: Multiple different specifications for %s  (%s and %s).\n",
-						 path, curr_spec->regex_str,
-						 spec_arr[jj].lr.ctx_raw,
-						 curr_spec->lr.ctx_raw);
-				} else {
-					COMPAT_LOG
-						(SELINUX_ERROR,
-						 "%s: Multiple same specifications for %s.\n",
-						 path, curr_spec->regex_str);
-				}
+		new = (struct chkdups_key *)malloc(sizeof(struct chkdups_key));
+		new->regex = spec_arr[ii].regex_str;
+		new->mode = spec_arr[ii].mode;
+		ret = hashtab_insert(hash_table, (hashtab_key_t)new, &spec_arr[ii]);
+		if (ret == HASHTAB_SUCCESS)
+			continue;
+		if (ret == HASHTAB_PRESENT) {
+			curr_spec =
+				(struct spec *)hashtab_search(hash_table, (hashtab_key_t)new);
+			rc = -1;
+			errno = EINVAL;
+			if (strcmp(spec_arr[ii].lr.ctx_raw, curr_spec->lr.ctx_raw)) {
+				COMPAT_LOG
+					(SELINUX_ERROR,
+					 "%s: Multiple different specifications for %s  (%s and %s).\n",
+					 path, curr_spec->regex_str,
+					 spec_arr[ii].lr.ctx_raw,
+					 curr_spec->lr.ctx_raw);
+			} else {
+				COMPAT_LOG
+					(SELINUX_ERROR,
+					 "%s: Multiple same specifications for %s.\n",
+					 path, curr_spec->regex_str);
 			}
 		}
+		if (ret == HASHTAB_OVERFLOW) {
+			rc = -1;
+			COMPAT_LOG
+				(SELINUX_ERROR,
+				"%s: hashtab happen memory error.\n",
+				path);
+			break;
+		}
+	}
+
+	for (ii = 0; ii < hashtab_len; ii++) {
+		cur = hash_table->htable[ii];
+		while (cur != NULL) {
+			temp = cur;
+			cur = cur->next;
+			free(temp->key);
+			free(temp);
+		}
+		hash_table->htable[ii] = NULL;
 	}
+
+	free(hash_table->htable);
+	hash_table->htable = NULL;
+	free(hash_table);
+
 	return rc;
 }
 
diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h
index 190bc175..ad79319e 100644
--- a/libselinux/src/label_file.h
+++ b/libselinux/src/label_file.h
@@ -35,6 +35,11 @@
 /* Required selinux_restorecon and selabel_get_digests_all_partial_matches() */
 #define RESTORECON_PARTIAL_MATCH_DIGEST  "security.sehash"
 
+struct chkdups_key {
+	char *regex;
+	unsigned int mode;
+};
+
 struct selabel_sub {
 	char *src;
 	int slen;
-- 
2.12.3




[Index of Archives]     [Selinux Refpolicy]     [Linux SGX]     [Fedora Users]     [Fedora Desktop]     [Yosemite Photos]     [Yosemite Camping]     [Yosemite Campsites]     [KDE Users]     [Gnome Users]

  Powered by Linux