Re: A filename to label translation daemon

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Aug 15, 2012 at 1:56 AM, Russell Coker <russell@xxxxxxxxxxxx> wrote:
> On Tue, 14 Aug 2012, Eric Paris <eparis@xxxxxxxxxxxxxx> wrote:
>> I have code that does just that.  Dan and I both wrote a version.
>> I'll attach it.  I didn't find the speedups we were hoping for and it
>> didn't work correctly/completely in the face of file context
>> equivalencies.  Although that is likely fixable.  I was just looking
>> at all of the stem code (and wondered who wrote it but it was
>> pre-git).  I'm surprised it made a big difference.  Wouldn't the regex
>> code be able to return extremely quickly if it didn't match?  Anyway.
>> I'm writing some test programs to look at all of the possibilities.
>
> At the time I wrote the code I didn't attempt to micro-benchmark it.  As the
> use case that mattered most was a full relabel of a root filesystem (between
> 40,000 and 120,000 files in the common case) and the number of regexes was
> smaller than it is today the win was fairly obvious.
>
> From memory the stem compression change typically improved performance by a
> factor of 3-5 depending on what you were doing.  That was on systems like a
> Pentium-166 or P2-400.  The performance benefits on faster systems (800MHz
> Athlon and better) were less important to me at the time.
>
> Prior to stem compression the relabel was entirely CPU bottlenecked on all
> systems.  After stem compression and some new CPU releases from Intel and AMD
> I wrote the code to have two processes doing disk IO (one stat'ing the files
> and the other applying labels) so that we could have some parallelism between
> regex checks and seeks on disk.
>
> A theoretical regex library wouldn't be much slower than the integer checks we
> are doing, it could just use a 32bit integer compare to check the first 4 bytes
> which would compare to the integer stem check we do.  Does one of the current
> regex libraries do such optimisations where you have a common case of a non-
> match on a relatively simple regex?

Couldn't say.  But I can say that pcre seems to kick the crap out of
glibc at every turn.  My test programs aren't doing any of the stem
stuff.  So I can't give a perfect comparison yet.  I'll pull that in
today to see if it seems reasonable to drop all of it and simplify the
code.  My current testing is to run in a loop 100 times load db, look
up one semi-complex path (/var/www/html/cgi-bin/mail.pl), and free the
db.  The timing for each method is as follows:

glibc - 16.16 sec
pcre - 2.08 sec
pcre-mmap - 0.23 sec

I made up my own mmap format instead of adding glib and gVariant as a
build requirement.  I'm not opposed to it.  Us reusing code seems like
a very good idea, kinda weird to have that circular dependancy thing
though.  I'll play with that today as well.

You'll find my current test programs attached.
-Eric
#include <regex.h>
#include <stdio.h>

#include "test-regex.h"

struct spec {
	char *context;
	char *regex;
	mode_t mode;
	regex_t reg;
};

static int process_file(FILE *context_file, unsigned int file_len, struct spec **out_spec)
{
	struct spec *specs;
	unsigned int line_num;
	char *line_buf = NULL;
	size_t line_len;
	ssize_t len;

	specs = calloc(file_len, sizeof(*specs));
	if (!specs) {
		perror("calloc");
		exit(EXIT_FAILURE);
	}

	line_num = 0;
	while ((len = getline(&line_buf, &line_len, context_file)) != -1) {
		char *context;
		char *mode;
		char *regex;
		int items, rc;

		items = sscanf(line_buf, "%ms %ms %ms", &regex, &mode, &context);
		if (items < 2 || items > 3) {
			fprintf(stderr, "invalid entry, skipping:%s", line_buf);
			continue;
		}

		if (items == 2) {
			context = mode;
			mode = NULL;
		}

		specs[line_num].context = context;
		specs[line_num].mode = string_to_mode(mode);
		free(mode);
		specs[line_num].regex = regex;

		rc = regcomp(&specs[line_num].reg, regex, REG_EXTENDED | REG_NOSUB);
		if (rc < 0)
			return rc;
		line_num++;
	}

	rewind(context_file);
	*out_spec = specs;
	return 0;
}

static int free_specs(struct spec *specs, unsigned int num_specs)
{
	unsigned int i;

	for (i = 0; i < num_specs; i++) {
		free(specs[i].context);
		free(specs[i].regex);
		regfree(&specs[i].reg);
	}

	free(specs);
	return 0;
}

static int test_match(struct spec *specs, unsigned int num_specs)
{
	unsigned int i;
	unsigned int matches = 0;
	int rc;

	for (i = 0; i < num_specs; i++) {
		rc = regexec(&specs[i].reg, TEST_PATH, 0, NULL, 0);
		if (rc == 0)
			matches++;
		else if (rc == REG_NOMATCH)
			continue;
		else
			return -1;
	}

	if (matches != TEST_PATH_MATCHES) {
		fprintf(stderr, "Found %d matches for %s\n", matches, TEST_PATH);
		return -1;
	}
	return 0;
}

int main(void)
{
	FILE *context_file;
	unsigned int num_specs;
	struct spec *specs;
	int rc, i;

	context_file = get_context_file();

	num_specs = lines_in_file(context_file);
	if (num_specs <= 0)
		exit(EXIT_FAILURE);

START;
	for (i = 0; i < NUM_RUNS; i++) {
		rc = process_file(context_file, num_specs, &specs);
		if (rc < 0)
			return rc;

		rc = test_match(specs, num_specs);
		if (rc < 0)
			return rc;

		rc = free_specs(specs, num_specs);
		if (rc < 0)
			return rc;
	}
STOP;
PRINTTIME;

	return 0;
}

Attachment: Makefile
Description: Binary data

#include <pcre.h>
#include <stdio.h>

#include "test-regex.h"

struct spec {
	char *context;
	char *regex;
	mode_t mode;
	pcre *re;
	pcre_extra *sd;
};

static int process_file(FILE *context_file, unsigned int file_len, struct spec **out_spec)
{
	struct spec *specs;
	unsigned int line_num;
	char *line_buf = NULL;
	size_t line_len;
	ssize_t len;

	specs = calloc(file_len, sizeof(*specs));
	if (!specs) {
		perror("calloc");
		exit(EXIT_FAILURE);
	}

	line_num = 0;
	while ((len = getline(&line_buf, &line_len, context_file)) != -1) {
		char *context;
		char *mode;
		char *regex;
		pcre *re;
		pcre_extra *sd;
		const char *err;
		int items, erroff;

		items = sscanf(line_buf, "%ms %ms %ms", &regex, &mode, &context);
		if (items < 2 || items > 3) {
			fprintf(stderr, "invalid entry, skipping:%s", line_buf);
			continue;
		}

		if (items == 2) {
			context = mode;
			mode = NULL;
		}

		specs[line_num].context=context;
		specs[line_num].mode = string_to_mode(mode);
		specs[line_num].regex = regex;

		re = pcre_compile(regex, 0, &err, &erroff, NULL);
		if (!re) {
			fprintf(stderr, "PCRE compilation failed for %s at offset %d: %s\n", regex, erroff, err);
			return -1;
		}
		specs[line_num].re = re;

		sd = pcre_study(re, 0, &err);
		if (!sd) {
			fprintf(stderr, "PCRE study failed for %s: %s\n", regex, err);
			return -1;
		}
		specs[line_num].sd = sd;

		line_num++;
	}

	rewind(context_file);
	*out_spec = specs;
	return 0;
}

static int test_match(struct spec *specs, unsigned int num_specs)
{
	unsigned int i;
	unsigned int matches = 0;
	int rc;

	for (i = 0; i < num_specs; i++) {
		pcre *re = specs[i].re;
		pcre_extra *sd = specs[i].sd;

		rc = pcre_exec(re, sd, TEST_PATH, strlen(TEST_PATH), 0, 0, NULL, 0);
		if (rc == 0)
			matches++;
		else if (rc == PCRE_ERROR_NOMATCH)
			continue;
		else
			return -1;
	}

	if (matches != TEST_PATH_MATCHES) {
		fprintf(stderr, "Found %d matches for %s\n", matches, TEST_PATH);
		return -1;
	}
	return 0;
}

static int free_specs(struct spec *specs, unsigned int num_specs)
{
	unsigned int i;

	for (i = 0; i < num_specs; i++) {
		free(specs[i].context);
		free(specs[i].regex);
		pcre_free(specs[i].re);
		pcre_free_study(specs[i].sd);
	}

	free(specs);
	return 0;
}

int main(void)
{
	FILE *context_file;
	unsigned int num_specs;
	struct spec *specs;
	int rc, i;

	context_file = get_context_file();

	num_specs = lines_in_file(context_file);
	if (num_specs <= 0)
		exit(EXIT_FAILURE);

START;
	for (i = 0; i < NUM_RUNS; i++) {
		rc = process_file(context_file, num_specs, &specs);
		if (rc < 0)
			return rc;

		rc = test_match(specs, num_specs);
		if (rc < 0)
			return rc;

		rc = free_specs(specs, num_specs);
		if (rc < 0)
			return rc;
	}
STOP;
PRINTTIME;

	return 0;
}
#include <pcre.h>
#include <stdio.h>

#include "test-regex.h"

struct spec {
	char *context;
	char *regex;
	mode_t mode;
	pcre *re;
	pcre_extra *sd;
};

static int process_file(FILE *context_file, unsigned int file_len, struct spec **out_spec)
{
	struct spec *specs;
	unsigned int line_num;
	char *line_buf = NULL;
	size_t line_len;
	ssize_t len;

	specs = calloc(file_len, sizeof(*specs));
	if (!specs) {
		perror("calloc");
		exit(EXIT_FAILURE);
	}

	line_num = 0;
	while ((len = getline(&line_buf, &line_len, context_file)) != -1) {
		char *context;
		char *mode;
		char *regex;
		pcre *re;
		pcre_extra *sd;
		const char *err;
		int items, erroff;

		items = sscanf(line_buf, "%ms %ms %ms", &regex, &mode, &context);
		if (items < 2 || items > 3) {
			fprintf(stderr, "invalid entry, skipping:%s", line_buf);
			continue;
		}

		if (items == 2) {
			context = mode;
			mode = NULL;
		}

		specs[line_num].context=context;
		specs[line_num].mode = string_to_mode(mode);
		specs[line_num].regex = regex;

		re = pcre_compile(regex, 0, &err, &erroff, NULL);
		if (!re) {
			fprintf(stderr, "PCRE compilation failed for %s at offset %d: %s\n", regex, erroff, err);
			return -1;
		}
		specs[line_num].re = re;

		sd = pcre_study(re, 0, &err);
		if (!sd) {
			fprintf(stderr, "PCRE study failed for %s: %s\n", regex, err);
			return -1;
		}
		specs[line_num].sd = sd;

		line_num++;
	}

	rewind(context_file);
	*out_spec = specs;
	return 0;
}

static int write_binary_file(struct spec *specs, unsigned int num_specs)
{
	FILE *bin_file;
	size_t len;
	unsigned int magic = 0xdeadbeef;
	unsigned int i;

	bin_file = fopen(BIN_FILE_PATH, "w");
	if (!bin_file) {
		perror("fopen binfile");
		exit(EXIT_FAILURE);
	}

	len = fwrite(&magic, sizeof(magic), 1, bin_file);
	if (len != 1)
		return -1;

	len = fwrite(&num_specs, sizeof(num_specs), 1, bin_file);
	if (len != 1)
		return -1;

	for (i = 0; i < num_specs; i++) {
		char *context = specs[i].context;
		char *regex = specs[i].regex;
		mode_t mode = specs[i].mode;
		pcre *re = specs[i].re;
		pcre_extra *sd = specs[i].sd;
		size_t to_write, size;
		int rc;

		to_write = strlen(context) + 1;
		len = fwrite(&to_write, sizeof(to_write), 1, bin_file);
		if (len != 1)
			return -1;

		len = fwrite(context, sizeof(*context), to_write, bin_file);
		if (len != to_write)
			return -1;

		to_write = strlen(regex) + 1;
		len = fwrite(&to_write, sizeof(to_write), 1, bin_file);
		if (len != 1)
			return -1;

		len = fwrite(regex, sizeof(*regex), to_write, bin_file);
		if (len != to_write)
			return -1;

		len = fwrite(&mode, sizeof(mode), 1, bin_file);
		if (len != 1)
			return -1;

		rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size);
		if (rc < 0)
			return -1;

		to_write = size;
		len = fwrite(&to_write, sizeof(to_write), 1, bin_file);
		if (len != 1)
			return -1;

		len = fwrite(re, 1, to_write, bin_file);
		if (len != to_write)
			return -1;

		rc = pcre_fullinfo(re, sd, PCRE_INFO_STUDYSIZE, &size);
		if (rc < 0)
			return -1;

		to_write = size;
		len = fwrite(&to_write, sizeof(to_write), 1, bin_file);
		if (len != 1)
			return -1;

		len = fwrite(sd->study_data, 1, to_write, bin_file);
		if (len != to_write)
			return -1;
	}
	return 0;
}

static int free_specs(struct spec *specs, unsigned int num_specs)
{
	unsigned int i;

	for (i = 0; i < num_specs; i++) {
		free(specs[i].context);
		free(specs[i].regex);
		pcre_free(specs[i].re);
		pcre_free_study(specs[i].sd);
	}

	free(specs);
	return 0;
}

int main(void)
{
	FILE *context_file;
	unsigned int num_specs;
	struct spec *specs;
	int rc, i;

	context_file = get_context_file();

	num_specs = lines_in_file(context_file);
	if (num_specs <= 0)
		exit(EXIT_FAILURE);

START;
	for (i = 0; i < NUM_RUNS; i++) {
		rc = process_file(context_file, num_specs, &specs);
		if (rc < 0)
			return rc;

		rc = write_binary_file(specs, num_specs);
		if (rc < 0)
			return rc;

		rc = free_specs(specs, num_specs);
		if (rc < 0)
			return rc;
	}
STOP;
PRINTTIME;

	return 0;
}
#include <fcntl.h>
#include <pcre.h>
#include <stdio.h>
#include <unistd.h>

#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>

#include "test-regex.h"

struct spec_data {
	void *addr;
	size_t len;
};

struct spec {
	char *context;
	char *regex;
	mode_t *mode;
	pcre *re;
	pcre_extra sd;
};

static int read_binary_file(struct spec **out_specs, unsigned int *out_num_specs, struct spec_data *data)
{
	int fd, rc;
	size_t len;
	size_t *plen;
	unsigned int *magic;
	unsigned int i;
	unsigned int *num_specs;
	struct spec *specs;
	struct stat stat;
	char *addr;

	fd = open(BIN_FILE_PATH, O_RDONLY);
	if (fd < 0)
		return -1;

	rc = fstat(fd, &stat);
	if (rc < 0)
		return -1;

	len = stat.st_size;
	len += (sysconf(_SC_PAGE_SIZE) - 1);
	len &= ~(sysconf(_SC_PAGE_SIZE) - 1);

	addr = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
	if (addr == MAP_FAILED) {
		perror("mmap");
		return -1;
	}

	data->addr = addr;
	data->len = len;
	close(fd);

	magic = (unsigned int *)addr;
	if (*magic != 0xdeadbeef)
		return -1;
	addr += sizeof(*magic);

	num_specs = (unsigned int *)addr;
	addr += sizeof(*num_specs);

	specs = calloc(*num_specs, sizeof(*specs));
	if (!specs)
		return -1;

	for (i = 0; i < *num_specs; i++) {
		plen = (size_t *)addr;
		addr += sizeof(*plen);
		specs[i].context = (char *)addr;
		addr += *plen;

		plen = (size_t *)addr;
		addr += sizeof(*plen);
		specs[i].regex = (char *)addr;
		addr += *plen;

		specs[i].mode = (mode_t *)addr;
		addr += sizeof(*specs[i].mode);

		plen = (size_t *)addr;
		addr += sizeof(*plen);
		specs[i].re = (pcre *)addr;
		addr += *plen;

		plen = (size_t *)addr;
		addr += sizeof(*plen);
		specs[i].sd.study_data = (void *)addr;
		specs[i].sd.flags |= PCRE_EXTRA_STUDY_DATA;
		addr += *plen;
	}
	
	*out_num_specs = *num_specs;
	*out_specs = specs;
	return 0;
}

static int test_match(struct spec *specs, unsigned int num_specs)
{
	unsigned int i;
	unsigned int matches = 0;
	int rc;

	for (i = 0; i < num_specs; i++) {
		pcre *re = specs[i].re;
		pcre_extra *sd = &specs[i].sd;

		rc = pcre_exec(re, sd, TEST_PATH, strlen(TEST_PATH), 0, 0, NULL, 0);
		if (rc == 0)
			matches++;
		else if (rc == PCRE_ERROR_NOMATCH)
			continue;
		else
			return -1;
	}

	if (matches != TEST_PATH_MATCHES) {
		fprintf(stderr, "Found %d matches for %s\n", matches, TEST_PATH);
		return -1;
	}
	return 0;
}

static int free_specs(struct spec *specs, struct spec_data *data)
{
	free(specs);
	munmap(data->addr, data->len);

	return 0;
}

int main(void)
{
	unsigned int num_specs;
	struct spec *specs;
	struct spec_data data;
	int rc, i;

START;
	for (i = 0; i < NUM_RUNS; i++) {
		rc = read_binary_file(&specs, &num_specs, &data);
		if (rc < 0)
			return rc;

		rc = test_match(specs, num_specs);
		if (rc < 0)
			return rc;

		rc = free_specs(specs, &data);
		if (rc < 0)
			return rc;
	}
STOP;
PRINTTIME;

	return 0;
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#include <sys/stat.h>

clock_t startm, stopm;
#define START if ( (startm = clock()) == -1) {printf("Error calling clock");exit(1);}
#define STOP if ( (stopm = clock()) == -1) {printf("Error calling clock");exit(1);}
#define PRINTTIME printf( "%6.3f seconds used by the processor.\n", ((double)stopm-startm)/CLOCKS_PER_SEC);

#define CONTEXT_PATH "/etc/selinux/targeted/contexts/files/file_contexts"
#define BIN_FILE_PATH "bin_file"
#define NUM_RUNS 100

#define TEST_PATH "/var/www/html/cgi-bin/mail.pl"
#define TEST_PATH_MATCHES 7

static inline void usage(char *prog)
{
	fprintf(stderr, "usage: %s\n", prog);
	exit(EXIT_FAILURE);
}

static inline FILE *get_context_file(void)
{
	FILE *f;

	f = fopen(CONTEXT_PATH, "r");
	if (!f) {
		perror("fopen");
		exit(EXIT_FAILURE);
	}

	return f;
}

static inline unsigned int lines_in_file(FILE *f)
{
	unsigned int lines = 0;
	int ch;

	while (EOF != (ch=fgetc(f)))
	if (ch=='\n')
		lines++;

	rewind(f);
	return lines;
}

static inline mode_t string_to_mode(char *smode)
{
	mode_t mode = 0;
	size_t len;

	if (!smode)
		return mode;

	len = strlen(smode);
	if (len != 2 || smode[0] != '-') {
		fprintf(stderr, "Illegal file type %s\n", smode);
		return -1;
	}

	switch (smode[1]) {
	case 'b':
		mode = S_IFBLK;
		break;
	case 'c':
		mode = S_IFCHR;
		break;
	case 'd':
		mode = S_IFDIR;
		break;
	case 'p':
		mode = S_IFIFO;
		break;
	case 'l':
		mode = S_IFLNK;
		break;
	case 's':
		mode = S_IFSOCK;
		break;
	case '-':
		mode = S_IFREG;
		break;
	default:
		fprintf(stderr, "Illegal file type %s\n", smode);
		return -1;
	}

	return mode;
}

[Index of Archives]     [Selinux Refpolicy]     [Linux SGX]     [Fedora Users]     [Fedora Desktop]     [Yosemite Photos]     [Yosemite Camping]     [Yosemite Campsites]     [KDE Users]     [Gnome Users]

  Powered by Linux