On Wed, Aug 15, 2012 at 1:56 AM, Russell Coker <russell@xxxxxxxxxxxx> wrote: > On Tue, 14 Aug 2012, Eric Paris <eparis@xxxxxxxxxxxxxx> wrote: >> I have code that does just that. Dan and I both wrote a version. >> I'll attach it. I didn't find the speedups we were hoping for and it >> didn't work correctly/completely in the face of file context >> equivalencies. Although that is likely fixable. I was just looking >> at all of the stem code (and wondered who wrote it but it was >> pre-git). I'm surprised it made a big difference. Wouldn't the regex >> code be able to return extremely quickly if it didn't match? Anyway. >> I'm writing some test programs to look at all of the possibilities. > > At the time I wrote the code I didn't attempt to micro-benchmark it. As the > use case that mattered most was a full relabel of a root filesystem (between > 40,000 and 120,000 files in the common case) and the number of regexes was > smaller than it is today the win was fairly obvious. > > From memory the stem compression change typically improved performance by a > factor of 3-5 depending on what you were doing. That was on systems like a > Pentium-166 or P2-400. The performance benefits on faster systems (800MHz > Athlon and better) were less important to me at the time. > > Prior to stem compression the relabel was entirely CPU bottlenecked on all > systems. After stem compression and some new CPU releases from Intel and AMD > I wrote the code to have two processes doing disk IO (one stat'ing the files > and the other applying labels) so that we could have some parallelism between > regex checks and seeks on disk. > > A theoretical regex library wouldn't be much slower than the integer checks we > are doing, it could just use a 32bit integer compare to check the first 4 bytes > which would compare to the integer stem check we do. Does one of the current > regex libraries do such optimisations where you have a common case of a non- > match on a relatively simple regex? Couldn't say. But I can say that pcre seems to kick the crap out of glibc at every turn. My test programs aren't doing any of the stem stuff. So I can't give a perfect comparison yet. I'll pull that in today to see if it seems reasonable to drop all of it and simplify the code. My current testing is to run in a loop 100 times load db, look up one semi-complex path (/var/www/html/cgi-bin/mail.pl), and free the db. The timing for each method is as follows: glibc - 16.16 sec pcre - 2.08 sec pcre-mmap - 0.23 sec I made up my own mmap format instead of adding glib and gVariant as a build requirement. I'm not opposed to it. Us reusing code seems like a very good idea, kinda weird to have that circular dependancy thing though. I'll play with that today as well. You'll find my current test programs attached. -Eric
#include <regex.h> #include <stdio.h> #include "test-regex.h" struct spec { char *context; char *regex; mode_t mode; regex_t reg; }; static int process_file(FILE *context_file, unsigned int file_len, struct spec **out_spec) { struct spec *specs; unsigned int line_num; char *line_buf = NULL; size_t line_len; ssize_t len; specs = calloc(file_len, sizeof(*specs)); if (!specs) { perror("calloc"); exit(EXIT_FAILURE); } line_num = 0; while ((len = getline(&line_buf, &line_len, context_file)) != -1) { char *context; char *mode; char *regex; int items, rc; items = sscanf(line_buf, "%ms %ms %ms", ®ex, &mode, &context); if (items < 2 || items > 3) { fprintf(stderr, "invalid entry, skipping:%s", line_buf); continue; } if (items == 2) { context = mode; mode = NULL; } specs[line_num].context = context; specs[line_num].mode = string_to_mode(mode); free(mode); specs[line_num].regex = regex; rc = regcomp(&specs[line_num].reg, regex, REG_EXTENDED | REG_NOSUB); if (rc < 0) return rc; line_num++; } rewind(context_file); *out_spec = specs; return 0; } static int free_specs(struct spec *specs, unsigned int num_specs) { unsigned int i; for (i = 0; i < num_specs; i++) { free(specs[i].context); free(specs[i].regex); regfree(&specs[i].reg); } free(specs); return 0; } static int test_match(struct spec *specs, unsigned int num_specs) { unsigned int i; unsigned int matches = 0; int rc; for (i = 0; i < num_specs; i++) { rc = regexec(&specs[i].reg, TEST_PATH, 0, NULL, 0); if (rc == 0) matches++; else if (rc == REG_NOMATCH) continue; else return -1; } if (matches != TEST_PATH_MATCHES) { fprintf(stderr, "Found %d matches for %s\n", matches, TEST_PATH); return -1; } return 0; } int main(void) { FILE *context_file; unsigned int num_specs; struct spec *specs; int rc, i; context_file = get_context_file(); num_specs = lines_in_file(context_file); if (num_specs <= 0) exit(EXIT_FAILURE); START; for (i = 0; i < NUM_RUNS; i++) { rc = process_file(context_file, num_specs, &specs); if (rc < 0) return rc; rc = test_match(specs, num_specs); if (rc < 0) return rc; rc = free_specs(specs, num_specs); if (rc < 0) return rc; } STOP; PRINTTIME; return 0; }
Attachment:
Makefile
Description: Binary data
#include <pcre.h> #include <stdio.h> #include "test-regex.h" struct spec { char *context; char *regex; mode_t mode; pcre *re; pcre_extra *sd; }; static int process_file(FILE *context_file, unsigned int file_len, struct spec **out_spec) { struct spec *specs; unsigned int line_num; char *line_buf = NULL; size_t line_len; ssize_t len; specs = calloc(file_len, sizeof(*specs)); if (!specs) { perror("calloc"); exit(EXIT_FAILURE); } line_num = 0; while ((len = getline(&line_buf, &line_len, context_file)) != -1) { char *context; char *mode; char *regex; pcre *re; pcre_extra *sd; const char *err; int items, erroff; items = sscanf(line_buf, "%ms %ms %ms", ®ex, &mode, &context); if (items < 2 || items > 3) { fprintf(stderr, "invalid entry, skipping:%s", line_buf); continue; } if (items == 2) { context = mode; mode = NULL; } specs[line_num].context=context; specs[line_num].mode = string_to_mode(mode); specs[line_num].regex = regex; re = pcre_compile(regex, 0, &err, &erroff, NULL); if (!re) { fprintf(stderr, "PCRE compilation failed for %s at offset %d: %s\n", regex, erroff, err); return -1; } specs[line_num].re = re; sd = pcre_study(re, 0, &err); if (!sd) { fprintf(stderr, "PCRE study failed for %s: %s\n", regex, err); return -1; } specs[line_num].sd = sd; line_num++; } rewind(context_file); *out_spec = specs; return 0; } static int test_match(struct spec *specs, unsigned int num_specs) { unsigned int i; unsigned int matches = 0; int rc; for (i = 0; i < num_specs; i++) { pcre *re = specs[i].re; pcre_extra *sd = specs[i].sd; rc = pcre_exec(re, sd, TEST_PATH, strlen(TEST_PATH), 0, 0, NULL, 0); if (rc == 0) matches++; else if (rc == PCRE_ERROR_NOMATCH) continue; else return -1; } if (matches != TEST_PATH_MATCHES) { fprintf(stderr, "Found %d matches for %s\n", matches, TEST_PATH); return -1; } return 0; } static int free_specs(struct spec *specs, unsigned int num_specs) { unsigned int i; for (i = 0; i < num_specs; i++) { free(specs[i].context); free(specs[i].regex); pcre_free(specs[i].re); pcre_free_study(specs[i].sd); } free(specs); return 0; } int main(void) { FILE *context_file; unsigned int num_specs; struct spec *specs; int rc, i; context_file = get_context_file(); num_specs = lines_in_file(context_file); if (num_specs <= 0) exit(EXIT_FAILURE); START; for (i = 0; i < NUM_RUNS; i++) { rc = process_file(context_file, num_specs, &specs); if (rc < 0) return rc; rc = test_match(specs, num_specs); if (rc < 0) return rc; rc = free_specs(specs, num_specs); if (rc < 0) return rc; } STOP; PRINTTIME; return 0; }
#include <pcre.h> #include <stdio.h> #include "test-regex.h" struct spec { char *context; char *regex; mode_t mode; pcre *re; pcre_extra *sd; }; static int process_file(FILE *context_file, unsigned int file_len, struct spec **out_spec) { struct spec *specs; unsigned int line_num; char *line_buf = NULL; size_t line_len; ssize_t len; specs = calloc(file_len, sizeof(*specs)); if (!specs) { perror("calloc"); exit(EXIT_FAILURE); } line_num = 0; while ((len = getline(&line_buf, &line_len, context_file)) != -1) { char *context; char *mode; char *regex; pcre *re; pcre_extra *sd; const char *err; int items, erroff; items = sscanf(line_buf, "%ms %ms %ms", ®ex, &mode, &context); if (items < 2 || items > 3) { fprintf(stderr, "invalid entry, skipping:%s", line_buf); continue; } if (items == 2) { context = mode; mode = NULL; } specs[line_num].context=context; specs[line_num].mode = string_to_mode(mode); specs[line_num].regex = regex; re = pcre_compile(regex, 0, &err, &erroff, NULL); if (!re) { fprintf(stderr, "PCRE compilation failed for %s at offset %d: %s\n", regex, erroff, err); return -1; } specs[line_num].re = re; sd = pcre_study(re, 0, &err); if (!sd) { fprintf(stderr, "PCRE study failed for %s: %s\n", regex, err); return -1; } specs[line_num].sd = sd; line_num++; } rewind(context_file); *out_spec = specs; return 0; } static int write_binary_file(struct spec *specs, unsigned int num_specs) { FILE *bin_file; size_t len; unsigned int magic = 0xdeadbeef; unsigned int i; bin_file = fopen(BIN_FILE_PATH, "w"); if (!bin_file) { perror("fopen binfile"); exit(EXIT_FAILURE); } len = fwrite(&magic, sizeof(magic), 1, bin_file); if (len != 1) return -1; len = fwrite(&num_specs, sizeof(num_specs), 1, bin_file); if (len != 1) return -1; for (i = 0; i < num_specs; i++) { char *context = specs[i].context; char *regex = specs[i].regex; mode_t mode = specs[i].mode; pcre *re = specs[i].re; pcre_extra *sd = specs[i].sd; size_t to_write, size; int rc; to_write = strlen(context) + 1; len = fwrite(&to_write, sizeof(to_write), 1, bin_file); if (len != 1) return -1; len = fwrite(context, sizeof(*context), to_write, bin_file); if (len != to_write) return -1; to_write = strlen(regex) + 1; len = fwrite(&to_write, sizeof(to_write), 1, bin_file); if (len != 1) return -1; len = fwrite(regex, sizeof(*regex), to_write, bin_file); if (len != to_write) return -1; len = fwrite(&mode, sizeof(mode), 1, bin_file); if (len != 1) return -1; rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size); if (rc < 0) return -1; to_write = size; len = fwrite(&to_write, sizeof(to_write), 1, bin_file); if (len != 1) return -1; len = fwrite(re, 1, to_write, bin_file); if (len != to_write) return -1; rc = pcre_fullinfo(re, sd, PCRE_INFO_STUDYSIZE, &size); if (rc < 0) return -1; to_write = size; len = fwrite(&to_write, sizeof(to_write), 1, bin_file); if (len != 1) return -1; len = fwrite(sd->study_data, 1, to_write, bin_file); if (len != to_write) return -1; } return 0; } static int free_specs(struct spec *specs, unsigned int num_specs) { unsigned int i; for (i = 0; i < num_specs; i++) { free(specs[i].context); free(specs[i].regex); pcre_free(specs[i].re); pcre_free_study(specs[i].sd); } free(specs); return 0; } int main(void) { FILE *context_file; unsigned int num_specs; struct spec *specs; int rc, i; context_file = get_context_file(); num_specs = lines_in_file(context_file); if (num_specs <= 0) exit(EXIT_FAILURE); START; for (i = 0; i < NUM_RUNS; i++) { rc = process_file(context_file, num_specs, &specs); if (rc < 0) return rc; rc = write_binary_file(specs, num_specs); if (rc < 0) return rc; rc = free_specs(specs, num_specs); if (rc < 0) return rc; } STOP; PRINTTIME; return 0; }
#include <fcntl.h> #include <pcre.h> #include <stdio.h> #include <unistd.h> #include <sys/mman.h> #include <sys/stat.h> #include <sys/types.h> #include "test-regex.h" struct spec_data { void *addr; size_t len; }; struct spec { char *context; char *regex; mode_t *mode; pcre *re; pcre_extra sd; }; static int read_binary_file(struct spec **out_specs, unsigned int *out_num_specs, struct spec_data *data) { int fd, rc; size_t len; size_t *plen; unsigned int *magic; unsigned int i; unsigned int *num_specs; struct spec *specs; struct stat stat; char *addr; fd = open(BIN_FILE_PATH, O_RDONLY); if (fd < 0) return -1; rc = fstat(fd, &stat); if (rc < 0) return -1; len = stat.st_size; len += (sysconf(_SC_PAGE_SIZE) - 1); len &= ~(sysconf(_SC_PAGE_SIZE) - 1); addr = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0); if (addr == MAP_FAILED) { perror("mmap"); return -1; } data->addr = addr; data->len = len; close(fd); magic = (unsigned int *)addr; if (*magic != 0xdeadbeef) return -1; addr += sizeof(*magic); num_specs = (unsigned int *)addr; addr += sizeof(*num_specs); specs = calloc(*num_specs, sizeof(*specs)); if (!specs) return -1; for (i = 0; i < *num_specs; i++) { plen = (size_t *)addr; addr += sizeof(*plen); specs[i].context = (char *)addr; addr += *plen; plen = (size_t *)addr; addr += sizeof(*plen); specs[i].regex = (char *)addr; addr += *plen; specs[i].mode = (mode_t *)addr; addr += sizeof(*specs[i].mode); plen = (size_t *)addr; addr += sizeof(*plen); specs[i].re = (pcre *)addr; addr += *plen; plen = (size_t *)addr; addr += sizeof(*plen); specs[i].sd.study_data = (void *)addr; specs[i].sd.flags |= PCRE_EXTRA_STUDY_DATA; addr += *plen; } *out_num_specs = *num_specs; *out_specs = specs; return 0; } static int test_match(struct spec *specs, unsigned int num_specs) { unsigned int i; unsigned int matches = 0; int rc; for (i = 0; i < num_specs; i++) { pcre *re = specs[i].re; pcre_extra *sd = &specs[i].sd; rc = pcre_exec(re, sd, TEST_PATH, strlen(TEST_PATH), 0, 0, NULL, 0); if (rc == 0) matches++; else if (rc == PCRE_ERROR_NOMATCH) continue; else return -1; } if (matches != TEST_PATH_MATCHES) { fprintf(stderr, "Found %d matches for %s\n", matches, TEST_PATH); return -1; } return 0; } static int free_specs(struct spec *specs, struct spec_data *data) { free(specs); munmap(data->addr, data->len); return 0; } int main(void) { unsigned int num_specs; struct spec *specs; struct spec_data data; int rc, i; START; for (i = 0; i < NUM_RUNS; i++) { rc = read_binary_file(&specs, &num_specs, &data); if (rc < 0) return rc; rc = test_match(specs, num_specs); if (rc < 0) return rc; rc = free_specs(specs, &data); if (rc < 0) return rc; } STOP; PRINTTIME; return 0; }
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <time.h> #include <sys/stat.h> clock_t startm, stopm; #define START if ( (startm = clock()) == -1) {printf("Error calling clock");exit(1);} #define STOP if ( (stopm = clock()) == -1) {printf("Error calling clock");exit(1);} #define PRINTTIME printf( "%6.3f seconds used by the processor.\n", ((double)stopm-startm)/CLOCKS_PER_SEC); #define CONTEXT_PATH "/etc/selinux/targeted/contexts/files/file_contexts" #define BIN_FILE_PATH "bin_file" #define NUM_RUNS 100 #define TEST_PATH "/var/www/html/cgi-bin/mail.pl" #define TEST_PATH_MATCHES 7 static inline void usage(char *prog) { fprintf(stderr, "usage: %s\n", prog); exit(EXIT_FAILURE); } static inline FILE *get_context_file(void) { FILE *f; f = fopen(CONTEXT_PATH, "r"); if (!f) { perror("fopen"); exit(EXIT_FAILURE); } return f; } static inline unsigned int lines_in_file(FILE *f) { unsigned int lines = 0; int ch; while (EOF != (ch=fgetc(f))) if (ch=='\n') lines++; rewind(f); return lines; } static inline mode_t string_to_mode(char *smode) { mode_t mode = 0; size_t len; if (!smode) return mode; len = strlen(smode); if (len != 2 || smode[0] != '-') { fprintf(stderr, "Illegal file type %s\n", smode); return -1; } switch (smode[1]) { case 'b': mode = S_IFBLK; break; case 'c': mode = S_IFCHR; break; case 'd': mode = S_IFDIR; break; case 'p': mode = S_IFIFO; break; case 'l': mode = S_IFLNK; break; case 's': mode = S_IFSOCK; break; case '-': mode = S_IFREG; break; default: fprintf(stderr, "Illegal file type %s\n", smode); return -1; } return mode; }