On Fri, Jan 17, 2020 at 8:11 PM Stephen Smalley <sds@xxxxxxxxxxxxx> wrote: > On 1/16/20 4:39 PM, Lucas Stach wrote: > > The current hash is too small for current usages in, e.g. the Fedora standard > > policy. On file creates a considerable amount of CPU time is spent walking the > > the hash chains. Increasing the number of hash buckets somewhat mitigates the > > issue, but doesn't completely get rid of the long hash chains. > > > > This patch does take the bit more invasive route by converting the filename > > trans hash to a rhashtable to allow this hash to scale with load. > > > > fs_mark create benchmark on a SSD device, no ramdisk: > > Count Size Files/sec App Overhead > > before: > > 10000 512 512.3 147715 > > after: > > 10000 512 572.3 75141 > > > > filenametr_cmp(), which was the topmost function in the CPU cycle trace before > > at ~5% of the overall CPU time, is now down in the noise. > > Thank you for working on this. IMHO, Fedora overuses name-based type > transitions but that's another topic. I haven't yet investigated the > root cause but with your patch applied, I see some test failures related > to name-based transitions: > > [...] > # Failed test at overlay/test line 439. > overlay/test ................ 114/119 # Looks like you failed 1 test of 119. > [...] > filesystem/test ............. 3/70 File context error, expected: > test_filesystem_filenametranscon1_t > got: > test_filesystem_filetranscon_t > > # Failed test at filesystem/test line 279. > File context error, expected: > test_filesystem_filenametranscon2_t > got: > test_filesystem_filetranscon_t > > # Failed test at filesystem/test line 286. > filesystem/test ............. 68/70 # Looks like you failed 2 tests of 70. > > You can reproduce by cloning the selinux-testsuite from > https://github.com/SELinuxProject/selinux-testsuite, applying the > filesystem tests patch from > https://patchwork.kernel.org/patch/11337659/, > and following the README.md instructions. I think I figured out what's wrong - see below. > > > > > Signed-off-by: Lucas Stach <dev@xxxxxxxxxx> > > --- > > security/selinux/ss/policydb.c | 140 +++++++++++++++++++++++---------- > > security/selinux/ss/policydb.h | 14 ++-- > > security/selinux/ss/services.c | 31 +------- > > 3 files changed, 109 insertions(+), 76 deletions(-) > > > > diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c > > index e20624a68f5d..d059317c15b6 100644 > > --- a/security/selinux/ss/policydb.c > > +++ b/security/selinux/ss/policydb.c > > @@ -34,6 +34,7 @@ > > #include <linux/string.h> > > #include <linux/errno.h> > > #include <linux/audit.h> > > +#include <linux/rhashtable.h> > > #include "security.h" > > > > #include "policydb.h" > > @@ -334,15 +335,13 @@ static int (*destroy_f[SYM_NUM]) (void *key, void *datum, void *datap) = > > cat_destroy, > > }; > > > > -static int filenametr_destroy(void *key, void *datum, void *p) > > +static void filenametr_destroy(void *ptr, void *arg) > > { > > - struct filename_trans *ft = key; > > + struct filename_trans *ft = ptr; > > > > kfree(ft->name); > > - kfree(key); > > - kfree(datum); > > + kfree(ft); > > cond_resched(); > > - return 0; > > } > > > > static int range_tr_destroy(void *key, void *datum, void *p) > > @@ -404,9 +403,9 @@ static int roles_init(struct policydb *p) > > return rc; > > } > > > > -static u32 filenametr_hash(struct hashtab *h, const void *k) > > +static u32 filenametr_hash(const void *data, u32 len, u32 seed) > > { > > - const struct filename_trans *ft = k; > > + const struct filename_trans *ft = data; > > unsigned long hash; > > unsigned int byte_num; > > unsigned char focus; > > @@ -416,13 +415,15 @@ static u32 filenametr_hash(struct hashtab *h, const void *k) > > byte_num = 0; > > while ((focus = ft->name[byte_num++])) > > hash = partial_name_hash(focus, hash); > > - return hash & (h->size - 1); > > + > > + return end_name_hash(hash); > > } > > > > -static int filenametr_cmp(struct hashtab *h, const void *k1, const void *k2) > > +static int filenametr_cmp(struct rhashtable_compare_arg *arg, > > + const void *obj) > > { > > - const struct filename_trans *ft1 = k1; > > - const struct filename_trans *ft2 = k2; > > + const struct filename_trans *ft1 = arg->key; > > + const struct filename_trans *ft2 = obj; > > int v; > > > > v = ft1->stype - ft2->stype; > > @@ -441,6 +442,39 @@ static int filenametr_cmp(struct hashtab *h, const void *k1, const void *k2) > > > > } > > > > +static const struct rhashtable_params filename_trans_hashparams = { > > + .nelem_hint = 1024, > > + .head_offset = offsetof(struct filename_trans, hash_head), You need to add: +.hashfn = filenametr_hash, here so that the key is correctly hashed on lookup. After applying this fix, the selinux-testuite passes for me with this patch. > > + .obj_hashfn = filenametr_hash, > > + .obj_cmpfn = filenametr_cmp, > > +}; > > + > > +void policydb_filename_compute_type(struct policydb *policydb, > > + struct context *newcontext, > > + u32 stype, u32 ttype, u16 tclass, > > + const char *objname) > > +{ > > + struct filename_trans key, *ft; > > + > > + /* > > + * Most filename trans rules are going to live in specific directories > > + * like /dev or /var/run. This bitmap will quickly skip rule searches > > + * if the ttype does not contain any rules. > > + */ > > + if (!ebitmap_get_bit(&policydb->filename_trans_ttypes, ttype)) > > + return; > > + > > + key.stype = stype; > > + key.ttype = ttype; > > + key.tclass = tclass; > > + key.name = objname; > > + > > + ft = rhashtable_lookup(&policydb->filename_trans, &key, > > + filename_trans_hashparams); > > + if (ft) > > + newcontext->type = ft->otype; > > +} > > + > > static u32 rangetr_hash(struct hashtab *h, const void *k) > > { > > const struct range_trans *key = k; > > @@ -494,12 +528,7 @@ static int policydb_init(struct policydb *p) > > if (rc) > > goto out; > > > > - p->filename_trans = hashtab_create(filenametr_hash, filenametr_cmp, > > - (1 << 10)); > > - if (!p->filename_trans) { > > - rc = -ENOMEM; > > - goto out; > > - } > > + rhashtable_init(&p->filename_trans, &filename_trans_hashparams); > > > > p->range_tr = hashtab_create(rangetr_hash, rangetr_cmp, 256); > > if (!p->range_tr) { > > @@ -513,7 +542,7 @@ static int policydb_init(struct policydb *p) > > > > return 0; > > out: > > - hashtab_destroy(p->filename_trans); > > + rhashtable_destroy(&p->filename_trans); > > hashtab_destroy(p->range_tr); > > for (i = 0; i < SYM_NUM; i++) { > > hashtab_map(p->symtab[i].table, destroy_f[i], NULL); > > @@ -831,8 +860,8 @@ void policydb_destroy(struct policydb *p) > > } > > kfree(lra); > > > > - hashtab_map(p->filename_trans, filenametr_destroy, NULL); > > - hashtab_destroy(p->filename_trans); > > + rhashtable_free_and_destroy(&p->filename_trans, filenametr_destroy, > > + NULL); > > > > hashtab_map(p->range_tr, range_tr_destroy, NULL); > > hashtab_destroy(p->range_tr); > > @@ -1878,7 +1907,6 @@ static int range_read(struct policydb *p, void *fp) > > static int filename_trans_read(struct policydb *p, void *fp) > > { > > struct filename_trans *ft; > > - struct filename_trans_datum *otype; > > char *name; > > u32 nel, len; > > __le32 buf[4]; > > @@ -1893,7 +1921,6 @@ static int filename_trans_read(struct policydb *p, void *fp) > > nel = le32_to_cpu(buf[0]); > > > > for (i = 0; i < nel; i++) { > > - otype = NULL; > > name = NULL; > > > > rc = -ENOMEM; > > @@ -1901,11 +1928,6 @@ static int filename_trans_read(struct policydb *p, void *fp) > > if (!ft) > > goto out; > > > > - rc = -ENOMEM; > > - otype = kmalloc(sizeof(*otype), GFP_KERNEL); > > - if (!otype) > > - goto out; > > - > > /* length of the path component string */ > > rc = next_entry(buf, fp, sizeof(u32)); > > if (rc) > > @@ -1926,14 +1948,14 @@ static int filename_trans_read(struct policydb *p, void *fp) > > ft->stype = le32_to_cpu(buf[0]); > > ft->ttype = le32_to_cpu(buf[1]); > > ft->tclass = le32_to_cpu(buf[2]); > > - > > - otype->otype = le32_to_cpu(buf[3]); > > + ft->otype = le32_to_cpu(buf[3]); > > > > rc = ebitmap_set_bit(&p->filename_trans_ttypes, ft->ttype, 1); > > if (rc) > > goto out; > > > > - rc = hashtab_insert(p->filename_trans, ft, otype); > > + rc = rhashtable_insert_fast(&p->filename_trans, &ft->hash_head, > > + filename_trans_hashparams); > > if (rc) { > > /* > > * Do not return -EEXIST to the caller, or the system > > @@ -1944,15 +1966,12 @@ static int filename_trans_read(struct policydb *p, void *fp) > > /* But free memory to avoid memory leak. */ > > kfree(ft); > > kfree(name); > > - kfree(otype); > > } > > } > > - hash_eval(p->filename_trans, "filenametr"); > > return 0; > > out: > > kfree(ft); > > kfree(name); > > - kfree(otype); > > > > return rc; > > } > > @@ -3323,12 +3342,10 @@ static int range_write(struct policydb *p, void *fp) > > return 0; > > } > > > > -static int filename_write_helper(void *key, void *data, void *ptr) > > +static int filename_write_helper(struct filename_trans *ft, > > + struct policy_file *fp) > > { > > __le32 buf[4]; > > - struct filename_trans *ft = key; > > - struct filename_trans_datum *otype = data; > > - void *fp = ptr; > > int rc; > > u32 len; > > > > @@ -3345,7 +3362,7 @@ static int filename_write_helper(void *key, void *data, void *ptr) > > buf[0] = cpu_to_le32(ft->stype); > > buf[1] = cpu_to_le32(ft->ttype); > > buf[2] = cpu_to_le32(ft->tclass); > > - buf[3] = cpu_to_le32(otype->otype); > > + buf[3] = cpu_to_le32(ft->otype); > > > > rc = put_entry(buf, sizeof(u32), 4, fp); > > if (rc) > > @@ -3356,15 +3373,34 @@ static int filename_write_helper(void *key, void *data, void *ptr) > > > > static int filename_trans_write(struct policydb *p, void *fp) > > { > > - u32 nel; > > + u32 nel = 0; > > __le32 buf[1]; > > - int rc; > > + int rc = 0; > > + struct rhashtable_iter iter; > > > > if (p->policyvers < POLICYDB_VERSION_FILENAME_TRANS) > > return 0; > > > > - nel = 0; > > - rc = hashtab_map(p->filename_trans, hashtab_cnt, &nel); > > + rhashtable_walk_enter(&p->filename_trans, &iter); > > + rhashtable_walk_start(&iter); > > + for(;;) { > > + struct filename_trans *trans; > > + > > + trans = rhashtable_walk_next(&iter); > > + if (!trans) > > + break; > > + if (IS_ERR(trans)) { > > + if (PTR_ERR(trans) == -EAGAIN) { > > + nel = 0; > > + continue; > > + } > > + rc = PTR_ERR(trans); > > + break; > > + } > > + nel++; > > + }; > > + rhashtable_walk_stop(&iter); > > + rhashtable_walk_exit(&iter); > > if (rc) > > return rc; > > > > @@ -3373,7 +3409,25 @@ static int filename_trans_write(struct policydb *p, void *fp) > > if (rc) > > return rc; > > > > - rc = hashtab_map(p->filename_trans, filename_write_helper, fp); > > + rhashtable_walk_enter(&p->filename_trans, &iter); > > + rhashtable_walk_start(&iter); > > + for(;;) { > > + struct filename_trans *trans; > > + > > + trans = rhashtable_walk_next(&iter); > > + if (!trans) > > + break; > > + if (IS_ERR(trans)) { > > + if (PTR_ERR(trans) == -EAGAIN) { > > + continue; > > + } > > + rc = PTR_ERR(trans); > > + break; > > + } > > + filename_write_helper(trans, fp); > > + }; > > + rhashtable_walk_stop(&iter); > > + rhashtable_walk_exit(&iter); > > if (rc) > > return rc; > > > > diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h > > index bc56b14e2216..f1d2f5166af2 100644 > > --- a/security/selinux/ss/policydb.h > > +++ b/security/selinux/ss/policydb.h > > @@ -29,6 +29,7 @@ > > #include "mls_types.h" > > #include "context.h" > > #include "constraint.h" > > +#include <linux/rhashtable.h> > > > > /* > > * A datum type is defined for each kind of symbol > > @@ -90,16 +91,14 @@ struct role_trans { > > }; > > > > struct filename_trans { > > + struct rhash_head hash_head; > > + u32 otype; > > u32 stype; /* current process */ > > u32 ttype; /* parent dir context */ > > u16 tclass; /* class of new object */ > > const char *name; /* last path component */ > > }; > > > > -struct filename_trans_datum { > > - u32 otype; /* expected of new object */ > > -}; > > - > > struct role_allow { > > u32 role; /* current role */ > > u32 new_role; /* new role */ > > @@ -266,7 +265,7 @@ struct policydb { > > /* quickly exclude lookups when parent ttype has no rules */ > > struct ebitmap filename_trans_ttypes; > > /* actual set of filename_trans rules */ > > - struct hashtab *filename_trans; > > + struct rhashtable filename_trans; > > > > /* bools indexed by (value - 1) */ > > struct cond_bool_datum **bool_val_to_struct; > > @@ -318,6 +317,11 @@ extern int policydb_role_isvalid(struct policydb *p, unsigned int role); > > extern int policydb_read(struct policydb *p, void *fp); > > extern int policydb_write(struct policydb *p, void *fp); > > > > +void policydb_filename_compute_type(struct policydb *policydb, > > + struct context *newcontext, > > + u32 stype, u32 ttype, u16 tclass, > > + const char *objname); > > + > > #define PERM_SYMTAB_SIZE 32 > > > > #define POLICYDB_CONFIG_MLS 1 > > diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c > > index a5813c7629c1..60a98d900dd3 100644 > > --- a/security/selinux/ss/services.c > > +++ b/security/selinux/ss/services.c > > @@ -1607,32 +1607,6 @@ static int compute_sid_handle_invalid_context( > > return -EACCES; > > } > > > > -static void filename_compute_type(struct policydb *policydb, > > - struct context *newcontext, > > - u32 stype, u32 ttype, u16 tclass, > > - const char *objname) > > -{ > > - struct filename_trans ft; > > - struct filename_trans_datum *otype; > > - > > - /* > > - * Most filename trans rules are going to live in specific directories > > - * like /dev or /var/run. This bitmap will quickly skip rule searches > > - * if the ttype does not contain any rules. > > - */ > > - if (!ebitmap_get_bit(&policydb->filename_trans_ttypes, ttype)) > > - return; > > - > > - ft.stype = stype; > > - ft.ttype = ttype; > > - ft.tclass = tclass; > > - ft.name = objname; > > - > > - otype = hashtab_search(policydb->filename_trans, &ft); > > - if (otype) > > - newcontext->type = otype->otype; > > -} > > - > > static int security_compute_sid(struct selinux_state *state, > > u32 ssid, > > u32 tsid, > > @@ -1770,8 +1744,9 @@ static int security_compute_sid(struct selinux_state *state, > > > > /* if we have a objname this is a file trans check so check those rules */ > > if (objname) > > - filename_compute_type(policydb, &newcontext, scontext->type, > > - tcontext->type, tclass, objname); > > + policydb_filename_compute_type(policydb, &newcontext, > > + scontext->type, tcontext->type, > > + tclass, objname); > > > > /* Check for class-specific changes. */ > > if (specified & AVTAB_TRANSITION) { > > > -- Ondrej Mosnacek <omosnace at redhat dot com> Software Engineer, Security Technologies Red Hat, Inc.