On Fri, Jan 27, 2012 at 6:51 AM, Eric W. Biederman <ebiederm@xxxxxxxxxxxx> wrote: > Simplify the code and the sysctl semantics by autogenerating > sysctl directories when a sysctl table is registered that needs > the directories and autodeleting the directories when there are > no more sysctl tables registered that need them. > > Autogenerating directories keeps sysctl tables from depending > on each other, removing all of the arcane register/unregister > ordering constraints and makes it impossible to get the order > wrong when reigsering and unregistering sysctl tables. > > Autogenerating directories yields one unique entity that dentries > can point to, retaining the current effective use of the dcache. > > Add struct ctl_dir as the type of these new autogenerated > directories. > > The attached_by and attached_to fields in ctl_table_header are > removed as they are no longer needed. > > The child field in ctl_table is no longer needed by the core of > the sysctl code. ctl_table.child can be removed once all of the > existing users have been updated. > > Benchmark before: > make-dummies 0 999 -> 0.7s > rmmod dummy -> 0.07s > make-dummies 0 9999 -> 1m10s > rmmod dummy -> 0.4s > > Benchmark after: > make-dummies 0 999 -> 0.44s > rmmod dummy -> 0.065s > make-dummies 0 9999 -> 1m36s > rmmod dummy -> 0.4s > > Signed-off-by: Eric W. Biederman <ebiederm@xxxxxxxxxxxx> > --- > fs/proc/proc_sysctl.c | 342 ++++++++++++++++++++---------------------------- > include/linux/sysctl.h | 10 +- > 2 files changed, 150 insertions(+), 202 deletions(-) > > diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c > index 65c13dd..3c0767d 100644 > --- a/fs/proc/proc_sysctl.c > +++ b/fs/proc/proc_sysctl.c > @@ -28,28 +28,31 @@ void proc_sys_poll_notify(struct ctl_table_poll *poll) > static struct ctl_table root_table[] = { > { > .procname = "", > - .mode = S_IRUGO|S_IXUGO, > - .child = &root_table[1], > + .mode = S_IFDIR|S_IRUGO|S_IXUGO, > }, > { } > }; > static struct ctl_table_root sysctl_table_root; > -static struct ctl_table_header root_table_header = { > - {{.count = 1, > - .nreg = 1, > - .ctl_table = root_table, > - .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, > - .root = &sysctl_table_root, > - .set = &sysctl_table_root.default_set, > +static struct ctl_dir sysctl_root_dir = { > + .header = { > + {{.count = 1, > + .nreg = 1, > + .ctl_table = root_table, > + .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, > + .root = &sysctl_table_root, > + .set = &sysctl_table_root.default_set, > + }, > }; > static struct ctl_table_root sysctl_table_root = { > .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), > - .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), > + .default_set.list = LIST_HEAD_INIT(sysctl_root_dir.header.ctl_entry), > .default_set.root = &sysctl_table_root, > }; > > static DEFINE_SPINLOCK(sysctl_lock); > > +static void drop_sysctl_table(struct ctl_table_header *header); > + > static int namecmp(const char *name1, int len1, const char *name2, int len2) > { > int minlen; > @@ -66,29 +69,18 @@ static int namecmp(const char *name1, int len1, const char *name2, int len2) > } > > static struct ctl_table *find_entry(struct ctl_table_header **phead, > - struct ctl_table_set *set, > - struct ctl_table_header *dir_head, struct ctl_table *dir, > + struct ctl_table_set *set, struct ctl_dir *dir, > const char *name, int namelen) > { > struct ctl_table_header *head; > struct ctl_table *entry; > > - if (dir_head->set == set) { > - for (entry = dir; entry->procname; entry++) { > - const char *procname = entry->procname; > - if (namecmp(procname, strlen(procname), name, namelen) == 0) { > - *phead = dir_head; > - return entry; > - } > - } > - } > - > list_for_each_entry(head, &set->list, ctl_entry) { > if (head->unregistering) > continue; > - if (head->attached_to != dir) > + if (head->parent != dir) > continue; > - for (entry = head->attached_by; entry->procname; entry++) { > + for (entry = head->ctl_table; entry->procname; entry++) { > const char *procname = entry->procname; > if (namecmp(procname, strlen(procname), name, namelen) == 0) { > *phead = head; > @@ -103,6 +95,7 @@ static void init_header(struct ctl_table_header *head, > struct ctl_table_root *root, struct ctl_table_set *set, > struct ctl_table *table) > { > + head->ctl_table = table; > head->ctl_table_arg = table; > INIT_LIST_HEAD(&head->ctl_entry); > head->used = 0; > @@ -119,9 +112,10 @@ static void erase_header(struct ctl_table_header *head) > list_del_init(&head->ctl_entry); > } > > -static void insert_header(struct ctl_table_header *header) > +static void insert_header(struct ctl_dir *dir, struct ctl_table_header *header) > { > - header->parent->count++; > + header->parent = dir; > + header->parent->header.nreg++; > list_add_tail(&header->ctl_entry, &header->set->list); > } > > @@ -219,8 +213,7 @@ lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) > } > > static struct ctl_table *lookup_entry(struct ctl_table_header **phead, > - struct ctl_table_header *dir_head, > - struct ctl_table *dir, > + struct ctl_dir *dir, > const char *name, int namelen) > { > struct ctl_table_header *head; > @@ -232,7 +225,7 @@ static struct ctl_table *lookup_entry(struct ctl_table_header **phead, > root = &sysctl_table_root; > do { > set = lookup_header_set(root, current->nsproxy); > - entry = find_entry(&head, set, dir_head, dir, name, namelen); > + entry = find_entry(&head, set, dir, name, namelen); > if (entry && use_table(head)) > *phead = head; > else > @@ -244,7 +237,7 @@ static struct ctl_table *lookup_entry(struct ctl_table_header **phead, > return entry; > } > > -static struct ctl_table_header *next_usable_entry(struct ctl_table *dir, > +static struct ctl_table_header *next_usable_entry(struct ctl_dir *dir, > struct ctl_table_root *root, struct list_head *tmp) > { > struct nsproxy *namespaces = current->nsproxy; > @@ -256,8 +249,8 @@ static struct ctl_table_header *next_usable_entry(struct ctl_table *dir, > head = list_entry(tmp, struct ctl_table_header, ctl_entry); > root = head->root; > > - if (head->attached_to != dir || > - !head->attached_by->procname || > + if (head->parent != dir || > + !head->ctl_table->procname || > !use_table(head)) > goto next; > > @@ -281,47 +274,35 @@ out: > return NULL; > } > > -static void first_entry( > - struct ctl_table_header *dir_head, struct ctl_table *dir, > +static void first_entry(struct ctl_dir *dir, > struct ctl_table_header **phead, struct ctl_table **pentry) > { > - struct ctl_table_header *head = dir_head; > - struct ctl_table *entry = dir; > + struct ctl_table_header *head; > + struct ctl_table *entry = NULL; > > spin_lock(&sysctl_lock); > - if (entry->procname) { > - use_table(head); > - } else { > - head = next_usable_entry(dir, &sysctl_table_root, > - &sysctl_table_root.default_set.list); > - if (head) > - entry = head->attached_by; > - } > + head = next_usable_entry(dir, &sysctl_table_root, > + &sysctl_table_root.default_set.list); > spin_unlock(&sysctl_lock); > + if (head) > + entry = head->ctl_table; > *phead = head; > *pentry = entry; > } > > -static void next_entry(struct ctl_table *dir, > - struct ctl_table_header **phead, struct ctl_table **pentry) > +static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentry) > { > struct ctl_table_header *head = *phead; > struct ctl_table *entry = *pentry; > > entry++; > if (!entry->procname) { > - struct ctl_table_root *root = head->root; > - struct list_head *tmp = &head->ctl_entry; > - if (head->attached_to != dir) { > - root = &sysctl_table_root; > - tmp = &sysctl_table_root.default_set.list; > - } > spin_lock(&sysctl_lock); > unuse_table(head); > - head = next_usable_entry(dir, root, tmp); > + head = next_usable_entry(head->parent, head->root, &head->ctl_entry); > spin_unlock(&sysctl_lock); > if (head) > - entry = head->attached_by; > + entry = head->ctl_table; > } > *phead = head; > *pentry = entry; > @@ -381,7 +362,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, > > inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; > inode->i_mode = table->mode; > - if (!table->child) { > + if (!S_ISDIR(table->mode)) { > inode->i_mode |= S_IFREG; > inode->i_op = &proc_sys_inode_operations; > inode->i_fop = &proc_sys_file_operations; > @@ -398,7 +379,7 @@ static struct ctl_table_header *grab_header(struct inode *inode) > { > struct ctl_table_header *head = PROC_I(inode)->sysctl; > if (!head) > - head = &root_table_header; > + head = &sysctl_root_dir.header; > return sysctl_head_grab(head); > } > > @@ -406,24 +387,19 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, > struct nameidata *nd) > { > struct ctl_table_header *head = grab_header(dir); > - struct ctl_table *table = PROC_I(dir)->sysctl_entry; > struct ctl_table_header *h = NULL; > struct qstr *name = &dentry->d_name; > struct ctl_table *p; > struct inode *inode; > struct dentry *err = ERR_PTR(-ENOENT); > + struct ctl_dir *ctl_dir; > > if (IS_ERR(head)) > return ERR_CAST(head); > > - if (table && !table->child) { > - WARN_ON(1); > - goto out; > - } > + ctl_dir = container_of(head, struct ctl_dir, header); > > - table = table ? table->child : &head->ctl_table[1]; > - > - p = lookup_entry(&h, head, table, name->name, name->len); > + p = lookup_entry(&h, ctl_dir, name->name, name->len); > if (!p) > goto out; > > @@ -586,21 +562,16 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) > struct dentry *dentry = filp->f_path.dentry; > struct inode *inode = dentry->d_inode; > struct ctl_table_header *head = grab_header(inode); > - struct ctl_table *table = PROC_I(inode)->sysctl_entry; > struct ctl_table_header *h = NULL; > struct ctl_table *entry; > + struct ctl_dir *ctl_dir; > unsigned long pos; > int ret = -EINVAL; > > if (IS_ERR(head)) > return PTR_ERR(head); > > - if (table && !table->child) { > - WARN_ON(1); > - goto out; > - } > - > - table = table ? table->child : &head->ctl_table[1]; > + ctl_dir = container_of(head, struct ctl_dir, header); > > ret = 0; > /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ > @@ -618,7 +589,7 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) > } > pos = 2; > > - for (first_entry(head, table, &h, &entry); h; next_entry(table, &h, &entry)) { > + for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) { > ret = scan(h, entry, &pos, filp, dirent, filldir); > if (ret) { > sysctl_head_finish(h); > @@ -779,52 +750,86 @@ static const struct dentry_operations proc_sys_dentry_operations = { > .d_compare = proc_sys_compare, > }; > > -static struct ctl_table *is_branch_in(struct ctl_table *branch, > - struct ctl_table *table) > +static struct ctl_dir *find_subdir(struct ctl_table_set *set, struct ctl_dir *dir, > + const char *name, int namelen) > { > - struct ctl_table *p; > - const char *s = branch->procname; > + struct ctl_table_header *head; > + struct ctl_table *entry; > > - /* branch should have named subdirectory as its first element */ > - if (!s || !branch->child) > - return NULL; > + entry = find_entry(&head, set, dir, name, namelen); > + if (!entry) > + return ERR_PTR(-ENOENT); > + if (S_ISDIR(entry->mode)) > + return container_of(head, struct ctl_dir, header); > + return ERR_PTR(-ENOTDIR); > +} I find this easier to read: entry = find_entry(&head, dir, name, namelen); if (!entry) return ERR_PTR(-ENOENT); if (!S_ISDIR(entry->mode)) return ERR_PTR(-ENOTDIR); return container_of(head, struct ctl_dir, header); > + > +static struct ctl_dir *new_dir(struct ctl_table_set *set, > + const char *name, int namelen) > +{ > + struct ctl_table *table; > + struct ctl_dir *new; > + char *new_name; > > - /* ... and nothing else */ > - if (branch[1].procname) > + new = kzalloc(sizeof(*new) + sizeof(struct ctl_table)*2 + > + namelen + 1, GFP_KERNEL); > + if (!new) > return NULL; > > - /* table should contain subdirectory with the same name */ > - for (p = table; p->procname; p++) { > - if (!p->child) > - continue; > - if (p->procname && strcmp(p->procname, s) == 0) > - return p; > - } > - return NULL; > + table = (struct ctl_table *)(new + 1); > + new_name = (char *)(table + 2); > + memcpy(new_name, name, namelen); > + new_name[namelen] = '\0'; > + table[0].procname = new_name; > + table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; > + init_header(&new->header, set->root, set, table); > + > + return new; > } > > -/* see if attaching q to p would be an improvement */ > -static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) > { > - struct ctl_table *to = p->ctl_table, *by = q->ctl_table; > - struct ctl_table *next; > - int is_better = 0; > - int not_in_parent = !p->attached_by; > - > - while ((next = is_branch_in(by, to)) != NULL) { > - if (by == q->attached_by) > - is_better = 1; > - if (to == p->attached_by) > - not_in_parent = 1; > - by = by->child; > - to = next->child; > - } > + struct ctl_dir *subdir, *new = NULL; > > - if (is_better && not_in_parent) { > - q->attached_by = by; > - q->attached_to = to; > - q->parent = p; This one's hard to understand :) I would add these comments: /* get_subdir - find and take a reference of a given subdir * - if no entry with the same name found, create one * - set - the set in which to look for the subdir (after looking in the parent dir's set) * - dir - the parent dir. NOTE: before calling this function * make sure to have a increased the count on the parent dir's nreg * as this function will drop a reference! */ > +static struct ctl_dir *get_subdir(struct ctl_table_set *set, > + struct ctl_dir *dir, const char *name, int namelen) > + spin_lock(&sysctl_lock); /* first look in the parent dir's set */ > + subdir = find_subdir(dir->header.set, dir, name, namelen); > + if (!IS_ERR(subdir)) > + goto found; > + if ((PTR_ERR(subdir) == -ENOENT) && set != dir->header.set) /* then look in the requested set if it's not the same as the parent's set */ > + subdir = find_subdir(set, dir, name, namelen); > + if (!IS_ERR(subdir)) > + goto found; > + if (PTR_ERR(subdir) != -ENOENT) > + goto failed; > + > + spin_unlock(&sysctl_lock); /* not found, let's make a new dir and we'll add it later */ > + new = new_dir(set, name, namelen); > + spin_lock(&sysctl_lock); > + subdir = ERR_PTR(-ENOMEM); > + if (!new) > + goto failed; > + /* check in the requested set: maybe someone added it while we temporarily gave up the sysctl_lock to create the new dir. */ > + subdir = find_subdir(set, dir, name, namelen); > + if (!IS_ERR(subdir)) > + goto found; > + if (PTR_ERR(subdir) != -ENOENT) > + goto failed; > + /* nope, no one created an entry with this name while we gave up the spin lock. We can add the new dir. */ > + insert_header(dir, &new->header); > + subdir = new; > +found: > + subdir->header.nreg++; > +failed: > + if (unlikely(IS_ERR(subdir))) { > + printk(KERN_ERR "sysctl could not get directory: %*.*s %ld\n", > + namelen, namelen, name, PTR_ERR(subdir)); > } /* drop a reference from the parent dir, as we dive into the subdir */ > + drop_sysctl_table(&dir->header); > + if (new) > + drop_sysctl_table(&new->header); > + spin_unlock(&sysctl_lock); > + return subdir; > } > Why do you only check the given @set after creating the new dir. Why not the parent dir too? > static int sysctl_check_table_dups(const char *path, struct ctl_table *old, > @@ -846,24 +851,14 @@ static int sysctl_check_table_dups(const char *path, struct ctl_table *old, > } > > static int sysctl_check_dups(struct nsproxy *namespaces, > - struct ctl_table_header *header, > + struct ctl_dir *dir, > const char *path, struct ctl_table *table) > { > struct ctl_table_root *root; > struct ctl_table_set *set; > - struct ctl_table_header *dir_head, *head; > - struct ctl_table *dir_table; > + struct ctl_table_header *head; > int error = 0; > > - /* No dups if we are the only member of our directory */ > - if (header->attached_by != table) > - return 0; > - > - dir_head = header->parent; > - dir_table = header->attached_to; > - > - error = sysctl_check_table_dups(path, dir_table, table); > - > root = &sysctl_table_root; > do { > set = lookup_header_set(root, namespaces); > @@ -871,9 +866,9 @@ static int sysctl_check_dups(struct nsproxy *namespaces, > list_for_each_entry(head, &set->list, ctl_entry) { > if (head->unregistering) > continue; > - if (head->attached_to != dir_table) > + if (head->parent != dir) > continue; > - error = sysctl_check_table_dups(path, head->attached_by, > + error = sysctl_check_table_dups(path, head->ctl_table, > table); > } > root = list_entry(root->root_list.next, > @@ -977,47 +972,25 @@ struct ctl_table_header *__register_sysctl_table( > const char *path, struct ctl_table *table) > { > struct ctl_table_header *header; > - struct ctl_table *new, **prevp; > const char *name, *nextname; > - unsigned int npath = 0; > struct ctl_table_set *set; > - size_t path_bytes = 0; > - char *new_name; > - > - /* Count the path components */ > - for (name = path; name; name = nextname) { > - int namelen; > - nextname = strchr(name, '/'); > - if (nextname) { > - namelen = nextname - name; > - nextname++; > - } else { > - namelen = strlen(name); > - } > - if (namelen == 0) > - continue; > - path_bytes += namelen + 1; > - npath++; > - } > + struct ctl_dir *dir; > > - /* > - * For each path component, allocate a 2-element ctl_table array. > - * The first array element will be filled with the sysctl entry > - * for this, the second will be the sentinel (procname == 0). > - * > - * We allocate everything in one go so that we don't have to > - * worry about freeing additional memory in unregister_sysctl_table. > - */ > - header = kzalloc(sizeof(struct ctl_table_header) + path_bytes + > - (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL); > + header = kzalloc(sizeof(struct ctl_table_header), GFP_KERNEL); > if (!header) > return NULL; > > - new = (struct ctl_table *) (header + 1); > - new_name = (char *)(new + (2 * npath)); > + init_header(header, root, NULL, table); > + if (sysctl_check_table(path, table)) > + goto fail; > + > + spin_lock(&sysctl_lock); > + header->set = set = lookup_header_set(root, namespaces); > + dir = &sysctl_root_dir; /* get_subdir will drop a reference to this dir as it dives into the subdir */ > + dir->header.nreg++; > + spin_unlock(&sysctl_lock); > > - /* Now connect the dots */ > - prevp = &header->ctl_table; > + /* Find the directory for the ctl_table */ > for (name = path; name; name = nextname) { > int namelen; > nextname = strchr(name, '/'); > @@ -1029,51 +1002,21 @@ struct ctl_table_header *__register_sysctl_table( > } > if (namelen == 0) > continue; > - memcpy(new_name, name, namelen); > - new_name[namelen] = '\0'; > - > - new->procname = new_name; > - new->mode = 0555; > - > - *prevp = new; > - prevp = &new->child; > > - new += 2; > - new_name += namelen + 1; > + dir = get_subdir(set, dir, name, namelen); > + if (IS_ERR(dir)) > + goto fail; > } > - *prevp = table; > - > - init_header(header, root, NULL, table); > - if (sysctl_check_table(path, table)) > - goto fail; > - > spin_lock(&sysctl_lock); > - header->set = lookup_header_set(root, namespaces); > - header->attached_by = header->ctl_table; > - header->attached_to = &root_table[1]; > - header->parent = &root_table_header; > - set = header->set; > - root = header->root; > - for (;;) { > - struct ctl_table_header *p; > - list_for_each_entry(p, &set->list, ctl_entry) { > - if (p->unregistering) > - continue; > - try_attach(p, header); > - } > - if (root == &sysctl_table_root) > - break; > - root = list_entry(root->root_list.prev, > - struct ctl_table_root, root_list); > - set = lookup_header_set(root, namespaces); > - } > - if (sysctl_check_dups(namespaces, header, path, table)) > - goto fail_locked; > - insert_header(header); > + if (sysctl_check_dups(namespaces, dir, path, table)) > + goto fail_put_dir_locked; > + insert_header(dir, header); > + drop_sysctl_table(&dir->header); > spin_unlock(&sysctl_lock); > > return header; > -fail_locked: > +fail_put_dir_locked: > + drop_sysctl_table(&dir->header); > spin_unlock(&sysctl_lock); > fail: > kfree(header); > @@ -1299,16 +1242,17 @@ EXPORT_SYMBOL(register_sysctl_table); > > static void drop_sysctl_table(struct ctl_table_header *header) > { > + struct ctl_dir *parent = header->parent; > + > if (--header->nreg) > return; > > start_unregistering(header); > - if (!--header->parent->count) { > - WARN_ON(1); > - kfree_rcu(header->parent, rcu); > - } > if (!--header->count) > kfree_rcu(header, rcu); > + > + if (parent) > + drop_sysctl_table(&parent->header); > } > > /** > diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h > index e73ba33..3084b62 100644 > --- a/include/linux/sysctl.h > +++ b/include/linux/sysctl.h > @@ -938,6 +938,7 @@ struct ctl_table; > struct nsproxy; > struct ctl_table_root; > struct ctl_table_header; > +struct ctl_dir; > > typedef struct ctl_table ctl_table; > > @@ -1040,9 +1041,12 @@ struct ctl_table_header > struct ctl_table *ctl_table_arg; > struct ctl_table_root *root; > struct ctl_table_set *set; > - struct ctl_table *attached_by; > - struct ctl_table *attached_to; > - struct ctl_table_header *parent; > + struct ctl_dir *parent; > +}; > + > +struct ctl_dir { > + /* Header must be at the start of ctl_dir */ > + struct ctl_table_header header; > }; > > struct ctl_table_set { > -- > 1.7.2.5 > -- . ..: Lucian -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html