On Wed, Jan 31, 2024 at 01:49:22PM -0500, Steven Rostedt wrote: > @@ -329,32 +320,29 @@ static struct dentry *create_file(const char *name, umode_t mode, > > ti = get_tracefs(inode); > ti->flags |= TRACEFS_EVENT_INODE; > - d_instantiate(dentry, inode); > + > + d_add(dentry, inode); > fsnotify_create(dentry->d_parent->d_inode, dentry); Seriously? stat(2), have it go away from dcache on memory pressure, lather, rinse, repeat... Won't *snotify get confused by the stream of creations of the same thing, with not a removal in sight? > - return eventfs_end_creating(dentry); > + return dentry; > }; > > /** > - * create_dir - create a dir in the tracefs filesystem > + * lookup_dir_entry - look up a dir in the tracefs filesystem > + * @dentry: the directory to look up > * @ei: the eventfs_inode that represents the directory to create > - * @parent: parent dentry for this file. > * > - * This function will create a dentry for a directory represented by > + * This function will look up a dentry for a directory represented by > * a eventfs_inode. > */ > -static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent) > +static struct dentry *lookup_dir_entry(struct dentry *dentry, > + struct eventfs_inode *pei, struct eventfs_inode *ei) > { > struct tracefs_inode *ti; > - struct dentry *dentry; > struct inode *inode; > > - dentry = eventfs_start_creating(ei->name, parent); > - if (IS_ERR(dentry)) > - return dentry; > - > inode = tracefs_get_inode(dentry->d_sb); > if (unlikely(!inode)) > - return eventfs_failed_creating(dentry); > + return ERR_PTR(-ENOMEM); > > /* If the user updated the directory's attributes, use them */ > update_inode_attr(dentry, inode, &ei->attr, > @@ -371,11 +359,14 @@ static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent > /* Only directories have ti->private set to an ei, not files */ > ti->private = ei; > > + dentry->d_fsdata = ei; > + ei->dentry = dentry; // Remove me! > + > inc_nlink(inode); > - d_instantiate(dentry, inode); > + d_add(dentry, inode); > inc_nlink(dentry->d_parent->d_inode); What will happen when that thing gets evicted from dcache, gets looked up again, and again, and...? > fsnotify_mkdir(dentry->d_parent->d_inode, dentry); Same re snotify confusion... > - return eventfs_end_creating(dentry); > + return dentry; > } > > static void free_ei(struct eventfs_inode *ei) > @@ -425,7 +416,7 @@ void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry) > } > > /** > - * create_file_dentry - create a dentry for a file of an eventfs_inode > + * lookup_file_dentry - create a dentry for a file of an eventfs_inode > * @ei: the eventfs_inode that the file will be created under > * @idx: the index into the d_children[] of the @ei > * @parent: The parent dentry of the created file. > @@ -438,157 +429,21 @@ void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry) > * address located at @e_dentry. > */ > static struct dentry * > -create_file_dentry(struct eventfs_inode *ei, int idx, > - struct dentry *parent, const char *name, umode_t mode, void *data, > +lookup_file_dentry(struct dentry *dentry, > + struct eventfs_inode *ei, int idx, > + umode_t mode, void *data, > const struct file_operations *fops) > { > struct eventfs_attr *attr = NULL; > struct dentry **e_dentry = &ei->d_children[idx]; > - struct dentry *dentry; > - > - WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); > > - mutex_lock(&eventfs_mutex); > - if (ei->is_freed) { > - mutex_unlock(&eventfs_mutex); > - return NULL; > - } > - /* If the e_dentry already has a dentry, use it */ > - if (*e_dentry) { > - dget(*e_dentry); > - mutex_unlock(&eventfs_mutex); > - return *e_dentry; > - } > - > - /* ei->entry_attrs are protected by SRCU */ > if (ei->entry_attrs) > attr = &ei->entry_attrs[idx]; > > - mutex_unlock(&eventfs_mutex); > - > - dentry = create_file(name, mode, attr, parent, data, fops); > - > - mutex_lock(&eventfs_mutex); > - > - if (IS_ERR_OR_NULL(dentry)) { > - /* > - * When the mutex was released, something else could have > - * created the dentry for this e_dentry. In which case > - * use that one. > - * > - * If ei->is_freed is set, the e_dentry is currently on its > - * way to being freed, don't return it. If e_dentry is NULL > - * it means it was already freed. > - */ > - if (ei->is_freed) { > - dentry = NULL; > - } else { > - dentry = *e_dentry; > - dget(dentry); > - } > - mutex_unlock(&eventfs_mutex); > - return dentry; > - } > - > - if (!*e_dentry && !ei->is_freed) { > - *e_dentry = dentry; > - dentry->d_fsdata = ei; > - } else { > - /* > - * Should never happen unless we get here due to being freed. > - * Otherwise it means two dentries exist with the same name. > - */ > - WARN_ON_ONCE(!ei->is_freed); > - dentry = NULL; > - } > - mutex_unlock(&eventfs_mutex); > - > - return dentry; > -} > - > -/** > - * eventfs_post_create_dir - post create dir routine > - * @ei: eventfs_inode of recently created dir > - * > - * Map the meta-data of files within an eventfs dir to their parent dentry > - */ > -static void eventfs_post_create_dir(struct eventfs_inode *ei) > -{ > - struct eventfs_inode *ei_child; > - > - lockdep_assert_held(&eventfs_mutex); > - > - /* srcu lock already held */ > - /* fill parent-child relation */ > - list_for_each_entry_srcu(ei_child, &ei->children, list, > - srcu_read_lock_held(&eventfs_srcu)) { > - ei_child->d_parent = ei->dentry; > - } > -} > - > -/** > - * create_dir_dentry - Create a directory dentry for the eventfs_inode > - * @pei: The eventfs_inode parent of ei. > - * @ei: The eventfs_inode to create the directory for > - * @parent: The dentry of the parent of this directory > - * > - * This creates and attaches a directory dentry to the eventfs_inode @ei. > - */ > -static struct dentry * > -create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, > - struct dentry *parent) > -{ > - struct dentry *dentry = NULL; > - > - WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); > - > - mutex_lock(&eventfs_mutex); > - if (pei->is_freed || ei->is_freed) { > - mutex_unlock(&eventfs_mutex); > - return NULL; > - } > - if (ei->dentry) { > - /* If the eventfs_inode already has a dentry, use it */ > - dentry = ei->dentry; > - dget(dentry); > - mutex_unlock(&eventfs_mutex); > - return dentry; > - } > - mutex_unlock(&eventfs_mutex); > + dentry->d_fsdata = ei; // NOTE: ei of _parent_ > + lookup_file(dentry, mode, attr, data, fops); > > - dentry = create_dir(ei, parent); > - > - mutex_lock(&eventfs_mutex); > - > - if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) { > - /* > - * When the mutex was released, something else could have > - * created the dentry for this e_dentry. In which case > - * use that one. > - * > - * If ei->is_freed is set, the e_dentry is currently on its > - * way to being freed. > - */ > - dentry = ei->dentry; > - if (dentry) > - dget(dentry); > - mutex_unlock(&eventfs_mutex); > - return dentry; > - } > - > - if (!ei->dentry && !ei->is_freed) { > - ei->dentry = dentry; > - eventfs_post_create_dir(ei); > - dentry->d_fsdata = ei; > - } else { > - /* > - * Should never happen unless we get here due to being freed. > - * Otherwise it means two dentries exist with the same name. > - */ > - WARN_ON_ONCE(!ei->is_freed); > - dentry = NULL; > - } > - mutex_unlock(&eventfs_mutex); > + *e_dentry = dentry; // Remove me > > return dentry; > } > @@ -607,79 +462,55 @@ static struct dentry *eventfs_root_lookup(struct inode *dir, > struct dentry *dentry, > unsigned int flags) > { > - const struct file_operations *fops; > - const struct eventfs_entry *entry; > struct eventfs_inode *ei_child; > struct tracefs_inode *ti; > struct eventfs_inode *ei; > - struct dentry *ei_dentry = NULL; > - struct dentry *ret = NULL; > - struct dentry *d; > const char *name = dentry->d_name.name; > - umode_t mode; > - void *data; > - int idx; > - int i; > - int r; > + struct dentry *result = NULL; > > ti = get_tracefs(dir); > if (!(ti->flags & TRACEFS_EVENT_INODE)) Can that ever happen? I mean, why set ->i_op to something that has this for ->lookup() on a directory without TRACEFS_EVENT_INODE in its inode? It's not as if you ever removed that flag... > - return NULL; > - > - /* Grab srcu to prevent the ei from going away */ > - idx = srcu_read_lock(&eventfs_srcu); > + return ERR_PTR(-EIO); > > - /* > - * Grab the eventfs_mutex to consistent value from ti->private. > - * This s > - */ > mutex_lock(&eventfs_mutex); > - ei = READ_ONCE(ti->private); > - if (ei && !ei->is_freed) > - ei_dentry = READ_ONCE(ei->dentry); > - mutex_unlock(&eventfs_mutex); > - > - if (!ei || !ei_dentry) > - goto out; > > - data = ei->data; > + ei = ti->private; > + if (!ei || ei->is_freed) > + goto enoent; > > - list_for_each_entry_srcu(ei_child, &ei->children, list, > - srcu_read_lock_held(&eventfs_srcu)) { > + list_for_each_entry(ei_child, &ei->children, list) { > if (strcmp(ei_child->name, name) != 0) > continue; > - ret = simple_lookup(dir, dentry, flags); > - if (IS_ERR(ret)) > - goto out; > - d = create_dir_dentry(ei, ei_child, ei_dentry); > - dput(d); > + if (ei_child->is_freed) > + goto enoent; Out of curiosity - can that happen now? You've got exclusion with eventfs_remove_rec(), so you shouldn't be able to catch the moment between setting ->is_freed and removal from the list... > + lookup_dir_entry(dentry, ei, ei_child); > goto out; > } > > - for (i = 0; i < ei->nr_entries; i++) { > - entry = &ei->entries[i]; > - if (strcmp(name, entry->name) == 0) { > - void *cdata = data; > - mutex_lock(&eventfs_mutex); > - /* If ei->is_freed, then the event itself may be too */ > - if (!ei->is_freed) > - r = entry->callback(name, &mode, &cdata, &fops); > - else > - r = -1; > - mutex_unlock(&eventfs_mutex); > - if (r <= 0) > - continue; > - ret = simple_lookup(dir, dentry, flags); > - if (IS_ERR(ret)) > - goto out; > - d = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops); > - dput(d); > - break; > - } > + for (int i = 0; i < ei->nr_entries; i++) { > + void *data; > + umode_t mode; > + const struct file_operations *fops; > + const struct eventfs_entry *entry = &ei->entries[i]; > + > + if (strcmp(name, entry->name) != 0) > + continue; > + > + data = ei->data; > + if (entry->callback(name, &mode, &data, &fops) <= 0) > + goto enoent; > + > + lookup_file_dentry(dentry, ei, i, mode, data, fops); > + goto out; > } > + > + enoent: > + /* Don't cache negative lookups, just return an error */ > + result = ERR_PTR(-ENOENT); Huh? Just return NULL and be done with that - you'll get an unhashed negative dentry and let the caller turn that into -ENOENT... > out: > - srcu_read_unlock(&eventfs_srcu, idx); > - return ret; > + mutex_unlock(&eventfs_mutex); > + return result; > } > > /*