On Thu, 1 Feb 2024 00:27:19 +0000 Al Viro <viro@xxxxxxxxxxxxxxxxxx> wrote: > On Wed, Jan 31, 2024 at 01:49:22PM -0500, Steven Rostedt wrote: > > > @@ -329,32 +320,29 @@ static struct dentry *create_file(const char *name, umode_t mode, > > > > ti = get_tracefs(inode); > > ti->flags |= TRACEFS_EVENT_INODE; > > - d_instantiate(dentry, inode); > > + > > + d_add(dentry, inode); > > fsnotify_create(dentry->d_parent->d_inode, dentry); > > Seriously? stat(2), have it go away from dcache on memory pressure, > lather, rinse, repeat... Won't *snotify get confused by the stream > of creations of the same thing, with not a removal in sight? > That looks to be cut and paste from the old create in tracefs. I don't know of a real use case for that. I think we could possibly delete it without anyone noticing. > > - return eventfs_end_creating(dentry); > > + return dentry; > > }; > > > > @@ -371,11 +359,14 @@ static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent > > /* Only directories have ti->private set to an ei, not files */ > > ti->private = ei; > > > > + dentry->d_fsdata = ei; > > + ei->dentry = dentry; // Remove me! > > + > > inc_nlink(inode); > > - d_instantiate(dentry, inode); > > + d_add(dentry, inode); > > inc_nlink(dentry->d_parent->d_inode); > > What will happen when that thing gets evicted from dcache, > gets looked up again, and again, and...? > > > fsnotify_mkdir(dentry->d_parent->d_inode, dentry); > > Same re snotify confusion... Yeah, again, I think it's useless. Doing that is more useless than taring the tracefs directory ;-) > > > - return eventfs_end_creating(dentry); > > + return dentry; > > } > > > > static void free_ei(struct eventfs_inode *ei) > > @@ -425,7 +416,7 @@ void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry) > > } > > > > @@ -607,79 +462,55 @@ static struct dentry *eventfs_root_lookup(struct inode *dir, > > struct dentry *dentry, > > unsigned int flags) > > { > > - const struct file_operations *fops; > > - const struct eventfs_entry *entry; > > struct eventfs_inode *ei_child; > > struct tracefs_inode *ti; > > struct eventfs_inode *ei; > > - struct dentry *ei_dentry = NULL; > > - struct dentry *ret = NULL; > > - struct dentry *d; > > const char *name = dentry->d_name.name; > > - umode_t mode; > > - void *data; > > - int idx; > > - int i; > > - int r; > > + struct dentry *result = NULL; > > > > ti = get_tracefs(dir); > > if (!(ti->flags & TRACEFS_EVENT_INODE)) > > Can that ever happen? I mean, why set ->i_op to something that > has this for ->lookup() on a directory without TRACEFS_EVENT_INODE in > its inode? It's not as if you ever removed that flag... That's been there mostly as paranoia. Should probably be switched to: if (WARN_ON_ONCE(!(ti->flags & TRACEFS_EVENT_INODE))) > > > - return NULL; > > - > > - /* Grab srcu to prevent the ei from going away */ > > - idx = srcu_read_lock(&eventfs_srcu); > > + return ERR_PTR(-EIO); > > > > - /* > > - * Grab the eventfs_mutex to consistent value from ti->private. > > - * This s > > - */ > > mutex_lock(&eventfs_mutex); > > - ei = READ_ONCE(ti->private); > > - if (ei && !ei->is_freed) > > - ei_dentry = READ_ONCE(ei->dentry); > > - mutex_unlock(&eventfs_mutex); > > - > > - if (!ei || !ei_dentry) > > - goto out; > > > > - data = ei->data; > > + ei = ti->private; > > + if (!ei || ei->is_freed) > > + goto enoent; > > > > - list_for_each_entry_srcu(ei_child, &ei->children, list, > > - srcu_read_lock_held(&eventfs_srcu)) { > > + list_for_each_entry(ei_child, &ei->children, list) { > > if (strcmp(ei_child->name, name) != 0) > > continue; > > - ret = simple_lookup(dir, dentry, flags); > > - if (IS_ERR(ret)) > > - goto out; > > - d = create_dir_dentry(ei, ei_child, ei_dentry); > > - dput(d); > > + if (ei_child->is_freed) > > + goto enoent; > > Out of curiosity - can that happen now? You've got exclusion with > eventfs_remove_rec(), so you shouldn't be able to catch the moment > between setting ->is_freed and removal from the list... Yeah, that's from when we just used SRCU. If anything, it too should just add a WARN_ON_ONCE() to it. > > > + lookup_dir_entry(dentry, ei, ei_child); > > goto out; > > } > > > > - for (i = 0; i < ei->nr_entries; i++) { > > - entry = &ei->entries[i]; > > - if (strcmp(name, entry->name) == 0) { > > - void *cdata = data; > > - mutex_lock(&eventfs_mutex); > > - /* If ei->is_freed, then the event itself may be too */ > > - if (!ei->is_freed) > > - r = entry->callback(name, &mode, &cdata, &fops); > > - else > > - r = -1; > > - mutex_unlock(&eventfs_mutex); > > - if (r <= 0) > > - continue; > > - ret = simple_lookup(dir, dentry, flags); > > - if (IS_ERR(ret)) > > - goto out; > > - d = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops); > > - dput(d); > > - break; > > - } > > + for (int i = 0; i < ei->nr_entries; i++) { > > + void *data; > > + umode_t mode; > > + const struct file_operations *fops; > > + const struct eventfs_entry *entry = &ei->entries[i]; > > + > > + if (strcmp(name, entry->name) != 0) > > + continue; > > + > > + data = ei->data; > > + if (entry->callback(name, &mode, &data, &fops) <= 0) > > + goto enoent; > > + > > + lookup_file_dentry(dentry, ei, i, mode, data, fops); > > + goto out; > > } > > + > > + enoent: > > + /* Don't cache negative lookups, just return an error */ > > + result = ERR_PTR(-ENOENT); > > Huh? Just return NULL and be done with that - you'll get an > unhashed negative dentry and let the caller turn that into > -ENOENT... We had a problem here with just returning NULL. It leaves the negative dentry around and doesn't get refreshed. I did this: # cd /sys/kernel/tracing # ls events/kprobes/sched/ ls: cannot access 'events/kprobes/sched/': No such file or directory # echo 'p:sched schedule' >> kprobe_events # ls events/kprobes/sched/ ls: cannot access 'events/kprobes/sched/': No such file or directory When it should have been: # ls events/kprobes/sched/ enable filter format hist hist_debug id inject trigger Leaving the negative dentry there will have it fail when the directory exists the next time. -- Steve > > > out: > > - srcu_read_unlock(&eventfs_srcu, idx); > > - return ret; > > + mutex_unlock(&eventfs_mutex); > > + return result; > > } > > > > /*