On Mon, Jul 4, 2022 at 4:53 AM Al Viro <viro@xxxxxxxxxxxxxxxxxx> wrote: > > On Sat, Jul 02, 2022 at 10:23:16AM -0700, Linus Torvalds wrote: > > > Al - can you please take a quick look? > > FWIW, trying to write a coherent documentation had its usual effect... > The thing is, we don't really need to fetch the inode that early. > All we really care about is that in RCU mode ->d_seq gets sampled > before we fetch ->d_inode *and* we don't treat "it looks negative" > as hard -ENOENT in case of ->d_seq mismatch. > > Which can be bloody well left to step_into(). So we don't need > to pass it inode argument at all - just dentry and seq. Makes > a bunch of functions simpler as well... > > It does *not* deal with the "uninitialized" seq argument in > !RCU case; I'll handle that in the followup, but that's a separate > story, IMO (and very clearly a false positive). I can confirm that your patch fixes KMSAN reports on inode, yet the following reports still persist: ===================================================== BUG: KMSAN: uninit-value in walk_component+0x5e7/0x6c0 fs/namei.c:1996 walk_component+0x5e7/0x6c0 fs/namei.c:1996 lookup_last fs/namei.c:2445 path_lookupat+0x27d/0x6f0 fs/namei.c:2468 filename_lookup+0x24c/0x800 fs/namei.c:2497 kern_path+0x79/0x3a0 fs/namei.c:2587 init_stat+0x72/0x13f fs/init.c:132 clean_path+0x74/0x24c init/initramfs.c:339 do_name+0x12d/0xc17 init/initramfs.c:371 write_buffer init/initramfs.c:457 unpack_to_rootfs+0x49a/0xd9e init/initramfs.c:510 do_populate_rootfs+0x57/0x40f init/initramfs.c:699 async_run_entry_fn+0x8f/0x400 kernel/async.c:127 process_one_work+0xb27/0x13e0 kernel/workqueue.c:2289 worker_thread+0x1076/0x1d60 kernel/workqueue.c:2436 kthread+0x31b/0x430 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 ??:? Local variable seq created at: walk_component+0x46/0x6c0 fs/namei.c:1981 lookup_last fs/namei.c:2445 path_lookupat+0x27d/0x6f0 fs/namei.c:2468 CPU: 0 PID: 10 Comm: kworker/u9:0 Tainted: G B 5.19.0-rc4-00059-gcf2d25715943-dirty #103 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Workqueue: events_unbound async_run_entry_fn ===================================================== What makes you think they are false positives? Is the scenario I described above: """ In particular, if the call to lookup_fast() in walk_component() returns NULL, and lookup_slow() returns a valid dentry, then the `seq` and `inode` will remain uninitialized until the call to step_into() """ impossible? > Cumulative diff follows; splitup is in #work.namei. Comments? > > diff --git a/fs/namei.c b/fs/namei.c > index 1f28d3f463c3..7f4f61ade9e3 100644 > --- a/fs/namei.c > +++ b/fs/namei.c > @@ -1467,7 +1467,7 @@ EXPORT_SYMBOL(follow_down); > * we meet a managed dentry that would need blocking. > */ > static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, > - struct inode **inode, unsigned *seqp) > + unsigned *seqp) > { > struct dentry *dentry = path->dentry; > unsigned int flags = dentry->d_flags; > @@ -1497,13 +1497,6 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, > dentry = path->dentry = mounted->mnt.mnt_root; > nd->state |= ND_JUMPED; > *seqp = read_seqcount_begin(&dentry->d_seq); > - *inode = dentry->d_inode; > - /* > - * We don't need to re-check ->d_seq after this > - * ->d_inode read - there will be an RCU delay > - * between mount hash removal and ->mnt_root > - * becoming unpinned. > - */ > flags = dentry->d_flags; > continue; > } > @@ -1515,8 +1508,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, > } > > static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry, > - struct path *path, struct inode **inode, > - unsigned int *seqp) > + struct path *path, unsigned int *seqp) > { > bool jumped; > int ret; > @@ -1525,9 +1517,7 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry, > path->dentry = dentry; > if (nd->flags & LOOKUP_RCU) { > unsigned int seq = *seqp; > - if (unlikely(!*inode)) > - return -ENOENT; > - if (likely(__follow_mount_rcu(nd, path, inode, seqp))) > + if (likely(__follow_mount_rcu(nd, path, seqp))) > return 0; > if (!try_to_unlazy_next(nd, dentry, seq)) > return -ECHILD; > @@ -1547,7 +1537,6 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry, > if (path->mnt != nd->path.mnt) > mntput(path->mnt); > } else { > - *inode = d_backing_inode(path->dentry); > *seqp = 0; /* out of RCU mode, so the value doesn't matter */ > } > return ret; > @@ -1607,9 +1596,7 @@ static struct dentry *__lookup_hash(const struct qstr *name, > return dentry; > } > > -static struct dentry *lookup_fast(struct nameidata *nd, > - struct inode **inode, > - unsigned *seqp) > +static struct dentry *lookup_fast(struct nameidata *nd, unsigned *seqp) > { > struct dentry *dentry, *parent = nd->path.dentry; > int status = 1; > @@ -1628,22 +1615,11 @@ static struct dentry *lookup_fast(struct nameidata *nd, > return NULL; > } > > - /* > - * This sequence count validates that the inode matches > - * the dentry name information from lookup. > - */ > - *inode = d_backing_inode(dentry); > - if (unlikely(read_seqcount_retry(&dentry->d_seq, seq))) > - return ERR_PTR(-ECHILD); > - > - /* > + /* > * This sequence count validates that the parent had no > * changes while we did the lookup of the dentry above. > - * > - * The memory barrier in read_seqcount_begin of child is > - * enough, we can use __read_seqcount_retry here. > */ > - if (unlikely(__read_seqcount_retry(&parent->d_seq, nd->seq))) > + if (unlikely(read_seqcount_retry(&parent->d_seq, nd->seq))) > return ERR_PTR(-ECHILD); > > *seqp = seq; > @@ -1838,13 +1814,21 @@ static const char *pick_link(struct nameidata *nd, struct path *link, > * for the common case. > */ > static const char *step_into(struct nameidata *nd, int flags, > - struct dentry *dentry, struct inode *inode, unsigned seq) > + struct dentry *dentry, unsigned seq) > { > struct path path; > - int err = handle_mounts(nd, dentry, &path, &inode, &seq); > + struct inode *inode; > + int err = handle_mounts(nd, dentry, &path, &seq); > > if (err < 0) > return ERR_PTR(err); > + inode = path.dentry->d_inode; > + if (unlikely(!inode)) { > + if ((nd->flags & LOOKUP_RCU) && > + read_seqcount_retry(&path.dentry->d_seq, seq)) > + return ERR_PTR(-ECHILD); > + return ERR_PTR(-ENOENT); > + } > if (likely(!d_is_symlink(path.dentry)) || > ((flags & WALK_TRAILING) && !(nd->flags & LOOKUP_FOLLOW)) || > (flags & WALK_NOFOLLOW)) { > @@ -1870,9 +1854,7 @@ static const char *step_into(struct nameidata *nd, int flags, > return pick_link(nd, &path, inode, seq, flags); > } > > -static struct dentry *follow_dotdot_rcu(struct nameidata *nd, > - struct inode **inodep, > - unsigned *seqp) > +static struct dentry *follow_dotdot_rcu(struct nameidata *nd, unsigned *seqp) > { > struct dentry *parent, *old; > > @@ -1895,7 +1877,6 @@ static struct dentry *follow_dotdot_rcu(struct nameidata *nd, > } > old = nd->path.dentry; > parent = old->d_parent; > - *inodep = parent->d_inode; > *seqp = read_seqcount_begin(&parent->d_seq); > if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq))) > return ERR_PTR(-ECHILD); > @@ -1910,9 +1891,7 @@ static struct dentry *follow_dotdot_rcu(struct nameidata *nd, > return NULL; > } > > -static struct dentry *follow_dotdot(struct nameidata *nd, > - struct inode **inodep, > - unsigned *seqp) > +static struct dentry *follow_dotdot(struct nameidata *nd, unsigned *seqp) > { > struct dentry *parent; > > @@ -1937,7 +1916,6 @@ static struct dentry *follow_dotdot(struct nameidata *nd, > return ERR_PTR(-ENOENT); > } > *seqp = 0; > - *inodep = parent->d_inode; > return parent; > > in_root: > @@ -1952,7 +1930,6 @@ static const char *handle_dots(struct nameidata *nd, int type) > if (type == LAST_DOTDOT) { > const char *error = NULL; > struct dentry *parent; > - struct inode *inode; > unsigned seq; > > if (!nd->root.mnt) { > @@ -1961,17 +1938,17 @@ static const char *handle_dots(struct nameidata *nd, int type) > return error; > } > if (nd->flags & LOOKUP_RCU) > - parent = follow_dotdot_rcu(nd, &inode, &seq); > + parent = follow_dotdot_rcu(nd, &seq); > else > - parent = follow_dotdot(nd, &inode, &seq); > + parent = follow_dotdot(nd, &seq); > if (IS_ERR(parent)) > return ERR_CAST(parent); > if (unlikely(!parent)) > error = step_into(nd, WALK_NOFOLLOW, > - nd->path.dentry, nd->inode, nd->seq); > + nd->path.dentry, nd->seq); > else > error = step_into(nd, WALK_NOFOLLOW, > - parent, inode, seq); > + parent, seq); > if (unlikely(error)) > return error; > > @@ -1995,7 +1972,6 @@ static const char *handle_dots(struct nameidata *nd, int type) > static const char *walk_component(struct nameidata *nd, int flags) > { > struct dentry *dentry; > - struct inode *inode; > unsigned seq; > /* > * "." and ".." are special - ".." especially so because it has > @@ -2007,7 +1983,7 @@ static const char *walk_component(struct nameidata *nd, int flags) > put_link(nd); > return handle_dots(nd, nd->last_type); > } > - dentry = lookup_fast(nd, &inode, &seq); > + dentry = lookup_fast(nd, &seq); > if (IS_ERR(dentry)) > return ERR_CAST(dentry); > if (unlikely(!dentry)) { > @@ -2017,7 +1993,7 @@ static const char *walk_component(struct nameidata *nd, int flags) > } > if (!(flags & WALK_MORE) && nd->depth) > put_link(nd); > - return step_into(nd, flags, dentry, inode, seq); > + return step_into(nd, flags, dentry, seq); > } > > /* > @@ -2473,8 +2449,7 @@ static int handle_lookup_down(struct nameidata *nd) > { > if (!(nd->flags & LOOKUP_RCU)) > dget(nd->path.dentry); > - return PTR_ERR(step_into(nd, WALK_NOFOLLOW, > - nd->path.dentry, nd->inode, nd->seq)); > + return PTR_ERR(step_into(nd, WALK_NOFOLLOW, nd->path.dentry, nd->seq)); > } > > /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ > @@ -3394,7 +3369,6 @@ static const char *open_last_lookups(struct nameidata *nd, > int open_flag = op->open_flag; > bool got_write = false; > unsigned seq; > - struct inode *inode; > struct dentry *dentry; > const char *res; > > @@ -3410,7 +3384,7 @@ static const char *open_last_lookups(struct nameidata *nd, > if (nd->last.name[nd->last.len]) > nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; > /* we _can_ be in RCU mode here */ > - dentry = lookup_fast(nd, &inode, &seq); > + dentry = lookup_fast(nd, &seq); > if (IS_ERR(dentry)) > return ERR_CAST(dentry); > if (likely(dentry)) > @@ -3464,7 +3438,7 @@ static const char *open_last_lookups(struct nameidata *nd, > finish_lookup: > if (nd->depth) > put_link(nd); > - res = step_into(nd, WALK_TRAILING, dentry, inode, seq); > + res = step_into(nd, WALK_TRAILING, dentry, seq); > if (unlikely(res)) > nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); > return res; -- Alexander Potapenko Software Engineer Google Germany GmbH Erika-Mann-Straße, 33 80636 München Geschäftsführer: Paul Manicle, Liana Sebastian Registergericht und -nummer: Hamburg, HRB 86891 Sitz der Gesellschaft: Hamburg Diese E-Mail ist vertraulich. Falls Sie diese fälschlicherweise erhalten haben sollten, leiten Sie diese bitte nicht an jemand anderes weiter, löschen Sie alle Kopien und Anhänge davon und lassen Sie mich bitte wissen, dass die E-Mail an die falsche Person gesendet wurde. This e-mail is confidential. If you received this communication by mistake, please don't forward it to anyone else, please erase all copies and attachments, and please let me know that it has gone to the wrong person.