On Mon, 2024-01-22 at 21:51 +0200, Amir Goldstein wrote: > An opaque directory cannot have xwhiteouts, so instead of marking an > xwhiteouts directory with a new xattr, overload overlay.opaque xattr > for marking both opaque dir ('y') and xwhiteouts dir ('x'). > > This is more efficient as the overlay.opaque xattr is checked during > lookup of directory anyway. > > This also prevents unnecessary checking the xattr when reading a > directory without xwhiteouts, i.e. most of the time. > > Note that the xwhiteouts marker is not checked on the upper layer and > on the last layer in lowerstack, where xwhiteouts are not expected. > > Fixes: bc8df7a3dc03 ("ovl: Add an alternative type of whiteout") > Cc: <stable@xxxxxxxxxxxxxxx> # v6.7 > Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx> > --- > > Miklos, > > This v4 is a combination of your v2 and my v3 patches to optimize > xwhiteouts readdir for the intersection of a dentry with xwhiteouts > on any layer and a layer with any xwhiteouts on any directory. > > Alex, > > Please re-review/test. Looks good to me. The only thing I worry about is the atomicity of ovl_layer_set_xwhiteouts(). Doesn't that need a barrier or something? Anyway: Reviewed-by: Alexander Larsson <alexl@xxxxxxxxxx> Tested-by: Alexander Larsson <alexl@xxxxxxxxxx> > Thanks, > Amir. > > Changes since v3: > - Lazy set of per-layer xwhiteouts flag > - Check both per-layer and per-dir flags for readdir > - Update overlayfs.rst > > Documentation/filesystems/overlayfs.rst | 16 +++++++-- > fs/overlayfs/namei.c | 43 ++++++++++++++--------- > fs/overlayfs/overlayfs.h | 23 +++++++++---- > fs/overlayfs/ovl_entry.h | 2 ++ > fs/overlayfs/readdir.c | 7 ++-- > fs/overlayfs/super.c | 15 ++++++++ > fs/overlayfs/util.c | 46 +++++++++++++---------- > -- > 7 files changed, 102 insertions(+), 50 deletions(-) > > diff --git a/Documentation/filesystems/overlayfs.rst > b/Documentation/filesystems/overlayfs.rst > index 1c244866041a..165514401441 100644 > --- a/Documentation/filesystems/overlayfs.rst > +++ b/Documentation/filesystems/overlayfs.rst > @@ -145,7 +145,9 @@ filesystem, an overlay filesystem needs to record > in the upper filesystem > that files have been removed. This is done using whiteouts and > opaque > directories (non-directories are always opaque). > > -A whiteout is created as a character device with 0/0 device number. > +A whiteout is created as a character device with 0/0 device number > or > +as a zero-size regular file with the xattr > "trusted.overlay.whiteout". > + > When a whiteout is found in the upper level of a merged directory, > any > matching name in the lower level is ignored, and the whiteout itself > is also hidden. > @@ -154,6 +156,13 @@ A directory is made opaque by setting the xattr > "trusted.overlay.opaque" > to "y". Where the upper filesystem contains an opaque directory, > any > directory in the lower filesystem with the same name is ignored. > > +An opaque directory should not conntain any whiteouts, because they > do not > +serve any purpose. A merge directory containing regular files with > the xattr > +"trusted.overlay.whiteout", should be additionally marked by setting > the xattr > +"trusted.overlay.opaque" to "x" on the merge directory itself. > +This is needed to avoid the overhead of checking the > "trusted.overlay.whiteout" > +on all entries during readdir in the common case. > + > readdir > ------- > > @@ -534,8 +543,9 @@ A lower dir with a regular whiteout will always > be handled by the overlayfs > mount, so to support storing an effective whiteout file in an > overlayfs mount an > alternative form of whiteout is supported. This form is a regular, > zero-size > file with the "overlay.whiteout" xattr set, inside a directory with > the > -"overlay.whiteouts" xattr set. Such whiteouts are never created by > overlayfs, > -but can be used by userspace tools (like containers) that generate > lower layers. > +"overlay.opaque" xattr set to "x" (see `whiteouts and opaque > directories`_). > +These alternative whiteouts are never created by overlayfs, but can > be used by > +userspace tools (like containers) that generate lower layers. > These alternative whiteouts can be escaped using the standard xattr > escape > mechanism in order to properly nest to any depth. > > diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c > index 984ffdaeed6c..5764f91d283e 100644 > --- a/fs/overlayfs/namei.c > +++ b/fs/overlayfs/namei.c > @@ -18,10 +18,11 @@ > > struct ovl_lookup_data { > struct super_block *sb; > - struct vfsmount *mnt; > + const struct ovl_layer *layer; > struct qstr name; > bool is_dir; > bool opaque; > + bool xwhiteouts; > bool stop; > bool last; > char *redirect; > @@ -201,17 +202,13 @@ struct dentry *ovl_decode_real_fh(struct ovl_fs > *ofs, struct ovl_fh *fh, > return real; > } > > -static bool ovl_is_opaquedir(struct ovl_fs *ofs, const struct path > *path) > -{ > - return ovl_path_check_dir_xattr(ofs, path, > OVL_XATTR_OPAQUE); > -} > - > static struct dentry *ovl_lookup_positive_unlocked(struct > ovl_lookup_data *d, > const char *name, > struct dentry > *base, int len, > bool > drop_negative) > { > - struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->mnt), > name, base, len); > + struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->layer- > >mnt), name, > + base, len); > > if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret- > >d_flags))) { > if (drop_negative && ret->d_lockref.count == 1) { > @@ -232,10 +229,13 @@ static int ovl_lookup_single(struct dentry > *base, struct ovl_lookup_data *d, > size_t prelen, const char *post, > struct dentry **ret, bool > drop_negative) > { > + struct ovl_fs *ofs = OVL_FS(d->sb); > struct dentry *this; > struct path path; > int err; > bool last_element = !post[0]; > + bool is_upper = d->layer->idx == 0; > + char val; > > this = ovl_lookup_positive_unlocked(d, name, base, namelen, > drop_negative); > if (IS_ERR(this)) { > @@ -253,8 +253,8 @@ static int ovl_lookup_single(struct dentry *base, > struct ovl_lookup_data *d, > } > > path.dentry = this; > - path.mnt = d->mnt; > - if (ovl_path_is_whiteout(OVL_FS(d->sb), &path)) { > + path.mnt = d->layer->mnt; > + if (ovl_path_is_whiteout(ofs, &path)) { > d->stop = d->opaque = true; > goto put_and_out; > } > @@ -272,7 +272,7 @@ static int ovl_lookup_single(struct dentry *base, > struct ovl_lookup_data *d, > d->stop = true; > goto put_and_out; > } > - err = ovl_check_metacopy_xattr(OVL_FS(d->sb), &path, > NULL); > + err = ovl_check_metacopy_xattr(ofs, &path, NULL); > if (err < 0) > goto out_err; > > @@ -292,7 +292,12 @@ static int ovl_lookup_single(struct dentry > *base, struct ovl_lookup_data *d, > if (d->last) > goto out; > > - if (ovl_is_opaquedir(OVL_FS(d->sb), &path)) { > + /* overlay.opaque=x means xwhiteouts directory */ > + val = ovl_get_opaquedir_val(ofs, &path); > + if (last_element && !is_upper && val == 'x') { > + d->xwhiteouts = true; > + ovl_layer_set_xwhiteouts(ofs, d->layer); > + } else if (val == 'y') { > d->stop = true; > if (last_element) > d->opaque = true; > @@ -863,7 +868,8 @@ struct dentry *ovl_lookup_index(struct ovl_fs > *ofs, struct dentry *upper, > * Returns next layer in stack starting from top. > * Returns -1 if this is the last layer. > */ > -int ovl_path_next(int idx, struct dentry *dentry, struct path *path) > +int ovl_path_next(int idx, struct dentry *dentry, struct path *path, > + const struct ovl_layer **layer) > { > struct ovl_entry *oe = OVL_E(dentry); > struct ovl_path *lowerstack = ovl_lowerstack(oe); > @@ -871,13 +877,16 @@ int ovl_path_next(int idx, struct dentry > *dentry, struct path *path) > BUG_ON(idx < 0); > if (idx == 0) { > ovl_path_upper(dentry, path); > - if (path->dentry) > + if (path->dentry) { > + *layer = &OVL_FS(dentry->d_sb)->layers[0]; > return ovl_numlower(oe) ? 1 : -1; > + } > idx++; > } > BUG_ON(idx > ovl_numlower(oe)); > path->dentry = lowerstack[idx - 1].dentry; > - path->mnt = lowerstack[idx - 1].layer->mnt; > + *layer = lowerstack[idx - 1].layer; > + path->mnt = (*layer)->mnt; > > return (idx < ovl_numlower(oe)) ? idx + 1 : -1; > } > @@ -1055,7 +1064,7 @@ struct dentry *ovl_lookup(struct inode *dir, > struct dentry *dentry, > old_cred = ovl_override_creds(dentry->d_sb); > upperdir = ovl_dentry_upper(dentry->d_parent); > if (upperdir) { > - d.mnt = ovl_upper_mnt(ofs); > + d.layer = &ofs->layers[0]; > err = ovl_lookup_layer(upperdir, &d, &upperdentry, > true); > if (err) > goto out; > @@ -1111,7 +1120,7 @@ struct dentry *ovl_lookup(struct inode *dir, > struct dentry *dentry, > else if (d.is_dir || !ofs->numdatalayer) > d.last = lower.layer->idx == > ovl_numlower(roe); > > - d.mnt = lower.layer->mnt; > + d.layer = lower.layer; > err = ovl_lookup_layer(lower.dentry, &d, &this, > false); > if (err) > goto out_put; > @@ -1278,6 +1287,8 @@ struct dentry *ovl_lookup(struct inode *dir, > struct dentry *dentry, > > if (upperopaque) > ovl_dentry_set_opaque(dentry); > + if (d.xwhiteouts) > + ovl_dentry_set_xwhiteouts(dentry); > > if (upperdentry) > ovl_dentry_set_upper_alias(dentry); > diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h > index 5ba11eb43767..ee949f3e7c77 100644 > --- a/fs/overlayfs/overlayfs.h > +++ b/fs/overlayfs/overlayfs.h > @@ -50,7 +50,6 @@ enum ovl_xattr { > OVL_XATTR_METACOPY, > OVL_XATTR_PROTATTR, > OVL_XATTR_XWHITEOUT, > - OVL_XATTR_XWHITEOUTS, > }; > > enum ovl_inode_flag { > @@ -70,6 +69,8 @@ enum ovl_entry_flag { > OVL_E_UPPER_ALIAS, > OVL_E_OPAQUE, > OVL_E_CONNECTED, > + /* Lower stack may contain xwhiteout entries */ > + OVL_E_XWHITEOUTS, > }; > > enum { > @@ -477,6 +478,10 @@ bool ovl_dentry_test_flag(unsigned long flag, > struct dentry *dentry); > bool ovl_dentry_is_opaque(struct dentry *dentry); > bool ovl_dentry_is_whiteout(struct dentry *dentry); > void ovl_dentry_set_opaque(struct dentry *dentry); > +bool ovl_dentry_has_xwhiteouts(struct dentry *dentry); > +void ovl_dentry_set_xwhiteouts(struct dentry *dentry); > +void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs, > + const struct ovl_layer *layer); > bool ovl_dentry_has_upper_alias(struct dentry *dentry); > void ovl_dentry_set_upper_alias(struct dentry *dentry); > bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int > flags); > @@ -494,11 +499,10 @@ struct file *ovl_path_open(const struct path > *path, int flags); > int ovl_copy_up_start(struct dentry *dentry, int flags); > void ovl_copy_up_end(struct dentry *dentry); > bool ovl_already_copied_up(struct dentry *dentry, int flags); > -bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path > *path, > - enum ovl_xattr ox); > +char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path > *path, > + enum ovl_xattr ox); > bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct > path *path); > bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct > path *path); > -bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const > struct path *path); > bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs, > const struct path *upperpath); > > @@ -573,7 +577,13 @@ static inline bool ovl_is_impuredir(struct > super_block *sb, > .mnt = ovl_upper_mnt(ofs), > }; > > - return ovl_path_check_dir_xattr(ofs, &upperpath, > OVL_XATTR_IMPURE); > + return ovl_get_dir_xattr_val(ofs, &upperpath, > OVL_XATTR_IMPURE) == 'y'; > +} > + > +static inline char ovl_get_opaquedir_val(struct ovl_fs *ofs, > + const struct path *path) > +{ > + return ovl_get_dir_xattr_val(ofs, path, OVL_XATTR_OPAQUE); > } > > static inline bool ovl_redirect_follow(struct ovl_fs *ofs) > @@ -680,7 +690,8 @@ int ovl_get_index_name(struct ovl_fs *ofs, struct > dentry *origin, > struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh > *fh); > struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry > *upper, > struct dentry *origin, bool verify); > -int ovl_path_next(int idx, struct dentry *dentry, struct path > *path); > +int ovl_path_next(int idx, struct dentry *dentry, struct path *path, > + const struct ovl_layer **layer); > int ovl_verify_lowerdata(struct dentry *dentry); > struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, > unsigned int flags); > diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h > index 5fa9c58af65f..b26d1824bf87 100644 > --- a/fs/overlayfs/ovl_entry.h > +++ b/fs/overlayfs/ovl_entry.h > @@ -40,6 +40,8 @@ struct ovl_layer { > int idx; > /* One fsid per unique underlying sb (upper fsid == 0) */ > int fsid; > + /* xwhiteouts were found on this layer */ > + bool has_xwhiteouts; > }; > > struct ovl_path { > diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c > index e71156baa7bc..0ca8af060b0c 100644 > --- a/fs/overlayfs/readdir.c > +++ b/fs/overlayfs/readdir.c > @@ -305,8 +305,6 @@ static inline int ovl_dir_read(const struct path > *realpath, > if (IS_ERR(realfile)) > return PTR_ERR(realfile); > > - rdd->in_xwhiteouts_dir = rdd->dentry && > - ovl_path_check_xwhiteouts_xattr(OVL_FS(rdd->dentry- > >d_sb), realpath); > rdd->first_maybe_whiteout = NULL; > rdd->ctx.pos = 0; > do { > @@ -359,10 +357,13 @@ static int ovl_dir_read_merged(struct dentry > *dentry, struct list_head *list, > .is_lowest = false, > }; > int idx, next; > + const struct ovl_layer *layer; > > for (idx = 0; idx != -1; idx = next) { > - next = ovl_path_next(idx, dentry, &realpath); > + next = ovl_path_next(idx, dentry, &realpath, > &layer); > rdd.is_upper = ovl_dentry_upper(dentry) == > realpath.dentry; > + rdd.in_xwhiteouts_dir = layer->has_xwhiteouts && > + ovl_dentry_has_xwhiteouts(de > ntry); > > if (next != -1) { > err = ovl_dir_read(&realpath, &rdd); > diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c > index 4ab66e3d4cff..2eef6c70b2ae 100644 > --- a/fs/overlayfs/super.c > +++ b/fs/overlayfs/super.c > @@ -1249,6 +1249,7 @@ static struct dentry *ovl_get_root(struct > super_block *sb, > struct ovl_entry *oe) > { > struct dentry *root; > + struct ovl_fs *ofs = OVL_FS(sb); > struct ovl_path *lowerpath = ovl_lowerstack(oe); > unsigned long ino = d_inode(lowerpath->dentry)->i_ino; > int fsid = lowerpath->layer->fsid; > @@ -1270,6 +1271,20 @@ static struct dentry *ovl_get_root(struct > super_block *sb, > ovl_set_flag(OVL_IMPURE, d_inode(root)); > } > > + /* Look for xwhiteouts marker except in the lowermost layer > */ > + for (int i = 0; i < ovl_numlower(oe) - 1; i++, lowerpath++) > { > + struct path path = { > + .mnt = lowerpath->layer->mnt, > + .dentry = lowerpath->dentry, > + }; > + > + /* overlay.opaque=x means xwhiteouts directory */ > + if (ovl_get_opaquedir_val(ofs, &path) == 'x') { > + ovl_layer_set_xwhiteouts(ofs, lowerpath- > >layer); > + ovl_dentry_set_xwhiteouts(root); > + } > + } > + > /* Root is always merge -> can have whiteouts */ > ovl_set_flag(OVL_WHITEOUTS, d_inode(root)); > ovl_dentry_set_flag(OVL_E_CONNECTED, root); > diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c > index 0217094c23ea..5667f21d0b73 100644 > --- a/fs/overlayfs/util.c > +++ b/fs/overlayfs/util.c > @@ -461,6 +461,26 @@ void ovl_dentry_set_opaque(struct dentry > *dentry) > ovl_dentry_set_flag(OVL_E_OPAQUE, dentry); > } > > +bool ovl_dentry_has_xwhiteouts(struct dentry *dentry) > +{ > + return ovl_dentry_test_flag(OVL_E_XWHITEOUTS, dentry); > +} > + > +void ovl_dentry_set_xwhiteouts(struct dentry *dentry) > +{ > + ovl_dentry_set_flag(OVL_E_XWHITEOUTS, dentry); > +} > + > +void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs, > + const struct ovl_layer *layer) > +{ > + if (layer->has_xwhiteouts) > + return; > + > + /* Write once to read-mostly layer properties */ > + ((struct ovl_layer *)layer)->has_xwhiteouts = true; > +} > + > /* > * For hard links and decoded file handles, it's possible for > ovl_dentry_upper() > * to return positive, while there's no actual upper alias for the > inode. > @@ -739,19 +759,6 @@ bool ovl_path_check_xwhiteout_xattr(struct > ovl_fs *ofs, const struct path *path) > return res >= 0; > } > > -bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const > struct path *path) > -{ > - struct dentry *dentry = path->dentry; > - int res; > - > - /* xattr.whiteouts must be a directory */ > - if (!d_is_dir(dentry)) > - return false; > - > - res = ovl_path_getxattr(ofs, path, OVL_XATTR_XWHITEOUTS, > NULL, 0); > - return res >= 0; > -} > - > /* > * Load persistent uuid from xattr into s_uuid if found, or store a > new > * random generated value in s_uuid and in xattr. > @@ -811,20 +818,17 @@ bool ovl_init_uuid_xattr(struct super_block > *sb, struct ovl_fs *ofs, > return false; > } > > -bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path > *path, > - enum ovl_xattr ox) > +char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path > *path, > + enum ovl_xattr ox) > { > int res; > char val; > > if (!d_is_dir(path->dentry)) > - return false; > + return 0; > > res = ovl_path_getxattr(ofs, path, ox, &val, 1); > - if (res == 1 && val == 'y') > - return true; > - > - return false; > + return res == 1 ? val : 0; > } > > #define OVL_XATTR_OPAQUE_POSTFIX "opaque" > @@ -837,7 +841,6 @@ bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, > const struct path *path, > #define OVL_XATTR_METACOPY_POSTFIX "metacopy" > #define OVL_XATTR_PROTATTR_POSTFIX "protattr" > #define OVL_XATTR_XWHITEOUT_POSTFIX "whiteout" > -#define OVL_XATTR_XWHITEOUTS_POSTFIX "whiteouts" > > #define OVL_XATTR_TAB_ENTRY(x) \ > [x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \ > @@ -854,7 +857,6 @@ const char *const ovl_xattr_table[][2] = { > OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY), > OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR), > OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUT), > - OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUTS), > }; > > int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry > *upperdentry, -- =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- =-=-= Alexander Larsson Red Hat, Inc alexl@xxxxxxxxxx alexander.larsson@xxxxxxxxx He's a fiendish overambitious jungle king with a robot buddy named Sparky. She's a radical Bolivian hooker who inherited a spooky stately manor from her late maiden aunt. They fight crime!