Overlayfs does not cache ACL's (to avoid double caching). Instead it just calls the underlying filesystem's i_op->get_acl(), which will return the cached value, if possible. In rcu path walk, however, get_cached_acl_rcu() is employed to get the value from the cache, which will fail on overlayfs resulting in dropping out of rcu walk mode. This can result in a big performance hit in certain situations. Fix by calling ->get_acl() with LOOKUP_RCU flag in case of ACL_DONT_CACHE (which indicates pass-through) Reported-by: garyhuang <zjh.20052005@xxxxxxx> Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxxxxx> --- fs/overlayfs/inode.c | 7 ++++--- fs/posix_acl.c | 8 +++++++- include/linux/fs.h | 5 +++++ include/linux/posix_acl.h | 3 ++- 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 727154a1d3ce..6a55231b262a 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -13,6 +13,7 @@ #include <linux/fiemap.h> #include <linux/fileattr.h> #include <linux/security.h> +#include <linux/namei.h> #include "overlayfs.h" @@ -454,12 +455,12 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type, int flags) const struct cred *old_cred; struct posix_acl *acl; - if (flags) - return ERR_PTR(-EINVAL); - if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode)) return NULL; + if (flags & LOOKUP_RCU) + return get_cached_acl_rcu(realinode, type); + old_cred = ovl_override_creds(inode->i_sb); acl = get_acl(realinode, type); revert_creds(old_cred); diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 6b7f793e2b6f..4d1c6c266cf0 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -22,6 +22,7 @@ #include <linux/xattr.h> #include <linux/export.h> #include <linux/user_namespace.h> +#include <linux/namei.h> static struct posix_acl **acl_by_type(struct inode *inode, int type) { @@ -56,7 +57,12 @@ EXPORT_SYMBOL(get_cached_acl); struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type) { - return rcu_dereference(*acl_by_type(inode, type)); + struct posix_acl *acl = rcu_dereference(*acl_by_type(inode, type)); + + if (acl == ACL_DONT_CACHE) + acl = inode->i_op->get_acl(inode, type, LOOKUP_RCU); + + return acl; } EXPORT_SYMBOL(get_cached_acl_rcu); diff --git a/include/linux/fs.h b/include/linux/fs.h index 1c56d4fc4efe..20b7db2d0a85 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -581,6 +581,11 @@ static inline void mapping_allow_writable(struct address_space *mapping) struct posix_acl; #define ACL_NOT_CACHED ((void *)(-1)) +/* + * ACL_DONT_CACHE is for stacked filesystems, that rely on underlying fs to + * cache the ACL. This also means that ->get_acl() can be called in RCU mode + * with the LOOKUP_RCU flag. + */ #define ACL_DONT_CACHE ((void *)(-3)) static inline struct posix_acl * diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 307094ebb88c..b65c877d92b8 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -72,6 +72,8 @@ extern struct posix_acl *get_posix_acl(struct inode *, int); extern int set_posix_acl(struct user_namespace *, struct inode *, int, struct posix_acl *); +struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type); + #ifdef CONFIG_FS_POSIX_ACL int posix_acl_chmod(struct user_namespace *, struct inode *, umode_t); extern int posix_acl_create(struct inode *, umode_t *, struct posix_acl **, @@ -84,7 +86,6 @@ extern int simple_set_acl(struct user_namespace *, struct inode *, extern int simple_acl_create(struct inode *, struct inode *); struct posix_acl *get_cached_acl(struct inode *inode, int type); -struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type); void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl); void forget_cached_acl(struct inode *inode, int type); void forget_all_cached_acls(struct inode *inode); -- 2.31.1