On Mon, 2021-11-29 at 08:53 -0500, Stefan Berger wrote: > On 11/29/21 07:50, James Bottomley wrote: > > On Sun, 2021-11-28 at 22:58 -0600, Serge E. Hallyn wrote: > > > On Sat, Nov 27, 2021 at 04:45:49PM +0000, James Bottomley wrote: > > > > Currently we get one entry in the IMA log per unique file > > > > event. So, if you have a measurement policy and it measures a > > > > particular binary it will not get measured again if it is > > > > subsequently executed. For Namespaced IMA, the correct > > > > behaviour > > > > seems to be to log once per inode per namespace (so every > > > > unique > > > > execution in a namespace gets a separate log entry). Since > > > > logging > > > > once per inode per namespace is > > > I suspect I'll need to do a more in depth reading of the existing > > > code, but I'll ask the lazy question anyway (since you say "the > > > correct behavior seems to be") - is it actually important that > > > files which were appraised under a parent namespace's policy > > > already > > > should be logged again? > > I think so. For a couple of reasons, assuming the namespace > > eventually > > gets its own log entries, which the next incremental patch proposed > > to > > do by virtualizing the securityfs entries. If you don't do this: > > To avoid duplicate efforts, an implementation of a virtualized > securityfs is in this series here: > > https://github.com/stefanberger/linux-ima-namespaces/commits/v5.15%2Bimans.20211119.v3 > > It starts with 'securityfs: Prefix global variables with secruityfs_' That's quite a big patch series. I already actually implemented this as part of the RFC for getting the per namespace measurement log. The attached is basically what I did. Most of the time we don't require namespacing the actual virtualfs file, because it's world readable. IMA has a special requirement in this regard because the IMA files should be readable (and writeable when we get around to policy updates) by the admin of the namespace but their protection is 0640 or 0440. I thought the simplest solution would be to make an additional flag that coped with the permissions and a per-inode flag way of making the file as "accessible by userns admin". Doing something simple like this gives a much smaller diffstat: fs/stat.c | 8 ++++++++ include/linux/fs.h | 1 + include/linux/security.h | 2 +- kernel/capability.c | 6 ++++-- security/inode.c | 6 ++++-- 5 files changed, 18 insertions(+), 5 deletions(-) The full diff is below so you can see how I did it. There's a sort of serendipity bit where it turns out the next i_flag entry is at bit 17 meaning it can be overloaded on the mode which is a u16, but other than that I think it's pretty clean. James --- diff --git a/fs/stat.c b/fs/stat.c index 28d2020ba1f4..2025dfea6720 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -49,6 +49,14 @@ void generic_fillattr(struct user_namespace *mnt_userns, struct inode *inode, stat->nlink = inode->i_nlink; stat->uid = i_uid_into_mnt(mnt_userns, inode); stat->gid = i_gid_into_mnt(mnt_userns, inode); + if (inode->i_flags & S_NSUSER) { + struct user_namespace *ns = current_user_ns(); + + if (uid_eq(stat->uid, GLOBAL_ROOT_UID)) + stat->uid = ns->owner; + if (gid_eq(stat->gid, GLOBAL_ROOT_GID)) + stat->gid = ns->group; + } stat->rdev = inode->i_rdev; stat->size = i_size_read(inode); stat->atime = inode->i_atime; diff --git a/include/linux/fs.h b/include/linux/fs.h index 1cb616fc1105..8bebac8463ac 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2249,6 +2249,7 @@ struct super_operations { #define S_ENCRYPTED (1 << 14) /* Encrypted file (using fs/crypto/) */ #define S_CASEFOLD (1 << 15) /* Casefolded file */ #define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */ +#define S_NSUSER (1 << 17) /* uid/gid changes with user_ns */ /* * Note that nosuid etc flags are inode-specific: setting some file-system diff --git a/include/linux/security.h b/include/linux/security.h index bbf44a466832..1cfe8832f5e0 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1919,7 +1919,7 @@ static inline void security_audit_rule_free(void *lsmrule) #ifdef CONFIG_SECURITYFS -extern struct dentry *securityfs_create_file(const char *name, umode_t mode, +extern struct dentry *securityfs_create_file(const char *name, unsigned int mode, struct dentry *parent, void *data, const struct file_operations *fops); extern struct dentry *securityfs_create_dir(const char *name, struct dentry *parent); diff --git a/kernel/capability.c b/kernel/capability.c index 46a361dde042..54907ffa4947 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -506,8 +506,10 @@ bool capable_wrt_inode_uidgid(struct user_namespace *mnt_userns, { struct user_namespace *ns = current_user_ns(); - return ns_capable(ns, cap) && - privileged_wrt_inode_uidgid(ns, mnt_userns, inode); + if (ns_capable(ns, cap) && + privileged_wrt_inode_uidgid(ns, mnt_userns, inode)) + return true; + return (inode->i_flags & S_NSUSER) && ns_capable(ns, cap); } EXPORT_SYMBOL(capable_wrt_inode_uidgid); diff --git a/security/inode.c b/security/inode.c index 6c326939750d..917514ddfce4 100644 --- a/security/inode.c +++ b/security/inode.c @@ -104,7 +104,7 @@ static struct file_system_type fs_type = { * If securityfs is not enabled in the kernel, the value %-ENODEV is * returned. */ -static struct dentry *securityfs_create_dentry(const char *name, umode_t mode, +static struct dentry *securityfs_create_dentry(const char *name, unsigned int mode, struct dentry *parent, void *data, const struct file_operations *fops, const struct inode_operations *iops) @@ -112,6 +112,7 @@ static struct dentry *securityfs_create_dentry(const char *name, umode_t mode, struct dentry *dentry; struct inode *dir, *inode; int error; + unsigned int flags = mode & 0xffff0000; if (!(mode & S_IFMT)) mode = (mode & S_IALLUGO) | S_IFREG; @@ -147,6 +148,7 @@ static struct dentry *securityfs_create_dentry(const char *name, umode_t mode, inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); inode->i_private = data; + inode->i_flags = flags; if (S_ISDIR(mode)) { inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; @@ -197,7 +199,7 @@ static struct dentry *securityfs_create_dentry(const char *name, umode_t mode, * If securityfs is not enabled in the kernel, the value %-ENODEV is * returned. */ -struct dentry *securityfs_create_file(const char *name, umode_t mode, +struct dentry *securityfs_create_file(const char *name, unsigned int mode, struct dentry *parent, void *data, const struct file_operations *fops) {