This is a proof of concept interface for replacing the contentious FS_IOC_[GS]ETFLAGS interface with one that presents itself as the xattr 'system.iflags'. Instead of using integer inode flags, this interface uses a comma-separated string of words, such as "extents,immutable" to describe the inode flags. This ought to enable all filesystems to introduce arbitrary flags, with any sort of backend they want, while also taking advantage of generic fs flags. The initial conversion is for ext4, though given the similarities of most filesystems' implementations, converting the rest shouldn't be difficult. How we get everyone to agree on common flag names is anyone's guess. Includes some helper methods to handle the string<->int conversion, and a tweak to the generic xattr code to allow setting iflags on an immutable file. Usage: # setfattr -n system.iflags -v 'extents,append' /path/file # getfattr -n system.iflags /path/file system.iflags="secrm,unrm,sync,immutable,nodump,noatime,journal_data,extents" Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> --- fs/ext4/Makefile | 2 fs/ext4/ext4.h | 3 + fs/ext4/ioctl.c | 198 ++++++++++++++++++++++++-------------------- fs/ext4/xattr.c | 1 fs/ext4/xattr.h | 1 fs/ext4/xattr_iflags.c | 90 ++++++++++++++++++++ fs/xattr.c | 9 ++ include/linux/strflags.h | 27 ++++++ include/uapi/linux/xattr.h | 2 lib/Makefile | 2 lib/strflags.c | 117 ++++++++++++++++++++++++++ 11 files changed, 358 insertions(+), 94 deletions(-) create mode 100644 fs/ext4/xattr_iflags.c create mode 100644 include/linux/strflags.h create mode 100644 lib/strflags.c diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 0310fec..7126958 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -8,7 +8,7 @@ ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ mmp.o indirect.o extents_status.o xattr.o xattr_user.o \ - xattr_trusted.o inline.o + xattr_trusted.o inline.o xattr_iflags.o ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index bf6c5cd..24837ab 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2660,6 +2660,9 @@ extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline); extern int ext4_convert_inline_data(struct inode *inode); +/* ioctl.c */ +extern int ext4_set_iflags(struct inode *inode, int flags); + /* namei.c */ extern const struct inode_operations ext4_dir_inode_operations; extern const struct inode_operations ext4_special_inode_operations; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 60589b6..c6f21c9 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -213,12 +213,115 @@ swap_boot_out: return err; } +/* + * Set inode flags. Assume we already have mnt_want_write_file and i_mutex. + */ +int ext4_set_iflags(struct inode *inode, int flags) +{ + handle_t *handle = NULL; + int err, migrate = 0; + struct ext4_iloc iloc; + unsigned int oldflags, mask, i; + unsigned int jflag; + struct ext4_inode_info *ei = EXT4_I(inode); + + if (!inode_owner_or_capable(inode)) + return -EACCES; + + flags = ext4_mask_flags(inode->i_mode, flags); + + err = -EPERM; + /* Is it quota file? Do not allow user to mess with it */ + if (IS_NOQUOTA(inode)) + goto flags_out; + + oldflags = ei->i_flags; + + /* The JOURNAL_DATA flag is modifiable only by root */ + jflag = flags & EXT4_JOURNAL_DATA_FL; + + /* + * The IMMUTABLE and APPEND_ONLY flags can only be changed by + * the relevant capability. + * + * This test looks nicer. Thanks to Pauline Middelink + */ + if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) { + if (!capable(CAP_LINUX_IMMUTABLE)) + goto flags_out; + } + + /* + * The JOURNAL_DATA flag can only be changed by + * the relevant capability. + */ + if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { + if (!capable(CAP_SYS_RESOURCE)) + goto flags_out; + } + if ((flags ^ oldflags) & EXT4_EXTENTS_FL) + migrate = 1; + + if (flags & EXT4_EOFBLOCKS_FL) { + /* we don't support adding EOFBLOCKS flag */ + if (!(oldflags & EXT4_EOFBLOCKS_FL)) { + err = -EOPNOTSUPP; + goto flags_out; + } + } else if (oldflags & EXT4_EOFBLOCKS_FL) + ext4_truncate(inode); + + handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto flags_out; + } + if (IS_SYNC(inode)) + ext4_handle_sync(handle); + err = ext4_reserve_inode_write(handle, inode, &iloc); + if (err) + goto flags_err; + + for (i = 0, mask = 1; i < 32; i++, mask <<= 1) { + if (!(mask & EXT4_FL_USER_MODIFIABLE)) + continue; + if (mask & flags) + ext4_set_inode_flag(inode, i); + else + ext4_clear_inode_flag(inode, i); + } + + ext4_set_inode_flags(inode); + inode->i_ctime = ext4_current_time(inode); + + err = ext4_mark_iloc_dirty(handle, inode, &iloc); +flags_err: + ext4_journal_stop(handle); + if (err) + goto flags_out; + + if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) + err = ext4_change_inode_journal_flag(inode, jflag); + if (err) + goto flags_out; + if (migrate) { + if (flags & EXT4_EXTENTS_FL) + err = ext4_ext_migrate(inode); + else + err = ext4_ind_migrate(inode); + } + +flags_out: + return err; +} + long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; struct ext4_inode_info *ei = EXT4_I(inode); unsigned int flags; + int err; ext4_debug("cmd = %u, arg = %lu\n", cmd, arg); @@ -227,13 +330,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ext4_get_inode_flags(ei); flags = ei->i_flags & EXT4_FL_USER_VISIBLE; return put_user(flags, (int __user *) arg); - case EXT4_IOC_SETFLAGS: { - handle_t *handle = NULL; - int err, migrate = 0; - struct ext4_iloc iloc; - unsigned int oldflags, mask, i; - unsigned int jflag; - + case EXT4_IOC_SETFLAGS: if (!inode_owner_or_capable(inode)) return -EACCES; @@ -244,95 +341,12 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (err) return err; - flags = ext4_mask_flags(inode->i_mode, flags); - - err = -EPERM; mutex_lock(&inode->i_mutex); - /* Is it quota file? Do not allow user to mess with it */ - if (IS_NOQUOTA(inode)) - goto flags_out; - - oldflags = ei->i_flags; - - /* The JOURNAL_DATA flag is modifiable only by root */ - jflag = flags & EXT4_JOURNAL_DATA_FL; - - /* - * The IMMUTABLE and APPEND_ONLY flags can only be changed by - * the relevant capability. - * - * This test looks nicer. Thanks to Pauline Middelink - */ - if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) - goto flags_out; - } - - /* - * The JOURNAL_DATA flag can only be changed by - * the relevant capability. - */ - if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { - if (!capable(CAP_SYS_RESOURCE)) - goto flags_out; - } - if ((flags ^ oldflags) & EXT4_EXTENTS_FL) - migrate = 1; - - if (flags & EXT4_EOFBLOCKS_FL) { - /* we don't support adding EOFBLOCKS flag */ - if (!(oldflags & EXT4_EOFBLOCKS_FL)) { - err = -EOPNOTSUPP; - goto flags_out; - } - } else if (oldflags & EXT4_EOFBLOCKS_FL) - ext4_truncate(inode); - - handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); - if (IS_ERR(handle)) { - err = PTR_ERR(handle); - goto flags_out; - } - if (IS_SYNC(inode)) - ext4_handle_sync(handle); - err = ext4_reserve_inode_write(handle, inode, &iloc); - if (err) - goto flags_err; - - for (i = 0, mask = 1; i < 32; i++, mask <<= 1) { - if (!(mask & EXT4_FL_USER_MODIFIABLE)) - continue; - if (mask & flags) - ext4_set_inode_flag(inode, i); - else - ext4_clear_inode_flag(inode, i); - } - - ext4_set_inode_flags(inode); - inode->i_ctime = ext4_current_time(inode); - - err = ext4_mark_iloc_dirty(handle, inode, &iloc); -flags_err: - ext4_journal_stop(handle); - if (err) - goto flags_out; - - if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) - err = ext4_change_inode_journal_flag(inode, jflag); - if (err) - goto flags_out; - if (migrate) { - if (flags & EXT4_EXTENTS_FL) - err = ext4_ext_migrate(inode); - else - err = ext4_ind_migrate(inode); - } - -flags_out: + err = ext4_set_iflags(inode, flags); mutex_unlock(&inode->i_mutex); + mnt_drop_write_file(filp); return err; - } case EXT4_IOC_GETVERSION: case EXT4_IOC_GETVERSION_OLD: return put_user(inode->i_generation, (int __user *) arg); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 1423c48..64963e4 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -105,6 +105,7 @@ static const struct xattr_handler *ext4_xattr_handler_map[] = { }; const struct xattr_handler *ext4_xattr_handlers[] = { + &ext4_xattr_iflags_handler, &ext4_xattr_user_handler, &ext4_xattr_trusted_handler, #ifdef CONFIG_EXT4_FS_POSIX_ACL diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index c767dbd..2559583 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -99,6 +99,7 @@ extern const struct xattr_handler ext4_xattr_trusted_handler; extern const struct xattr_handler ext4_xattr_acl_access_handler; extern const struct xattr_handler ext4_xattr_acl_default_handler; extern const struct xattr_handler ext4_xattr_security_handler; +extern const struct xattr_handler ext4_xattr_iflags_handler; extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); diff --git a/fs/ext4/xattr_iflags.c b/fs/ext4/xattr_iflags.c new file mode 100644 index 0000000..e3c5472 --- /dev/null +++ b/fs/ext4/xattr_iflags.c @@ -0,0 +1,90 @@ +/* + * linux/fs/ext4/xattr_iflags.c + * Handler for using the extended attribute interface to store inode flags. + * + * Copyright (C) 2014 Oracle, Darrick J. Wong <darrick.wong@xxxxxxxxxx> + */ +#include <linux/string.h> +#include <linux/fs.h> +#include <linux/security.h> +#include <linux/slab.h> +#include <linux/strflags.h> +#include "ext4_jbd2.h" +#include "ext4.h" +#include "xattr.h" + +static struct flag_db ext4_iflags = { + .flag_sz = sizeof(int), + .maps = { + {"secrm", EXT4_SECRM_FL}, + {"unrm", EXT4_UNRM_FL}, + {"compr", EXT4_COMPR_FL}, + {"sync", EXT4_SYNC_FL}, + {"immutable", EXT4_IMMUTABLE_FL}, + {"append", EXT4_APPEND_FL}, + {"nodump", EXT4_NODUMP_FL}, + {"noatime", EXT4_NOATIME_FL}, + {"htree_dir", EXT4_INDEX_FL}, + {"journal_data", EXT4_JOURNAL_DATA_FL}, + {"notail", EXT4_NOTAIL_FL}, + {"dirsync", EXT4_DIRSYNC_FL}, + {"topdir", EXT4_TOPDIR_FL}, + {"extents", EXT4_EXTENTS_FL}, + {NULL, 0}, + } +}; + +static size_t +ext4_xattr_iflags_list(struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len, int type) +{ + const size_t prefix_len = sizeof(XATTR_NAME_IFLAGS)-1; + const size_t total_len = prefix_len + name_len + 1; + + if (list && total_len <= list_size) { + memcpy(list, XATTR_NAME_IFLAGS, prefix_len); + memcpy(list+prefix_len, name, name_len); + list[prefix_len + name_len] = '\0'; + } + return total_len; +} + +static int +ext4_xattr_iflags_get(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + struct ext4_inode_info *ei; + + ei = EXT4_I(dentry->d_inode); + return flags_to_string(&ext4_iflags, &ei->i_flags, buffer, size); +} + +static int +ext4_xattr_iflags_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) +{ + char *string = (char *)value; + int ret, moo; + + string = kmalloc(size + 1, GFP_KERNEL); + if (!string) + return -ENOMEM; + memcpy(string, value, size); + string[size] = 0; + + ret = string_to_flags(&ext4_iflags, string, size, &moo); + if (ret) + goto out; + + ret = ext4_set_iflags(dentry->d_inode, moo); +out: + kfree(string); + return ret; +} + +const struct xattr_handler ext4_xattr_iflags_handler = { + .prefix = XATTR_NAME_IFLAGS, + .list = ext4_xattr_iflags_list, + .get = ext4_xattr_iflags_get, + .set = ext4_xattr_iflags_set, +}; diff --git a/fs/xattr.c b/fs/xattr.c index 3377dff..bf0fe89 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -32,6 +32,15 @@ static int xattr_permission(struct inode *inode, const char *name, int mask) { /* + * Always allow read/write of inode flag attribute. + */ + if (!strcmp(name, XATTR_NAME_IFLAGS)) { + if (mask & MAY_WRITE) + return inode_owner_or_capable(inode) ? 0 : -EACCES; + return 0; + } + + /* * We can never set or remove an extended attribute on a read-only * filesystem or on an immutable / append-only inode. */ diff --git a/include/linux/strflags.h b/include/linux/strflags.h new file mode 100644 index 0000000..6c8067c --- /dev/null +++ b/include/linux/strflags.h @@ -0,0 +1,27 @@ +/* + * File: linux/strflags.h + * Convert flags to pretty strings. + * + * Copyright (C) 2014 Oracle, Darrick J. Wong <darrick.wong@xxxxxxxxxx> + */ +#ifndef _LINUX_STRFLAGS_H_ +#define _LINUX_STRFLAGS_H_ + +#include <linux/types.h> + +struct flag_map { + const char *string; + unsigned long flag; +}; + +struct flag_db { + size_t flag_sz; + struct flag_map maps[]; +}; + +int flags_to_string(struct flag_db *flagdb, void *flags, char *buf, + size_t buf_len); +int string_to_flags(struct flag_db *flagdb, char *buf, size_t buf_len, + void *flags); + +#endif /* _LINUX_STRFLAGS_H_ */ diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h index e4629b9..8cd78ae 100644 --- a/include/uapi/linux/xattr.h +++ b/include/uapi/linux/xattr.h @@ -63,5 +63,7 @@ #define XATTR_POSIX_ACL_DEFAULT "posix_acl_default" #define XATTR_NAME_POSIX_ACL_DEFAULT XATTR_SYSTEM_PREFIX XATTR_POSIX_ACL_DEFAULT +#define XATTR_IFLAGS "iflags" +#define XATTR_NAME_IFLAGS XATTR_SYSTEM_PREFIX XATTR_IFLAGS #endif /* _UAPI_LINUX_XATTR_H */ diff --git a/lib/Makefile b/lib/Makefile index a459c31..f3a517d 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ - percpu-refcount.o percpu_ida.o + percpu-refcount.o percpu_ida.o strflags.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o diff --git a/lib/strflags.c b/lib/strflags.c new file mode 100644 index 0000000..d1a34e7 --- /dev/null +++ b/lib/strflags.c @@ -0,0 +1,117 @@ +/* + * File: lib/strflags.c + * Convert flags to pretty strings. + * + * Copyright (C) 2014 Oracle, Darrick J. Wong <darrick.wong@xxxxxxxxxx> + */ +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/strflags.h> +#include <linux/errno.h> + +int flags_to_string(struct flag_db *flagdb, void *flags, char *buf, + size_t buf_len) +{ + struct flag_map *mapping; + char *b; + unsigned long fl; + int used, first; + size_t sz; + + switch (flagdb->flag_sz) { + case 1: + fl = *(u8 *)flags; + break; + case 2: + fl = *(u16 *)flags; + break; + case 4: + fl = *(u32 *)flags; + break; + case 8: + fl = *(u64 *)flags; + break; + default: + return -EINVAL; + } + + /* Calculating buffer size */ + sz = 0; + for (mapping = flagdb->maps; mapping->string; mapping++) { + if (mapping->flag & fl) { + sz += strlen(mapping->string); + sz++; + } + } + + if (buf == NULL) + return sz; + if (buf_len < sz) + return -ERANGE; + + /* Stringify the flags */ + b = buf; + first = 1; + for (mapping = flagdb->maps; mapping->string; mapping++) { + if (mapping->flag & fl) { + used = snprintf(b, buf_len, "%s%s", + (first ? "" : ","), + mapping->string); + first = 0; + buf_len -= used; + b += used; + } + } + + return b - buf; +} +EXPORT_SYMBOL_GPL(flags_to_string); + +int string_to_flags(struct flag_db *flagdb, char *buf, size_t buflen, + void *flags) +{ + struct flag_map *mapping; + unsigned long fl = 0; + char *p, *q; + char c; + int found; + + for (p = buf; p < buf + buflen; p = q + 1) { + for (q = p; *q != 0 && *q != ','; q++) + ; /* empty */ + if (p == q) + continue; + + c = *q; *q = 0; + found = 0; + for (mapping = flagdb->maps; mapping->string; mapping++) + if (strcmp(p, mapping->string) == 0) { + fl |= mapping->flag; + found = 1; + } + if (!found) + return -EINVAL; + *q = c; + p = q + 1; + } + + switch (flagdb->flag_sz) { + case 1: + *(u8 *)flags = fl; + break; + case 2: + *(u16 *)flags = fl; + break; + case 4: + *(u32 *)flags = fl; + break; + case 8: + *(u64 *)flags = fl; + break; + default: + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL_GPL(string_to_flags); -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html