From: Tao Ma <boyu.mt@xxxxxxxxxx> Implement inline data with xattr. This idea is inspired by Andreas. So now we use "system.data" to store xattr. For inode_size = 256, currently we uses all the space between i_extra_isize and inode_size. For inode_size > 256, we use half of that space. Signed-off-by: Tao Ma <boyu.mt@xxxxxxxxxx> --- fs/ext4/Makefile | 2 +- fs/ext4/ext4.h | 5 + fs/ext4/inline.c | 459 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/ext4/inode.c | 4 +- fs/ext4/xattr.h | 62 ++++++++ 5 files changed, 530 insertions(+), 2 deletions(-) create mode 100644 fs/ext4/inline.c diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 56fd8f8..e88b7a6 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -9,6 +9,6 @@ ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ mmp.o indirect.o -ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o +ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o inline.o ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 5b0e26a..3e560ba 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -869,6 +869,10 @@ struct ext4_inode_info { /* on-disk additional length */ __u16 i_extra_isize; + /* Indicate the inline data space. */ + u16 i_inline_off; + u16 i_inline_size; + #ifdef CONFIG_QUOTA /* quota space reservation, managed internally by quota code */ qsize_t i_reserved_quota; @@ -1297,6 +1301,7 @@ enum { EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ EXT4_STATE_NEWENTRY, /* File just added to dir */ EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */ + EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ }; #define EXT4_INODE_BIT_FNS(name, field, offset) \ diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c new file mode 100644 index 0000000..d3aa377 --- /dev/null +++ b/fs/ext4/inline.c @@ -0,0 +1,459 @@ +/* + * Copyright (c) 2011 Taobao. + * Written by Tao Ma <boyu.mt@xxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include "ext4_jbd2.h" +#include "ext4.h" +#include "xattr.h" + +#define EXT4_XATTR_SYSTEM_DATA_NAME "data" +#define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS)) + +int ext4_get_inline_size(struct inode *inode) +{ + if (EXT4_I(inode)->i_inline_off) + return EXT4_I(inode)->i_inline_size; + + return 0; +} + +static int get_max_inline_xattr_value_size(struct inode *inode, + struct ext4_iloc *iloc) +{ + struct ext4_xattr_ibody_header *header; + struct ext4_xattr_entry *entry; + struct ext4_inode *raw_inode; + int free, min_offs; + + if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) + return EXT4_XATTR_SIZE(EXT4_SB(inode->i_sb)->s_inode_size - + EXT4_GOOD_OLD_INODE_SIZE - + EXT4_I(inode)->i_extra_isize - + sizeof(struct ext4_xattr_ibody_header) - + EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA_NAME)) - + EXT4_XATTR_ROUND - sizeof(__u32)); + + down_read(&EXT4_I(inode)->xattr_sem); + raw_inode = ext4_raw_inode(iloc); + header = IHDR(inode, raw_inode); + entry = IFIRST(header); + min_offs = EXT4_SB(inode->i_sb)->s_inode_size - + EXT4_GOOD_OLD_INODE_SIZE - + EXT4_I(inode)->i_extra_isize - + sizeof(struct ext4_xattr_ibody_header); + + /* Compute min_offs. */ + for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) { + if (!entry->e_value_block && entry->e_value_size) { + size_t offs = le16_to_cpu(entry->e_value_offs); + if (offs < min_offs) + min_offs = offs; + } + } + free = min_offs - + ((void *)entry - (void *)IFIRST(header)) - sizeof(__u32); + + if (EXT4_I(inode)->i_inline_off) { + entry = (struct ext4_xattr_entry *) + ((void *)raw_inode + EXT4_I(inode)->i_inline_off); + + free += le32_to_cpu(entry->e_value_size); + goto out; + } + + free -= EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA_NAME)); + + if (free > EXT4_XATTR_ROUND) + free = EXT4_XATTR_SIZE(free - EXT4_XATTR_ROUND); + else + free = 0; + +out: + up_read(&EXT4_I(inode)->xattr_sem); + return free; +} + +/* + * Get the maximum size we now can store in an inode. + * If we can't find the space for a xattr entry, don't use the space + * of the extents since we have no space to indicate the inline data. + */ +int ext4_get_max_inline_size(struct inode *inode) +{ + int error, max_inline_size; + struct ext4_iloc iloc; + + if (EXT4_I(inode)->i_extra_isize == 0) + return 0; + + error = ext4_get_inode_loc(inode, &iloc); + if (error) + return error; + + max_inline_size = get_max_inline_xattr_value_size(inode, &iloc); + + brelse(iloc.bh); + + if (!max_inline_size) + return 0; + + return max_inline_size + EXT4_MIN_INLINE_DATA_SIZE; +} + +int ext4_has_inline_data(struct inode *inode) +{ + return EXT4_I(inode)->i_inline_off; +} + +int ext4_find_inline_data(struct inode *inode) +{ + struct ext4_xattr_ibody_find is = { + .s = { .not_found = -ENODATA, }, + }; + struct ext4_xattr_info i = { + .name_index = EXT4_XATTR_INDEX_SYSTEM_DATA, + .name = EXT4_XATTR_SYSTEM_DATA_NAME, + }; + int error; + + if (EXT4_I(inode)->i_extra_isize == 0) + return 0; + + error = ext4_get_inode_loc(inode, &is.iloc); + if (error) + return error; + + error = ext4_xattr_ibody_find(inode, &i, &is); + if (error) + goto out; + + if (!is.s.not_found) { + EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - + (void *)ext4_raw_inode(&is.iloc)); + EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + + le32_to_cpu(is.s.here->e_value_size); + ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); + } +out: + brelse(is.iloc.bh); + return error; +} + +int ext4_read_inline_data(struct inode *inode, void *buffer, unsigned int len) +{ + struct ext4_xattr_entry *entry; + struct ext4_xattr_ibody_header *header; + int cp_len = 0, error; + struct ext4_inode *raw_inode; + struct ext4_iloc iloc; + + if (!len) + return 0; + + error = ext4_get_inode_loc(inode, &iloc); + if (error) + return error; + + down_read(&EXT4_I(inode)->xattr_sem); + if(!EXT4_I(inode)->i_inline_off) { + ext4_warning(inode->i_sb, "inode %lu doesn't have inline data.", + inode->i_ino); + goto out; + } + BUG_ON(len > EXT4_I(inode)->i_inline_size); + + + cp_len = len < EXT4_MIN_INLINE_DATA_SIZE ? + len : EXT4_MIN_INLINE_DATA_SIZE; + + raw_inode = ext4_raw_inode(&iloc); + memcpy(buffer, (void *)(raw_inode->i_block), cp_len); + + len -= cp_len; + buffer += cp_len; + + if (!len) + goto out; + + header = IHDR(inode, raw_inode); + entry = (struct ext4_xattr_entry *)((void *)raw_inode + + EXT4_I(inode)->i_inline_off); + + memcpy(buffer, + (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs), len); + cp_len += len; + +out: + up_read(&EXT4_I(inode)->xattr_sem); + brelse(iloc.bh); + return cp_len; +} + +/* + * write the buffer to the inline inode. + * If 'create' is set, we don't need to do the extra copy in the xattr + * value since it is already handled by ext4_xattr_ibody_set. That saves + * us one memcpy. + */ +void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc, + void *buffer, loff_t pos, unsigned int len) +{ + struct ext4_xattr_entry *entry; + struct ext4_xattr_ibody_header *header; + struct ext4_inode *raw_inode; + int cp_len = 0; + + BUG_ON(!EXT4_I(inode)->i_inline_off); + BUG_ON(pos + len > EXT4_I(inode)->i_inline_size); + + raw_inode = ext4_raw_inode(iloc); + buffer += pos; + + if (pos < EXT4_MIN_INLINE_DATA_SIZE) { + cp_len = pos + len > EXT4_MIN_INLINE_DATA_SIZE ? + EXT4_MIN_INLINE_DATA_SIZE - pos : len; + memcpy((void *)raw_inode->i_block + pos, buffer, cp_len); + + len -= cp_len; + buffer += cp_len; + pos += cp_len; + } + + if (!len) + return; + + pos -= EXT4_MIN_INLINE_DATA_SIZE; + header = IHDR(inode, raw_inode); + entry = (struct ext4_xattr_entry *)((void *)raw_inode + + EXT4_I(inode)->i_inline_off); + + memcpy((void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs) + pos, + buffer, len); +} + +static int ext4_create_inline_data(handle_t *handle, + struct inode *inode, unsigned len) +{ + int error; + void *value = NULL; + struct ext4_xattr_ibody_find is = { + .s = { .not_found = -ENODATA, }, + }; + struct ext4_xattr_info i = { + .name_index = EXT4_XATTR_INDEX_SYSTEM_DATA, + .name = EXT4_XATTR_SYSTEM_DATA_NAME, + }; + + error = ext4_get_inode_loc(inode, &is.iloc); + if (error) + return error; + + error = ext4_journal_get_write_access(handle, is.iloc.bh); + if (error) + goto out; + + if (len > EXT4_MIN_INLINE_DATA_SIZE) { + value = (void *)empty_zero_page; + len -= EXT4_MIN_INLINE_DATA_SIZE; + } else { + value = ""; + len = 0; + } + + /* Insert the the xttr entry. */ + i.value = value; + i.value_len = len; + + error = ext4_xattr_ibody_find(inode, &i, &is); + if (error) + goto out; + + BUG_ON(!is.s.not_found); + + error = ext4_xattr_ibody_set(handle, inode, &i, &is); + if (error) { + if (error == -ENOSPC) + ext4_clear_inode_state(inode, + EXT4_STATE_MAY_INLINE_DATA); + goto out; + } + + memset((void *)ext4_raw_inode(&is.iloc)->i_block, + 0, EXT4_MIN_INLINE_DATA_SIZE); + + EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - + (void *)ext4_raw_inode(&is.iloc)); + EXT4_I(inode)->i_inline_size = len + EXT4_MIN_INLINE_DATA_SIZE; + ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); + get_bh(is.iloc.bh); + error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); + +out: + brelse(is.iloc.bh); + return error; +} + +static int ext4_update_inline_data(handle_t *handle, struct inode *inode, + unsigned int len) +{ + int error; + void *value = NULL; + struct ext4_xattr_ibody_find is = { + .s = { .not_found = -ENODATA, }, + }; + struct ext4_xattr_info i = { + .name_index = EXT4_XATTR_INDEX_SYSTEM_DATA, + .name = EXT4_XATTR_SYSTEM_DATA_NAME, + }; + + /* If the old space is ok, write the data directly. */ + if (len <= EXT4_I(inode)->i_inline_size) + return 0; + + error = ext4_get_inode_loc(inode, &is.iloc); + if (error) + return error; + + error = ext4_xattr_ibody_find(inode, &i, &is); + if (error) + goto out; + + BUG_ON(is.s.not_found); + + len -= EXT4_MIN_INLINE_DATA_SIZE; + value = kzalloc(len, GFP_NOFS); + if (!value) + goto out; + + error = ext4_xattr_ibody_get(inode, i.name_index, i.name, + value, len); + if (error == -ENODATA) + goto out; + + error = ext4_journal_get_write_access(handle, is.iloc.bh); + if (error) + goto out; + + /* Update the xttr entry. */ + i.value = value; + i.value_len = len; + + error = ext4_xattr_ibody_set(handle, inode, &i, &is); + if (error) + goto out; + + EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - + (void *)ext4_raw_inode(&is.iloc)); + EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + + le32_to_cpu(is.s.here->e_value_size); + ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); + get_bh(is.iloc.bh); + error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); + +out: + brelse(is.iloc.bh); + return error; +} + +int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, + unsigned int len) +{ + int ret, size; + struct ext4_inode_info *ei = EXT4_I(inode); + + if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) + return -ENOSPC; + + size = ext4_get_max_inline_size(inode); + if (size < len) + return -ENOSPC; + + down_write(&EXT4_I(inode)->xattr_sem); + + if (ei->i_inline_off) + ret = ext4_update_inline_data(handle, inode, len); + else + ret = ext4_create_inline_data(handle, inode, len); + + up_write(&EXT4_I(inode)->xattr_sem); + + return ret; +} + +static int ext4_destroy_inline_data_nolock(handle_t *handle, struct inode *inode) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + struct ext4_xattr_ibody_find is = { + .s = { .not_found = 0, }, + }; + struct ext4_xattr_info i = { + .name_index = EXT4_XATTR_INDEX_SYSTEM_DATA, + .name = EXT4_XATTR_SYSTEM_DATA_NAME, + .value = NULL, + .value_len = 0, + }; + int error; + + if (!ei->i_inline_off) + return 0; + + error = ext4_get_inode_loc(inode, &is.iloc); + if (error) + return error; + + error = ext4_xattr_ibody_find(inode, &i, &is); + if (error) + goto out; + + error = ext4_journal_get_write_access(handle, is.iloc.bh); + if (error) + goto out; + + error = ext4_xattr_ibody_set(handle, inode, &i, &is); + if (error) + goto out; + + memset((void *)ext4_raw_inode(&is.iloc)->i_block, + 0, EXT4_MIN_INLINE_DATA_SIZE); + + if (EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_INCOMPAT_EXTENTS)) { + if (S_ISDIR(inode->i_mode) || + S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) { + ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); + ext4_ext_tree_init(handle, inode); + } + } + + get_bh(is.iloc.bh); + error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); + + EXT4_I(inode)->i_inline_off = 0; + EXT4_I(inode)->i_inline_size = 0; + ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); +out: + brelse(is.iloc.bh); + if (error == -ENODATA) + error = 0; + return error; +} + +int ext4_destroy_inline_data(handle_t *handle, struct inode *inode) +{ + int ret; + + down_write(&EXT4_I(inode)->xattr_sem); + ret = ext4_destroy_inline_data_nolock(handle, inode); + up_write(&EXT4_I(inode)->xattr_sem); + + return ret; +} diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 15d0460..9b1fbf5 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3767,8 +3767,10 @@ static inline void ext4_iget_extra_inode(struct inode *inode, { __le32 *magic = (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize; - if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) + if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { ext4_set_inode_state(inode, EXT4_STATE_XATTR); + ext4_find_inline_data(inode); + } } struct inode *ext4_iget(struct super_block *sb, unsigned long ino) diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index c4c015d..e436d33 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -21,6 +21,7 @@ #define EXT4_XATTR_INDEX_TRUSTED 4 #define EXT4_XATTR_INDEX_LUSTRE 5 #define EXT4_XATTR_INDEX_SECURITY 6 +#define EXT4_XATTR_INDEX_SYSTEM_DATA 7 struct ext4_xattr_header { __le32 h_magic; /* magic number for identification */ @@ -123,6 +124,21 @@ extern int ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, void *buffer, size_t buffer_size); +extern int ext4_has_inline_data(struct inode *inode); +extern int ext4_get_inline_size(struct inode *inode); +extern int ext4_get_max_inline_size(struct inode *inode); +extern int ext4_find_inline_data(struct inode *inode); +extern int ext4_read_inline_data(struct inode *inode, + void *buffer, unsigned int len); +extern void ext4_write_inline_data(struct inode *inode, + struct ext4_iloc *iloc, + void *buffer, loff_t pos, + unsigned int len); +extern int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, + unsigned int len); +extern int ext4_init_inline_data(handle_t *handle, struct inode *inode, + unsigned int len); +extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode); # else /* CONFIG_EXT4_FS_XATTR */ static inline int @@ -198,6 +214,52 @@ extern int ext4_xattr_ibody_get(struct inode *inode, int name_index, return -EOPNOTSUPP; } +static inline int ext4_find_inline_data(struct inode *inode) +{ + return 0; +} + +static inline int ext4_has_inline_data(struct inode *inode) +{ + return 0; +} + +static inline int ext4_get_inline_size(struct inode *inode) +{ + return 0; +} + +static inline int ext4_get_max_inline_size(struct inode *inode) +{ + return 0; +} + +static inline int ext4_read_inline_data(struct inode *inode, + void *buffer, unsigned int len) +{ + return 0; +} + +static inline void ext4_write_inline_data(struct inode *inode, + struct ext4_iloc *iloc, + void *buffer, loff_t pos, + unsigned int len) +{ + return; +} + +static inline int ext4_init_inline_data(handle_t *handle, + struct inode *inode, + unsigned int len) +{ + return 0; +} + +static inline int ext4_destroy_inline_data(handle_t *handle, + struct inode *inode) +{ + return 0; +} # endif /* CONFIG_EXT4_FS_XATTR */ #ifdef CONFIG_EXT4_FS_SECURITY -- 1.7.0.4 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html