A file handle that userspace provides to open_by_handle_at() can legitimately contain an outdated inode number that has since been reused for another purpose - that's why the file handle also contains a generation number. But if the inode number has been reused for an ea_inode, check_igot_inode() will notice, __ext4_iget() will go through ext4_error_inode(), and if the inode was newly created, it will also be marked as bad by iget_failed(). This all happens before the point where the inode generation is checked. ext4_error_inode() is supposed to only be used on filesystem corruption; it should not be used when userspace just got unlucky with a stale file handle. So when this happens, let __ext4_iget() just return an error. Fixes: b3e6bcb94590 ("ext4: add EA_INODE checking to ext4_iget()") Signed-off-by: Jann Horn <jannh@xxxxxxxxxx> --- I'm sending this as an RFC patch because the patch I came up with is pretty ugly; I think it would be ideal if someone else comes up with a nicer patch that can be used instead of this one. I'm also not sure whether it actually matters if we call iget_failed() when this happens with a new inode. The following testcase demonstrates this issue, and shows that a filesystem mounted with errors=remount-ro is remounted to read-only when hitting it. Run this as root. ``` #define _GNU_SOURCE #include <err.h> #include <stdarg.h> #include <stdio.h> #include <sched.h> #include <stddef.h> #include <unistd.h> #include <fcntl.h> #include <string.h> #include <stdlib.h> #include <sys/mount.h> #include <sys/mman.h> #include <sys/xattr.h> #include <sys/stat.h> #define SYSCHK(x) ({ \ typeof(x) __res = (x); \ if (__res == (typeof(x))-1) \ err(1, "SYSCHK(" #x ")"); \ __res; \ }) static int systemf(const char *cmd, ...) { char *full_cmd; va_list ap; va_start(ap, cmd); if (vasprintf(&full_cmd, cmd, ap) == -1) err(1, "vasprintf"); int res = system(full_cmd); free(full_cmd); return res; } int main(void) { // avoid messing with the main mount hierarchy SYSCHK(unshare(CLONE_NEWNS)); SYSCHK(mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL)); // create and mount new ext4 fs int fs_fd = SYSCHK(memfd_create("ext4-image", 0)); SYSCHK(ftruncate(fs_fd, 1024*1024)); if (systemf("mkfs.ext4 -O ea_inode /proc/self/fd/%d", fs_fd)) errx(1, "mkfs failed"); if (systemf("mount -o errors=remount-ro -t ext4 /proc/self/fd/%d /mnt", fs_fd)) errx(1, "mount failed"); // create file with inode xattr char xattr_body[8192]; memset(xattr_body, 'A', sizeof(xattr_body)); int file_fd = SYSCHK(open("/mnt/file", O_RDWR|O_CREAT, 0600)); SYSCHK(fsetxattr(file_fd, "user.foo", xattr_body, sizeof(xattr_body), XATTR_CREATE)); struct stat st; SYSCHK(fstat(file_fd, &st)); // trigger bug: do fhandle lookup on inode of file plus one (which will be // the xattr inode) struct handle { unsigned int handle_bytes; unsigned int handle_type; unsigned int ino, gen, parent_ino, parent_gen; } handle = { .handle_bytes = sizeof(handle) - offsetof(struct handle, ino), .handle_type = 1/*FILEID_INO32_GEN*/, .ino = st.st_ino+1, .gen = 0 }; // this should fail SYSCHK(open_by_handle_at(file_fd, (void*)&handle, O_RDONLY)); } ``` resulting dmesg: ``` EXT4-fs (loop0): mounted filesystem 13b7e98f-901a-41a4-ba59-4cc58d597798 r/w without journal. Quota mode: none. EXT4-fs error (device loop0): ext4_nfs_get_inode:1545: inode #13: comm ext4-ea-inode-f: unexpected EA_INODE flag EXT4-fs (loop0): Remounting filesystem read-only EXT4-fs (loop0): unmounting filesystem 13b7e98f-901a-41a4-ba59-4cc58d597798. ``` --- fs/ext4/inode.c | 68 ++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 89aade6f45f62d9fd6300ef84c118c6b919cddc9..8a8cc29b211c875a1d22b943004dc3f10b9c4d79 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4705,22 +4705,43 @@ static inline void ext4_inode_set_iversion_queried(struct inode *inode, u64 val) inode_set_iversion_queried(inode, val); } -static const char *check_igot_inode(struct inode *inode, ext4_iget_flags flags) - +static int check_igot_inode(struct inode *inode, ext4_iget_flags flags, + const char *function, unsigned int line) { + const char *err_str; + if (flags & EXT4_IGET_EA_INODE) { - if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) - return "missing EA_INODE flag"; + if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) { + err_str = "missing EA_INODE flag"; + goto error; + } if (ext4_test_inode_state(inode, EXT4_STATE_XATTR) || - EXT4_I(inode)->i_file_acl) - return "ea_inode with extended attributes"; + EXT4_I(inode)->i_file_acl) { + err_str = "ea_inode with extended attributes"; + goto error; + } } else { - if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) - return "unexpected EA_INODE flag"; + if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) { + /* + * open_by_handle_at() could provide an old inode number + * that has since been reused for an ea_inode; this does + * not indicate filesystem corruption + */ + if (flags & EXT4_IGET_HANDLE) + return -ESTALE; + err_str = "unexpected EA_INODE flag"; + goto error; + } + } + if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) { + err_str = "unexpected bad inode w/o EXT4_IGET_BAD"; + goto error; } - if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) - return "unexpected bad inode w/o EXT4_IGET_BAD"; - return NULL; + return 0; + +error: + ext4_error_inode(inode, function, line, 0, err_str); + return -EFSCORRUPTED; } struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, @@ -4732,7 +4753,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, struct ext4_inode_info *ei; struct ext4_super_block *es = EXT4_SB(sb)->s_es; struct inode *inode; - const char *err_str; journal_t *journal = EXT4_SB(sb)->s_journal; long ret; loff_t size; @@ -4761,10 +4781,10 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) { - if ((err_str = check_igot_inode(inode, flags)) != NULL) { - ext4_error_inode(inode, function, line, 0, err_str); + ret = check_igot_inode(inode, flags, function, line); + if (ret) { iput(inode); - return ERR_PTR(-EFSCORRUPTED); + return ERR_PTR(ret); } return inode; } @@ -5036,13 +5056,21 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, ret = -EFSCORRUPTED; goto bad_inode; } - if ((err_str = check_igot_inode(inode, flags)) != NULL) { - ext4_error_inode(inode, function, line, 0, err_str); - ret = -EFSCORRUPTED; - goto bad_inode; + ret = check_igot_inode(inode, flags, function, line); + /* + * -ESTALE here means there is nothing inherently wrong with the inode, + * it's just not an inode we can return for an fhandle lookup. + */ + if (ret == -ESTALE) { + brelse(iloc.bh); + unlock_new_inode(inode); + iput(inode); + return ERR_PTR(-ESTALE); } - + if (ret) + goto bad_inode; brelse(iloc.bh); + unlock_new_inode(inode); return inode; --- base-commit: b86545e02e8c22fb89218f29d381fa8e8b91d815 change-id: 20241129-ext4-ignore-ea-fhandle-743d3723c5e9 -- Jann Horn <jannh@xxxxxxxxxx>