[RFC PATCH 19/19] tarfs: introduce tar fs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Wedson Almeida Filho <walmeida@xxxxxxxxxxxxx>

It is a file system based on tar files and an index appended to them (to
facilitate finding fs entries without having to traverse the whole tar
file).

Signed-off-by: Wedson Almeida Filho <walmeida@xxxxxxxxxxxxx>
---
 fs/Kconfig                        |   1 +
 fs/Makefile                       |   1 +
 fs/tarfs/Kconfig                  |  16 ++
 fs/tarfs/Makefile                 |   8 +
 fs/tarfs/defs.rs                  |  80 ++++++++
 fs/tarfs/tar.rs                   | 322 ++++++++++++++++++++++++++++++
 scripts/generate_rust_analyzer.py |   2 +-
 7 files changed, 429 insertions(+), 1 deletion(-)
 create mode 100644 fs/tarfs/Kconfig
 create mode 100644 fs/tarfs/Makefile
 create mode 100644 fs/tarfs/defs.rs
 create mode 100644 fs/tarfs/tar.rs

diff --git a/fs/Kconfig b/fs/Kconfig
index aa7e03cc1941..f4b8c33ea624 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -331,6 +331,7 @@ source "fs/sysv/Kconfig"
 source "fs/ufs/Kconfig"
 source "fs/erofs/Kconfig"
 source "fs/vboxsf/Kconfig"
+source "fs/tarfs/Kconfig"
 
 endif # MISC_FILESYSTEMS
 
diff --git a/fs/Makefile b/fs/Makefile
index f9541f40be4e..e3389f8b049d 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -129,3 +129,4 @@ obj-$(CONFIG_EFIVAR_FS)		+= efivarfs/
 obj-$(CONFIG_EROFS_FS)		+= erofs/
 obj-$(CONFIG_VBOXSF_FS)		+= vboxsf/
 obj-$(CONFIG_ZONEFS_FS)		+= zonefs/
+obj-$(CONFIG_TARFS_FS)		+= tarfs/
diff --git a/fs/tarfs/Kconfig b/fs/tarfs/Kconfig
new file mode 100644
index 000000000000..d3e19eb2adbc
--- /dev/null
+++ b/fs/tarfs/Kconfig
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+
+config TARFS_FS
+	tristate "TAR file system support"
+	depends on RUST && BLOCK
+	select BUFFER_HEAD
+	help
+	  This is a simple read-only file system intended for mounting
+	  tar files that have had an index appened to them.
+
+	  To compile this file system support as a module, choose M here: the
+	  module will be called tarfs.
+
+	  If you don't know whether you need it, then you don't need it:
+	  answer N.
diff --git a/fs/tarfs/Makefile b/fs/tarfs/Makefile
new file mode 100644
index 000000000000..011c5d64fbe3
--- /dev/null
+++ b/fs/tarfs/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux tarfs filesystem routines.
+#
+
+obj-$(CONFIG_TARFS_FS) += tarfs.o
+
+tarfs-y := tar.o
diff --git a/fs/tarfs/defs.rs b/fs/tarfs/defs.rs
new file mode 100644
index 000000000000..7481b75aaab2
--- /dev/null
+++ b/fs/tarfs/defs.rs
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Definitions of tarfs structures.
+
+use kernel::types::LE;
+
+/// Flags used in [`Inode::flags`].
+pub mod inode_flags {
+    /// Indicates that the inode is opaque.
+    ///
+    /// When set, inode will have the "trusted.overlay.opaque" set to "y" at runtime.
+    pub const OPAQUE: u8 = 0x1;
+}
+
+kernel::derive_readable_from_bytes! {
+    /// An inode in the tarfs inode table.
+    #[repr(C)]
+    pub struct Inode {
+        /// The mode of the inode.
+        ///
+        /// The bottom 9 bits are the rwx bits for owner, group, all.
+        ///
+        /// The bits in the [`S_IFMT`] mask represent the file mode.
+        pub mode: LE<u16>,
+
+        /// Tarfs flags for the inode.
+        ///
+        /// Values are drawn from the [`inode_flags`] module.
+        pub flags: u8,
+
+        /// The bottom 4 bits represent the top 4 bits of mtime.
+        pub hmtime: u8,
+
+        /// The owner of the inode.
+        pub owner: LE<u32>,
+
+        /// The group of the inode.
+        pub group: LE<u32>,
+
+        /// The bottom 32 bits of mtime.
+        pub lmtime: LE<u32>,
+
+        /// Size of the contents of the inode.
+        pub size: LE<u64>,
+
+        /// Either the offset to the data, or the major and minor numbers of a device.
+        ///
+        /// For the latter, the 32 LSB are the minor, and the 32 MSB are the major numbers.
+        pub offset: LE<u64>,
+    }
+
+    /// An entry in a tarfs directory entry table.
+    #[repr(C)]
+    pub struct DirEntry {
+        /// The inode number this entry refers to.
+        pub ino: LE<u64>,
+
+        /// The offset to the name of the entry.
+        pub name_offset: LE<u64>,
+
+        /// The length of the name of the entry.
+        pub name_len: LE<u64>,
+
+        /// The type of entry.
+        pub etype: u8,
+
+        /// Unused padding.
+        pub _padding: [u8; 7],
+    }
+
+    /// The super-block of a tarfs instance.
+    #[repr(C)]
+    pub struct Header {
+        /// The offset to the beginning of the inode-table.
+        pub inode_table_offset: LE<u64>,
+
+        /// The number of inodes in the file system.
+        pub inode_count: LE<u64>,
+    }
+}
diff --git a/fs/tarfs/tar.rs b/fs/tarfs/tar.rs
new file mode 100644
index 000000000000..1a71b1ccf8e7
--- /dev/null
+++ b/fs/tarfs/tar.rs
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! File system based on tar files and an index.
+
+use core::mem::size_of;
+use defs::*;
+use kernel::fs::{
+    DirEmitter, DirEntryType, INode, INodeParams, INodeType, NewSuperBlock, Stat, Super,
+    SuperBlock, SuperParams,
+};
+use kernel::types::{ARef, Either, FromBytes};
+use kernel::{c_str, folio::Folio, folio::LockedFolio, fs, prelude::*};
+
+pub mod defs;
+
+kernel::module_fs! {
+    type: TarFs,
+    name: "tarfs",
+    author: "Wedson Almeida Filho <walmeida@xxxxxxxxxxxxx>",
+    description: "File system for indexed tar files",
+    license: "GPL",
+}
+
+const SECTOR_SIZE: u64 = 512;
+const TARFS_BSIZE: u64 = 1 << TARFS_BSIZE_BITS;
+const TARFS_BSIZE_BITS: u8 = 12;
+const SECTORS_PER_BLOCK: u64 = TARFS_BSIZE / SECTOR_SIZE;
+const TARFS_MAGIC: u32 = 0x54415246;
+
+static_assert!(SECTORS_PER_BLOCK > 0);
+
+struct INodeData {
+    offset: u64,
+    flags: u8,
+}
+
+struct TarFs {
+    data_size: u64,
+    inode_table_offset: u64,
+    inode_count: u64,
+}
+
+impl TarFs {
+    fn iget(sb: &SuperBlock<Self>, ino: u64) -> Result<ARef<INode<Self>>> {
+        // Check that the inode number is valid.
+        let h = sb.data();
+        if ino == 0 || ino > h.inode_count {
+            return Err(ENOENT);
+        }
+
+        // Create an inode or find an existing (cached) one.
+        let inode = match sb.get_or_create_inode(ino)? {
+            Either::Left(existing) => return Ok(existing),
+            Either::Right(new) => new,
+        };
+
+        static_assert!((TARFS_BSIZE as usize) % size_of::<Inode>() == 0);
+
+        // Load inode details from storage.
+        let offset = h.inode_table_offset + (ino - 1) * u64::try_from(size_of::<Inode>())?;
+
+        let bh = sb.bread(offset / TARFS_BSIZE)?;
+        let b = bh.data();
+        let idata = Inode::from_bytes(b, (offset & (TARFS_BSIZE - 1)) as usize).ok_or(EIO)?;
+
+        let mode = idata.mode.value();
+
+        // Ignore inodes that have unknown mode bits.
+        if (u32::from(mode) & !(fs::mode::S_IFMT | 0o777)) != 0 {
+            return Err(ENOENT);
+        }
+
+        let doffset = idata.offset.value();
+        let size = idata.size.value().try_into()?;
+        let secs = u64::from(idata.lmtime.value()) | (u64::from(idata.hmtime & 0xf) << 32);
+        let ts = kernel::time::Timespec::new(secs, 0)?;
+        let typ = match u32::from(mode) & fs::mode::S_IFMT {
+            fs::mode::S_IFREG => INodeType::Reg,
+            fs::mode::S_IFDIR => INodeType::Dir,
+            fs::mode::S_IFLNK => INodeType::Lnk,
+            fs::mode::S_IFSOCK => INodeType::Sock,
+            fs::mode::S_IFIFO => INodeType::Fifo,
+            fs::mode::S_IFCHR => INodeType::Chr((doffset >> 32) as u32, doffset as u32),
+            fs::mode::S_IFBLK => INodeType::Blk((doffset >> 32) as u32, doffset as u32),
+            _ => return Err(ENOENT),
+        };
+        inode.init(INodeParams {
+            typ,
+            mode: mode & 0o777,
+            size,
+            blocks: (idata.size.value() + TARFS_BSIZE - 1) / TARFS_BSIZE,
+            nlink: 1,
+            uid: idata.owner.value(),
+            gid: idata.group.value(),
+            ctime: ts,
+            mtime: ts,
+            atime: ts,
+            value: INodeData {
+                offset: doffset,
+                flags: idata.flags,
+            },
+        })
+    }
+
+    fn name_eq(sb: &SuperBlock<Self>, mut name: &[u8], offset: u64) -> Result<bool> {
+        for v in sb.read(offset, name.len().try_into()?)? {
+            let v = v?;
+            let b = v.data();
+            if b != &name[..b.len()] {
+                return Ok(false);
+            }
+            name = &name[b.len()..];
+        }
+        Ok(true)
+    }
+
+    fn read_name(sb: &SuperBlock<Self>, mut name: &mut [u8], offset: u64) -> Result<bool> {
+        for v in sb.read(offset, name.len().try_into()?)? {
+            let v = v?;
+            let b = v.data();
+            name[..b.len()].copy_from_slice(b);
+            name = &mut name[b.len()..];
+        }
+        Ok(true)
+    }
+}
+
+impl fs::FileSystem for TarFs {
+    type Data = Box<Self>;
+    type INodeData = INodeData;
+    const NAME: &'static CStr = c_str!("tar");
+    const SUPER_TYPE: Super = Super::BlockDev;
+
+    fn super_params(sb: &NewSuperBlock<Self>) -> Result<SuperParams<Self::Data>> {
+        let scount = sb.sector_count()?;
+        if scount < SECTORS_PER_BLOCK {
+            pr_err!("Block device is too small: sector count={scount}\n");
+            return Err(ENXIO);
+        }
+
+        let tarfs = {
+            let mut folio = Folio::try_new(0)?;
+            sb.sread(
+                (scount / SECTORS_PER_BLOCK - 1) * SECTORS_PER_BLOCK,
+                SECTORS_PER_BLOCK as usize,
+                &mut folio,
+            )?;
+            let mapped = folio.map_page(0)?;
+            let hdr =
+                Header::from_bytes(&mapped, (TARFS_BSIZE - SECTOR_SIZE) as usize).ok_or(EIO)?;
+            Box::try_new(TarFs {
+                inode_table_offset: hdr.inode_table_offset.value(),
+                inode_count: hdr.inode_count.value(),
+                data_size: scount.checked_mul(SECTOR_SIZE).ok_or(ERANGE)?,
+            })?
+        };
+
+        // Check that the inode table starts within the device data and is aligned to the block
+        // size.
+        if tarfs.inode_table_offset >= tarfs.data_size {
+            pr_err!(
+                "inode table offset beyond data size: {} >= {}\n",
+                tarfs.inode_table_offset,
+                tarfs.data_size
+            );
+            return Err(E2BIG);
+        }
+
+        if tarfs.inode_table_offset % SECTOR_SIZE != 0 {
+            pr_err!(
+                "inode table offset not aligned to sector size: {}\n",
+                tarfs.inode_table_offset,
+            );
+            return Err(EDOM);
+        }
+
+        // Check that the last inode is within bounds (and that there is no overflow when
+        // calculating its offset).
+        let offset = tarfs
+            .inode_count
+            .checked_mul(u64::try_from(size_of::<Inode>())?)
+            .ok_or(ERANGE)?
+            .checked_add(tarfs.inode_table_offset)
+            .ok_or(ERANGE)?;
+        if offset > tarfs.data_size {
+            pr_err!(
+                "inode table extends beyond the data size : {} > {}\n",
+                tarfs.inode_table_offset + (tarfs.inode_count * size_of::<Inode>() as u64),
+                tarfs.data_size,
+            );
+            return Err(E2BIG);
+        }
+
+        Ok(SuperParams {
+            magic: TARFS_MAGIC,
+            blocksize_bits: TARFS_BSIZE_BITS,
+            maxbytes: fs::MAX_LFS_FILESIZE,
+            time_gran: 1000000000,
+            data: tarfs,
+        })
+    }
+
+    fn init_root(sb: &SuperBlock<Self>) -> Result<ARef<INode<Self>>> {
+        Self::iget(sb, 1)
+    }
+
+    fn read_dir(inode: &INode<Self>, emitter: &mut DirEmitter) -> Result {
+        let sb = inode.super_block();
+        let mut name = Vec::<u8>::new();
+        let pos = emitter.pos();
+
+        if pos < 0 || pos % size_of::<DirEntry>() as i64 != 0 {
+            return Err(ENOENT);
+        }
+
+        if pos >= inode.size() {
+            return Ok(());
+        }
+
+        // Make sure the inode data doesn't overflow the data area.
+        let size = u64::try_from(inode.size())?;
+        if inode.data().offset.checked_add(size).ok_or(EIO)? > sb.data().data_size {
+            return Err(EIO);
+        }
+
+        for v in sb.read(inode.data().offset + pos as u64, size - pos as u64)? {
+            for e in DirEntry::from_bytes_to_slice(v?.data()).ok_or(EIO)? {
+                let name_len = usize::try_from(e.name_len.value())?;
+                if name_len > name.len() {
+                    name.try_resize(name_len, 0)?;
+                }
+
+                Self::read_name(sb, &mut name[..name_len], e.name_offset.value())?;
+
+                if !emitter.emit(
+                    size_of::<DirEntry>() as i64,
+                    &name[..name_len],
+                    e.ino.value(),
+                    DirEntryType::try_from(u32::from(e.etype))?,
+                ) {
+                    return Ok(());
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    fn lookup(parent: &INode<Self>, name: &[u8]) -> Result<ARef<INode<Self>>> {
+        let name_len = u64::try_from(name.len())?;
+        let sb = parent.super_block();
+
+        for v in sb.read(parent.data().offset, parent.size().try_into()?)? {
+            for e in DirEntry::from_bytes_to_slice(v?.data()).ok_or(EIO)? {
+                if e.name_len.value() != name_len || e.name_len.value() > usize::MAX as u64 {
+                    continue;
+                }
+                if Self::name_eq(sb, name, e.name_offset.value())? {
+                    return Self::iget(sb, e.ino.value());
+                }
+            }
+        }
+
+        Err(ENOENT)
+    }
+
+    fn read_folio(inode: &INode<Self>, mut folio: LockedFolio<'_>) -> Result {
+        let pos = u64::try_from(folio.pos()).unwrap_or(u64::MAX);
+        let size = u64::try_from(inode.size())?;
+        let sb = inode.super_block();
+
+        let copied = if pos >= size {
+            0
+        } else {
+            let offset = inode.data().offset.checked_add(pos).ok_or(ERANGE)?;
+            let len = core::cmp::min(size - pos, folio.size().try_into()?);
+            let mut foffset = 0;
+
+            if offset.checked_add(len).ok_or(ERANGE)? > sb.data().data_size {
+                return Err(EIO);
+            }
+
+            for v in sb.read(offset, len)? {
+                let v = v?;
+                folio.write(foffset, v.data())?;
+                foffset += v.data().len();
+            }
+            foffset
+        };
+
+        folio.zero_out(copied, folio.size() - copied)?;
+        folio.mark_uptodate();
+        folio.flush_dcache();
+
+        Ok(())
+    }
+
+    fn read_xattr(inode: &INode<Self>, name: &CStr, outbuf: &mut [u8]) -> Result<usize> {
+        if inode.data().flags & inode_flags::OPAQUE == 0
+            || name.as_bytes() != b"trusted.overlay.opaque"
+        {
+            return Err(ENODATA);
+        }
+
+        if !outbuf.is_empty() {
+            outbuf[0] = b'y';
+        }
+
+        Ok(1)
+    }
+
+    fn statfs(sb: &SuperBlock<Self>) -> Result<Stat> {
+        let data = sb.data();
+        Ok(Stat {
+            magic: TARFS_MAGIC,
+            namelen: i64::MAX,
+            bsize: TARFS_BSIZE as _,
+            blocks: data.inode_table_offset / TARFS_BSIZE,
+            files: data.inode_count,
+        })
+    }
+}
diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py
index fc52bc41d3e7..8dc74991894e 100755
--- a/scripts/generate_rust_analyzer.py
+++ b/scripts/generate_rust_analyzer.py
@@ -116,7 +116,7 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs):
     # Then, the rest outside of `rust/`.
     #
     # We explicitly mention the top-level folders we want to cover.
-    extra_dirs = map(lambda dir: srctree / dir, ("samples", "drivers"))
+    extra_dirs = map(lambda dir: srctree / dir, ("samples", "drivers", "fs"))
     if external_src is not None:
         extra_dirs = [external_src]
     for folder in extra_dirs:
-- 
2.34.1





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux