Maybe objects in different subvols should still be given different inode
numbers. This would be problematic on 32bit systems, but much less so on
64bit systems.
The patch below, which is just a proof-of-concept, changes btrfs to
report a uniform st_dev, and different (64bit) st_ino in different
subvols.
It has problems:
- it will break any 32bit readdir and 32bit stat. I don't know how big
a problem that is these days (ino_t in the kernel is "unsigned long",
not "unsigned long long). That surprised me).
- It might break some user-space expectations. One thing I have learnt
is not to make any assumption about what other people might expect.
So: how would this break your use-case??
Thanks,
NeilBrown
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0117d867ecf8..8dc58c848502 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6020,6 +6020,37 @@ static int btrfs_opendir(struct inode *inode,
struct file *file)
return 0;
}
+static u64 btrfs_make_inum(struct btrfs_key *root, struct btrfs_key
*ino)
+{
+ u64 rootid = root->objectid;
+ u64 inoid = ino->objectid;
+ int shift = 64-8;
+
+ if (ino->type == BTRFS_ROOT_ITEM_KEY) {
+ /* This is a subvol root found during readdir. */
+ rootid = inoid;
+ inoid = BTRFS_FIRST_FREE_OBJECTID;
+ }
+ if (rootid == BTRFS_FS_TREE_OBJECTID)
+ /* this is main vol, not subvol (I think) */
+ return inoid;
+ /* store the rootid in the high bits of the inum. This
+ * will break if 32bit inums are required - we cannot know
+ */
+ while (rootid) {
+ inoid ^= (rootid & 0xff) << shift;
+ rootid >>= 8;
+ shift -= 8;
+ }
+ return inoid;
+}
+
+static inline u64 btrfs_ino_to_inum(struct inode *inode)
+{
+ return btrfs_make_inum(&BTRFS_I(inode)->root->root_key,
+ &BTRFS_I(inode)->location);
+}
+
struct dir_entry {
u64 ino;
u64 offset;
@@ -6045,6 +6076,49 @@ static int btrfs_filldir(void *addr, int
entries, struct dir_context *ctx)
return 0;
}
+static inline bool btrfs_dir_emit_dot(struct file *file,
+ struct dir_context *ctx)
+{
+ return ctx->actor(ctx, ".", 1, ctx->pos,
+ btrfs_ino_to_inum(file->f_path.dentry->d_inode),
+ DT_DIR) == 0;
+}
+
+static inline ino_t btrfs_parent_ino(struct dentry *dentry)
+{
+ ino_t res;
+
+ /*
+ * Don't strictly need d_lock here? If the parent ino could change
+ * then surely we'd have a deeper race in the caller?
+ */
+ spin_lock(&dentry->d_lock);
+ res = btrfs_ino_to_inum(dentry->d_parent->d_inode);
+ spin_unlock(&dentry->d_lock);
+ return res;
+}
+
+static inline bool btrfs_dir_emit_dotdot(struct file *file,
+ struct dir_context *ctx)
+{
+ return ctx->actor(ctx, "..", 2, ctx->pos,
+ btrfs_parent_ino(file->f_path.dentry), DT_DIR) == 0;
+}
+static inline bool btrfs_dir_emit_dots(struct file *file,
+ struct dir_context *ctx)
+{
+ if (ctx->pos == 0) {
+ if (!btrfs_dir_emit_dot(file, ctx))
+ return false;
+ ctx->pos = 1;
+ }
+ if (ctx->pos == 1) {
+ if (!btrfs_dir_emit_dotdot(file, ctx))
+ return false;
+ ctx->pos = 2;
+ }
+ return true;
+}
static int btrfs_real_readdir(struct file *file, struct dir_context
*ctx)
{
struct inode *inode = file_inode(file);
@@ -6067,7 +6141,7 @@ static int btrfs_real_readdir(struct file *file,
struct dir_context *ctx)
bool put = false;
struct btrfs_key location;
- if (!dir_emit_dots(file, ctx))
+ if (!btrfs_dir_emit_dots(file, ctx))
return 0;
path = btrfs_alloc_path();
@@ -6136,7 +6210,8 @@ static int btrfs_real_readdir(struct file *file,
struct dir_context *ctx)
put_unaligned(fs_ftype_to_dtype(btrfs_dir_type(leaf, di)),
&entry->type);
btrfs_dir_item_key_to_cpu(leaf, di, &location);
- put_unaligned(location.objectid, &entry->ino);
+ put_unaligned(btrfs_make_inum(&root->root_key, &location),
+ &entry->ino);
put_unaligned(found_key.offset, &entry->offset);
entries++;
addr += sizeof(struct dir_entry) + name_len;
@@ -9193,7 +9268,7 @@ static int btrfs_getattr(struct user_namespace
*mnt_userns,
STATX_ATTR_NODUMP);
generic_fillattr(&init_user_ns, inode, stat);
- stat->dev = BTRFS_I(inode)->root->anon_dev;
+ stat->ino = btrfs_ino_to_inum(inode);
spin_lock(&BTRFS_I(inode)->lock);
delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes;