--- /dev/null 2008-04-02 16:29:12.813336657 +0200 +++ linux-2.6.24logfs/fs/logfs/logfs.h 2008-04-01 21:00:59.600247674 +0200 @@ -0,0 +1,555 @@ +/* + * fs/logfs/logfs.h + * + * As should be obvious for Linux kernel code, license is GPLv2 + * + * Copyright (c) 2005-2007 Joern Engel <joern@xxxxxxxxx> + * + * Private header for logfs. + */ +#ifndef fs_logfs_logfs_h +#define fs_logfs_logfs_h + +#define __CHECK_ENDIAN__ + +#include <linux/crc32.h> +#include <linux/fs.h> +#include <linux/kernel.h> +#include <linux/mempool.h> +#include <linux/pagemap.h> +#include <linux/mtd/mtd.h> +#include "logfs_abi.h" + +#define PG_zero PG_owner_priv_1 +#define PageZero(page) test_bit(PG_zero, &(page)->flags) +#define SetPageZero(page) set_bit(PG_zero, &(page)->flags) +#define ClearPageZero(page) clear_bit(PG_zero, &(page)->flags) + +/* + * There is no generic kernel btree library yet. When such a thing gets + * introduced, this definition and the corresponding source file should + * get removed. + */ +struct btree_head { + struct btree_node *node; + int height; + void *null_ptr; +}; + +static inline void build_bug_on_needs_a_function(void) +{ + BUILD_BUG_ON(sizeof(struct logfs_object_header) != LOGFS_HEADERSIZE); + BUILD_BUG_ON(sizeof(struct logfs_segment_header) + != LOGFS_SEGMENT_HEADERSIZE); +} + +/* FIXME: This should really be somewhere in the 64bit area. */ +#define LOGFS_LINK_MAX (1<<30) + +/* + * Private errno for accessed beyond end-of-file. Only used internally to + * logfs. If this ever gets exposed to userspace or even other parts of the + * kernel, it is a bug. 256 was chosen as a number sufficiently above all + * used errno #defines. + * + * It can be argued that this is a hack and should be replaced with something + * else. My last attempt to do this failed spectacularly and there are more + * urgent problems that users actually care about. This will remain for the + * moment. Patches are welcome, of course. + */ +#define EOF (512) + +/* Read-only filesystem */ +#define LOGFS_SB_FLAG_RO 1 + +/* Write Control Flags */ +#define WF_LOCK 0x01 /* take write lock */ +#define WF_WRITE 0x02 /* write block */ +#define WF_DELETE 0x04 /* delete old block */ +#define WF_SYNC 0x08 /* sync every indirect block */ +#define WF_GC 0x10 /* GC write, move to GC list */ + +/** + * struct logfs_area - area management information + * + * @a_sb: the superblock this area belongs to + * @a_is_open: 1 if the area is currently open, else 0 + * @a_segno: segment number of area + * @a_used_bytes: number of used bytes + * @a_ops: area operations (either journal or ostore) + * @a_wbuf: write buffer + * @a_erase_count: erase count + * @a_level: GC level + */ +struct logfs_area { /* a segment open for writing */ + struct super_block *a_sb; + int a_is_open; + u32 a_segno; + u32 a_used_bytes; + const struct logfs_area_ops *a_ops; + void *a_wbuf; + u32 a_erase_count; + u8 a_level; +}; + +/** + * struct logfs_area_ops - area operations + * + * @get_free_segment: fill area->ofs with the offset of a free segment + * @get_erase_count: fill area->erase_count (needs area->ofs) + * @erase_segment: erase and setup segment + * @finish_area: flush buffers, etc. + */ +struct logfs_area_ops { + void (*get_free_segment)(struct logfs_area *area); + void (*get_erase_count)(struct logfs_area *area); + int (*erase_segment)(struct logfs_area *area); + void (*finish_area)(struct logfs_area *area); +}; + +/** + * struct logfs_device_ops - device access operations + * + * @read: read from the device + * @write: write to the device + * @erase: erase part of the device + */ +struct logfs_device_ops { + s64 (*find_sb)(struct super_block *sb); + int (*read)(struct super_block *sb, loff_t ofs, size_t len, void *buf); + int (*write)(struct super_block *sb, loff_t ofs, size_t len, void *buf); + int (*erase)(struct super_block *sb, loff_t ofs, size_t len); + void (*sync)(struct super_block *sb); +}; + +/** + * struct gc_candidate - "candidate" segment to be garbage collected next + * + * @list: list (either free of low) + * @segno: segment number + * @valid: number of valid bytes + * @erase_count: erase count of segment + * @dist: distance from tree root + * + * Candidates can be on two lists. The free list contains electees rather + * than candidates - segments that no longer contain any valid data. The + * low list contains candidates to be picked for GC. It should be kept + * short. It is not required to always pick a perfect candidate. In the + * worst case GC will have to move more data than absolutely necessary. + */ +struct gc_candidate { + struct list_head list; + u64 gec; + u32 segno; + u32 valid; + u32 erase_count; + u8 dist; +}; + +/** + * struct candidate_list - list of similar candidates + */ +struct candidate_list { + struct list_head list; + int count; + int maxcount; + int sort_by_ec; +}; + +/** + * struct logfs_journal_entry - temporary structure used during journal scan + * + * @used: + * @version: normalized version + * @len: length + * @offset: offset + */ +struct logfs_journal_entry { + int used; + s16 version; + u16 len; + u16 datalen; + u64 offset; +}; + +enum transaction_state { + CREATE_1 = 1, + CREATE_2, + UNLINK_1, + UNLINK_2, + CROSS_RENAME_1, + CROSS_RENAME_2, + TARGET_RENAME_1, + TARGET_RENAME_2, + TARGET_RENAME_3 +}; + +/** + * struct logfs_transaction - essential fields to support atomic dirops + * + * @ino: target inode + * @dir: inode of directory containing dentry + * @pos: pos of dentry in directory + */ +struct logfs_transaction { + enum transaction_state state; + u64 ino; + u64 dir; + u64 pos; +}; + +/** + * struct logfs_shadow - old block in the shadow of a not-yet-committed new one + * @old_ofs: offset of old block on medium + * @new_ofs: offset of new block on medium + * @ino: inode number + * @bix: block index + * @old_len: size of old block, including header + * @new_len: size of new block, including header + * @level: block level + */ +struct logfs_shadow { + u64 old_ofs; + u64 new_ofs; + u64 ino; + u64 bix; + int old_len; + int new_len; + u8 level; +}; + +/** + * struct shadow_tree + * @new: shadows where old_ofs==0, indexed by new_ofs + * @old: shadows where old_ofs!=0, indexed by old_ofs + */ +struct shadow_tree { + struct btree_head new; + struct btree_head old; +}; + +/** + * struct logfs_block - contains any block state + * @tree: btree of shadows, indexed by old_ofs + */ +struct logfs_block { + struct list_head dirty_list; + struct shadow_tree shadow_tree; + struct page *page; + struct logfs_transaction *ta; +}; + +struct mtd_inode { + struct mtd_info *mtd; + long openers; + struct inode vfs_inode; +}; + +struct logfs_super { + struct mtd_inode *s_mtd; /* underlying device */ + struct block_device *s_bdev; /* underlying device */ + int s_sync; /* sync on next io? */ + const struct logfs_device_ops *s_devops;/* device access */ + struct inode *s_master_inode; /* ifile */ + long s_flags; + /* dir.c fields */ + struct mutex s_dirop_mutex; /* for creat/unlink/rename */ + u64 s_victim_ino; /* used for atomic dir-ops */ + u64 s_rename_dir; /* source directory ino */ + u64 s_rename_pos; /* position of source dd */ + /* gc.c fields */ + long s_segsize; /* size of a segment */ + int s_segshift; /* log2 of segment size */ + long s_no_segs; /* segments on device */ + long s_no_blocks; /* blocks per segment */ + long s_writesize; /* minimum write size */ + int s_writeshift; /* log2 of write size */ + u64 s_size; /* filesystem size */ + struct logfs_area *s_area[LOGFS_NO_AREAS]; /* open segment array */ + u64 s_gec; /* global erase count */ + u64 s_sweeper; /* current sweeper pos */ + u8 s_ifile_levels; /* max level of ifile */ + u8 s_iblock_levels; /* max level of regular files */ + u8 s_data_levels; /* # of segments to leaf block*/ + u8 s_total_levels; /* sum of above three */ + struct candidate_list s_free_list; /* 100% free segments */ + struct candidate_list s_low_list[LOGFS_NO_AREAS];/* good candidates */ + struct candidate_list s_ec_list; /* wear level candidates */ + struct btree_head s_reserved_segments; /* sb, journal, bad, etc. */ + struct list_head s_dirty_list; /* list of dirty blocks */ + struct list_head s_gc_dirty_list[LOGFS_NO_AREAS];/* blocks dirtied during GC */ + /* inode.c fields */ + spinlock_t s_ino_lock; /* lock s_last_ino on 32bit */ + u64 s_last_ino; /* highest ino used */ + struct list_head s_freeing_list; /* inodes being freed */ + /* journal.c fields */ + struct mutex s_journal_mutex; + void *s_je; /* journal entry to compress */ + void *s_compressed_je; /* block to write to journal */ + u64 s_journal_seg[LOGFS_JOURNAL_SEGS]; /* journal segments */ + u32 s_journal_ec[LOGFS_JOURNAL_SEGS]; /* journal erasecounts */ + u64 s_last_version; + struct logfs_area *s_journal_area; /* open journal segment */ + struct logfs_journal_entry s_retired[JE_LAST+1]; /* for journal scan */ + struct logfs_journal_entry s_speculative[JE_LAST+1]; /* dito */ + struct logfs_journal_entry s_first; /* dito */ + int s_sum_index; /* for the 12 summaries */ + __be32 *s_bb_array; /* bad segments */ + /* readwrite.c fields */ + struct mutex s_w_mutex; + struct page *s_write_page; /* page under writeback now */ + mempool_t *s_block_pool; /* struct logfs_block pool */ + mempool_t *s_shadow_pool; /* struct logfs_shadow pool */ + /* + * Space accounting in LogFS: + * - s_used_bytes specifies space used to store valid data objects. + * - s_dirty_used_bytes is space used to store non-committed data + * objects. Those objects have already been written themselves, + * but they don't become valid until all indirect blocks up to the + * journal have been written as well. + * - s_dirty_free_bytes is space used to store the old copy of a + * replaced object, as long as the replacement is non-committed. + * In other words, it is the amount of space freed when all dirty + * blocks are written back. + * - s_free_bytes is the amount of free space available for any + * purpose. + * - s_root_reserve is the amount of free space available only to + * the root user. + * - s_gc_reserve is currently a mess. + */ + u64 s_free_bytes; /* free space */ + u64 s_used_bytes; /* used (valid) data */ + u64 s_dirty_free_bytes; /* space freed on commit */ + u64 s_dirty_used_bytes; /* space used on commit */ + u64 s_gc_reserve; /* space reserved for GC */ + u64 s_root_reserve; /* FIXME: currently unused */ + u32 s_bad_segments; /* number of bad segments */ +}; + +/** + * struct logfs_inode - in-memory inode + * + * @vfs_inode: struct inode + * @li_data: data pointers + * @li_used_bytes: number of used bytes + * @li_freeing_list: used to track inodes currently being freed + * @li_flags: inode flags + */ +struct logfs_inode { + struct inode vfs_inode; + u64 li_data[LOGFS_EMBEDDED_FIELDS]; + u64 li_used_bytes; + struct list_head li_freeing_list; + struct logfs_transaction *li_transaction; + struct shadow_tree li_shadow_tree; + u32 li_flags; + u8 li_height; +}; + +#define journal_for_each(__i) for (__i = 0; __i < LOGFS_JOURNAL_SEGS; __i++) +#define for_each_area(__i) for (__i = 0; __i < LOGFS_NO_AREAS; __i++) + +/* compr.c */ +int logfs_compress(void *in, void *out, size_t inlen, size_t outlen); +int logfs_uncompress(void *in, void *out, size_t inlen, size_t outlen); +int __init logfs_compr_init(void); +void logfs_compr_exit(void); + +/* dev_bdev.c */ +#ifdef CONFIG_BLOCK +int logfs_get_sb_bdev(struct file_system_type *type, int flags, + const char *devname, struct vfsmount *mnt); + +static inline void logfs_put_bdev(struct block_device *bdev) +{ + if (bdev) + close_bdev_excl(bdev); +} +#else +static inline int logfs_get_sb_bdev(struct file_system_type *type, int flags, + const char *devname, struct vfsmount *mnt) +{ + return -ENODEV; +} + +static inline void logfs_put_bdev(struct block_device *bdev) +{ +} +#endif + +/* dev_mtd.c */ +#ifdef CONFIG_MTD +int logfs_get_sb_mtd(struct file_system_type *type, int flags, + int mtdnr, struct vfsmount *mnt); +void logfs_put_mtd(struct mtd_inode *mi); +#else +static inline int logfs_get_sb_mtd(struct file_system_type *type, int flags, + int mtdnr, struct vfsmount *mnt) +{ + return -ENODEV; +} + +static inline void logfs_put_mtd(struct mtd_inode *mi) +{ +} +#endif + +/* dir.c */ +extern const struct inode_operations logfs_symlink_iops; +extern const struct inode_operations logfs_dir_iops; +extern const struct file_operations logfs_dir_fops; +int logfs_replay_journal(struct super_block *sb); + +/* file.c */ +extern const struct inode_operations logfs_reg_iops; +extern const struct file_operations logfs_reg_fops; +extern const struct address_space_operations logfs_reg_aops; +int logfs_readpage(struct file *file, struct page *page); +int logfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd, + unsigned long arg); +int logfs_fsync(struct file *file, struct dentry *dentry, int datasync); + +/* gc.c */ +int logfs_safe_to_write_block(struct super_block *sb, u8 level); +struct gc_candidate *get_best_cand(struct candidate_list *list); +int add_free_segments_from_journal(struct super_block *sb, + struct logfs_je_free_segments *segs, int count); +void logfs_dirty_for_gc(struct super_block *sb, struct logfs_block *block); +void logfs_gc_pass(struct super_block *sb); +int logfs_check_areas(struct super_block *sb); +int logfs_init_gc(struct logfs_super *super); +void logfs_cleanup_gc(struct logfs_super *super); + +/* inode.c */ +extern const struct super_operations logfs_super_operations; +struct inode *logfs_iget(struct super_block *sb, ino_t ino, int *cookie); +void logfs_iput(struct inode *inode, int cookie); +struct inode *logfs_new_inode(struct inode *dir, int mode); +struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino); +int logfs_init_inode_cache(void); +void logfs_destroy_inode_cache(void); +int __logfs_write_inode(struct inode *inode, long flags); +void __logfs_destroy_inode(struct inode *inode); +void logfs_set_blocks(struct inode *inode, u64 no); + +/* journal.c */ +int logfs_write_anchor(struct inode *inode); +int logfs_init_journal(struct super_block *sb); +void logfs_cleanup_journal(struct super_block *sb); + +/* memtree.c */ +void btree_init(struct btree_head *head); +void *btree_lookup(struct btree_head *head, u64 val); +int btree_insert(struct btree_head *head, u64 val, void *ptr); +void *btree_remove(struct btree_head *head, u64 val); +int btree_merge(struct btree_head *target, struct btree_head *victim); +void btree_visitor(struct btree_head *head, long opaque, + void (*func)(void *elem, long opaque, u64 val)); +void btree_grim_visitor(struct btree_head *head, long opaque, + void (*func)(void *elem, long opaque, u64 val)); + +/* readwrite.c */ +void logfs_unpack_index(pgoff_t index, u64 *bix, u8 *level); +void logfs_flush_dirty(struct super_block *sb, int sync); +int logfs_inode_read(struct inode *inode, void *buf, size_t n, loff_t _pos); +int logfs_inode_write(struct inode *inode, const void *buf, size_t count, + loff_t bix, long flags, struct logfs_transaction *ta, + struct shadow_tree *shadow_tree); +int logfs_readpage_nolock(struct page *page); +int logfs_write_buf(struct inode *inode, struct page *page, + struct logfs_transaction *ta, long flags); +int logfs_delete(struct inode *inode, pgoff_t index, + struct shadow_tree *shadow_tree, struct logfs_transaction *ta); +int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs, int level, + long flags); +int logfs_is_valid_block(struct super_block *sb, u64 ofs, u64 ino, u64 pos, + u8 level); +int logfs_truncate(struct inode *inode, u64 size); +u64 logfs_seek_hole(struct inode *inode, u64 bix); +u64 logfs_seek_data(struct inode *inode, u64 bix); +int logfs_init_rw(struct logfs_super *super); +void logfs_cleanup_rw(struct logfs_super *super); + +/* segment.c */ +int logfs_erase_segment(struct super_block *sb, u32 ofs); +int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf); +int logfs_segment_read(struct inode *inode, struct page *page, u64 ofs, u64 bix, + u8 level); +int logfs_segment_write(struct inode *inode, struct page *page, + struct logfs_shadow *shadow); +int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow); +void logfs_buf_write(struct logfs_area *area, u64 ofs, void *data, size_t len); + +/* area handling */ +int logfs_init_areas(struct super_block *sb); +void logfs_cleanup_areas(struct logfs_super *super); +int logfs_open_area(struct logfs_area *area); +void logfs_close_area(struct logfs_area *area); + +/* super.c */ +void logfs_crash_dump(struct super_block *sb); +void *memchr_inv(const void *s, int c, size_t n); +int logfs_statfs(struct dentry *dentry, struct kstatfs *stats); +int logfs_get_sb_device(struct file_system_type *type, int flags, + struct mtd_inode *mtd, struct block_device *bdev, + const struct logfs_device_ops *devops, struct vfsmount *mnt); + +static inline struct logfs_super *logfs_super(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct logfs_inode *logfs_inode(struct inode *inode) +{ + return container_of(inode, struct logfs_inode, vfs_inode); +} + +static inline void logfs_set_ro(struct super_block *sb) +{ + logfs_super(sb)->s_flags |= LOGFS_SB_FLAG_RO; +} + +#define LOGFS_BUG(sb) do { \ + struct super_block *__sb = sb; \ + logfs_crash_dump(__sb); \ + logfs_super(__sb)->s_flags |= LOGFS_SB_FLAG_RO; \ + BUG(); \ +} while (0) + +#define LOGFS_BUG_ON(condition, sb) \ + do { if (unlikely(condition)) LOGFS_BUG((sb)); } while (0) + +static inline __be32 logfs_crc32(void *data, size_t len, size_t skip) +{ + return cpu_to_be32(crc32(~0, data+skip, len-skip)); +} + +static inline u8 logfs_type(struct inode *inode) +{ + return (inode->i_mode >> 12) & 15; +} + +static inline pgoff_t logfs_index(struct super_block *sb, u64 pos) +{ + return pos >> sb->s_blocksize_bits; +} + +static inline u64 dev_ofs(struct super_block *sb, u32 segno, u32 ofs) +{ + return ((u64)segno << logfs_super(sb)->s_segshift) + ofs; +} + +static inline int device_read(struct super_block *sb, u32 segno, u32 ofs, + size_t len, void *buf) +{ + struct logfs_super *super = logfs_super(sb); + + return super->s_devops->read(sb, dev_ofs(sb, segno, ofs), len, buf); +} + +static inline struct logfs_block *logfs_block(struct page *page) +{ + return (void *)page->private; +} + +#endif -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html