Re: [PATCH 1/2] LogFS proper

Thomas Gleixner <tglx@xxxxxxxxxxxxx> · Tue, 08 May 2007 09:22:30 +0200

On Tue, 2007-05-08 at 00:00 +0200, Jörn Engel wrote:
> The filesystem itself.

Very descriptive log entry.

> +config LOGFS
> +	tristate "Log Filesystem (EXPERIMENTAL)"
> +	depends on EXPERIMENTAL
> +	select ZLIB_INFLATE
> +	select ZLIB_DEFLATE
> +	help
> +	  Successor of JFFS2, using explicit filesystem hierarchy.

Why is it a successor ? Does it build upon JFFS2 ?

> +	  Continuing with the long tradition of calling the filesystem
> +	  exactly what it is not, LogFS is a journaled filesystem,
> +	  while JFFS and JFFS2 were true log-structured filesystems.
> +	  The hybrid structure of journaled filesystems promise to
> +	  scale better to larger sized.
> +
> +	  If unsure, say N.

...

> @@ -0,0 +1,14 @@
> +obj-$(CONFIG_LOGFS)	+= logfs.o
> +
> +logfs-y += compr.o
> +logfs-y	+= dir.o
> +logfs-y	+= file.o
> +logfs-y	+= gc.o
> +logfs-y	+= inode.o
> +logfs-y	+= journal.o
> +logfs-y += memtree.o
> +logfs-y	+= readwrite.o
> +logfs-y	+= segment.o
> +logfs-y	+= super.o
> +logfs-y += progs/fsck.o
> +logfs-y += progs/mkfs.o

Please use either tabs or spaces. Preferrably tabs

> --- /dev/null	2007-04-18 05:32:26.652341749 +0200
> +++ linux-2.6.21logfs/fs/logfs/logfs.h	2007-05-07 13:32:12.000000000 +0200
> @@ -0,0 +1,626 @@
> +#ifndef logfs_h
> +#define logfs_h
> +
> +#define __CHECK_ENDIAN__
> +
> +
> +#include <linux/crc32.h>
> +#include <linux/fs.h>
> +#include <linux/kallsyms.h>
> +#include <linux/kernel.h>
> +#include <linux/mtd/mtd.h>
> +#include <linux/pagemap.h>
> +#include <linux/statfs.h>

Please sort includes alphabetically and seperate the 
#include <linux/mtd/mtd.h> from the #include <linux/...> ones

> +typedef __be16 be16;
> +typedef __be32 be32;
> +typedef __be64 be64;

Why are those typedefs necessary ?

> +struct btree_head {
> +	struct btree_node *node;
> +	int height;
> +	void *null_ptr;
> +};

Please document structures

> +#define packed __attribute__((__packed__))

Please use the __attribute__((__packed__)) on your structs instead of
creating some extra "needs lookup" magic.

> +
> +#define TRACE() do {						\
> +	printk("trace: %s:%d: ", __FILE__, __LINE__);		\
> +	printk("->%s\n", __func__);				\
> +} while(0)

Oh no. Not again another "I'm in function X tracer". 

> +
> +#define LOGFS_MAGIC 0xb21f205ac97e8168ull
> +#define LOGFS_MAGIC_U32 0xc97e8168ull

why is an U32 constant ull ?

> +#define LOGFS_BLOCK_SECTORS	(8)
> +#define LOGFS_BLOCK_BITS	(9)	/* 512 pointers, used for shifts */
> +#define LOGFS_BLOCKSIZE		(4096ull)
> +#define LOGFS_BLOCK_FACTOR (LOGFS_BLOCKSIZE / sizeof(u64))
> +#define LOGFS_BLOCK_MASK (LOGFS_BLOCK_FACTOR-1)

for the whole defines:

Please align them so it does not look like a jigsaw puzzle.

Please avoid tail comments as it makes it harder to parse

> +#define I0_BLOCKS	(4+16)
> +#define I1_BLOCKS LOGFS_BLOCK_FACTOR
> +#define I2_BLOCKS (LOGFS_BLOCK_FACTOR * I1_BLOCKS)
> +#define I3_BLOCKS (LOGFS_BLOCK_FACTOR * I2_BLOCKS)
> +#define I4_BLOCKS (LOGFS_BLOCK_FACTOR * I3_BLOCKS)
> +#define I5_BLOCKS (LOGFS_BLOCK_FACTOR * I4_BLOCKS)

Some explanation for that magic math might be helpful

> +#define I1_INDEX	(4+16)

same constant as IO_BLOCKS. coincidence ?

> +#define I2_INDEX	(5+16)
> +#define I3_INDEX	(6+16)
> +#define I4_INDEX	(7+16)
> +#define I5_INDEX	(8+16)

#define I2_INDEX	(I1_INDEX + 1)
....

> +struct logfs_disk_super {
> +	be64	ds_magic;
> +	be32	ds_crc;			/* crc32 of everything below */
> +	u8	ds_ifile_levels;	/* max level of ifile */
> +	u8	ds_iblock_levels;	/* max level of regular files */
> +	u8	ds_data_levels;		/* number of segments to leaf blocks */
> +	u8	pad0;
> +
> +	be64	ds_feature_incompat;
> +	be64	ds_feature_ro_compat;
> +
> +	be64	ds_feature_compat;
> +	be64	ds_flags;
> +
> +	be64	ds_filesystem_size;	/* filesystem size in bytes */
> +	u8	ds_segment_shift;	/* log2 of segment size */
> +	u8	ds_block_shift;		/* log2 if block size */
> +	u8	ds_write_shift;		/* log2 of write size */
> +	u8	pad1[5];
> +
> +	/* the segments of the primary journal.  if fewer than 4 segments are
> +	 * used, some fields are set to 0 */
> +#define LOGFS_JOURNAL_SEGS 4

Please avoid defines inside of structures

> +	be64	ds_journal_seg[LOGFS_JOURNAL_SEGS];
> +
> +	be64	ds_root_reserve;	/* bytes reserved for root */
> +
> +	be64	pad2[19];		/* align to 256 bytes */
> +}packed;

Please comment the structure with kernel doc comments and avoid the tail
comments.

> +
> +#define LOGFS_IF_VALID		0x00000001 /* inode exists */
> +#define LOGFS_IF_EMBEDDED	0x00000002 /* data embedded in block pointers */
> +#define LOGFS_IF_ZOMBIE		0x00000004 /* inode was already deleted */
> +#define LOGFS_IF_STILLBORN	0x40000000 /* couldn't write inode in creat() */
> +#define LOGFS_IF_INVALID	0x80000000 /* inode does not exist */

Are these bit values or enum type ?

> +struct logfs_disk_inode {
> +	be16	di_mode;
> +	be16	di_pad;
> +	be32	di_flags;
> +	be32	di_uid;
> +	be32	di_gid;
> +
> +	be64	di_ctime;
> +	be64	di_mtime;
> +
> +	be32	di_refcount;
> +	be32	di_generation;
> +	be64	di_used_bytes;
> +
> +	be64	di_size;
> +	be64	di_data[LOGFS_EMBEDDED_FIELDS];
> +}packed;
> +
> +
> +#define LOGFS_MAX_NAMELEN 255

Please put define on top

> +struct logfs_disk_dentry {
> +	be64	ino;		/* inode pointer */
> +	be16	namelen;
> +	u8	type;
> +	u8	name[LOGFS_MAX_NAMELEN];
> +}packed;
> +
> +
> +#define OBJ_TOP_JOURNAL	1	/* segment header for master journal */
> +#define OBJ_JOURNAL	2	/* segment header for journal */
> +#define OBJ_OSTORE	3	/* segment header for ostore */
> +#define OBJ_BLOCK	4	/* data block */
> +#define OBJ_INODE	5	/* inode */
> +#define OBJ_DENTRY	6	/* dentry */

enum please

> +struct logfs_object_header {
> +	be32	crc;		/* checksum */
> +	be16	len;		/* length of object, header not included */
> +	u8	type;		/* node type */
> +	u8	compr;		/* compression type */
> +	be64	ino;		/* inode number */
> +	be64	pos;		/* file position */
> +}packed;

For all structs:

Please use kernel doc struct comments.

> +
> +struct logfs_segment_header {
> +	be32	crc;		/* checksum */
> +	be16	len;		/* length of object, header not included */
> +	u8	type;		/* node type */
> +	u8	level;		/* GC level */
> +	be32	segno;		/* segment number */
> +	be32	ec;		/* erase count */
> +	be64	gec;		/* global erase count (write time) */
> +}packed;
> +
> +enum {
> +	COMPR_NONE	= 0,
> +	COMPR_ZLIB	= 1,
> +};

Please name the enums and use the same enum for the according fields and
the function arguments.

> +
> +/* Journal entries come in groups of 16.  First group contains individual
> + * entries, next groups contain one entry per level */
> +enum {
> +	JEG_BASE	= 0,
> +	JE_FIRST	= 1,
> +
> +	JE_COMMIT	= 1,	/* commits all previous entries */
> +	JE_ABORT	= 2,	/* aborts all previous entries */
> +	JE_DYNSB	= 3,
> +	JE_ANCHOR	= 4,
> +	JE_ERASECOUNT	= 5,
> +	JE_SPILLOUT	= 6,
> +	JE_DELTA	= 7,
> +	JE_BADSEGMENTS	= 8,
> +	JE_AREAS	= 9,	/* area description sans wbuf */
> +	JEG_WBUF	= 0x10,	/* write buffer for segments */
> +
> +	JE_LAST		= 0x1f,
> +};

same here

> +
> +////////////////////////////////////////////////////////////////////////////////
> +////////////////////////////////////////////////////////////////////////////////

Eew.

> +
> +#define LOGFS_SUPER(sb) ((struct logfs_super*)(sb->s_fs_info))
> +#define LOGFS_INODE(inode) container_of(inode, struct logfs_inode, vfs_inode)

lowercase inlines please

> +
> +			/*	0	reserved for gc markers */
> +#define LOGFS_INO_MASTER	1	/* inode file */
> +#define LOGFS_INO_ROOT		2	/* root directory */
> +#define LOGFS_INO_ATIME		4	/* atime for all inodes */
> +#define LOGFS_INO_BAD_BLOCKS	5	/* bad blocks */
> +#define LOGFS_INO_OBSOLETE	6	/* obsolete block count */
> +#define LOGFS_INO_ERASE_COUNT	7	/* erase count */
> +#define LOGFS_RESERVED_INOS	16

enum ?

> +struct logfs_super {
> +	//struct super_block *s_sb;		/* should get removed... */

Please do so

> +	be64	*s_rblock;
> +	be64	*s_wblock[LOGFS_MAX_LEVELS];

Please comment the non obvious ones instead of the self explaining

> +	u64	 s_free_bytes;			/* number of free bytes */

> +#define journal_for_each(__i) for (__i=0; __i<LOGFS_JOURNAL_SEGS; __i++)

	__i = 0; __i < LOGFS_JOURNAL_SEGS;

> +void logfs_crash_dump(struct super_block *sb);
> +#define LOGFS_BUG(sb) do {		\
> +	struct super_block *__sb = sb;	\

Why do we need a local variable here ?

> +	logfs_crash_dump(__sb);		\
> +	BUG();				\
> +} while(0)

> +static inline u8 logfs_type(struct inode *inode)
> +{
> +	return (inode->i_mode >> 12) & 15;

What's 12 and 15 ? Constants perhaps ?

> +}
> +static inline struct logfs_disk_sum *alloc_disk_sum(struct super_block *sb)
> +{
> +	return kzalloc(sb->s_blocksize, GFP_ATOMIC);
> +}

No, please do not add another alias for kzalloc

> +static inline void free_disk_sum(struct logfs_disk_sum *sum)
> +{
> +	kfree(sum);
> +}

same here

> +
> +/* compr.c */
> +#define logfs_compress_none logfs_memcpy
> +#define logfs_uncompress_none logfs_memcpy

can you please use logfs_memcpy instead ?

> +int logfs_memcpy(void *in, void *out, size_t inlen, size_t outlen);
> +int logfs_compress(void *in, void *out, size_t inlen, size_t outlen);
> +int logfs_compress_vec(struct kvec *vec, int count, void *out, size_t outlen);
> +int logfs_uncompress(void *in, void *out, size_t inlen, size_t outlen);
> +int logfs_uncompress_vec(void *in, size_t inlen, struct kvec *vec, int count);

are those global ? If yes, please add extern, else remove

> +int __init logfs_compr_init(void);
> +void __exit logfs_compr_exit(void);

dito

> +
> +/* dir.c */
> +extern struct inode_operations logfs_dir_iops;
> +extern struct file_operations logfs_dir_fops;
> +int logfs_replay_journal(struct super_block *sb);

dito

> +
> +/* file.c */
> +extern struct inode_operations logfs_reg_iops;
> +extern struct file_operations logfs_reg_fops;
> +extern struct address_space_operations logfs_reg_aops;
> +
> +int logfs_setattr(struct dentry *dentry, struct iattr *iattr);

dito

> +
> +/* gc.c */
> +void logfs_gc_pass(struct super_block *sb);
> +int logfs_init_gc(struct logfs_super *super);
> +void logfs_cleanup_gc(struct logfs_super *super);

same here ......................

> +
> +/* inode.c */
> +/* progs/mkfs.c */
> +int logfs_fsck(struct super_block *sb);

down to this place

> +
> +static inline u64 dev_ofs(struct super_block *sb, u32 segno, u32 ofs)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);

Seperate variables and code by an empty line please

> +	return ((u64)segno << super->s_segshift) + ofs;
> +}
> +
> +
> +static inline void device_read(struct super_block *sb, u32 segno, u32 ofs,
> +		size_t len, void *buf)
> +{
> +	int err = mtdread(sb, dev_ofs(sb, segno, ofs), len, buf);

Same here.

> +	LOGFS_BUG_ON(err, sb);

Please open code this instead of nesting mtdread into device_read and
therefor avoid the error handling pathes in those places where
device_read is used.

> +}
> +
> +
> +#define EOF	256

1. very intuitive name
2. why is this constant not at the top, where the other constants are
3. why 256

> +
> +typedef int (*dir_callback)(struct inode *dir, struct dentry *dentry,
> +		struct logfs_disk_dentry *dd, loff_t pos);

Why is this in the middle of something else ?

> +
> +static s64 dir_seek_data(struct inode *inode, s64 pos)
> +{
> +	s64 new_pos = logfs_seek_data(inode, pos);

new line please

> +	return max((s64)pos, new_pos - 1);

	max_t please 

> +}
> +
> +
> +static int __logfs_dir_walk(struct inode *dir, struct dentry *dentry,
> +		dir_callback handler, struct logfs_disk_dentry *dd, loff_t *pos)
> +{
> +	struct qstr *name = dentry ? &dentry->d_name : NULL;
> +	int ret;
> +
> +	for (; ; (*pos)++) {
> +		ret = read_dir(dir, dd, *pos);
> +		if (ret == -EOF)
> +			return 0;
> +		if (ret == -ENODATA) {/* deleted dentry */

Please move the comment away. It makes parsing hard

> +			*pos = dir_seek_data(dir, *pos);
> +			continue;
> +		}
> +		if (ret)
> +			return ret;
> +		BUG_ON(dd->namelen == 0);
> +
> +		if (name) {
> +			if (name->len != be16_to_cpu(dd->namelen))
> +				continue;
> +			if (memcmp(name->name, dd->name, name->len))
> +				continue;
> +		}
> +
> +		return handler(dir, dentry, dd, *pos);
> +	}
> +	return ret;

	Where do you break out of the loop ?

> +}
> +
> +
> +static int logfs_dir_walk(struct inode *dir, struct dentry *dentry,
> +		dir_callback handler)
> +{
> +	struct logfs_disk_dentry dd;
> +	loff_t pos = 0;

New line please

> +	return __logfs_dir_walk(dir, dentry, handler, &dd, &pos);
> +}
> +
> +
> +static struct dentry *logfs_lookup(struct inode *dir, struct dentry *dentry,
> +		struct nameidata *nd)
> +{
> +	struct dentry *ret;
> +
> +	ret = ERR_PTR(logfs_dir_walk(dir, dentry, logfs_lookup_handler));
> +	return ret;

	return ERR_PTR(.....);

> +}
> +
> +static int logfs_unlink(struct inode *dir, struct dentry *dentry)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(dir->i_sb);
> +	struct inode *inode = dentry->d_inode;
> +	int ret;
> +
> +	mutex_lock(&super->s_victim_mutex);
> +	super->s_victim_ino = inode->i_ino;
> +
> +	/* remove dentry */
> +	if (inode->i_mode & S_IFDIR)
> +		dir->i_nlink--;
> +	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
> +	ret = logfs_dir_walk(dir, dentry, logfs_unlink_handler);
> +	super->s_victim_ino = 0;
> +	if (ret)
> +		goto out;
> +
> +	/* remove inode */
> +	ret = logfs_remove_inode(inode);

Please remove this goto / label construct and  do

	if (likely(!ret))
		ret = logfs_remove_inode(inode);

instead

> +out:
> +	mutex_unlock(&super->s_victim_mutex);
> +	return ret;
> +}
> +
> +
> +/* FIXME: readdir currently has it's own dir_walk code.  I don't see a good
> + * way to combine the two copies */
> +#define IMPLICIT_NODES 2
> +static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir)
> +{
> +	struct logfs_disk_dentry dd;
> +	loff_t pos = file->f_pos - IMPLICIT_NODES;
> +	int err;
> +
> +	BUG_ON(pos<0);
> +	for (;; pos++) {
> +		struct inode *dir = file->f_dentry->d_inode;

new line please

> +		err = read_dir(dir, &dd, pos);
> +		if (err == -EOF)
> +			break;

	-EOF results in a return code 0 ?

> +		if (err == -ENODATA) {/* deleted dentry */
> +			pos = dir_seek_data(dir, pos);
> +			continue;
> +		}
> +		if (err)
> +			return err;
> +		BUG_ON(dd.namelen == 0);
> +
> +		if (filldir(buf, dd.name, be16_to_cpu(dd.namelen), pos,
> +					be64_to_cpu(dd.ino), dd.type))
> +			break;
> +	}
> +
> +	file->f_pos = pos + IMPLICIT_NODES;
> +	return 0;
> +}
> +
> +
> +static int logfs_readdir(struct file *file, void *buf, filldir_t filldir)
> +{
> +	struct inode *inode = file->f_dentry->d_inode;
> +	int err;
> +
> +	if (file->f_pos < 0)
> +		return -EINVAL;
> +
> +	if (file->f_pos == 0) {
> +		if (filldir(buf, ".", 1, 1, inode->i_ino, DT_DIR) < 0)
> +			return 0;
> +		file->f_pos++;
> +	}
> +	if (file->f_pos == 1) {
> +		ino_t pino = parent_ino(file->f_dentry);

empty line

> +		if (filldir(buf, "..", 2, 2, pino, DT_DIR) < 0)
> +			return 0;
> +		file->f_pos++;
> +	}
> +
> +	err = __logfs_readdir(file, buf, filldir);
> +	if (err)
> +		printk("LOGFS readdir error=%x, pos=%llx\n", err, file->f_pos);
> +	return err;
> +}

> +static int logfs_write_dir(struct inode *dir, struct dentry *dentry,
> +		struct inode *inode)
> +{
> +	struct logfs_disk_dentry dd;
> +	int err;
> +
> +	memset(&dd, 0, sizeof(dd));
> +	dd.ino = cpu_to_be64(inode->i_ino);
> +	dd.type = logfs_type(inode);
> +	logfs_set_name(&dd, &dentry->d_name);
> +
> +	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
> +	/* FIXME: the file size should actually get aligned when writing,
> +	 * not when reading. */

Please use 

	/*
	 * kernel style 
	 * multi line comments
	 */

> +	err = write_dir(dir, &dd, file_end(dir));
> +	if (err)
> +		return err;
> +	d_instantiate(dentry, inode);
> +	return 0;
> +}
> +
> +
> +static int __logfs_create(struct inode *dir, struct dentry *dentry,
> +		struct inode *inode, const char *dest, long destlen)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(dir->i_sb);
> +	struct logfs_inode *li = LOGFS_INODE(inode);
> +	int ret;
> +
> +	mutex_lock(&super->s_victim_mutex);
> +	super->s_victim_ino = inode->i_ino;
> +	if (inode->i_mode & S_IFDIR)
> +		inode->i_nlink++;
> +
> +	if (dest) /* symlink */
> +		ret = logfs_inode_write(inode, dest, destlen, 0);
> +	else /* creat/mkdir/mknod */
> +		ret = __logfs_write_inode(inode);

Please remove this confusing tail comments

> +	super->s_victim_ino = 0;
> +	if (ret) {
> +		if (!dest)
> +			li->li_flags |= LOGFS_IF_STILLBORN;
> +		/* FIXME: truncate symlink */
> +		inode->i_nlink--;
> +		iput(inode);
> +		goto out;
> +	}
> +
> +	if (inode->i_mode & S_IFDIR)
> +		dir->i_nlink++;
> +	ret = logfs_write_dir(dir, dentry, inode);
> +
> +	if (ret) {
> +		if (inode->i_mode & S_IFDIR)
> +			dir->i_nlink--;
> +		logfs_remove_inode(inode);
> +		iput(inode);
> +	}
> +out:
> +	mutex_unlock(&super->s_victim_mutex);
> +	return ret;
> +}
> +
> +
> +/* FIXME: This should really be somewhere in the 64bit area. */
> +#define LOGFS_LINK_MAX (1<<30)

Please move the define to the header file or some other useful place

> +static int logfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
> +{
> +	struct inode *inode;
> +
> +	if (dir->i_nlink >= LOGFS_LINK_MAX)
> +		return -EMLINK;
> +
> +	/* FIXME: why do we have to fill in S_IFDIR, while the mode is
> +	 * correct for mknod, creat, etc.?  Smells like the vfs *should*
> +	 * do it for us but for some reason fails to do so.
> +	 */

Comment style

> +
> +static struct inode_operations ext2_symlink_iops = {
> +	.readlink	= generic_readlink,
> +	.follow_link	= page_follow_link_light,
> +};

s/ext2/logfs/ maybe ?

> +static int logfs_nop_handler(struct inode *dir, struct dentry *dentry,
> +		struct logfs_disk_dentry *dd, loff_t pos)
> +{
> +	return 0;
> +}

New line

> +static inline int logfs_get_dd(struct inode *dir, struct dentry *dentry,
> +		struct logfs_disk_dentry *dd, loff_t *pos)
> +{
> +	*pos = 0;
> +	return __logfs_dir_walk(dir, dentry, logfs_nop_handler, dd, pos);
> +}
> +

> +static int logfs_delete_dd(struct inode *dir, struct logfs_disk_dentry *dd,
> +		loff_t pos)
> +{
> +	int err;
> +
> +	err = read_dir(dir, dd, pos);
> +	if (err == -EOF) /* don't expose internal errnos */
> +		err = -EIO;

Interesting. Why is EOF morphed to EIO ?

> +	if (err)
> +		return err;
> +
> +	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
> +	if (dd->type == DT_DIR)
> +		dir->i_nlink--;
> +	return logfs_delete(dir, pos);
> +}

> +
> +static int logfs_rename(struct inode *old_dir, struct dentry *old_dentry,
> +		struct inode *new_dir, struct dentry *new_dentry)
> +{
> +	if (new_dentry->d_inode) /* target exists */
> +		return logfs_rename_target(old_dir, old_dentry, new_dir, new_dentry);
> +	else if (old_dir == new_dir) /* local rename */
> +		return logfs_rename_local(old_dir, old_dentry, new_dentry);

Comment style

> +	return logfs_rename_cross(old_dir, old_dentry, new_dir, new_dentry);
> +}
> +
> --- /dev/null	2007-04-18 05:32:26.652341749 +0200
> +++ linux-2.6.21logfs/fs/logfs/file.c	2007-05-07 13:32:12.000000000 +0200
> @@ -0,0 +1,82 @@

Comment missing. License missing.

> +#include "logfs.h"
> +
> +
> +static int logfs_prepare_write(struct file *file, struct page *page,
> +		unsigned start, unsigned end)
> +{
> +	if (PageUptodate(page))
> +		return 0;
> +
> +	if ((start == 0) && (end == PAGE_CACHE_SIZE))
> +		return 0;

	Self explaining logic ?

> +	return logfs_readpage_nolock(page);
> +}
> +
> +
> +static int logfs_readpage(struct file *file, struct page *page)
> +{
> +	int ret = logfs_readpage_nolock(page);

empty line

> +	unlock_page(page);
> +	return ret;
> +}
> +
> +
> +static int logfs_writepage(struct page *page, struct writeback_control *wbc)
> +{
> +	BUG();

Is this a permanent solution ?

> +	return 0;
> +}

> --- /dev/null	2007-04-18 05:32:26.652341749 +0200
> +++ linux-2.6.21logfs/fs/logfs/gc.c	2007-05-07 13:32:12.000000000 +0200
> @@ -0,0 +1,350 @@

Comment and license please.

> +#include "logfs.h"
> +
> +#if 0

Can you please remove this ?

> +/**
> + * When deciding which segment to use next, calculate the resistance
> + * of each segment and pick the lowest.  Segments try to resist usage
> + * if
> + * o they are full,
> + * o they have a high erase count or
> + * o they have recently been written.
> + *
> + * Full segments should not get reused, as there is little space to
> + * gain from them.  Segments with high erase count should be left
> + * aside as they can wear out sooner than others.  Freshly-written
> + * segments contain many blocks that will get obsoleted fairly soon,
> + * so it helps to wait a little before reusing them.
> + *
> + * Total resistance is expressed in erase counts.  Formula is:
> + *
> + * R = EC + K1*F + K2*e^(-t/theta)
> + *
> + * R:	Resistance
> + * EC:	Erase count
> + * K1:	Constant, 10,000 might be a good value
> + * K2:	Constant,  1,000 might be a good value
> + * F:	Segment fill level
> + * t:	Time since segment was written to (in number of segments written)
> + * theta: Time constant.  Total number of segments might be a good value
> + *
> + * Since the kernel is not allowed to use floating point, the function
> + * decay() is used to approximate exponential decay in fixed point.
> + */

Interestingly enough this unused function is better commented than
anything else in this patch.

> +static long decay(long t0, long t, long theta)
> +{
> +	long shift, fac;
> +
> +	if (t >= 32*theta)
> +		return 0;
> +
> +	shift = t/theta;
> +	fac = theta - (t%theta)/2;
> +	return (t0 >> shift) * fac / theta;
> +}
> +#endif
> +
> +
> +static u32 logfs_valid_bytes(struct super_block *sb, u32 segno)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	struct logfs_object_header h;
> +	u64 ofs, ino, pos;
> +	u32 seg_ofs, valid, size;
> +	void *reserved;
> +	int i;
> +
> +	/* Some segments are reserved.  Just pretend they were all valid */
> +	reserved = btree_lookup(&super->s_reserved_segments, segno);
> +	if (reserved)
> +		return super->s_segsize;
> +
> +	/* Currently open segments */
> +	/* FIXME: just reserve open areas and remove this code */
> +	for (i=0; i<LOGFS_NO_AREAS; i++) {
> +		struct logfs_area *area = super->s_area[i];
> +		if (area->a_is_open && (area->a_segno == segno)) {
> +			return super->s_segsize;
> +		}
> +	}
> +
> +	device_read(sb, segno, 0, sizeof(h), &h);

	See above comment about device_read() implementation.

> +	if (all_ff(&h, sizeof(h)))
> +		return 0;
> +
> +	valid = 0; /* segment header not counted as valid bytes */
> +	for (seg_ofs = sizeof(h); seg_ofs + sizeof(h) < super->s_segsize; ) {
> +		device_read(sb, segno, seg_ofs, sizeof(h), &h);
> +		if (all_ff(&h, sizeof(h)))
> +			break;
> +
> +		ofs = dev_ofs(sb, segno, seg_ofs);
> +		ino = be64_to_cpu(h.ino);
> +		pos = be64_to_cpu(h.pos);
> +		size = (u32)be16_to_cpu(h.len) + sizeof(h);
> +		//printk("%x %x (%llx, %llx, %llx)(%x, %x)\n", h.type, h.compr, ofs, ino, pos, valid, size);

	Please remove

> +		if (logfs_is_valid_block(sb, ofs, ino, pos))
> +			valid += size;
> +		seg_ofs += size;
> +	}
> +	printk("valid(%x) = %x\n", segno, valid);
> +	return valid;
> +}
> +
> +static void __logfs_gc_segment(struct super_block *sb, u32 segno)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	struct logfs_object_header h;
> +	struct logfs_segment_header *sh;
> +	u64 ofs, ino, pos;
> +	u32 seg_ofs;
> +	int level;
> +
> +	device_read(sb, segno, 0, sizeof(h), &h);

	See above comment about device_read() implementation.

> +	sh = (void*)&h;

Please use proper type casting !

> +	level = sh->level;
> +
> +	for (seg_ofs = sizeof(h); seg_ofs + sizeof(h) < super->s_segsize; ) {
> +		ofs = dev_ofs(sb, segno, seg_ofs);
> +		device_read(sb, segno, seg_ofs, sizeof(h), &h);

	See above comment about device_read() implementation.

> +		ino = be64_to_cpu(h.ino);
> +		pos = be64_to_cpu(h.pos);
> +		if (logfs_is_valid_block(sb, ofs, ino, pos))
> +			logfs_cleanse_block(sb, ofs, ino, pos, level);
> +		seg_ofs += sizeof(h);
> +		seg_ofs += be16_to_cpu(h.len);
> +	}
> +}
> +

> +static void __add_segment(struct list_head *list, int *count, u32 segno,
> +		int valid)
> +{
> +	struct logfs_segment *seg = kzalloc(sizeof(*seg), GFP_KERNEL);

empty line

> +	if (!seg)
> +		return;
> +
> +	seg->segno = segno;
> +	seg->valid = valid;
> +	list_add(&seg->list, list);
> +	*count += 1;
> +}

Also __add_segment() can fail. Why is there no return code ?

> +
> +
> +static void add_segment(struct list_head *list, int *count, u32 segno,
> +		int valid)
> +{
> +	struct logfs_segment *seg;
> +	list_for_each_entry(seg, list, list)
> +		if (seg->segno == segno)
> +			return;
> +	__add_segment(list, count, segno, valid);

	Can fail. Error handling ?

> +}
> +
> +
> +static void del_segment(struct list_head *list, int *count, u32 segno)
> +{
> +	struct logfs_segment *seg;

Empty line

> +	list_for_each_entry(seg, list, list)
> +		if (seg->segno == segno) {
> +			list_del(&seg->list);
> +			*count -= 1;
> +			kfree(seg);
> +			return;
> +		}
> +}
> +
> +
> +static void add_free_segment(struct super_block *sb, u32 segno)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	add_segment(&super->s_free_list, &super->s_free_count, segno, 0);
> +}

Empty line

> +static void add_low_segment(struct super_block *sb, u32 segno, int valid)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);

Empty line

> +	add_segment(&super->s_low_list, &super->s_low_count, segno, valid);

	Can fail

> +}
> +static void del_low_segment(struct super_block *sb, u32 segno)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);

Empty line

> +	del_segment(&super->s_low_list, &super->s_low_count, segno);
> +}
> +
> +
> +static void scan_segment(struct super_block *sb, u32 segno)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	u32 full = super->s_segsize - sb->s_blocksize - 0x18; /* one header */

Please use a understandable constant instead of 0x18

> +	int valid;
> +
> +	valid = logfs_valid_bytes(sb, segno);
> +	if (valid == 0) {
> +		del_low_segment(sb, segno);
> +		add_free_segment(sb, segno);
> +	} else if (valid < full)
> +		add_low_segment(sb, segno, valid);

		Can fail
> +}
> +
> +
> +static void free_all_segments(struct logfs_super *super)
> +{
> +	struct logfs_segment *seg, *next;
> +
> +	list_for_each_entry_safe(seg, next, &super->s_free_list, list) {
> +		list_del(&seg->list);
> +		kfree(seg);
> +	}
> +	list_for_each_entry_safe(seg, next, &super->s_low_list, list) {
> +		list_del(&seg->list);
> +		kfree(seg);
> +	}
> +}
> +
> +
> +static void logfs_scan_pass(struct super_block *sb)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	int i;
> +
> +	for (i = super->s_sweeper+1; i != super->s_sweeper; i++) {

	for (i = super->s_sweeper + 1; i != super->s_sweeper; i++) {

> +		if (i >= super->s_no_segs)
> +			i=1;	/* skip superblock */

			i = 1;
	and remove tail comment

> +
> +		scan_segment(sb, i);
> +
> +		if (super->s_free_count >= super->s_total_levels) {
> +			super->s_sweeper = i;
> +			return;
> +		}
> +	}
> +	scan_segment(sb, super->s_sweeper);
> +}
> +

> +/* GC all the low-count segments.  If necessary, rescan the medium.
> + * If we made enough room, return */
> +static void logfs_gc_several(struct super_block *sb)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	int rounds;
> +
> +	rounds = super->s_low_count;
> +
> +	for (; rounds; rounds--) {
> +		if (super->s_free_count >= super->s_total_levels)
> +			return;
> +		if (super->s_free_count < 3) {
> +			logfs_scan_pass(sb);
> +			printk("s");

	Debug leftover ?

> +		}
> +		logfs_gc_once(sb);
> +#if 1
> +		if (super->s_free_count >= super->s_total_levels)
> +			return;
> +		printk(".");
> +#endif

	Dito ?

> +	}
> +}
> +
> +
> +void logfs_gc_pass(struct super_block *sb)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	int i;
> +
> +	for (i=4; i; i--) {

	(i = 4; ...

	Please use a constant instead of 4

> +		if (super->s_free_count >= super->s_total_levels)
> +			return;
> +		logfs_scan_pass(sb);
> +
> +		if (super->s_free_count >= super->s_total_levels)
> +			return;
> +		printk("free:%8d, low:%8d, sweeper:%8lld\n",
> +				super->s_free_count, super->s_low_count,
> +				super->s_sweeper);

Debug leftover ? Otherwise please add loglevel and some hint from which
code this originates

> +		logfs_gc_several(sb);
> +		printk("free:%8d, low:%8d, sweeper:%8lld\n",
> +				super->s_free_count, super->s_low_count,
> +				super->s_sweeper);

Same here

> +	}
> +	logfs_fsck(sb);
> +	LOGFS_BUG(sb);
> +}
> +
> +
> +
> +void logfs_cleanup_gc(struct logfs_super *super)
> +{
> +	free_all_segments(super);
> +}

Can we add another wrapper to this please ?

> --- /dev/null	2007-04-18 05:32:26.652341749 +0200
> +++ linux-2.6.21logfs/fs/logfs/inode.c	2007-05-07 13:32:12.000000000 +0200
> @@ -0,0 +1,468 @@

Comment + license missing

> +#include "logfs.h"
> +#include <linux/backing-dev.h>
> +#include <linux/writeback.h> /* for inode_lock */

Please remove the stupid comment

> +
> +static struct kmem_cache *logfs_inode_cache;
> +
> +
> +static int __logfs_read_inode(struct inode *inode);
> +
> +
> +struct inode *logfs_iget(struct super_block *sb, ino_t ino, int *cookie)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	struct logfs_inode *li;
> +
> +	if (ino == LOGFS_INO_MASTER) /* never iget this "inode"! */

	comment style

> +		return super->s_master_inode;
> +
> +	spin_lock(&inode_lock);
> +	list_for_each_entry(li, &super->s_freeing_list, li_freeing_list)
> +		if (li->vfs_inode.i_ino == ino) {
> +			spin_unlock(&inode_lock);
> +			*cookie = 1;
> +			return &li->vfs_inode;
> +		}
> +	spin_unlock(&inode_lock);
> +
> +	*cookie = 0;
> +	return __logfs_iget(sb, ino);
> +}
> +
> +
> +void logfs_iput(struct inode *inode, int cookie)
> +{
> +	if (inode->i_ino == LOGFS_INO_MASTER) /* never iput it either! */

	comment style

> +		return;
> +
> +	if (cookie)
> +		return;
> +
> +	iput(inode);
> +}
> +
> +
> +static void logfs_init_inode(struct inode *inode)
> +{
> +	struct logfs_inode *li = LOGFS_INODE(inode);
> +	int i;
> +
> +	li->li_flags	= LOGFS_IF_VALID;
> +	li->li_used_bytes = 0;
> +	inode->i_uid	= 0;
> +	inode->i_gid	= 0;
> +	inode->i_size	= 0;
> +	inode->i_blocks	= 0;
> +	inode->i_ctime	= CURRENT_TIME;
> +	inode->i_mtime	= CURRENT_TIME;
> +	inode->i_nlink	= 1;
> +	INIT_LIST_HEAD(&li->li_freeing_list);
> +
> +	for (i=0; i<LOGFS_EMBEDDED_FIELDS; i++)

	i = 0; .....

> +		li->li_data[i] = 0;
> +
> +	return;
> +}
> +
> +
> +struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino)
> +{
> +	struct inode *inode;
> +
> +	inode = logfs_alloc_inode(sb);
> +	if (!inode)
> +		return ERR_PTR(-ENOMEM);
> +
> +	logfs_init_inode(inode);
> +	inode->i_mode = 0;
> +	inode->i_ino = ino;
> +	inode->i_sb = sb;
> +
> +	/* This is a blatant copy of alloc_inode code.  We'd need alloc_inode
> +	 * to be nonstatic, alas. */
> +	{
> +		static const struct address_space_operations empty_aops;
> +		struct address_space * const mapping = &inode->i_data;

Please remove the brackets and move the variables to the top of the
fucntion

> +		mapping->a_ops = &empty_aops;
> +		mapping->host = inode;
> +		mapping->flags = 0;
> +		mapping_set_gfp_mask(mapping, GFP_HIGHUSER);
> +		mapping->assoc_mapping = NULL;
> +		mapping->backing_dev_info = &default_backing_dev_info;
> +		inode->i_mapping = mapping;
> +	}
> +
> +	return inode;
> +}
> +
> +
> +static struct timespec be64_to_timespec(be64 betime)
> +{
> +	u64 time = be64_to_cpu(betime);
> +	struct timespec tsp;

Empty line

> +	tsp.tv_sec = time >> 32;
> +	tsp.tv_nsec = time & 0xffffffff;
> +	return tsp;
> +}
> +
> +
> +static be64 timespec_to_be64(struct timespec tsp)
> +{
> +	u64 time = ((u64)tsp.tv_sec << 32) + (tsp.tv_nsec & 0xffffffff);

tsp.tv_nsec & 0xffffffff ????

timespecs need to be normalized, so tv_nsec can never be greater than
999999999 == 0x3B9AC9FF

> +	return cpu_to_be64(time);
> +}
> +
> +
> +static void logfs_disk_to_inode(struct logfs_disk_inode *di, struct inode*inode)
> +{
> +	struct logfs_inode *li = LOGFS_INODE(inode);
> +	int i;
> +
> +	inode->i_mode	= be16_to_cpu(di->di_mode);
> +	li->li_flags	= be32_to_cpu(di->di_flags);
> +	inode->i_uid	= be32_to_cpu(di->di_uid);
> +	inode->i_gid	= be32_to_cpu(di->di_gid);
> +	inode->i_size	= be64_to_cpu(di->di_size);
> +	logfs_set_blocks(inode, be64_to_cpu(di->di_used_bytes));
> +	inode->i_ctime	= be64_to_timespec(di->di_ctime);
> +	inode->i_mtime	= be64_to_timespec(di->di_mtime);
> +	inode->i_nlink	= be32_to_cpu(di->di_refcount);
> +	inode->i_generation = be32_to_cpu(di->di_generation);
> +
> +	switch (inode->i_mode & S_IFMT) {
> +	case S_IFCHR: /* fall through */

Sigh. Could you please add useful comments ?

> +	case S_IFBLK: /* fall through */
> +	case S_IFIFO:
> +		inode->i_rdev = be64_to_cpu(di->di_data[0]);
> +		break;
> +	default:
> +		for (i=0; i<LOGFS_EMBEDDED_FIELDS; i++)

		i = 0; i < L.....

> +			li->li_data[i] = be64_to_cpu(di->di_data[i]);
> +		break;
> +	}
> +}
> +
> +
> +static void logfs_inode_to_disk(struct inode *inode, struct logfs_disk_inode*di)
> +{
> +	struct logfs_inode *li = LOGFS_INODE(inode);
> +	int i;
> +
> +	di->di_mode	= cpu_to_be16(inode->i_mode);
> +	di->di_pad	= 0;
> +	di->di_flags	= cpu_to_be32(li->li_flags);
> +	di->di_uid	= cpu_to_be32(inode->i_uid);
> +	di->di_gid	= cpu_to_be32(inode->i_gid);
> +	di->di_size	= cpu_to_be64(i_size_read(inode));
> +	di->di_used_bytes = cpu_to_be64(li->li_used_bytes);
> +	di->di_ctime	= timespec_to_be64(inode->i_ctime);
> +	di->di_mtime	= timespec_to_be64(inode->i_mtime);
> +	di->di_refcount	= cpu_to_be32(inode->i_nlink);
> +	di->di_generation = cpu_to_be32(inode->i_generation);
> +
> +	switch (inode->i_mode & S_IFMT) {
> +	case S_IFCHR: /* fall through */

See above

> +	case S_IFBLK: /* fall through */
> +	case S_IFIFO:
> +		di->di_data[0] = cpu_to_be64(inode->i_rdev);
> +		break;
> +	default:
> +		for (i=0; i<LOGFS_EMBEDDED_FIELDS; i++)

See above

> +			di->di_data[i] = cpu_to_be64(li->li_data[i]);
> +		break;
> +	}
> +}
> +
> +
> +static int logfs_read_disk_inode(struct logfs_disk_inode *di,
> +		struct inode *inode)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
> +	ino_t ino = inode->i_ino;
> +	int ret;
> +
> +	BUG_ON(!super->s_master_inode);
> +	ret = logfs_inode_read(super->s_master_inode, di, sizeof(*di), ino);
> +	if (ret)
> +		return ret;
> +
> +	if ( !(be32_to_cpu(di->di_flags) & LOGFS_IF_VALID))
> +		return -EIO;
> +
> +	if (be32_to_cpu(di->di_flags) & LOGFS_IF_INVALID)
> +		return -EIO;
> +
> +	return 0;
> +}
> +
> +
> +static int __logfs_read_inode(struct inode *inode)
> +{
> +	struct logfs_inode *li = LOGFS_INODE(inode);
> +	struct logfs_disk_inode di;
> +	int ret;
> +
> +	ret = logfs_read_disk_inode(&di, inode);
> +	/* FIXME: move back to mkfs when format has settled */
> +	if (ret == -ENODATA && inode->i_ino == LOGFS_INO_ROOT) {
> +		memset(&di, 0, sizeof(di));
> +		di.di_flags	= cpu_to_be32(LOGFS_IF_VALID);
> +		di.di_mode	= cpu_to_be16(S_IFDIR | 0755);
> +		di.di_refcount	= cpu_to_be32(2);
> +		ret = 0;
> +	}
> +	if (ret)
> +		return ret;
> +	logfs_disk_to_inode(&di, inode);
> +
> +	if ( !(li->li_flags&LOGFS_IF_VALID) || (li->li_flags&LOGFS_IF_INVALID))
> +		return -EIO;

	Is this really an IO error ?

> +	switch (inode->i_mode & S_IFMT) {
> +	case S_IFDIR:
> +		inode->i_op = &logfs_dir_iops;
> +		inode->i_fop = &logfs_dir_fops;
> +		break;
> +	case S_IFREG:
> +		inode->i_op = &logfs_reg_iops;
> +		inode->i_fop = &logfs_reg_fops;
> +		inode->i_mapping->a_ops = &logfs_reg_aops;
> +		break;
> +	default:
> +		;
> +	}
> +
> +	return 0;
> +}
> +

> +int __logfs_write_inode(struct inode *inode)
> +{
> +	struct logfs_disk_inode old, new; /* FIXME: move these off the stack */
> +
> +	BUG_ON(inode->i_ino == LOGFS_INO_MASTER);
> +
> +	/* read and compare the inode first.  If it hasn't changed, don't
> +	 * bother writing it. */

Comment style

> +	logfs_inode_to_disk(inode, &new);
> +	if (logfs_read_disk_inode(&old, inode))
> +		return logfs_write_disk_inode(&new, inode);
> +	if (memcmp(&old, &new, sizeof(old)))
> +		return logfs_write_disk_inode(&new, inode);
> +	return 0;
> +}
> +
> +
> +
> +/**

Do not use kernel doc comment start sequence for non kernel doc comments
please 

> + * We need to remember which inodes are currently being dropped.  They
> + * would deadlock the cleaner, if it were to iget() them.  So
> + * logfs_drop_inode() adds them to super->s_freeing_list,
> + * logfs_destroy_inode() removes them again and logfs_iget() checks the
> + * list.
> + */
> +static void logfs_destroy_inode(struct inode *inode)
> +

> +static u64 logfs_get_ino(struct super_block *sb)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	u64 ino;
> +
> +	/* FIXME: ino allocation should work in two modes:
> +	 * o nonsparse - ifile is mostly occupied, just append
> +	 * o sparse - ifile has lots of holes, fill them up
> +	 */

Comment style

> +	spin_lock(&super->s_ino_lock);
> +	ino = super->s_last_ino; /* ifile shouldn't be too sparse */
> +	super->s_last_ino++;
> +	spin_unlock(&super->s_ino_lock);
> +	return ino;
> +}
> +

> --- /dev/null	2007-04-18 05:32:26.652341749 +0200
> +++ linux-2.6.21logfs/fs/logfs/journal.c	2007-05-07 13:32:12.000000000 +0200
> @@ -0,0 +1,696 @@

Comment and license missing

> +#include "logfs.h"
> +
> +
> +static void clear_retired(struct super_block *sb)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	int i;
> +
> +	for (i=0; i<JE_LAST; i++)

	i = 0; ....

> +		super->s_retired[i].used = 0;
> +	super->s_first.used = 0;
> +}
> +
> +
> +static void clear_speculatives(struct super_block *sb)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	int i;
> +
> +	for (i=0; i<JE_LAST; i++)

	dito

> +		super->s_speculative[i].used = 0;
> +}
> +
> +
> +static void retire_speculatives(struct super_block *sb)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	int i;
> +
> +	for (i=0; i<JE_LAST; i++) {
> +		struct logfs_journal_entry *spec = super->s_speculative + i;
> +		struct logfs_journal_entry *retired = super->s_retired + i;

empty line

> +		if (! spec->used)
> +			continue;
> +		if (retired->used && (spec->version <= retired->version))
> +			continue;
> +		retired->used = 1;
> +		retired->version = spec->version;
> +		retired->offset = spec->offset;
> +		retired->len = spec->len;
> +	}
> +	clear_speculatives(sb);
> +}
> +
> +
> +static void __logfs_scan_journal(struct super_block *sb, void *block,
> +		u32 segno, u64 block_ofs, int block_index)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	struct logfs_journal_header *h;
> +	struct logfs_area *area = super->s_journal_area;
> +
> +	for (h = block; (void*)h - block < sb->s_blocksize; h++) {
> +		struct logfs_journal_entry *spec, *retired;
> +		unsigned long ofs = (void*)h - block;
> +		unsigned long remainder = sb->s_blocksize - ofs;
> +		u16 len = be16_to_cpu(h->h_len);
> +		u16 type = be16_to_cpu(h->h_type);
> +		s16 version = be16_to_cpu(h->h_version);
> +
> +		if ((len < 16) || (len > remainder))
> +			continue;
> +		if ((type < JE_FIRST) || (type > JE_LAST))
> +			continue;
> +		if (h->h_crc != logfs_crc32(h, len, 4))
> +			continue;
> +
> +		if (!super->s_first.used) { /* remember first version */

	Comment style

> +			super->s_first.used = 1;
> +			super->s_first.version = version;
> +		}
> +		version -= super->s_first.version;
> +
> +		if (abs(version) > 1<<14) /* all versions should be near */
> +			LOGFS_BUG(sb);
> +
> +		spec = &super->s_speculative[type];
> +		retired = &super->s_retired[type];
> +		switch (type) {
> +		default: /* store speculative entry */

	Comment style

> +			if (spec->used && (version <= spec->version))
> +				break;
> +			spec->used = 1;
> +			spec->version = version;
> +			spec->offset = block_ofs + ofs;
> +			spec->len = len;
> +			break;
> +		case JE_COMMIT: /* retire speculative entries */

	Comment style

> +			if (retired->used && (version <= retired->version))
> +				break;
> +			retired->used = 1;
> +			retired->version = version;
> +			retired->offset = block_ofs + ofs;
> +			retired->len = len;
> +			retire_speculatives(sb);
> +			/* and set up journal area */
> +			area->a_segno = segno;
> +			area->a_used_objects = block_index;
> +			area->a_is_open = 0; /* never reuse same segment after
> +						mount - wasteful but safe */

	Comment style

> +			break;
> +		}
> +	}
> +}
> +
> +
> +static int logfs_scan_journal(struct super_block *sb)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	void *block = super->s_compressed_je;
> +	u64 ofs;
> +	u32 segno;
> +	int i, k, err;
> +
> +	clear_speculatives(sb);
> +	clear_retired(sb);
> +	journal_for_each(i) {
> +		segno = super->s_journal_seg[i];
> +		if (!segno)
> +			continue;
> +		for (k=0; k<super->s_no_blocks; k++) {

	k = 0;..........

> +			ofs = logfs_block_ofs(sb, segno, k);
> +			err = mtdread(sb, ofs, sb->s_blocksize, block);
> +			if (err)
> +				return err;
> +			__logfs_scan_journal(sb, block, segno, ofs, k);
> +		}
> +	}
> +	return 0;
> +}
> +

> +static void logfs_calc_free(struct super_block *sb)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	u64 no_segs = super->s_no_segs;
> +	u64 no_blocks = super->s_no_blocks;
> +	u64 blocksize = sb->s_blocksize;
> +	u64 free;
> +	int i, reserved_segs;
> +
> +	reserved_segs = 1; /* super_block */
> +	reserved_segs += super->s_bad_segments;
> +	journal_for_each(i)
> +		if (super->s_journal_seg[i])
> +			reserved_segs++;
> +
> +	free = no_segs * no_blocks * blocksize;		/* total size */
> +	free -= reserved_segs * no_blocks * blocksize;	/* sb & journal */
> +	free -= (no_segs - reserved_segs) * blocksize;	/* block summary */
> +	free -= super->s_used_bytes;			/* stored data */
> +	super->s_free_bytes = free;

	comments all over the function

> +}
> +

> +static void reserve_sb_and_journal(struct super_block *sb)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	struct btree_head *head = &super->s_reserved_segments;
> +	int i, err;
> +
> +	err = btree_insert(head, 0, (void*)1);

What stands 1 for ?

> +	BUG_ON(err);
> +
> +	journal_for_each(i) {
> +		if (! super->s_journal_seg[i])
> +			continue;
> +		err = btree_insert(head, super->s_journal_seg[i], (void*)1);
> +		BUG_ON(err);
> +	}
> +}
> +

> +static void logfs_read_anchor(struct super_block *sb, struct logfs_anchor *da)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	struct inode *inode = super->s_master_inode;
> +	struct logfs_inode *li = LOGFS_INODE(inode);
> +	int i;
> +
> +	super->s_last_ino = be64_to_cpu(da->da_last_ino);
> +	li->li_flags	= LOGFS_IF_VALID;
> +	i_size_write(inode, be64_to_cpu(da->da_size));
> +	li->li_used_bytes = be64_to_cpu(da->da_used_bytes);
> +
> +	for (i=0; i<LOGFS_EMBEDDED_FIELDS; i++)

	i = 0; ...

> +		li->li_data[i] = be64_to_cpu(da->da_data[i]);
> +}
> +

> +static void logfs_read_areas(struct super_block *sb, struct logfs_je_areas *a)
> +{
> +	struct logfs_area *area;
> +	int i;
> +
> +	for (i=0; i<LOGFS_NO_AREAS; i++) {

	Sigh

> +		area = LOGFS_SUPER(sb)->s_area[i];
> +		area->a_used_bytes = be32_to_cpu(a->used_bytes[i]);
> +		area->a_segno = be32_to_cpu(a->segno[i]);
> +		if (area->a_segno)
> +			area->a_is_open = 1;
> +	}
> +}
> +

> +/* FIXME: make sure there are enough per-area objects in journal */
> +static int logfs_read_journal(struct super_block *sb)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	void *block = super->s_compressed_je;
> +	void *scratch = super->s_je;
> +	int i, err, level;
> +	struct logfs_area *area;
> +
> +	for (i=0; i<JE_LAST; i++) {

i..

> +		struct logfs_journal_entry *je = super->s_retired + i;
> +		if (!super->s_retired[i].used)

		if (!super->s_retired[i].used) {

> +			switch (i) {
> +			case JE_COMMIT:
> +			case JE_DYNSB:
> +			case JE_ANCHOR:
> +				printk("LogFS: Missing journal entry %x?\n",
> +						i);
> +				return -EIO;
> +			default:
> +				continue;
> +			}

		}

> +		err = mtdread(sb, je->offset, sb->s_blocksize, block);
> +		if (err)
> +			return err;

> +		level = i & 0xf;

what is 0xf ?

> +		area = super->s_area[level];
> +		switch (i & ~0xf) {
> +		case JEG_BASE:
> +			switch (i) {

Represents I an enum or a bitfield or both ?

> +			case JE_COMMIT:
> +				/* just reads the latest version number */
> +				logfs_read_commit(super, block);
> +				break;
> +			case JE_DYNSB:
> +				logfs_read_dynsb(sb, unpack(block, scratch));
> +				break;
> +
> +static void journal_get_free_segment(struct logfs_area *area)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(area->a_sb);
> +	int i;
> +
> +	journal_for_each(i) {
> +		if (area->a_segno != super->s_journal_seg[i])
> +			continue;
> +empty_seg:
> +		i++;
> +		if (i == LOGFS_JOURNAL_SEGS)
> +			i = 0;
> +		if (!super->s_journal_seg[i])
> +			goto empty_seg;

Does this loop for ever or is there a guranteed exit ?
Please use a do while loop instead of the goto

> +		area->a_segno = super->s_journal_seg[i];
> +		++(super->s_journal_ec[i]);
> +		return;
> +	}
> +	BUG();
> +}
> +
> +
> +/**
> + * logfs_get_free_entry - return free space for journal entry
> + */
> +static s64 logfs_get_free_entry(struct super_block *sb)
> +{
> +	s64 ret;
> +
> +	mutex_lock(&LOGFS_SUPER(sb)->s_log_mutex);
> +	ret = __logfs_get_free_entry(sb);
> +	mutex_unlock(&LOGFS_SUPER(sb)->s_log_mutex);
> +	BUG_ON(ret <= 0); /* not sure, but it's safer to BUG than to accept */

It might be safer to do proper error handling.

> +	return ret;
> +}
> +

> +static size_t logfs_write_header(struct logfs_super *super,
> +		struct logfs_journal_header *h, size_t datalen, u16 type)
> +{
> +	size_t len = datalen + sizeof(*h);

Empty line

> +	return __logfs_write_header(super, h, len, datalen, type, COMPR_NONE);
> +}
> +
> +
> +static void *logfs_write_bb(struct super_block *sb, void *h,
> +		u16 *type, size_t *len)
> +{
> +	*type = JE_BADSEGMENTS;
> +	*len = sb->s_blocksize;
> +	return LOGFS_SUPER(sb)->s_bb_array;
> +}
> +
> +
> +static inline size_t logfs_journal_erasecount_size(struct logfs_super *super)
> +{
> +	return LOGFS_JOURNAL_SEGS * sizeof(be32);
> +}

E,pty line

> +static void *logfs_write_erasecount(struct super_block *sb, void *_ec,
> +		u16 *type, size_t *len)
> +{

> +
> +static void *__logfs_write_anchor(struct super_block *sb, void *_da,
> +		u16 *type, size_t *len)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	struct logfs_anchor *da = _da;
> +	struct inode *inode = super->s_master_inode;
> +	struct logfs_inode *li = LOGFS_INODE(inode);
> +	int i;
> +
> +	da->da_last_ino = cpu_to_be64(super->s_last_ino);
> +	da->da_size	= cpu_to_be64(i_size_read(inode));
> +	da->da_used_bytes = cpu_to_be64(li->li_used_bytes);
> +	for (i=0; i<LOGFS_EMBEDDED_FIELDS; i++)

	i = 0; ....

> +		da->da_data[i] = cpu_to_be64(li->li_data[i]);
> +	*type = JE_ANCHOR;
> +	*len = sizeof(*da);
> +	return da;
> +}
> +

> +
> +static void *logfs_write_areas(struct super_block *sb, void *_a,
> +		u16 *type, size_t *len)
> +{
> +	struct logfs_area *area;
> +	struct logfs_je_areas *a = _a;
> +	int i;
> +
> +	for (i=0; i<16; i++) { /* FIXME: have all 16 areas */
> +		a->used_bytes[i] = 0;
> +		a->segno[i] = 0;
> +	}

	memset perhaps ?

> +	for (i=0; i<LOGFS_NO_AREAS; i++) {

	i = 0; ...

> +		area = LOGFS_SUPER(sb)->s_area[i];
> +		a->used_bytes[i] = cpu_to_be32(area->a_used_bytes);
> +		a->segno[i] = cpu_to_be32(area->a_segno);
> +	}
> +	*type = JE_AREAS;
> +	*len = sizeof(*a);
> +	return a;
> +}
> +

> +int logfs_write_anchor(struct inode *inode)
> +{
> +	struct super_block *sb = inode->i_sb;
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	void *block = super->s_compressed_je;
> +	u64 ofs;
> +	size_t jpos;
> +	int i, ret;
> +
> +	ofs = logfs_get_free_entry(sb);
> +	BUG_ON(ofs >= super->s_size);
> +
> +	memset(block, 0, sb->s_blocksize);
> +	jpos = 0;
> +	for (i=0; i<LOGFS_NO_AREAS; i++) {

	i = 0; ...
> +		super->s_sum_index = i;
> +		jpos += logfs_write_je(sb, jpos, logfs_write_wbuf);
> +	}
> +	jpos += logfs_write_je(sb, jpos, logfs_write_bb);
> +	jpos += logfs_write_je(sb, jpos, logfs_write_erasecount);
> +	jpos += logfs_write_je(sb, jpos, __logfs_write_anchor);
> +	jpos += logfs_write_je(sb, jpos, logfs_write_dynsb);
> +	jpos += logfs_write_je(sb, jpos, logfs_write_areas);
> +	jpos += logfs_write_je(sb, jpos, logfs_write_commit);
> +
> +	BUG_ON(jpos > sb->s_blocksize);
> +
> +	ret = mtdwrite(sb, ofs, sb->s_blocksize, block);
> +	if (ret)
> +		return ret;
> +	return 0;

	Interesting way to reyl on compiler smartness

> +}
> +

> +int logfs_init_journal(struct super_block *sb)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	int ret;
> +
> +	mutex_init(&super->s_log_mutex);
> +
> +	super->s_je = kzalloc(sb->s_blocksize, GFP_KERNEL);
> +	if (!super->s_je)
> +		goto err0;
> +
> +	super->s_compressed_je = kzalloc(sb->s_blocksize, GFP_KERNEL);
> +	if (!super->s_compressed_je)
> +		goto err1;
> +
> +	super->s_bb_array = kzalloc(sb->s_blocksize, GFP_KERNEL);
> +	if (!super->s_bb_array)
> +		goto err2;
> +
> +	super->s_master_inode = logfs_new_meta_inode(sb, LOGFS_INO_MASTER);
> +	if (!super->s_master_inode)
> +		goto err3;
> +
> +	super->s_master_inode->i_nlink = 1; /* lock it in ram */
> +
> +	/* logfs_scan_journal() is looking for the latest journal entries, but
> +	 * doesn't copy them into data structures yet.  logfs_read_journal()
> +	 * then re-reads those entries and copies their contents over. */
> +	ret = logfs_scan_journal(sb);
> +	if (ret)
> +		return ret;

	what about the allocated buffers ?

> +	ret = logfs_read_journal(sb);
> +	if (ret)
> +		return ret;

	dito

> +	reserve_sb_and_journal(sb);
> +	logfs_calc_free(sb);
> +
> +	super->s_journal_area->a_ops = &journal_area_ops;
> +	return 0;
> +err3:
> +	kfree(super->s_bb_array);
> +err2:
> +	kfree(super->s_compressed_je);
> +err1:
> +	kfree(super->s_je);
> +err0:
> +	return -ENOMEM;
> +}
> +
> +
> --- /dev/null	2007-04-18 05:32:26.652341749 +0200
> +++ linux-2.6.21logfs/fs/logfs/readwrite.c	2007-05-07 20:37:05.000000000 +0200
> @@ -0,0 +1,1125 @@
> +/**
> + * fs/logfs/readwrite.c
> + *
> + * Actually contains five sets of very similar functions:
> + * read		read blocks from a file
> + * write	write blocks to a file
> + * valid	check whether a block still belongs to a file
> + * truncate	truncate a file
> + * rewrite	move existing blocks of a file to a new location (gc helper)

License ?

> + */
> +#include "logfs.h"
> +
> +
> +static int logfs_read_empty(void *buf, int read_zero)
> +{
> +	if (!read_zero)
> +		return -ENODATA;
> +
> +	memset(buf, 0, PAGE_CACHE_SIZE);

	Is buf guaranteed to be at least sizeof(PAGE_CACHE_SIZE) ?

> +	return 0;
> +}

> +static int logfs_read_direct(struct inode *inode, pgoff_t index, void *buf,
> +		int read_zero)
> +{
> +	struct logfs_inode *li = LOGFS_INODE(inode);
> +	u64 block;
> +
> +	block = li->li_data[index];
> +	if (!block)
> +		return logfs_read_empty(buf, read_zero);
> +
> +	//printk("ino=%lx, index=%lx, blocks=%llx\n", inode->i_ino, index, block);

Please remove

> +	return logfs_segment_read(inode->i_sb, buf, block);
> +}
> +
> +
> +
> +static unsigned long get_bits(u64 val, int skip, int no)
> +{
> +	u64 ret = val;
> +
> +	ret >>= skip * no;
> +	ret <<= 64 - no;
> +	ret >>= 64 - no;
> +	BUG_ON((unsigned long)ret != ret);

	????

> +	return ret;
> +}
> +
> +
> +
> +static u64 seek_data_loop(struct inode *inode, u64 pos, int count)
> +{
> +	struct logfs_inode *li = LOGFS_INODE(inode);
> +	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
> +	be64 *rblock;
> +	u64 bofs = li->li_data[I1_INDEX + count];
> +	int bits = LOGFS_BLOCK_BITS;
> +	int i, ret, slot;
> +
> +	BUG_ON(!bofs);
> +
> +	rblock = logfs_get_rblock(super);
> +
> +	for (i=count; i>=0; i--) {
> +		ret = logfs_segment_read(inode->i_sb, rblock, bofs);
> +		if (ret)
> +			goto out;

	break;

> +		slot = get_bits(pos, i, bits);
> +		while (slot < LOGFS_BLOCK_FACTOR && rblock[slot] == 0) {
> +			slot++;
> +			pos += 1 << (LOGFS_BLOCK_BITS * i);
> +		}
> +		if (slot >= LOGFS_BLOCK_FACTOR)
> +			goto out;

	break;

> +		bofs = be64_to_cpu(rblock[slot]);
> +	}
> +out:
> +	logfs_put_rblock(super);
> +	return pos;
> +}
> +

> +static int logfs_is_valid_loop(struct inode *inode, pgoff_t index,
> +		int count, u64 ofs)
> +{
> +	struct logfs_inode *li = LOGFS_INODE(inode);
> +	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
> +	be64 *rblock;
> +	u64 bofs = li->li_data[I1_INDEX + count];
> +	int bits = LOGFS_BLOCK_BITS;
> +	int i, ret;
> +
> +	if (!bofs)
> +		return 0;
> +
> +	if (bofs == ofs)
> +		return 1;
> +
> +	rblock = logfs_get_rblock(super);
> +
> +	for (i=count; i>=0; i--) {

	....

> +		ret = logfs_segment_read(inode->i_sb, rblock, bofs);
> +		if (ret)
> +			goto fail;

	please use break and do a return !ret;

> +		bofs = be64_to_cpu(rblock[get_bits(index, i, bits)]);
> +		if (!bofs)
> +			goto fail;
> +
> +		if (bofs == ofs) {
> +			ret = 1;
> +			goto out;
> +		}
> +	}
> +
> +fail:
> +	ret = 0;

> +out:
> +	logfs_put_rblock(super);
> +	return ret;
> +}
> +
> +
> +static int __logfs_is_valid_block(struct inode *inode, pgoff_t index, u64 ofs)
> +{
> +	struct logfs_inode *li = LOGFS_INODE(inode);
> +
> +	//printk("%lx, %x, %x\n", inode->i_ino, inode->i_nlink, atomic_read(&inode->i_count));

	Sigh

> +	if ((inode->i_nlink == 0) && atomic_read(&inode->i_count) == 1)
> +		return 0;
> +
> +	if (li->li_flags & LOGFS_IF_EMBEDDED)
> +		return 0;
> +
> +	if (index < I0_BLOCKS)
> +		return logfs_is_valid_direct(li, index, ofs);
> +	else if (index < I1_BLOCKS)
> +		return logfs_is_valid_loop(inode, index, 0, ofs);
> +	else if (index < I2_BLOCKS)
> +		return logfs_is_valid_loop(inode, index, 1, ofs);
> +	else if (index < I3_BLOCKS)
> +		return logfs_is_valid_loop(inode, index, 2, ofs);
> +
> +	BUG();
> +	return 0;
> +}
> +
> +
> +int logfs_is_valid_block(struct super_block *sb, u64 ofs, u64 ino, u64 pos)
> +{
> +	struct inode *inode;
> +	int ret, cookie;
> +
> +	/* Umount closes a segment with free blocks remaining.  Those
> +	 * blocks are by definition invalid. */
> +	if (ino == -1)
> +		return 0;
> +
> +	if ((u64)(u_long)ino != ino) {
> +		printk("%llx, %llx, %llx\n", ofs, ino, pos);

	more sigh

> +		LOGFS_BUG(sb);
> +	}
> +	inode = logfs_iget(sb, ino, &cookie);
> +	if (!inode)
> +		return 0;
> +
> +#if 0
> +	/* Any data belonging to dirty inodes must be considered valid until
> +	 * the inode is written back.  If we prematurely deleted old blocks
> +	 * and crashed before the inode is written, the filesystem goes boom.
> +	 */
> +	if (inode->i_state & I_DIRTY)
> +		ret = 2;
> +	else

There seems to be a patternm, that unused code is surprisingly well
commented.

> +#endif
> +		ret = __logfs_is_valid_block(inode, pos, ofs);
> +
> +	logfs_iput(inode, cookie);
> +	return ret;
> +}
> +
> +
> +
> +/**
> + * logfs_file_read - generic_file_read for in-kernel buffers
> + */
> +static ssize_t __logfs_inode_read(struct inode *inode, char *buf, size_t count,
> +		loff_t *ppos, int read_zero)
> +{
> +	void *block_data = NULL;
> +	loff_t size = i_size_read(inode);
> +	int err = -ENOMEM;
> +
> +	pr_debug("read from %lld, count %zd\n", *ppos, count);

Loglevel missing

> +	if (*ppos >= size)
> +		return 0;
> +	if (count > size - *ppos)
> +		count = size - *ppos;
> +
> +	BUG_ON(logfs_index(*ppos) != logfs_index(*ppos + count - 1));
> +
> +	block_data = kzalloc(LOGFS_BLOCKSIZE, GFP_KERNEL);
> +	if (!block_data)
> +		goto fail;
> +
> +	err = logfs_read_block(inode, logfs_index(*ppos), block_data,
> +			read_zero);
> +	if (err)
> +		goto fail;
> +
> +	memcpy(buf, block_data + (*ppos % LOGFS_BLOCKSIZE), count);
> +	*ppos += count;
> +	kfree(block_data);
> +	return count;

	err = count; and fall trough ?

> +fail:
> +	kfree(block_data);
> +	return err;
> +}
> +

> +static int logfs_alloc_bytes(struct inode *inode, int bytes)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
> +
> +	if (!bytes)
> +		return 0;
> +
> +	if (super->s_free_bytes < bytes + super->s_gc_reserve) {
> +		//TRACE();

	Sigh.

> +		return -ENOSPC;
> +	}
> +
> +	/* Actual allocation happens later.  Make sure we don't drop the
> +	 * lock before then! */
> +
> +	return 0;
> +}
> +

> +
> +/*
> + * File is too large for embedded data when called.  Move data to first
> + * block and clear embedded area
> + */
> +static int logfs_move_embedded(struct inode *inode, be64 **wblocks)
> +{
> +	struct logfs_inode *li = LOGFS_INODE(inode);
> +	void *buf;
> +	s64 block;
> +	int i;
> +
> +	if (! (li->li_flags & LOGFS_IF_EMBEDDED))
> +		return 0;
> +
> +	if (logfs_alloc_blocks(inode, 1)) {
> +		//TRACE();

	more sigh
> +		return -ENOSPC;
> +	}
> +
> +	buf = wblocks[0];
> +
> +	memcpy(buf, li->li_data, LOGFS_EMBEDDED_SIZE);
> +	block = logfs_segment_write(inode, buf, 0, 0, 1);
> +	if (block < 0)
> +		return block;
> +
> +	li->li_data[0] = block;
> +
> +	li->li_flags &= ~LOGFS_IF_EMBEDDED;
> +	for (i=1; i<LOGFS_EMBEDDED_FIELDS; i++)
> +		li->li_data[i] = 0;
> +
> +	return logfs_dirty_inode(inode);
> +}
> +
> +
> +static int logfs_write_direct(struct inode *inode, pgoff_t index, void *buf)
> +{
> +	struct logfs_inode *li = LOGFS_INODE(inode);
> +	s64 block;
> +
> +	if (li->li_data[index] == 0) {
> +		if (logfs_alloc_blocks(inode, 1)) {
> +			//TRACE();

	again

> +			return -ENOSPC;
> +		}
> +	}
> +	block = logfs_segment_write(inode, buf, index, 0, 1);
> +	if (block < 0)
> +		return block;
> +
> +	if (li->li_data[index])
> +		logfs_segment_delete(inode, li->li_data[index], index, 0);
> +	li->li_data[index] = block;
> +
> +	return logfs_dirty_inode(inode);
> +}
> +
> +
> +static int logfs_write_loop(struct inode *inode, pgoff_t index, void *buf,
> +		be64 **wblocks, int count)
> +{
> +	struct logfs_inode *li = LOGFS_INODE(inode);
> +	u64 bofs = li->li_data[I1_INDEX + count];
> +	s64 block;
> +	int bits = LOGFS_BLOCK_BITS;
> +	int allocs = 0;
> +	int i, ret;
> +
> +	for (i=count; i>=0; i--) {
> +		if (bofs) {
> +			ret = logfs_segment_read(inode->i_sb, wblocks[i], bofs);
> +			if (ret)
> +				return ret;
> +		} else {
> +			allocs++;
> +			memset(wblocks[i], 0, LOGFS_BLOCKSIZE);
> +		}
> +		bofs = be64_to_cpu(wblocks[i][get_bits(index, i, bits)]);
> +	}
> +
> +	if (! wblocks[0][get_bits(index, 0, bits)])
> +		allocs++;
> +	if (logfs_alloc_blocks(inode, allocs)) {
> +		//TRACE();

	yet more

> +		return -ENOSPC;
> +	}
> +
> +	block = logfs_segment_write(inode, buf, index, 0, allocs);
> +	allocs = allocs ? allocs-1 : 0;
> +	if (block < 0)
> +		return block;
> +
> +	for (i=0; i<=count; i++) {

	i = 0; ....

> +		wblocks[i][get_bits(index, i, bits)] = cpu_to_be64(block);
> +		block = logfs_segment_write(inode, wblocks[i], index, i+1,
> +				allocs);
> +		allocs = allocs ? allocs-1 : 0;
> +		if (block < 0)
> +			return block;
> +	}
> +
> +	li->li_data[I1_INDEX + count] = block;
> +
> +	return logfs_dirty_inode(inode);
> +}
> +
> +
> +
> +
> +int logfs_rewrite_block(struct inode *inode, pgoff_t index, u64 ofs, int level)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(inode->i_sb);
> +	be64 **wblocks;
> +	void *buf;
> +	int ret;
> +
> +	//printk("(%lx, %lx, %llx, %x)\n", inode->i_ino, index, ofs, level);

	yay !

> +	wblocks = super->s_wblock;
> +	buf = wblocks[LOGFS_MAX_INDIRECT];
> +	ret = __logfs_rewrite_block(inode, index, buf, wblocks, level);
> +	return ret;
> +}
> +
> +
> +/**

Please do not use /** here, it is the start sequence for kernel doc
comments

> + * Three cases exist:
> + * size <= pos			- remove full block
> + * size >= pos + chunk		- do nothing
> + * pos < size < pos + chunk	- truncate, rewrite
> + */
> +static s64 __logfs_truncate_i0(struct inode *inode, u64 size, u64 bofs,
> +		u64 pos, be64 **wblocks)
> +{
> +	size_t len = size - pos;
> +	void *buf = wblocks[LOGFS_MAX_INDIRECT];
> +	int err;
> +
> +	if (size <= pos) {	/* remove whole block */
> +		logfs_segment_delete(inode, bofs,
> +				pos >> inode->i_sb->s_blocksize_bits, 0);
> +		return 0;
> +	}
> +
> +	/* truncate this block, rewrite it */
> +	err = logfs_segment_read(inode->i_sb, buf, bofs);
> +	if (err)
> +		return err;
> +
> +	memset(buf + len, 0, LOGFS_BLOCKSIZE - len);
> +	return logfs_segment_write_pos(inode, buf, pos, 0, 0);
> +}
> +
> +
> +/* FIXME: move to super */

Please do so

> +static u64 logfs_factor[] = {
> +	LOGFS_BLOCKSIZE,
> +	LOGFS_I1_SIZE,
> +	LOGFS_I2_SIZE,
> +	LOGFS_I3_SIZE
> +};
> +

> +
> +static ssize_t __logfs_inode_write(struct inode *inode, const char *buf,
> +		size_t count, loff_t *ppos)
> +{
> +	void *block_data = NULL;
> +	int err = -ENOMEM;
> +
> +	pr_debug("write to 0x%llx, count %zd\n", *ppos, count);
> +
> +	BUG_ON(logfs_index(*ppos) != logfs_index(*ppos + count - 1));
> +
> +	block_data = kzalloc(LOGFS_BLOCKSIZE, GFP_KERNEL);
> +	if (!block_data)
> +		goto fail;
> +
> +	err = logfs_read_block(inode, logfs_index(*ppos), block_data, 1);
> +	if (err)
> +		goto fail;
> +
> +	memcpy(block_data + (*ppos % LOGFS_BLOCKSIZE), buf, count);
> +
> +	if (i_size_read(inode) < *ppos + count)
> +		i_size_write(inode, *ppos + count);
> +
> +	err = logfs_write_buf(inode, logfs_index(*ppos), block_data);
> +	if (err)
> +		goto fail;
> +
> +	*ppos += count;
> +	pr_debug("write to %lld, count %zd\n", *ppos, count);

	Please add some hint, where this comes from

> +	kfree(block_data);
> +	return count;

	err = count; fall trhough ?

> +fail:
> +	kfree(block_data);
> +	return err;
> +}
> +
> +
> +int logfs_inode_read(struct inode *inode, void *buf, size_t n, loff_t _pos)
> +{
> +	loff_t pos = _pos << inode->i_sb->s_blocksize_bits;
> +	ssize_t ret;
> +
> +	if (pos >= i_size_read(inode))
> +		return -EOF;
> +	ret = __logfs_inode_read(inode, buf, n, &pos, 0);
> +	if (ret < 0)
> +		return ret;
> +	ret = ret==n ? 0 : -EIO;

	return ret == n ? ..... perhaps ?

> +	return ret;
> +}
> +
> +
> +
> +int logfs_init_rw(struct logfs_super *super)
> +{
> +	int i;
> +
> +	mutex_init(&super->s_r_mutex);
> +	mutex_init(&super->s_w_mutex);
> +	super->s_rblock = kmalloc(LOGFS_BLOCKSIZE, GFP_KERNEL);
> +	if (!super->s_wblock)
> +		return -ENOMEM;
> +	for (i=0; i<=LOGFS_MAX_INDIRECT; i++) {

	i = 0; ...

> +		super->s_wblock[i] = kmalloc(LOGFS_BLOCKSIZE, GFP_KERNEL);
> +		if (!super->s_wblock) {
> +			logfs_cleanup_rw(super);
> +			return -ENOMEM;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +
> +void logfs_cleanup_rw(struct logfs_super *super)
> +{
> +	int i;
> +
> +	for (i=0; i<=LOGFS_MAX_INDIRECT; i++)

	dito

> +		kfree(super->s_wblock[i]);
> +	kfree(super->s_rblock);
> +}
> --- /dev/null	2007-04-18 05:32:26.652341749 +0200
> +++ linux-2.6.21logfs/fs/logfs/super.c	2007-05-07 13:32:12.000000000 +0200
> @@ -0,0 +1,490 @@

Comment, license please

> +#include "logfs.h"
> +
> +
> +#define FAIL_ON(cond) do { if (unlikely((cond))) return -EINVAL; } while(0)

Please open code

> +int mtdread(struct super_block *sb, loff_t ofs, size_t len, void *buf)
> +{
> +	struct mtd_info *mtd = LOGFS_SUPER(sb)->s_mtd;
> +	size_t retlen;
> +	int ret;
> +
> +	ret = mtd->read(mtd, ofs, len, &retlen, buf);
> +	if (ret || (retlen != len)) {
> +		printk("ret: %x\n", ret);
> +		printk("retlen: %x, len: %x\n", retlen, len);
> +		printk("ofs: %llx, mtd->size: %x\n", ofs, mtd->size);

	Sigh

> +		dump_stack();
> +		return -EIO;
> +	}
> +
> +	return 0;
> +}
> +
> +
> +static void check(void *buf, size_t len)
> +{
> +	char value[8] = {0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a};
> +	void *poison = buf, *end = buf + len;
> +
> +	while (poison) {
> +		poison = memchr(poison, value[0], end-poison);
> +		if (!poison || poison + 8 > end)
> +			return;
> +		if (! memcmp(poison, value, 8)) {
> +			printk("%p %p %p\n", buf, poison, end);

	More sigh

> +			BUG();
> +		}
> +		poison++;
> +	}
> +}
> +
> +
> +int mtdwrite(struct super_block *sb, loff_t ofs, size_t len, void *buf)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	struct mtd_info *mtd = super->s_mtd;
> +	struct inode *inode = super->s_dev_inode;
> +	size_t retlen;
> +	loff_t page_start, page_end;
> +	int ret;
> +
> +	if (0) /* FIXME: this should be a debugging option */
> +		check(buf, len);
> +
> +	//printk("write ofs=%llx, len=%x\n", ofs, len);

	hrmpf

> +	BUG_ON((ofs >= mtd->size) || (len > mtd->size - ofs));
> +	BUG_ON(ofs != (ofs >> super->s_writeshift) << super->s_writeshift);
> +	//BUG_ON(len != (len >> super->s_blockshift) << super->s_blockshift);

	hrmpf

> +	/* FIXME: fix all callers to write PAGE_CACHE_SIZE'd chunks */
> +	BUG_ON(len > PAGE_CACHE_SIZE);
> +	page_start = ofs & PAGE_CACHE_MASK;
> +	page_end = PAGE_CACHE_ALIGN(ofs + len) - 1;
> +	truncate_inode_pages_range(&inode->i_data, page_start, page_end);
> +	ret = mtd->write(mtd, ofs, len, &retlen, buf);
> +	if (ret || (retlen != len))
> +		return -EIO;
> +
> +	return 0;
> +}
> +
> +
> +static DECLARE_COMPLETION(logfs_erase_complete);

empty line

> +static void logfs_erase_callback(struct erase_info *ei)
> +{
> +	complete(&logfs_erase_complete);
> +}

dito

> +int mtderase(struct super_block *sb, loff_t ofs, size_t len)
> +{
> +	struct mtd_info *mtd = LOGFS_SUPER(sb)->s_mtd;
> +	struct inode *inode = LOGFS_SUPER(sb)->s_dev_inode;
> +	struct erase_info ei;
> +	int ret;
> +
> +	BUG_ON(len % mtd->erasesize);
> +
> +	truncate_inode_pages_range(&inode->i_data, ofs, ofs+len-1);
> +	if (mtd->block_isbad(mtd, ofs))
> +		return -EIO;

this actually leads to a double check of block_isbad for blocks which
are not bad. 

> +	memset(&ei, 0, sizeof(ei));
> +	ei.mtd = mtd;
> +	ei.addr = ofs;
> +	ei.len = len;
> +	ei.callback = logfs_erase_callback;
> +	ret = mtd->erase(mtd, &ei);
> +	if (ret)
> +		return -EIO;
> +
> +	wait_for_completion(&logfs_erase_complete);
> +	if (ei.state != MTD_ERASE_DONE)
> +		return -EIO;
> +	return 0;
> +}
> +
> +
> +
> +void *logfs_device_getpage(struct super_block *sb, u64 offset,
> +		struct page **page)
> +{
> +	struct inode *inode = LOGFS_SUPER(sb)->s_dev_inode;
> +
> +	*page = read_cache_page(inode->i_mapping, offset >> PAGE_CACHE_SHIFT,
> +			logfs_readdevice, NULL);
> +	BUG_ON(IS_ERR(*page));	/* TODO: use mempool here */

				For the BUG ?

> +	return kmap(*page);
> +}
> +
> +
> +static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
> +{
> +	struct inode *rootdir;
> +	int err;
> +
> +	/* root dir */
> +	rootdir = iget(sb, LOGFS_INO_ROOT);
> +	if (!rootdir)
> +		goto fail;
> +
> +	sb->s_root = d_alloc_root(rootdir);
> +	if (!sb->s_root)
> +		goto fail;
> +
> +#if 1
> +	err = logfs_fsck(sb);
> +#else
> +	err = 0;
> +#endif

	Please cleanup

> +	if (err) {
> +		printk(KERN_ERR "LOGFS: fsck failed, refusing to mount\n");
> +		goto fail;
> +	}
> +
> +	return simple_set_mnt(mnt, sb);
> +
> +fail:
> +	iput(LOGFS_SUPER(sb)->s_master_inode);
> +	return -EIO;
> +}
> +
> +
> +
> +
> +
> +static int logfs_get_sb(struct file_system_type *type, int flags,
> +		const char *devname, void *data, struct vfsmount *mnt)
> +{
> +	ulong mtdnr;
> +	struct mtd_info *mtd;
> +
> +#if 0
> +	if (!devname)
> +		return ERR_PTR(-EINVAL);
> +	if (strncmp(devname, "mtd", 3))
> +		return ERR_PTR(-EINVAL);
> +
> +	{
> +		char *garbage;
> +		mtdnr = simple_strtoul(devname+3, &garbage, 0);
> +		if (*garbage)
> +			return ERR_PTR(-EINVAL);
> +	}
> +#else
> +	mtdnr = 0;
> +#endif
> +

	Please cleanup

> +	mtd = get_mtd_device(NULL, mtdnr);
> +	if (!mtd)
> +		return -EINVAL;
> +
> +	return logfs_get_sb_mtd(type, flags, mtd, mnt);
> +}
> +
> +-- /dev/null	2007-04-18 05:32:26.652341749 +0200
> +++ linux-2.6.21logfs/fs/logfs/progs/mkfs.c	2007-05-07 13:32:12.000000000 +0200

why needs this to be in a sub directory ? And shouldn't this be user
space tools - or what I'm missing here ?

> @@ -0,0 +1,319 @@

Comment, license

> +#include "../logfs.h"
> +
> +#define OFS_SB		0
> +#define OFS_JOURNAL	1
> +#define OFS_ROOTDIR	3
> +#define OFS_IFILE	4
> +#define OFS_COUNT	5

enum ?

> +static u64 segment_offset[OFS_COUNT];
> +
> +static u64 fssize;
> +static u64 no_segs;
> +static u64 free_blocks;
> +
> +static u32 segsize;
> +static u32 blocksize;
> +static int segshift;
> +static int blockshift;
> +static int writeshift;
> +
> +static u32 blocks_per_seg;
> +static u16 version;
> +
> +static be32 bb_array[1024];
> +static int bb_count;
> +
> +
> +#if 0
> +/* rootdir */
> +static int make_rootdir(struct super_block *sb)
> +{
> +	struct logfs_disk_inode *di;
> +	int ret;
> +
> +	di = kzalloc(blocksize, GFP_KERNEL);
> +	if (!di)
> +		return -ENOMEM;
> +
> +	di->di_flags	= cpu_to_be32(LOGFS_IF_VALID);
> +	di->di_mode	= cpu_to_be16(S_IFDIR | 0755);
> +	di->di_refcount	= cpu_to_be32(2);
> +	ret = mtdwrite(sb, segment_offset[OFS_ROOTDIR], blocksize, di);
> +	kfree(di);
> +	return ret;
> +}
> +
> +
> +/* summary */
> +static int make_summary(struct super_block *sb)
> +{
> +	struct logfs_disk_sum *sum;
> +	u64 sum_ofs;
> +	int ret;
> +
> +	sum = kzalloc(LOGFS_BLOCKSIZE, GFP_KERNEL);
> +	if (!sum)
> +		return -ENOMEM;
> +	memset(sum, 0xff, LOGFS_BLOCKSIZE);
> +
> +	sum->oids[0].ino = cpu_to_be64(LOGFS_INO_MASTER);
> +	sum->oids[0].pos = cpu_to_be64(LOGFS_INO_ROOT);
> +	sum_ofs = segment_offset[OFS_ROOTDIR];
> +	sum_ofs += segsize - blocksize;
> +	sum->level = LOGFS_MAX_LEVELS;
> +	ret = mtdwrite(sb, sum_ofs, LOGFS_BLOCKSIZE, sum);
> +	kfree(sum);
> +	return ret;
> +}
> +#endif

Please remove

> +
> +/* journal */
> +static size_t __write_header(struct logfs_journal_header *h, size_t len,
> +		size_t datalen, u16 type, u8 compr)
> +{
> +	h->h_len	= cpu_to_be16(len);
> +	h->h_type	= cpu_to_be16(type);
> +	h->h_version	= cpu_to_be16(++version);
> +	h->h_datalen	= cpu_to_be16(datalen);
> +	h->h_compr	= compr;
> +	h->h_pad[0]	= 'h';
> +	h->h_pad[1]	= 'a';
> +	h->h_pad[2]	= 't';
> +	h->h_crc	= logfs_crc32(h, len, 4);
> +	return len;
> +}
> +static size_t write_header(struct logfs_journal_header *h, size_t datalen,
> +		u16 type)
> +{
> +	size_t len = datalen + sizeof(*h);
> +	return __write_header(h, len, datalen, type, COMPR_NONE);
> +}
> +static size_t je_badsegments(void *data, u16 *type)
> +{
> +	memcpy(data, bb_array, blocksize);
> +	*type = JE_BADSEGMENTS;
> +	return blocksize;
> +}
> +static size_t je_anchor(void *_da, u16 *type)
> +{
> +	struct logfs_anchor *da = _da;
> +
> +	memset(da, 0, sizeof(*da));
> +	da->da_last_ino	= cpu_to_be64(LOGFS_RESERVED_INOS);
> +	da->da_size	= cpu_to_be64((LOGFS_INO_ROOT+1) * blocksize);
> +#if 0
> +	da->da_used_bytes = cpu_to_be64(blocksize);
> +	da->da_data[LOGFS_INO_ROOT] = cpu_to_be64(3*segsize);
> +#else
> +	da->da_data[LOGFS_INO_ROOT] = 0;
> +#endif

Please cleanup

> +	*type = JE_ANCHOR;
> +	return sizeof(*da);
> +}

Empty line

> +static size_t je_dynsb(void *_dynsb, u16 *type)
> +{
> +	struct logfs_dynsb *dynsb = _dynsb;
> +
> +	memset(dynsb, 0, sizeof(*dynsb));
> +	dynsb->ds_used_bytes	= cpu_to_be64(blocksize);
> +	*type = JE_DYNSB;
> +	return sizeof(*dynsb);
> +}

Same

> +static size_t je_commit(void *h, u16 *type)
> +{
> +	*type = JE_COMMIT;
> +	return 0;
> +}

Same

> +static size_t write_je(size_t jpos, void *scratch, void *header,
> +		size_t (*write)(void *scratch, u16 *type))
> +{
> +	void *data;
> +	ssize_t len, max, compr_len, pad_len, full_len;
> +	u16 type;
> +	u8 compr = COMPR_ZLIB;
> +
> +	header += jpos;
> +	data = header + sizeof(struct logfs_journal_header);
> +
> +	len = write(scratch, &type);
> +	if (len == 0)
> +		return write_header(header, 0, type);
> +
> +	max = blocksize - jpos;
> +	compr_len = logfs_compress(scratch, data, len, max);
> +	if ((compr_len < 0) || (type == JE_ANCHOR)) {
> +		compr_len = logfs_memcpy(scratch, data, len, max);
> +		compr = COMPR_NONE;
> +	}
> +	BUG_ON(compr_len < 0);
> +
> +	pad_len = ALIGN(compr_len, 16);
> +	memset(data + compr_len, 0, pad_len - compr_len);
> +	full_len = pad_len + sizeof(struct logfs_journal_header);
> +
> +	return __write_header(header, full_len, len, type, compr);
> +}

Same

> +static int make_journal(struct super_block *sb)
> +{
> +	void *journal, *scratch;
> +	size_t jpos;
> +	int ret;
> +
> +	journal = kzalloc(2*blocksize, GFP_KERNEL);
> +	if (!journal)
> +		return -ENOMEM;
> +
> +	scratch = journal + blocksize;
> +
> +	jpos = 0;
> +	/* erasecount is not written - implicitly set to 0 */
> +	/* neither are summary, index, wbuf */
> +	jpos += write_je(jpos, scratch, journal, je_badsegments);
> +	jpos += write_je(jpos, scratch, journal, je_anchor);
> +	jpos += write_je(jpos, scratch, journal, je_dynsb);
> +	jpos += write_je(jpos, scratch, journal, je_commit);
> +	ret = mtdwrite(sb, segment_offset[OFS_JOURNAL], blocksize, journal);
> +	kfree(journal);
> +	return ret;
> +}
> +
> +
> +/* superblock */
> +static int make_super(struct super_block *sb, struct logfs_disk_super *ds)
> +{
> +	void *sector;
> +	int ret;
> +
> +	sector = kzalloc(4096, GFP_KERNEL);
> +	if (!sector)
> +		return -ENOMEM;
> +
> +	memset(ds, 0, sizeof(*ds));
> +
> +	ds->ds_magic		= cpu_to_be64(LOGFS_MAGIC);
> +#if 0	/* sane defaults */
> +	ds->ds_ifile_levels	= 3; /* 2+1, 1GiB */
> +	ds->ds_iblock_levels	= 4; /* 3+1, 512GiB */
> +	ds->ds_data_levels	= 3; /* old, young, unknown */
> +#else
> +	ds->ds_ifile_levels	= 1; /* 0+1, 80kiB */
> +	ds->ds_iblock_levels	= 4; /* 3+1, 512GiB */
> +	ds->ds_data_levels	= 1; /* unknown */
> +#endif

Please cleanup

> +	ds->ds_feature_incompat	= 0;
> +	ds->ds_feature_ro_compat= 0;
> +
> +	ds->ds_feature_compat	= 0;
> +	ds->ds_flags		= 0;
> +
> +	ds->ds_filesystem_size	= cpu_to_be64(fssize);
> +	ds->ds_segment_shift	= segshift;
> +	ds->ds_block_shift	= blockshift;
> +	ds->ds_write_shift	= writeshift;
> +
> +	ds->ds_journal_seg[0]	= cpu_to_be64(1);
> +	ds->ds_journal_seg[1]	= cpu_to_be64(2);
> +	ds->ds_journal_seg[2]	= 0;
> +	ds->ds_journal_seg[3]	= 0;
> +
> +	ds->ds_root_reserve	= 0;
> +
> +	ds->ds_crc = logfs_crc32(ds, sizeof(*ds), 12);
> +
> +	memcpy(sector, ds, sizeof(*ds));
> +	ret = mtdwrite(sb, segment_offset[OFS_SB], 4096, sector);
> +	kfree(sector);
> +	return ret;
> +}
> +
> +
> +int logfs_mkfs(struct super_block *sb, struct logfs_disk_super *ds)
> +{
> +	int ret = 0;
> +
> +	segshift = 17;
> +	blockshift = 12;
> +	writeshift = 8;
> +
> +	segsize = 1 << segshift;
> +	blocksize = 1 << blockshift;
> +	version = 0;
> +
> +	getsize(sb, &fssize, &no_segs);
> +
> +	/* 3 segs for sb and journal,
> +	 * 1 block per seg extra,
> +	 * 1 block for rootdir
> +	 */
> +	blocks_per_seg = 1 << (segshift - blockshift);
> +	free_blocks = (no_segs - 3) * (blocks_per_seg - 1) - 1;
> +
> +	ret = bad_block_scan(sb);
> +	if (ret)
> +		return ret;
> +
> +	{
> +		int i;
> +		for (i=0; i<OFS_COUNT; i++)
> +			printk("%x->%llx\n", i, segment_offset[i]);
> +	}
> +
> +#if 0
> +	ret = make_rootdir(sb);
> +	if (ret)
> +		return ret;
> +
> +	ret = make_summary(sb);
> +	if (ret)
> +		return ret;
> +#endif

Same

> +	ret = make_journal(sb);
> +	if (ret)
> +		return ret;
> +
> +	ret = make_super(sb, ds);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
> +}
> --- /dev/null	2007-04-18 05:32:26.652341749 +0200
> +++ linux-2.6.21logfs/fs/logfs/progs/fsck.c	2007-05-07 13:32:12.000000000 +0200
> @@ -0,0 +1,323 @@

Comment, license

> +#include "../logfs.h"
> +
> +static u64 used_bytes;
> +static u64 free_bytes;
> +static u64 last_ino;
> +static u64 *inode_bytes;
> +static u64 *inode_links;
> +
> +
> +/**
> + * Pass 1: blocks
> + */
> +
> +
> +static void safe_read(struct super_block *sb, u32 segno, u32 ofs,
> +		size_t len, void *buf)
> +{
> +	BUG_ON(wbuf_read(sb, dev_ofs(sb, segno, ofs), len, buf));
> +}

Empty line

> +static u32 logfs_free_bytes(struct super_block *sb, u32 segno)
> +{

> +static void logfsck_blocks(struct super_block *sb)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	int i;
> +	int free;
> +
> +	for (i=0; i<super->s_no_segs; i++) {
> +		free = logfs_free_bytes(sb, i);
> +		free_bytes += free;
> +		printk(" %3x", free);
> +		if (i % 8 == 7)
> +			printk(" : ");
> +		if (i % 16 == 15)
> +			printk("\n");
> +	}
> +	printk("\n");

printk with loglevels and identifiable origin please

> +
> +
> +static s64 dir_seek_data(struct inode *inode, s64 pos)
> +{
> +	s64 new_pos = logfs_seek_data(inode, pos);

new line

> +	return max((s64)pos, new_pos - 1);
> +}
> +
> +
> +static int __logfsck_dirs(struct inode *dir)
> +{
> +	struct inode *inode;
> +	loff_t pos;
> +	u64 ino;
> +	u8 type;
> +	int cookie, err, ret = 0;
> +
> +	for (pos=0; ; pos++) {
> +		err = read_one_dd(dir, pos, &ino, &type);
> +		//yield();

	great. cond_resched() if you really need to

> +		if (err == -ENODATA) { /* dentry was deleted */
> +			pos = dir_seek_data(dir, pos);
> +			continue;
> +		}
> +		if (err == -EOF)
> +			break;
> +		if (err)
> +			goto error0;
> +
> +		err = -EIO;
> +		if (ino > last_ino) {
> +			printk("ino %llx > last_ino %llx\n", ino, last_ino);

	loglevel .....

> +			goto error0;
> +		}
> +		inode = logfs_iget(dir->i_sb, ino, &cookie);
> +		if (!inode) {
> +			printk("Could not find inode #%llx\n", ino);
> +			goto error0;
> +		}
> +		if (type != logfs_type(inode)) {
> +			printk("dd type %x != inode type %x\n", type,
> +					logfs_type(inode));

	dito

> +			goto error1;
> +		}
> +		inode_links[ino]++;
> +		err = 0;
> +		if (type == DT_DIR) {
> +			inode_links[dir->i_ino]++;
> +			inode_links[ino]++;
> +			err = __logfsck_dirs(inode);
> +		}
> +error1:
> +		logfs_iput(inode, cookie);
> +error0:
> +		if (!ret)
> +			ret = err;
> +		continue;
> +	}
> +	return 1;
> +}
> +
> +
> +/**
> + * Pass 3: inodes
> + */
> +
> +
> +static int logfs_check_inode(struct inode *inode)
> +{
> +	struct logfs_inode *li = LOGFS_INODE(inode);
> +	u64 bytes0 = li->li_used_bytes;
> +	u64 bytes1 = inode_bytes[inode->i_ino];
> +	u64 links0 = inode->i_nlink;
> +	u64 links1 = inode_links[inode->i_ino];
> +
> +	if (bytes0 || bytes1 || links0 || links1
> +			|| inode->i_ino == LOGFS_SUPER(inode->i_sb)->s_last_ino)
> +		printk("%lx: %llx(%llx) bytes, %llx(%llx) links\n",
> +				inode->i_ino, bytes0, bytes1, links0, links1);

	Sigh

> +	used_bytes += bytes0;
> +	return (bytes0 == bytes1) && (links0 == links1);
> +}
> +
> +
> +static int logfs_check_ino(struct super_block *sb, u64 ino)
> +{
> +	struct inode *inode;
> +	int ret, cookie;
> +
> +	//yield();

	See above instance of //yield();

> +	inode = logfs_iget(sb, ino, &cookie);
> +	if (!inode)
> +		return 1;
> +	ret = logfs_check_inode(inode);
> +	logfs_iput(inode, cookie);
> +	return ret;
> +}
> +
> +
> +
> +static int logfsck_stats(struct super_block *sb)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	u64 ostore_segs, total, expected;
> +	int i, reserved_segs;
> +
> +	reserved_segs = 1; /* super_block */
> +	journal_for_each(i)
> +		if (super->s_journal_seg[i])
> +			reserved_segs++;
> +	reserved_segs += super->s_bad_segments;
> +
> +	ostore_segs = super->s_no_segs - reserved_segs;
> +	expected = ostore_segs << super->s_segshift;
> +	total = free_bytes + used_bytes;
> +
> +	printk("free:%8llx, used:%8llx, total:%8llx",
> +			free_bytes, used_bytes, expected);

	loglevel

> +	if (total > expected)
> +		printk(" + %llx\n", total - expected);
> +	else if (total < expected)
> +		printk(" - %llx\n", expected - total);
> +	else
> +		printk("\n");
> +
> +	return total == expected;
> +}
> +
> +
> +static int __logfs_fsck(struct super_block *sb)
> +{
> +	int ret;
> +	int err = 0;
> +
> +	/* pass 1: check blocks */
> +	logfsck_blocks(sb);
> +	/* pass 2: check directories */
> +	ret = logfsck_dirs(sb);
> +	if (!ret) {
> +		printk("Pass 2: directory check failed\n");

	same

> +		err = -EIO;
> +	}
> +	/* pass 3: check inodes */
> +	ret = logfsck_inodes(sb);
> +	if (!ret) {
> +		printk("Pass 3: inode check failed\n");

	same

> +		err = -EIO;
> +	}
> +	/* Pass 4: Total blocks */
> +	ret = logfsck_stats(sb);
> +	if (!ret) {
> +		printk("Pass 4: statistic check failed\n");

	same

> +		err = -EIO;
> +	}
> +
> +	return err;
> +}
> +
> +
> +int logfs_fsck(struct super_block *sb)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	int ret = -ENOMEM;
> +
> +	used_bytes = 0;
> +	free_bytes = 0;
> +	last_ino = super->s_last_ino;
> +	inode_bytes = kzalloc(last_ino * sizeof(be64), GFP_KERNEL);
> +	if (!inode_bytes)
> +		goto out0;

	return ret;

> +	inode_links = kzalloc(last_ino * sizeof(be64), GFP_KERNEL);
> +	if (!inode_links)
> +		goto out1;
> +
> +	ret = __logfs_fsck(sb);
> +
> +	kfree(inode_links);
> +	inode_links = NULL;
> +out1:
> +	kfree(inode_bytes);
> +	inode_bytes = NULL;
> +out0:
> +	return ret;
> +}
> --- /dev/null	2007-04-18 05:32:26.652341749 +0200
> +++ linux-2.6.21logfs/fs/logfs/Locking	2007-05-07 13:32:12.000000000 +0200
> @@ -0,0 +1,45 @@

Can you move this into documentation please

> --- /dev/null	2007-04-18 05:32:26.652341749 +0200
> +++ linux-2.6.21logfs/fs/logfs/compr.c	2007-05-07 13:32:12.000000000 +0200
> @@ -0,0 +1,198 @@

Comment, license

> +#include "logfs.h"
> +#include <linux/vmalloc.h>
> +#include <linux/zlib.h>
> +
> +#define COMPR_LEVEL 3
> +
> +static DEFINE_MUTEX(compr_mutex);
> +static struct z_stream_s stream;
> +
> +
> +int logfs_memcpy(void *in, void *out, size_t inlen, size_t outlen)
> +{
> +	if (outlen < inlen)
> +		return -EIO;
> +	memcpy(out, in, inlen);
> +	return inlen;
> +}
> +
> +
> +int logfs_compress_vec(struct kvec *vec, int count, void *out, size_t outlen)
> +{
> +	int i, ret;
> +
> +	mutex_lock(&compr_mutex);
> +	ret = zlib_deflateInit(&stream, COMPR_LEVEL);
> +	if (ret != Z_OK)
> +		goto error;
> +
> +	stream.total_in = 0;
> +	stream.total_out = 0;
> +
> +	for (i=0; i<count-1; i++) {
> +		stream.next_in = vec[i].iov_base;
> +		stream.avail_in = vec[i].iov_len;
> +		stream.next_out = out + stream.total_out;
> +		stream.avail_out = outlen - stream.total_out;
> +
> +		ret = zlib_deflate(&stream, Z_NO_FLUSH);
> +		if (ret != Z_OK)
> +			goto error;
> +		/* if (stream.total_out >= outlen)
> +			goto error; */

	???

> +	}
> +
> +	stream.next_in = vec[count-1].iov_base;
> +	stream.avail_in = vec[count-1].iov_len;
> +	stream.next_out = out + stream.total_out;
> +	stream.avail_out = outlen - stream.total_out;
> +
> +	ret = zlib_deflate(&stream, Z_FINISH);
> +	if (ret != Z_STREAM_END)
> +		goto error;
> +	/* if (stream.total_out >= outlen)
> +		goto error; */

	???

> +	ret = zlib_deflateEnd(&stream);
> +	if (ret != Z_OK)
> +		goto error;
> +
> +	if (stream.total_out >= stream.total_in)
> +		goto error;
> +
> +	ret = stream.total_out;
> +	mutex_unlock(&compr_mutex);
> +	return ret;
> +error:
> +	mutex_unlock(&compr_mutex);
> +	return -EIO;
> +}
> +
> +

> +int logfs_uncompress_vec(void *in, size_t inlen, struct kvec *vec, int count)
> +{
> +	int i, ret;
> +
> +	mutex_lock(&compr_mutex);
> +	ret = zlib_inflateInit(&stream);
> +	if (ret != Z_OK)
> +		goto error;
> +
> +	stream.total_in = 0;
> +	stream.total_out = 0;
> +
> +	for (i=0; i<count-1; i++) {
> +		stream.next_in = in + stream.total_in;
> +		stream.avail_in = inlen - stream.total_in;
> +		stream.next_out = vec[i].iov_base;
> +		stream.avail_out = vec[i].iov_len;
> +
> +		ret = zlib_inflate(&stream, Z_NO_FLUSH);
> +		if (ret != Z_OK)
> +			goto error;
> +	}
> +	stream.next_in = in + stream.total_in;
> +	stream.avail_in = inlen - stream.total_in;
> +	stream.next_out = vec[count-1].iov_base;
> +	stream.avail_out = vec[count-1].iov_len;
> +
> +	ret = zlib_inflate(&stream, Z_FINISH);
> +	if (ret != Z_STREAM_END)
> +		goto error;
> +
> +	ret = zlib_inflateEnd(&stream);
> +	if (ret != Z_OK)
> +		goto error;
> +
> +	mutex_unlock(&compr_mutex);
> +	return ret;
> +error:
> +	mutex_unlock(&compr_mutex);
> +	return -EIO;

	Sigh. Can you please make this a bit more clever  ?

> +}
> +
> +
> +int logfs_uncompress(void *in, void *out, size_t inlen, size_t outlen)
> +{
> +	int ret;
> +
> +	mutex_lock(&compr_mutex);
> +	ret = zlib_inflateInit(&stream);
> +	if (ret != Z_OK)
> +		goto error;
> +
> +	stream.next_in = in;
> +	stream.avail_in = inlen;
> +	stream.total_in = 0;
> +	stream.next_out = out;
> +	stream.avail_out = outlen;
> +	stream.total_out = 0;
> +
> +	ret = zlib_inflate(&stream, Z_FINISH);
> +	if (ret != Z_STREAM_END)
> +		goto error;
> +
> +	ret = zlib_inflateEnd(&stream);
> +	if (ret != Z_OK)
> +		goto error;
> +
> +	mutex_unlock(&compr_mutex);
> +	return ret;
> +error:
> +	mutex_unlock(&compr_mutex);
> +	return -EIO;

	Same here

> +}

> +
> +int __init logfs_compr_init(void)
> +{
> +	size_t size = max(zlib_deflate_workspacesize(),
> +			zlib_inflate_workspacesize());
> +	printk("deflate size: %x\n", zlib_deflate_workspacesize());
> +	printk("inflate size: %x\n", zlib_inflate_workspacesize());

	loglevel

> +	stream.workspace = vmalloc(size);
> +	if (!stream.workspace)
> +		return -ENOMEM;
> +	return 0;
> +}
> +
> +void __exit logfs_compr_exit(void)
> +{
> +	vfree(stream.workspace);
> +}
> --- /dev/null	2007-04-18 05:32:26.652341749 +0200
> +++ linux-2.6.21logfs/fs/logfs/segment.c	2007-05-07 20:41:17.000000000 +0200
> @@ -0,0 +1,533 @@

Comment, license

> +#include "logfs.h"
> +
> +
> +
> +#define HEADER_SIZE sizeof(struct logfs_object_header)

empty line

> +s64 __logfs_segment_write(struct inode *inode, void *buf, u64 pos, int level,
> +		int alloc, int len, int compr)
> +{
> +	struct logfs_area *area;
> +	struct super_block *sb = inode->i_sb;
> +	u64 ofs;
> +	u64 ino = inode->i_ino;
> +	int err;
> +	struct logfs_object_header h;
> +
> +	h.crc	= cpu_to_be32(0xcccccccc);
> +	h.len	= cpu_to_be16(len);
> +	h.type	= OBJ_BLOCK;
> +	h.compr	= compr;
> +	h.ino	= cpu_to_be64(inode->i_ino);
> +	h.pos	= cpu_to_be64(pos);
> +
> +	level = adj_level(ino, level);
> +	area = get_area(sb, level);
> +	ofs = __logfs_get_free_bytes(area, ino, pos, len + HEADER_SIZE);
> +	LOGFS_BUG_ON(ofs <= 0, sb);
> +	//printk("alloc: (%llx, %llx, %llx, %x)\n", ino, pos, ret, level);

	clean up

> +	err = buf_write(area, ofs, &h, sizeof(h));
> +	if (!err)
> +		err = buf_write(area, ofs + HEADER_SIZE, buf, len);
> +	BUG_ON(err);
> +	if (err)
> +		return err;
> +	if (alloc) {
> +		int acc_len = (level==0) ? len : sb->s_blocksize;
> +		logfs_consume_bytes(inode, acc_len + HEADER_SIZE);
> +	}
> +
> +	logfs_close_area(area); /* FIXME merge with open_area */
> +
> +	//printk("    (%llx, %llx, %llx)\n", ofs, ino, pos);

	same

> +	return ofs;
> +}
> +
> +
> +
> +
> +int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	struct logfs_area *area;
> +	u32 segno = ofs >> super->s_segshift;
> +	int i, err;
> +
> +	err = mtdread(sb, ofs, len, buf);
> +	if (err)
> +		return err;
> +
> +	for (i=0; i<LOGFS_NO_AREAS; i++) {

	i = 0; ...

> +		area = super->s_area[i];
> +		if (area->a_segno == segno) {
> +			fixup_from_wbuf(sb, area, buf, ofs, len);
> +			break;
> +		}
> +	}
> +	return 0;
> +}
> +
> +
> +int logfs_segment_read(struct super_block *sb, void *buf, u64 ofs)
> +{
> +	struct logfs_object_header *h;
> +	u16 len;
> +	int err, bs = sb->s_blocksize;
> +
> +	mutex_lock(&compr_mutex);
> +	err = wbuf_read(sb, ofs, bs+24, compressor_buf);
> +	if (err)
> +		goto out;
> +	h = (void*)compressor_buf;

	please use proper typecasts

> +	len = be16_to_cpu(h->len);
> +
> +	switch (h->compr) {
> +	case COMPR_NONE:
> +		logfs_memcpy(compressor_buf+24, buf, bs, bs);
> +		break;
> +	case COMPR_ZLIB:
> +		err = logfs_uncompress(compressor_buf+24, buf, len, bs);
> +		BUG_ON(err);
> +		break;
> +	default:
> +		LOGFS_BUG(sb);
> +	}
> +out:
> +	mutex_unlock(&compr_mutex);
> +	return err;
> +}
> +
> +
> +static u64 logfs_block_mask[] = {
> +	~0,
> +	~(I1_BLOCKS-1),
> +	~(I2_BLOCKS-1),
> +	~(I3_BLOCKS-1)
> +};

Empty line please

> +static int check_pos(struct super_block *sb, u64 pos1, u64 pos2, int level)
> +{
> +	LOGFS_BUG_ON(	(pos1 & logfs_block_mask[level]) !=
> +			(pos2 & logfs_block_mask[level]), sb);
> +}

empty line

> +int logfs_segment_delete(struct inode *inode, u64 ofs, u64 pos, int level)
> +{
> +	struct super_block *sb = inode->i_sb;
> +	struct logfs_object_header *h;
> +	u16 len;
> +	int err;
> +
> +
> +	mutex_lock(&compr_mutex);
> +	err = wbuf_read(sb, ofs, 4096+24, compressor_buf);
> +	LOGFS_BUG_ON(err, sb);
> +	h = (void*)compressor_buf;

	proper typecast

> +	len = be16_to_cpu(h->len);
> +	check_pos(sb, pos, be64_to_cpu(h->pos), level);
> +	mutex_unlock(&compr_mutex);
> +
> +	level = adj_level(inode->i_ino, level);
> +	len = (level==0) ? len : sb->s_blocksize;
> +	logfs_remove_bytes(inode, len + sizeof(*h));
> +	return 0;
> +}
> +
> +
> +int logfs_open_area(struct logfs_area *area)
> +{
> +	if (area->a_is_open)
> +		return 0; /* nothing to do */

	yeah, another really helpful comment

> +	area->a_ops->get_free_segment(area);
> +	area->a_used_objects = 0;
> +	area->a_used_bytes = 0;
> +	area->a_ops->get_erase_count(area);
> +
> +	area->a_ops->clear_blocks(area);
> +	area->a_is_open = 1;
> +
> +	return area->a_ops->erase_segment(area);
> +}
> +

> +static void ostore_get_free_segment(struct logfs_area *area)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(area->a_sb);
> +	struct logfs_segment *seg;
> +
> +	BUG_ON(list_empty(&super->s_free_list));
> +
> +	seg = list_entry(super->s_free_list.prev, struct logfs_segment, list);
> +	list_del(&seg->list);
> +	area->a_segno = seg->segno;
> +	kfree(seg);
> +	super->s_free_count -= 1;

get_free_segment actually kfree's a segment ? Please use a less
misleading function name

> +}
> +
> +
> +static void ostore_get_erase_count(struct logfs_area *area)
> +{
> +	struct logfs_segment_header h;
> +
> +	device_read(area->a_sb, area->a_segno, 0, sizeof(h), &h);

	error handling

> +	area->a_erase_count = be32_to_cpu(h.ec) + 1;
> +}
> +
> +
> +
> +static int ostore_erase_segment(struct logfs_area *area)
> +{
> +	struct logfs_segment_header h;
> +	u64 ofs;
> +	int err;
> +
> +	err = logfs_erase_segment(area->a_sb, area->a_segno);
> +	if (err)
> +		return err;
> +
> +	h.len = 0;
> +	h.type = OBJ_OSTORE;
> +	h.level = area->a_level;
> +	h.segno = cpu_to_be32(area->a_segno);
> +	h.ec = cpu_to_be32(area->a_erase_count);
> +	h.gec = cpu_to_be64(LOGFS_SUPER(area->a_sb)->s_gec);
> +	h.crc = logfs_crc32(&h, sizeof(h), 4);
> +	/* FIXME: write it out */

	isn't that what buf_write() does ?

> +	ofs = dev_ofs(area->a_sb, area->a_segno, 0);
> +	area->a_used_bytes = sizeof(h);
> +	return buf_write(area, ofs, &h, sizeof(h));
> +}
> +
> +
> +static void flush_buf(struct logfs_area *area)
> +{
> +	struct super_block *sb = area->a_sb;
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	u32 used, free;
> +	u64 ofs;
> +	u32 writemask = super->s_writesize - 1;
> +	int err;
> +
> +	ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
> +	ofs &= ~writemask;
> +	used = area->a_used_bytes & writemask;
> +	free = super->s_writesize - area->a_used_bytes;
> +	free &= writemask;
> +	//printk("flush(%llx, %x, %x)\n", ofs, used, free);

	sigh

> +	if (used == 0)
> +		return;
> +
> +	TRACE();

	sigh more

> +	memset(area->a_wbuf + used, 0xff, free);
> +	err = mtdwrite(sb, ofs, super->s_writesize, area->a_wbuf);
> +	LOGFS_BUG_ON(err, sb);
> +}
> +

> +
> +
> +int logfs_init_areas(struct super_block *sb)
> +{
> +	struct logfs_super *super = LOGFS_SUPER(sb);
> +	int i;
> +
> +	super->s_journal_area = kzalloc(sizeof(struct logfs_area), GFP_KERNEL);
> +	if (!super->s_journal_area)
> +		return -ENOMEM;
> +	super->s_journal_area->a_sb = sb;
> +
> +	for (i=0; i<LOGFS_NO_AREAS; i++) {
	i = 0; ..

> +		super->s_area[i] = init_ostore_area(sb, i);
> +		if (!super->s_area[i])
> +			goto err;
> +	}
> +	return 0;
> +
> +err:
> +	for (i--; i>=0; i--)

	same here

> +		cleanup_ostore_area(super->s_area[i]);
> +	kfree(super->s_journal_area);
> +	return -ENOMEM;
> +}
> +
> +
> +void logfs_cleanup_areas(struct logfs_super *super)
> +{
> +	int i;
> +
> +	for (i=0; i<LOGFS_NO_AREAS; i++)

	adnd here

> +		cleanup_ostore_area(super->s_area[i]);
> +	kfree(super->s_journal_area);
> +}
> --- /dev/null	2007-04-18 05:32:26.652341749 +0200
> +++ linux-2.6.21logfs/fs/logfs/memtree.c	2007-05-07 13:32:12.000000000 +0200
> @@ -0,0 +1,199 @@
> +/* In-memory B+Tree. */

license and a little bit more description

> +#include "logfs.h"
> +
> +#define BTREE_NODES 16	/* 32bit, 128 byte cacheline */
> +//#define BTREE_NODES 8	/* 32bit,  64 byte cacheline */

Please cleanup

> +void *btree_lookup(struct btree_head *head, long val)
> +{
> +	int i, height = head->height;
> +	struct btree_node *node = head->node;
> +
> +	if (val == 0)
> +		return head->null_ptr;
> +
> +	if (height == 0)
> +		return NULL;
> +
> +	for ( ; height > 1; height--) {
> +		for (i=0; i<BTREE_NODES; i++)
> +			if (node[i].val <= val)
> +				break;
> +		node = node[i].node;
> +	}
> +
> +	for (i=0; i<BTREE_NODES; i++)

	i = 0; ...

> +		if (node[i].val == val)
> +			return node[i].node;
> +
> +	return NULL;
> +}
> +
> +
> +static void find_pos(struct btree_node *node, long val, int *pos, int *fill)
> +{
> +	int i;
> +
> +	for (i=0; i<BTREE_NODES; i++)

same

> +		if (node[i].val <= val)
> +			break;
> +	*pos = i;
> +	for (i=*pos; i<BTREE_NODES; i++)

same

> +		if (node[i].val == 0)
> +			break;
> +	*fill = i;
> +}
> +
> +
> +static struct btree_node *find_level(struct btree_head *head, long val,
> +		int level)
> +{
> +	struct btree_node *node = head->node;
> +	int i, height = head->height;
> +
> +	for ( ; height > level; height--) {
> +		for (i=0; i<BTREE_NODES; i++)

same

> +			if (node[i].val <= val)
> +				break;
> +		node = node[i].node;
> +	}
> +	return node;
> +}
> +
> +
> +
> +static int btree_remove_level(struct btree_head *head, long val, int level)
> +{
> +	struct btree_node *node;
> +	int i, pos, fill;
> +
> +	if (val == 0) { /* 0 identifies empty slots, so special-case this */
> +		head->null_ptr = NULL;
> +		return 0;
> +	}
> +
> +	node = find_level(head, val, level);
> +	find_pos(node, val, &pos, &fill);
> +	if (level == 1)
> +		BUG_ON(node[pos].val != val);
> +
> +	/* remove and shift */
> +	for (i=pos; i<fill-1; i++) {
> +		node[i].val = node[i+1].val;
> +		node[i].node = node[i+1].node;
> +	}
> +	node[fill-1].val = 0;
> +	node[fill-1].node = NULL;
> +
> +	if (fill-1 < BTREE_NODES/2) {
> +		/* XXX */

	YYYY perhaps ?

> +	}
> +	if (fill-1 == 0) {
> +		btree_remove_level(head, val, level+1);
> +		kfree(node);
> +		return 0;
> +	}
> +
> +	return 0;
> +}

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html