Re: [PATCH] nilfs2: sync super blokcs in turns

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

Thank you for the comments!

At Fri, 28 May 2010 01:59:59 +0900 (JST),
Ryusuke Konishi wrote:
> 
> Hi,
> On Thu, 27 May 2010 16:27:22 +0900, Jiro SEKIBA wrote:
> > Hi,
> > 
> > This is a proposed patch to sync super blocks by turn.
> > It still require recovery action when super root is not found,
> 
> Arh, right.  We need some extension to retry the search with the older
> super block.  It looks a bit complicate.
> 
> Ok, I'll take a moment to solve this issue.
> 
> > but it works at least to mount with valid fs with the patch:
> > 
> >  nilfs2: use checkpoint number instead of timestamp to select super block
> >  9f6c75b4c354939f0a8aefc19eb6a9334ef58a89
> > 
> > This will sync super blocks by turn instead of syncing duplicate
> > super blocks at the time.  This will help searching valid super root when
> > super block is written into disk before log is written, which is happen when
> > barrier-less block devices are unmounted uncleanly.
> > In the stiation, old super block likely points valid log.
> > 
> > Signed-off-by: Jiro SEKIBA <jir@xxxxxxxxx>
> > ---
> >  fs/nilfs2/nilfs.h     |    2 +-
> >  fs/nilfs2/segment.c   |    3 +--
> >  fs/nilfs2/super.c     |   32 ++++++++++++++++----------------
> >  fs/nilfs2/the_nilfs.c |    2 +-
> >  4 files changed, 19 insertions(+), 20 deletions(-)
> > 
> > diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
> > index 8723e5b..ec644c5 100644
> > --- a/fs/nilfs2/nilfs.h
> > +++ b/fs/nilfs2/nilfs.h
> > @@ -270,7 +270,7 @@ extern struct nilfs_super_block *
> >  nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **);
> >  extern int nilfs_store_magic_and_option(struct super_block *,
> >  					struct nilfs_super_block *, char *);
> > -extern int nilfs_commit_super(struct nilfs_sb_info *, int);
> > +extern int nilfs_commit_super(struct nilfs_sb_info *);
> >  extern int nilfs_attach_checkpoint(struct nilfs_sb_info *, __u64);
> >  extern void nilfs_detach_checkpoint(struct nilfs_sb_info *);
> >  
> > diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
> > index 6a7dbd8..b48de1f 100644
> > --- a/fs/nilfs2/segment.c
> > +++ b/fs/nilfs2/segment.c
> > @@ -2490,8 +2490,7 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode)
> >  		if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) &&
> >  		    nilfs_discontinued(nilfs)) {
> >  			down_write(&nilfs->ns_sem);
> > -			err = nilfs_commit_super(
> > -				sbi, nilfs_altsb_need_update(nilfs));
> > +			err = nilfs_commit_super(sbi);
> >  			up_write(&nilfs->ns_sem);
> >  		}
> >  	}
> > diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
> > index 48145f5..4894e07 100644
> > --- a/fs/nilfs2/super.c
> > +++ b/fs/nilfs2/super.c
> > @@ -101,7 +101,7 @@ void nilfs_error(struct super_block *sb, const char *function,
> >  			nilfs->ns_mount_state |= NILFS_ERROR_FS;
> >  			nilfs->ns_sbp[0]->s_state |=
> >  				cpu_to_le16(NILFS_ERROR_FS);
> > -			nilfs_commit_super(sbi, 1);
> > +			nilfs_commit_super(sbi);
> >  		}
> >  		up_write(&nilfs->ns_sem);
> >  
> > @@ -200,7 +200,7 @@ static void nilfs_clear_inode(struct inode *inode)
> >  	nilfs_btnode_cache_clear(&ii->i_btnode_cache);
> >  }
> >  
> > -static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb)
> > +static int nilfs_sync_super(struct nilfs_sb_info *sbi)
> >  {
> >  	struct the_nilfs *nilfs = sbi->s_nilfs;
> >  	int err;
> > @@ -226,6 +226,9 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb)
> >  		printk(KERN_ERR
> >  		       "NILFS: unable to write superblock (err=%d)\n", err);
> >  		if (err == -EIO && nilfs->ns_sbh[1]) {
> > +			memcpy(nilfs->ns_sbp[1], nilfs->ns_sbp[0],
> > +			       nilfs->ns_sbsize);
> > +			nilfs->ns_sbwtime[1] = nilfs->ns_sbwtime[0];
> >  			nilfs_fall_back_super_block(nilfs);
> >  			goto retry;
> >  		}
> > @@ -240,11 +243,12 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb)
> >  
> >  		/* update GC protection for recent segments */
> >  		if (nilfs->ns_sbh[1]) {
> > +			int flip_bits = (nilfs->ns_cno & 0x0FL);
> >  			sbp = NULL;
> > -			if (dupsb) {
> > -				set_buffer_dirty(nilfs->ns_sbh[1]);
> > -				if (!sync_dirty_buffer(nilfs->ns_sbh[1]))
> > -					sbp = nilfs->ns_sbp[1];
> > +			/* flip super block 9 to 7 ratio */
> > +			if ((flip_bits == 0x08) || flip_bits == 0x0F) {
> > +				sbp = nilfs->ns_sbp[1];
> > +				nilfs_swap_super_block(nilfs);
> >  			}
> >  		}
> >  		if (sbp) {
> > @@ -257,7 +261,7 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb)
> >  	return err;
> >  }
> >  
> > -int nilfs_commit_super(struct nilfs_sb_info *sbi, int dupsb)
> > +int nilfs_commit_super(struct nilfs_sb_info *sbi)
> >  {
> >  	struct the_nilfs *nilfs = sbi->s_nilfs;
> >  	struct nilfs_super_block **sbp = nilfs->ns_sbp;
> > @@ -294,12 +298,8 @@ int nilfs_commit_super(struct nilfs_sb_info *sbi, int dupsb)
> >  	sbp[0]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed,
> >  					     (unsigned char *)sbp[0],
> >  					     nilfs->ns_sbsize));
> > -	if (dupsb && sbp[1]) {
> > -		memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
> > -		nilfs->ns_sbwtime[1] = t;
> > -	}
> >  	clear_nilfs_sb_dirty(nilfs);
> > -	return nilfs_sync_super(sbi, dupsb);
> > +	return nilfs_sync_super(sbi);
> >  }
> >  
> >  static void nilfs_put_super(struct super_block *sb)
> > @@ -314,7 +314,7 @@ static void nilfs_put_super(struct super_block *sb)
> >  	if (!(sb->s_flags & MS_RDONLY)) {
> >  		down_write(&nilfs->ns_sem);
> >  		nilfs->ns_sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state);
> > -		nilfs_commit_super(sbi, 1);
> > +		nilfs_commit_super(sbi);
> >  		up_write(&nilfs->ns_sem);
> >  	}
> >  	down_write(&nilfs->ns_super_sem);
> > @@ -343,7 +343,7 @@ static int nilfs_sync_fs(struct super_block *sb, int wait)
> >  
> >  	down_write(&nilfs->ns_sem);
> >  	if (nilfs_sb_dirty(nilfs))
> > -		nilfs_commit_super(sbi, 1);
> > +		nilfs_commit_super(sbi);
> >  	up_write(&nilfs->ns_sem);
> >  
> >  	return err;
> > @@ -657,7 +657,7 @@ static int nilfs_setup_super(struct nilfs_sb_info *sbi)
> >  	sbp->s_mnt_count = cpu_to_le16(mnt_count + 1);
> >  	sbp->s_state = cpu_to_le16(le16_to_cpu(sbp->s_state) & ~NILFS_VALID_FS);
> >  	sbp->s_mtime = cpu_to_le64(get_seconds());
> > -	return nilfs_commit_super(sbi, 1);
> > +	return nilfs_commit_super(sbi);
> >  }
> >  
> >  struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb,
> > @@ -901,7 +901,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
> >  		    (nilfs->ns_mount_state & NILFS_VALID_FS))
> >  			sbp->s_state = cpu_to_le16(nilfs->ns_mount_state);
> >  		sbp->s_mtime = cpu_to_le64(get_seconds());
> > -		nilfs_commit_super(sbi, 1);
> > +		nilfs_commit_super(sbi);
> >  		up_write(&nilfs->ns_sem);
> >  	} else {
> >  		/*
> > diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
> > index a756168..f779363 100644
> > --- a/fs/nilfs2/the_nilfs.c
> > +++ b/fs/nilfs2/the_nilfs.c
> > @@ -327,7 +327,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
> >  	down_write(&nilfs->ns_sem);
> >  	nilfs->ns_mount_state |= NILFS_VALID_FS;
> >  	nilfs->ns_sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state);
> > -	err = nilfs_commit_super(sbi, 1);
> > +	err = nilfs_commit_super(sbi);
> >  	up_write(&nilfs->ns_sem);
> >  
> >  	if (err) {
> > -- 
> > 1.5.6.5
> 
> 
> Well, the basic idea looks OK.
> 
> But, this patch has an issue.
> 
> When the filesystem is mounted, nilfs updates super blocks to drop
> their VALID_FS flags, and this state continues until the partition
> will be unmounted.
> 
> The point is that the checkpoint number does not change when the
> VALID_FS flags are set or unset.
> 
> If the system goes down while the VALID_FS flag on the secondary super
> block is still on, it may prevent roll-forward recovery because the
> VALID_FS flag forces to skip the recovery.
> 
> So, we still have to drop the VALID_FS flag together from both super
> blocks.

I see the issue.  Actually, VALID_FS flag is never unset for the
older super block on mount time.  Because ns_sbp[0]->s_state is never
propagated to the ns_sbp[1]->s_sate.

There are two way to unset VALID_FS flag on mount time, I came up with:

1. use dupsb flag only in load_nilfs
2. swap super block and commit those separately in load_nilfs

1st case, dupsb flag is examined each time in nilfs_sync_super.
2nd case is a little hacky, for it requires to check checkpoint number
if checkpoint number is the number to swap super blocks to make sure
not to swap twice (in load_nilfs and nilfs_sync_super).

> Or, we may as well stop using the VALID_FS flag.  Nilfs can know
> whether the recovery is needed or not by scanning logs without the
> VALID_FS flag.

Actually, what I've done so far for grub module does not check that flag,
but just try to find the latest log from where super block pointed.
It, as far as I understand, is harmless to try roll forward action
on the clean unmount fs.

I think it may take more time when it's valid but, searching log
instantly fails.  Therefore that differences might be slight.

It would be good to have a "valid" flag for peace of mind, though.

> > +			/* flip super block 9 to 7 ratio */
> > +			if ((flip_bits == 0x08) || flip_bits == 0x0F) {
> > +				sbp = nilfs->ns_sbp[1];
> > +				nilfs_swap_super_block(nilfs);
> >  			}
> 
> This twist looks well thought out.  But, one comment.  From the coding
> style viewpoint, it should be as follows:
> 
> > +			if (flip_bits == 0x08 || flip_bits == 0x0F) {
> 
> Thanks,
> Ryusuke Konishi
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 
> 


-- 
Jiro SEKIBA <jir@xxxxxxxxx>
--
To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Filesystem Development]     [Linux BTRFS]     [Linux CIFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux