Re: [EXT4 set 7][PATCH 1/1]Remove 32000 subdirs limit.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun, 01 Jul 2007 03:38:18 -0400 Mingming Cao <cmm@xxxxxxxxxx> wrote:

> >From kalpak@xxxxxxxxxxxxx Thu May 17 17:21:08 2007
> Hi,
> 
> I have rebased this patch to 2.6.22-rc1 so that it can be added to the
> ext4 patch queue. It has been tested by creating more than 65000 subdirs
> and then deleting them and checking the nlinks. The e2fsprogs part of
> this patch was sent earlier by me to linux-ext4 and doesn't need any
> changes, so not submitting it again.
> 
> ----------------------------------------------------------------------
> This patch adds support to ext4 for allowing more than 65000
> subdirectories. Currently the maximum number of subdirectories is capped
> at 32000.
> 
> If we exceed 65000 subdirectories in an htree directory it sets the
> inode link count to 1 and no longer counts subdirectories.  The
> directory link count is not actually used when determining if a
> directory is empty, as that only counts subdirectories and not regular
> files that might be in there. 
> 
> A EXT4_FEATURE_RO_COMPAT_DIR_NLINK flag has been added and it is set if
> the subdir count for any directory crosses 65000.
> 

Would I be correct in assuming that a later fsck will clear
EXT4_FEATURE_RO_COMPAT_DIR_NLINK if there are no longer any >65000 subdir
directories?

If so, that is worth a mention in the changelog, perhaps?

Please remind us what is the behaviour of an RO_COMPAT flag?  It means that
old ext4, ext3 and ext2 can only mount this fs read-only, yes?

> 
> Index: linux-2.6.22-rc4/fs/ext4/namei.c
> ===================================================================
> --- linux-2.6.22-rc4.orig/fs/ext4/namei.c	2007-06-14 17:30:47.000000000 -0700
> +++ linux-2.6.22-rc4/fs/ext4/namei.c	2007-06-14 17:32:55.000000000 -0700
> @@ -1619,6 +1619,27 @@ static int ext4_delete_entry (handle_t *
>  	return -ENOENT;
>  }
>  
> +static inline void ext4_inc_count(handle_t *handle, struct inode *inode)
> +{
> +	inc_nlink(inode);
> +	if (is_dx(inode) && inode->i_nlink > 1) {
> +		/* limit is 16-bit i_links_count */
> +		if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) {
> +			inode->i_nlink = 1;
> +			EXT4_SET_RO_COMPAT_FEATURE(inode->i_sb,
> +					      EXT4_FEATURE_RO_COMPAT_DIR_NLINK);
> +		}
> +	}
> +}

Looks too big to be inlined.

Why do we set EXT4_FEATURE_RO_COMPAT_DIR_NLINK if i_nlink==2?

> +static inline void ext4_dec_count(handle_t *handle, struct inode *inode)
> +{
> +	drop_nlink(inode);
> +	if (S_ISDIR(inode->i_mode) && inode->i_nlink == 0)
> +		inc_nlink(inode);
> +}

Probably too big to inline.

> +
>  static int ext4_add_nondir(handle_t *handle,
>  		struct dentry *dentry, struct inode *inode)
>  {
> @@ -1715,7 +1736,7 @@ static int ext4_mkdir(struct inode * dir
>  	struct ext4_dir_entry_2 * de;
>  	int err, retries = 0;
>  
> -	if (dir->i_nlink >= EXT4_LINK_MAX)
> +	if (EXT4_DIR_LINK_MAX(dir))
>  		return -EMLINK;
>  
>  retry:
> @@ -1738,7 +1759,7 @@ retry:
>  	inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
>  	dir_block = ext4_bread (handle, inode, 0, 1, &err);
>  	if (!dir_block) {
> -		drop_nlink(inode); /* is this nlink == 0? */
> +		ext4_dec_count(handle, inode); /* is this nlink == 0? */
>  		ext4_mark_inode_dirty(handle, inode);
>  		iput (inode);
>  		goto out_stop;
> @@ -1770,7 +1791,7 @@ retry:
>  		iput (inode);
>  		goto out_stop;
>  	}
> -	inc_nlink(dir);
> +	ext4_inc_count(handle, dir);
>  	ext4_update_dx_flag(dir);
>  	ext4_mark_inode_dirty(handle, dir);
>  	d_instantiate(dentry, inode);
> @@ -2035,10 +2056,10 @@ static int ext4_rmdir (struct inode * di
>  	retval = ext4_delete_entry(handle, dir, de, bh);
>  	if (retval)
>  		goto end_rmdir;
> -	if (inode->i_nlink != 2)
> -		ext4_warning (inode->i_sb, "ext4_rmdir",
> -			      "empty directory has nlink!=2 (%d)",
> -			      inode->i_nlink);
> +	if (!EXT4_DIR_LINK_EMPTY(inode))
> +		ext4_warning(inode->i_sb, "ext4_rmdir",
> +			     "empty directory has too many links (%d)",
> +			     inode->i_nlink);
>  	inode->i_version++;
>  	clear_nlink(inode);
>  	/* There's no need to set i_disksize: the fact that i_nlink is
> @@ -2048,7 +2069,7 @@ static int ext4_rmdir (struct inode * di
>  	ext4_orphan_add(handle, inode);
>  	inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode);
>  	ext4_mark_inode_dirty(handle, inode);
> -	drop_nlink(dir);
> +	ext4_dec_count(handle, dir);
>  	ext4_update_dx_flag(dir);
>  	ext4_mark_inode_dirty(handle, dir);
>  
> @@ -2099,7 +2120,7 @@ static int ext4_unlink(struct inode * di
>  	dir->i_ctime = dir->i_mtime = ext4_current_time(dir);
>  	ext4_update_dx_flag(dir);
>  	ext4_mark_inode_dirty(handle, dir);
> -	drop_nlink(inode);
> +	ext4_dec_count(handle, inode);
>  	if (!inode->i_nlink)
>  		ext4_orphan_add(handle, inode);
>  	inode->i_ctime = ext4_current_time(inode);
> @@ -2149,7 +2170,7 @@ retry:
>  		err = __page_symlink(inode, symname, l,
>  				mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
>  		if (err) {
> -			drop_nlink(inode);
> +			ext4_dec_count(handle, inode);
>  			ext4_mark_inode_dirty(handle, inode);
>  			iput (inode);
>  			goto out_stop;
> @@ -2175,8 +2196,9 @@ static int ext4_link (struct dentry * ol
>  	struct inode *inode = old_dentry->d_inode;
>  	int err, retries = 0;
>  
> -	if (inode->i_nlink >= EXT4_LINK_MAX)
> +	if (EXT4_DIR_LINK_MAX(inode))
>  		return -EMLINK;

argh.  WHY_IS_EXT4_FULL_OF_UPPER_CASE_MACROS_WHICH_COULD_BE_IMPLEMENTED
as_lower_case_inlines?  Sigh.  It's all the old-timers, I guess.

EXT4_DIR_LINK_MAX() is buggy: it evaluates its arg twice.

>  	/*
>  	 * Return -ENOENT if we've raced with unlink and i_nlink is 0.  Doing
>  	 * otherwise has the potential to corrupt the orphan inode list.
> @@ -2194,7 +2216,7 @@ retry:
>  		handle->h_sync = 1;
>  
>  	inode->i_ctime = ext4_current_time(inode);
> -	inc_nlink(inode);
> +	ext4_inc_count(handle, inode);
>  	atomic_inc(&inode->i_count);
>  
>  	err = ext4_add_nondir(handle, dentry, inode);
> @@ -2327,7 +2349,7 @@ static int ext4_rename (struct inode * o
>  	}
>  
>  	if (new_inode) {
> -		drop_nlink(new_inode);
> +		ext4_dec_count(handle, new_inode);
>  		new_inode->i_ctime = ext4_current_time(new_inode);
>  	}
>  	old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
> @@ -2338,11 +2360,13 @@ static int ext4_rename (struct inode * o
>  		PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
>  		BUFFER_TRACE(dir_bh, "call ext4_journal_dirty_metadata");
>  		ext4_journal_dirty_metadata(handle, dir_bh);
> -		drop_nlink(old_dir);
> +		ext4_dec_count(handle, old_dir);
>  		if (new_inode) {
> -			drop_nlink(new_inode);
> +			/* checked empty_dir above, can't have another parent,
> +			 * ext3_dec_count() won't work for many-linked dirs */
> +			new_inode->i_nlink = 0;
>  		} else {
> -			inc_nlink(new_dir);
> +			ext4_inc_count(handle, new_dir);
>  			ext4_update_dx_flag(new_dir);
>  			ext4_mark_inode_dirty(handle, new_dir);
>  		}

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux