Re: [RFC] quota: 64-bit limits with vfs, updated

Andreas Dilger <adilger@xxxxxxx> · Sun, 16 Mar 2008 06:47:26 +0800

On Mar 15, 2008  21:58 +0300, Andrew Perepechko wrote:
> +static inline uint v2_dqblksz(uint rev)
> +{
> +	uint sz = 0; /* make the compiler happy */
> +
> +	switch (rev) {
> +	case 0:
> +		sz = sizeof(struct v2_disk_dqblk_r0);
> +		break;
> +	case 1:
> +		sz = sizeof(struct v2_disk_dqblk_r1);
> +		break;
> +	default:
> +		BUG();
> +	}
> +
> +	return sz;
> +}

Is this ever used on values that are read from the disk?  If yes, then
having it BUG() because there is corruption on the disk is bad.

> +static int v2_quota_file_revision(struct super_block *sb, int type)
>  {
>  	size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
>  	if (size != sizeof(struct v2_disk_dqheader)) {
>  		printk("quota_v2: failed read expected=%zd got=%zd\n",
>  			sizeof(struct v2_disk_dqheader), size);
> -		return 0;
> +		return -1;
>  	}
> -	if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
> -	    le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
> -		return 0;
> -	return 1;
> +	if (le32_to_cpu(dqhead.dqh_magic) == quota_magics[type]) {
> +		if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r0[type])
> +			return 0;
> +		if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r1[type])
> +			return 1;
> +	}
> +	return -1;
> +}
> @@ -51,6 +92,10 @@ static int v2_read_file_info(struct supe
> +
> +	rev = v2_quota_file_revision(sb, type);
> +	BUG_ON(rev < 0);

This is no good - if there is an error from ->quota_read() then the kernel
will BUG() here.  You need proper (non fatal) error handling when you are
first loading the quota information into memory.

> +/* Check whether given file is really vfsv0 quotafile */
> +static int v2_check_quota_file(struct super_block *sb, int type)
> +{
> +	return v2_quota_file_revision(sb, type) != -1;
>  }

This could be inline.

> +#define DQ2MQ(v) (sizeof(v) == sizeof(__u64) ? \
> +		  (qsize_t)le64_to_cpu(v) : \
> +		  (qsize_t)le32_to_cpu(v))
> +
> +#define MQ2DQ(v, newv) (sizeof(v) == sizeof(__u64) ? \
> +			(v = cpu_to_le64((__u64)newv)) : \
> +			(v = cpu_to_le32((__u32)newv)))
> +
> +#define DQF_GET(var, rev, field) (rev == 0 ? \
> +		DQ2MQ((var)->disk_dqblk_r0.field) : \
> +		DQ2MQ((var)->disk_dqblk_r1.field))
> +
> +#define DQF_PUT(var, rev, field, val) (rev == 0 ? \
> +		MQ2DQ((var)->disk_dqblk_r0.field, val) : \
> +		MQ2DQ((var)->disk_dqblk_r1.field, val))
> +
> +void disk2memdqb(struct mem_dqblk *m, union v2_disk_dqblk *d,
> +		 uint rev)
> +{
> +	m->dqb_ihardlimit = DQF_GET(d, rev, dqb_ihardlimit);
> +	m->dqb_isoftlimit = DQF_GET(d, rev, dqb_isoftlimit);
> +	m->dqb_curinodes = DQF_GET(d, rev, dqb_curinodes);
> +	m->dqb_itime = DQF_GET(d, rev, dqb_itime);
> +	m->dqb_bhardlimit = DQF_GET(d, rev, dqb_bhardlimit);
> +	m->dqb_bsoftlimit = DQF_GET(d, rev, dqb_bsoftlimit);
> +	m->dqb_curspace = DQF_GET(d, rev, dqb_curspace);
> +	m->dqb_btime = DQF_GET(d, rev, dqb_btime);
> +}

While this produces nice looking code, it isn't very efficient to run.
It has 8 checks for "rev" per call, and it would have an additional
8 checks for "sizeof" except those are constant and optimized away
by the compiler.  If you can convince me that the compiler will also
optimize the 8 checks for "rev" into a single branch then it can
stay, otherwise you should split it into 2 large sections based on
"rev" and then decode r0 and r1 structures separately.

> @@ -301,17 +371,18 @@ static uint find_free_dqentry(struct dqu
> +	/* Block will be full? */
> +	if (le16_to_cpu(dh->dqdh_entries)+1 >= dqstrinblk)
>  		if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
>  			printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
>  			goto out_buf;
>  		}

Can you please clean this up a bit while here:

	/* Block will be full? */
	if (le16_to_cpu(dh->dqdh_entries) + 1 >= dqstrinblk &&
	    (*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
		printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove "
				"block (%u) from entry free list.\n", blk);
		goto out_buf;
	}

> +	for (i = 0; i < dqstrinblk && memcmp(&emptydquot,
> +			ddquot+i, dqblksz); i++);

This is also quite easy to be confused, please write it as:

	for (i = 0; i < dqstrinblk && memcmp(&emptydquot,ddquot+i,dqblksz); i++)
		/* empty loop to count used dquot structs in block */;

In fact, it would be even better to just increment "ddquot" in the loop to
avoid having to re-do the offset each time:

	for (i = 0; i < dqstrinblk && memcmp(&emptydquot, ddquot, dqblksz);
	     i++, ddquot++)
		/* empty loop to count used dquot structs in block */;

> -	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+sizeof(struct v2_disk_dqdbheader)+i*sizeof(struct v2_disk_dqblk);
> +	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+
> +			sizeof(struct v2_disk_dqdbheader)+i*dqblksz;

With the above change to ddquot you can then use it directly here:

	dquot->dq_off = (blk << V2_DQBLKSIZE_BITS) +
			((char *)ddquot - (char *)buf);

> @@ -395,7 +467,9 @@ static int v2_write_dquot(struct dquot *
>  {
>  	int type = dquot->dq_type;

type is only used twice in this function, please remove it from the stack.

> @@ -538,27 +613,29 @@ static loff_t find_block_dqentry(struct
> +	union v2_disk_dqblk *ddquot = GETENTRIES(buf);
> +	int type = dquot->dq_type;

type only used twice in this function, please remove from stack.

> +	uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision;
> +	uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
>  
>  	if (dquot->dq_id)
> -		for (i = 0; i < V2_DQSTRINBLK &&
> -		     le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
> +		for (i = 0; i < dqstrinblk &&
> +		     DQF_GET(ddquot+i, rev, dqb_id) != dquot->dq_id; i++);
>  	else {	/* ID 0 as a bit more complicated searching... */

Spaces around " + ", and please add a { } around this block because second
part has { } already.

> +		for (i = 0; i < dqstrinblk; i++)
> +			if (!DQF_GET(ddquot+i, rev, dqb_id) &&
> +			    memcmp(&emptydquot, ddquot+i, dqblksz))
>  				break;
>  	}

Spaces around " + " here also.  Actually, please run the patch through
"checkpatch.pl" to find any similar issues.

> @@ -608,7 +685,7 @@ static int v2_read_dquot(struct dquot *d
>  {
>  	int type = dquot->dq_type;
>  	loff_t offset;
> -	struct v2_disk_dqblk ddquot, empty;
> +	union v2_disk_dqblk ddquot;

Please remove type from stack.

> @@ -629,25 +706,25 @@ static int v2_read_dquot(struct dquot *d
> +		uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.
> +			   dqi_revision, dqblksz = v2_dqblksz(rev);

Use another line with "uint dqblksz = v2_dqblksz(rev);" here.

>  struct mem_dqblk {
> -	__u32 dqb_bhardlimit;	/* absolute limit on disk blks alloc */
> -	__u32 dqb_bsoftlimit;	/* preferred limit on disk blks */
> +	qsize_t dqb_bhardlimit;	/* absolute limit on disk blks alloc */
> +	qsize_t dqb_bsoftlimit;	/* preferred limit on disk blks */
>  	qsize_t dqb_curspace;	/* current used space */
> -	__u32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
> -	__u32 dqb_isoftlimit;	/* preferred inode limit */
> -	__u32 dqb_curinodes;	/* current # allocated inodes */
> +	qsize_t dqb_ihardlimit;	/* absolute limit on allocated inodes */
> +	qsize_t dqb_isoftlimit;	/* preferred inode limit */
> +	qsize_t dqb_curinodes;	/* current # allocated inodes */
>  	time_t dqb_btime;	/* time limit for excessive disk use */
>  	time_t dqb_itime;	/* time limit for excessive inode use */
>  };

Is there a reason to use "qsize_t" instead of just using __u64 directly?

> +++ linux-2.6.24.3/include/linux/quotaio_v2.h	2008-03-13 22:01:48.000000000 +0300
> @@ -16,28 +16,51 @@
> @@ -59,7 +82,7 @@ struct v2_disk_dqinfo {
>  
>  /*
>   *  Structure of header of block with quota structures. It is padded to 16 bytes so
> - *  there will be space for exactly 21 quota-entries in a block
> + *  there will be space for exactly 21 (r0) or 14 (r1) quota-entries in a block
>   */

Hmm, when this comment says "block" does it mean "1024-byte block" or
filesystem block?  For 1024-byte block it is true that both 48-byte (r0) and
72-byte (r1) records fit evenly, but for 4096-byte blocks 72-byte records
do not fit evenly.  Is that a problem?

Please line wrap or adjust comment to fit in 80 colums.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html