Re: [PATCH v2] staging: erofs: keep corrupted fs from crashing kernel in erofs_namei()

Chao Yu <yuchao0@xxxxxxxxxx> · Mon, 18 Feb 2019 09:39:14 +0800

Hi xiang,

On 2019/2/15 16:58, Gao Xiang wrote:
> Hi Chao,
> 
> On 2019/2/15 15:02, Chao Yu wrote:
>> On 2019/2/1 20:16, Gao Xiang wrote:
>>> As Al pointed out, "
>>> ... and while we are at it, what happens to
>>> 	unsigned int nameoff = le16_to_cpu(de[mid].nameoff);
>>> 	unsigned int matched = min(startprfx, endprfx);
>>>
>>> 	struct qstr dname = QSTR_INIT(data + nameoff,
>>> 		unlikely(mid >= ndirents - 1) ?
>>> 			maxsize - nameoff :
>>> 			le16_to_cpu(de[mid + 1].nameoff) - nameoff);
>>>
>>> 	/* string comparison without already matched prefix */
>>> 	int ret = dirnamecmp(name, &dname, &matched);
>>> if le16_to_cpu(de[...].nameoff) is not monotonically increasing?  I.e.
>>> what's to prevent e.g. (unsigned)-1 ending up in dname.len?
>>>
>>> Corrupted fs image shouldn't oops the kernel.. "
>>>
>>> Revisit the related lookup flow to address the issue.
>>>
>>> Fixes: d72d1ce60174 ("staging: erofs: add namei functions")
>>> Cc: <stable@xxxxxxxxxxxxxxx> # 4.19+
>>> Suggested-by: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
>>> Signed-off-by: Gao Xiang <gaoxiang25@xxxxxxxxxx>
>>> ---
>>> [ It should be better get reviewed first and for linux-next... ]
>>>
>>> change log v2:
>>>  - fix the missing "kunmap_atomic" pointed out by Dan;
>>>  - minor cleanup;
>>>
>>>  drivers/staging/erofs/namei.c | 187 ++++++++++++++++++++++--------------------
>>>  1 file changed, 99 insertions(+), 88 deletions(-)
>>>
>>> diff --git a/drivers/staging/erofs/namei.c b/drivers/staging/erofs/namei.c
>>> index 5596c52e246d..321c881d720f 100644
>>> --- a/drivers/staging/erofs/namei.c
>>> +++ b/drivers/staging/erofs/namei.c
>>> @@ -15,74 +15,77 @@
>>>  
>>>  #include <trace/events/erofs.h>
>>>  
>>> -/* based on the value of qn->len is accurate */
>>> -static inline int dirnamecmp(struct qstr *qn,
>>> -	struct qstr *qd, unsigned int *matched)
>>> +struct erofs_qstr {
>>> +	const unsigned char *name;
>>> +	const unsigned char *end;
>>> +};
>>> +
>>> +/* based on the end of qn is accurate and it must have the trailing '\0' */
>>> +static inline int dirnamecmp(const struct erofs_qstr *qn,
>>> +			     const struct erofs_qstr *qd,
>>> +			     unsigned int *matched)
>>>  {
>>> -	unsigned int i = *matched, len = min(qn->len, qd->len);
>>> -loop:
>>> -	if (unlikely(i >= len)) {
>>> -		*matched = i;
>>> -		if (qn->len < qd->len) {
>>> -			/*
>>> -			 * actually (qn->len == qd->len)
>>> -			 * when qd->name[i] == '\0'
>>> -			 */
>>> -			return qd->name[i] == '\0' ? 0 : -1;
>>> +	unsigned int i = *matched;
>>> +
>>> +	/*
>>> +	 * on-disk error, let's only BUG_ON in the debugging mode.
>>> +	 * otherwise, it will return 1 to just skip the invalid name
>>> +	 * and go on (in consideration of the lookup performance).
>>> +	 */
>>> +	DBG_BUGON(qd->name > qd->end);
>>
>> qd->name == qd->end is not allowed as well?
> 
> Make sense, it is only used for debugging mode, let me send another patch later...
> 
>>
>> So will it be better to return directly here?
>>
>> 	if (unlikely(qd->name >= qd->end)) {
>> 		DBG_BUGON(1);
>> 		return 1;
>> 	}
> 
> Corrupted image is rare happened in normal systems, I tend to only use
> DBG_BUGON here, and  qn->name[i] is of course not '\0', so it will return 1;

If the image is corrupted, qn->name[i] may be anything, as you commented
above DBG_BUGON(), we really don't need to go through any later codes, it
can avoid potentially encoutnering wrong condition.

* otherwise, it will return 1 to just skip the invalid name

> 
>>
>>> +
>>> +	/* qd could not have trailing '\0' */
>>> +	/* However it is absolutely safe if < qd->end */
>>> +	while (qd->name + i < qd->end && qd->name[i] != '\0') {
>>> +		if (qn->name[i] != qd->name[i]) {
>>> +			*matched = i;
>>> +			return qn->name[i] > qd->name[i] ? 1 : -1;
>>>  		}
>>> -		return (qn->len > qd->len);
>>> +		++i;
>>>  	}
>>> -
>>> -	if (qn->name[i] != qd->name[i]) {
>>> -		*matched = i;
>>> -		return qn->name[i] > qd->name[i] ? 1 : -1;
>>> -	}
>>> -
>>> -	++i;
>>> -	goto loop;
>>> +	*matched = i;
>>> +	/* See comments in __d_alloc on the terminating NUL character */
>>> +	return qn->name[i] == '\0' ? 0 : 1;
>>>  }
>>>  
>>> -static struct erofs_dirent *find_target_dirent(
>>> -	struct qstr *name,
>>> -	u8 *data, int maxsize)
>>> +#define nameoff_from_disk(off, sz)	(le16_to_cpu(off) & ((sz) - 1))
>>> +
>>> +static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name,
>>> +					       u8 *data,
>>> +					       unsigned int dirblksize,
>>> +					       const int ndirents)
>>>  {
>>> -	unsigned int ndirents, head, back;
>>> +	int head, back;
>>>  	unsigned int startprfx, endprfx;
>>>  	struct erofs_dirent *const de = (struct erofs_dirent *)data;
>>>  
>>> -	/* make sure that maxsize is valid */
>>> -	BUG_ON(maxsize < sizeof(struct erofs_dirent));
>>> -
>>> -	ndirents = le16_to_cpu(de->nameoff) / sizeof(*de);
>>> -
>>> -	/* corrupted dir (may be unnecessary...) */
>>> -	BUG_ON(!ndirents);
>>> -
>>> -	head = 0;
>>> +	/* since the 1st dirent has been evaluated previously */
>>> +	head = 1;
>>>  	back = ndirents - 1;
>>>  	startprfx = endprfx = 0;
>>>  
>>>  	while (head <= back) {
>>> -		unsigned int mid = head + (back - head) / 2;
>>> -		unsigned int nameoff = le16_to_cpu(de[mid].nameoff);
>>> +		const int mid = head + (back - head) / 2;
>>> +		const int nameoff = nameoff_from_disk(de[mid].nameoff,
>>> +						      dirblksize);
>>>  		unsigned int matched = min(startprfx, endprfx);
>>> -
>>> -		struct qstr dname = QSTR_INIT(data + nameoff,
>>> -			unlikely(mid >= ndirents - 1) ?
>>> -				maxsize - nameoff :
>>> -				le16_to_cpu(de[mid + 1].nameoff) - nameoff);
>>> +		struct erofs_qstr dname = {
>>> +			.name = data + nameoff,
>>> +			.end = unlikely(mid >= ndirents - 1) ?
>>> +				data + dirblksize :
>>> +				data + nameoff_from_disk(de[mid + 1].nameoff,
>>> +							 dirblksize)
>>> +		};
>>>  
>>>  		/* string comparison without already matched prefix */
>>>  		int ret = dirnamecmp(name, &dname, &matched);
>>>  
>>> -		if (unlikely(!ret))
>>> +		if (unlikely(!ret)) {
>>>  			return de + mid;
>>> -		else if (ret > 0) {
>>> +		} else if (ret > 0) {
>>>  			head = mid + 1;
>>>  			startprfx = matched;
>>> -		} else if (unlikely(mid < 1))	/* fix "mid" overflow */
>>> -			break;
>>> -		else {
>>> +		} else {
>>>  			back = mid - 1;
>>>  			endprfx = matched;
>>>  		}
>>> @@ -91,12 +94,12 @@ static struct erofs_dirent *find_target_dirent(
>>>  	return ERR_PTR(-ENOENT);
>>>  }
>>>  
>>> -static struct page *find_target_block_classic(
>>> -	struct inode *dir,
>>> -	struct qstr *name, int *_diff)
>>> +static struct page *find_target_block_classic(struct inode *dir,
>>> +					      struct erofs_qstr *name,
>>> +					      int *_ndirents)
>>>  {
>>>  	unsigned int startprfx, endprfx;
>>> -	unsigned int head, back;
>>> +	int head, back;
>>>  	struct address_space *const mapping = dir->i_mapping;
>>>  	struct page *candidate = ERR_PTR(-ENOENT);
>>>  
>>> @@ -105,41 +108,43 @@ static struct page *find_target_block_classic(
>>>  	back = inode_datablocks(dir) - 1;
>>>  
>>>  	while (head <= back) {
>>> -		unsigned int mid = head + (back - head) / 2;
>>> +		const int mid = head + (back - head) / 2;
>>>  		struct page *page = read_mapping_page(mapping, mid, NULL);
>>>  
>>> -		if (IS_ERR(page)) {
>>> -exact_out:
>>> -			if (!IS_ERR(candidate)) /* valid candidate */
>>> -				put_page(candidate);
>>> -			return page;
>>> -		} else {
>>> -			int diff;
>>> -			unsigned int ndirents, matched;
>>> -			struct qstr dname;
>>> +		if (!IS_ERR(page)) {
>>>  			struct erofs_dirent *de = kmap_atomic(page);
>>> -			unsigned int nameoff = le16_to_cpu(de->nameoff);
>>> -
>>> -			ndirents = nameoff / sizeof(*de);
>>> +			const int nameoff = nameoff_from_disk(de->nameoff,
>>> +							      EROFS_BLKSIZ);
>>> +			const int ndirents = nameoff / sizeof(*de);
>>> +			int diff;
>>> +			unsigned int matched;
>>> +			struct erofs_qstr dname;
>>>  
>>> -			/* corrupted dir (should have one entry at least) */
>>> -			BUG_ON(!ndirents || nameoff > PAGE_SIZE);
>>> +			if (unlikely(!ndirents)) {
>>> +				DBG_BUGON(1);
>>> +				kunmap_atomic(de);
>>> +				put_page(page);
>>> +				page = ERR_PTR(-EIO);
>>> +				goto out;
>>> +			}
>>>  
>>>  			matched = min(startprfx, endprfx);
>>>  
>>>  			dname.name = (u8 *)de + nameoff;
>>> -			dname.len = ndirents == 1 ?
>>> -				/* since the rest of the last page is 0 */
>>> -				EROFS_BLKSIZ - nameoff
>>> -				: le16_to_cpu(de[1].nameoff) - nameoff;
>>> +			if (ndirents == 1)
>>> +				dname.end = (u8 *)de + EROFS_BLKSIZ;
>>> +			else
>>> +				dname.end = (u8 *)de +
>>> +					nameoff_from_disk(de[1].nameoff,
>>> +							  EROFS_BLKSIZ);
>>>  
>>>  			/* string comparison without already matched prefix */
>>>  			diff = dirnamecmp(name, &dname, &matched);
>>>  			kunmap_atomic(de);
>>>  
>>>  			if (unlikely(!diff)) {
>>> -				*_diff = 0;
>>> -				goto exact_out;
>>> +				*_ndirents = 0;
>>> +				goto out;
>>>  			} else if (diff > 0) {
>>>  				head = mid + 1;
>>>  				startprfx = matched;
>>> @@ -147,47 +152,53 @@ static struct page *find_target_block_classic(
>>>  				if (likely(!IS_ERR(candidate)))
>>>  					put_page(candidate);
>>>  				candidate = page;
>>> +				*_ndirents = ndirents;
>>>  			} else {
>>>  				put_page(page);
>>>  
>>> -				if (unlikely(mid < 1))	/* fix "mid" overflow */
>>> -					break;
>>> -
>>>  				back = mid - 1;
>>>  				endprfx = matched;
>>>  			}
>>> +			continue;
>>>  		}
>>> +out:		/* free if the candidate is valid */
>>> +		if (!IS_ERR(candidate))
>>> +			put_page(candidate);
>>> +		return page;
>>>  	}
>>> -	*_diff = 1;
>>>  	return candidate;
>>>  }
>>>  
>>>  int erofs_namei(struct inode *dir,
>>> -	struct qstr *name,
>>> -	erofs_nid_t *nid, unsigned int *d_type)
>>> +		struct qstr *name,
>>> +		erofs_nid_t *nid, unsigned int *d_type)
>>>  {
>>> -	int diff;
>>> +	int ndirents;
>>>  	struct page *page;
>>> -	u8 *data;
>>> +	void *data;
>>>  	struct erofs_dirent *de;
>>> +	struct erofs_qstr qn;
>>>  
>>>  	if (unlikely(!dir->i_size))
>>>  		return -ENOENT;
>>>  
>>> -	diff = 1;
>>> -	page = find_target_block_classic(dir, name, &diff);
>>> +	qn.name = name->name;
>>> +	qn.end = name->name + name->len;
>>> +
>>> +	ndirents = 0;
>>> +	page = find_target_block_classic(dir, &qn, &ndirents);
>>>  
>>> -	if (unlikely(IS_ERR(page)))
>>> +	if (IS_ERR(page))
>>>  		return PTR_ERR(page);
>>>  
>>>  	data = kmap_atomic(page);
>>>  	/* the target page has been mapped */
>>> -	de = likely(diff) ?
>>> -		/* since the rest of the last page is 0 */
>>> -		find_target_dirent(name, data, EROFS_BLKSIZ) :
>>> -		(struct erofs_dirent *)data;
>>> +	if (ndirents)
>>> +		de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents);
>>
>> If we want to support different blksize during dirent lookup, how about
>> adding parameter for find_target_block_classic() as find_target_dirent()?
> 
> Yes, you are right. However, find_target_dirent is now totally designed in page unit
> because of get_meta_page usage, but find_target_dirent doesn't.
> 
> I think if we support sub-page feature later (by using iomap or someelse), it
> could be added later.

No problem, it's not a big deal. :)

Thanks,

> 
> Thanks,
> Gao Xiang
> 
> 
>>
>> Thanks,
>>
>>> +	else
>>> +		de = (struct erofs_dirent *)data;
>>>  
>>> -	if (likely(!IS_ERR(de))) {
>>> +	if (!IS_ERR(de)) {
>>>  		*nid = le64_to_cpu(de->nid);
>>>  		*d_type = de->file_type;
>>>  	}
>>>
>>
> 
> .
>