Re: [PATCH v2] ceph: invalidate pages when doing DIO in encrypted inodes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Xiubo Li <xiubli@xxxxxxxxxx> writes:

> Hi Luis,
>
> Please try the following patch, to see could it resolve your issue:

No, this seems to deadlock when running my test.  I'm attaching the code
I'm using to test it; it's part of generic/647 but I've removed all the
other test cases that were passing.  I simply mount the filesystem with
test_dummy_encryption and run this test.  With your patch it'll hang;
without it it'll show "pwrite (O_DIRECT) is broken".  The extra
invalidate_inode_pages2_range() will make it pass.

Cheers,
-- 
Luís

// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (c) 2021 Red Hat, Inc.  All Rights Reserved.
 * Written by Andreas Gruenbacher (agruenba@xxxxxxxxxx)
 */

/* Trigger page faults in the same file during read and write. */

#ifndef _GNU_SOURCE
#define _GNU_SOURCE /* to get definition of O_DIRECT flag. */
#endif

#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <err.h>

char *filename;
unsigned int page_size;
void *page;
char *addr;
int fd;
ssize_t ret;

/*
 * Leave a hole at the beginning of the test file and initialize a block of
 * @page_size bytes at offset @page_size to @c.  Then, reopen the file and
 * mmap the first two pages.
 */
void init(char c, int flags)
{
	fd = open(filename, O_CREAT | O_TRUNC | O_WRONLY | O_DIRECT, 0666);
	if (fd == -1)
		goto fail;
	memset(page, c, page_size);
	ret = pwrite(fd, page, page_size, page_size);
	if (ret != page_size)
		goto fail;
	if (close(fd))
		goto fail;

	fd = open(filename, flags);
	if (fd == -1)
		goto fail;
	addr = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
	if (addr == MAP_FAILED)
		err(1, NULL);
	return;

fail:
	err(1, "%s", filename);
}

static ssize_t do_write(int fd, const void *buf, size_t count, off_t offset)
{
	ssize_t count2 = 0, ret;

	do {
		ret = pwrite(fd, buf, count, offset);
		if (ret == -1) {
			if (errno == EINTR)
				continue;
			break;
		}
		if (ret == 0)
			break;
		count2 += ret;
		buf += ret;
		count -= ret;
	} while (count);
	return count2;
}

int main(int argc, char *argv[])
{
	if (argc != 2)
		errx(1, "no test filename argument given");
	filename = argv[1];

	page_size = ret = sysconf(_SC_PAGE_SIZE);
	if (ret == -1)
		err(1, NULL);

	ret = posix_memalign(&page, page_size, page_size);
	if (ret) {
		errno = ENOMEM;
		err(1, NULL);
	}
	init('d', O_RDWR | O_DIRECT);
	ret = do_write(fd, addr + page_size, page_size, 0);
	if (ret != page_size)
		err(1, "pwrite %s (O_DIRECT): %ld != %u", filename, ret, page_size);
	if (memcmp(addr, page, page_size))
		errx(1, "pwrite (O_DIRECT) is broken");

	if (fsync(fd))
		errx(1, "fsync");
	if (close(fd))
		errx(1, "close");

	if (unlink(filename))
		err(1, "unlink %s", filename);

	return 0;
}
>
>
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 5d39d8e54273..3507e4066de4 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -2011,6 +2011,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct
> iov_iter *to)
>                      ceph_cap_string(got));
>
>                 if (ci->i_inline_version == CEPH_INLINE_NONE) {
> + filemap_invalidate_lock(inode->i_mapping);
>                         if (!retry_op &&
>                             (iocb->ki_flags & IOCB_DIRECT) &&
>                             !IS_ENCRYPTED(inode)) {
> @@ -2021,6 +2022,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct
> iov_iter *to)
>                         } else {
>                                 ret = ceph_sync_read(iocb, to, &retry_op);
>                         }
> + filemap_invalidate_unlock(inode->i_mapping);
>                 } else {
>                         retry_op = READ_INLINE;
>                 }
> @@ -2239,11 +2241,13 @@ static ssize_t ceph_write_iter(struct kiocb *iocb,
> struct iov_iter *from)
>
>                 /* we might need to revert back to that point */
>                 data = *from;
> +               filemap_invalidate_lock(inode->i_mapping);
>                 if ((iocb->ki_flags & IOCB_DIRECT) && !IS_ENCRYPTED(inode))
>                         written = ceph_direct_read_write(iocb, &data, snapc,
> &prealloc_cf);
>                 else
>                         written = ceph_sync_write(iocb, &data, pos, snapc);
> +               filemap_invalidate_unlock(inode->i_mapping);
>                 if (direct_lock)
>                         ceph_end_io_direct(inode);
>                 else
>
>
>
> On 4/1/22 9:32 PM, Luís Henriques wrote:
>> When doing DIO on an encrypted node, we need to invalidate the page cache in
>> the range being written to, otherwise the cache will include invalid data.
>>
>> Signed-off-by: Luís Henriques <lhenriques@xxxxxxx>
>> ---
>>   fs/ceph/file.c | 11 ++++++++++-
>>   1 file changed, 10 insertions(+), 1 deletion(-)
>>
>> Changes since v1:
>> - Replaced truncate_inode_pages_range() by invalidate_inode_pages2_range
>> - Call fscache_invalidate with FSCACHE_INVAL_DIO_WRITE if we're doing DIO
>>
>> Note: I'm not really sure this last change is required, it doesn't really
>> affect generic/647 result, but seems to be the most correct.
>>
>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
>> index 5072570c2203..b2743c342305 100644
>> --- a/fs/ceph/file.c
>> +++ b/fs/ceph/file.c
>> @@ -1605,7 +1605,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
>>   	if (ret < 0)
>>   		return ret;
>>   -	ceph_fscache_invalidate(inode, false);
>> +	ceph_fscache_invalidate(inode, (iocb->ki_flags & IOCB_DIRECT));
>>   	ret = invalidate_inode_pages2_range(inode->i_mapping,
>>   					    pos >> PAGE_SHIFT,
>>   					    (pos + count - 1) >> PAGE_SHIFT);
>> @@ -1895,6 +1895,15 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
>>   		req->r_inode = inode;
>>   		req->r_mtime = mtime;
>>   +		if (IS_ENCRYPTED(inode) && (iocb->ki_flags & IOCB_DIRECT)) {
>> +			ret = invalidate_inode_pages2_range(
>> +				inode->i_mapping,
>> +				write_pos >> PAGE_SHIFT,
>> +				(write_pos + write_len - 1) >> PAGE_SHIFT);
>> +			if (ret < 0)
>> +				dout("invalidate_inode_pages2_range returned %d\n", ret);
>> +		}
>> +
>>   		/* Set up the assertion */
>>   		if (rmw) {
>>   			/*
>>
>

[Index of Archives]     [CEPH Users]     [Ceph Large]     [Ceph Dev]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux