Xiubo Li <xiubli@xxxxxxxxxx> writes: > Hi Luis, > > Please try the following patch, to see could it resolve your issue: No, this seems to deadlock when running my test. I'm attaching the code I'm using to test it; it's part of generic/647 but I've removed all the other test cases that were passing. I simply mount the filesystem with test_dummy_encryption and run this test. With your patch it'll hang; without it it'll show "pwrite (O_DIRECT) is broken". The extra invalidate_inode_pages2_range() will make it pass. Cheers, -- Luís
// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2021 Red Hat, Inc. All Rights Reserved. * Written by Andreas Gruenbacher (agruenba@xxxxxxxxxx) */ /* Trigger page faults in the same file during read and write. */ #ifndef _GNU_SOURCE #define _GNU_SOURCE /* to get definition of O_DIRECT flag. */ #endif #include <sys/types.h> #include <sys/stat.h> #include <sys/mman.h> #include <fcntl.h> #include <unistd.h> #include <stdlib.h> #include <stdio.h> #include <string.h> #include <errno.h> #include <err.h> char *filename; unsigned int page_size; void *page; char *addr; int fd; ssize_t ret; /* * Leave a hole at the beginning of the test file and initialize a block of * @page_size bytes at offset @page_size to @c. Then, reopen the file and * mmap the first two pages. */ void init(char c, int flags) { fd = open(filename, O_CREAT | O_TRUNC | O_WRONLY | O_DIRECT, 0666); if (fd == -1) goto fail; memset(page, c, page_size); ret = pwrite(fd, page, page_size, page_size); if (ret != page_size) goto fail; if (close(fd)) goto fail; fd = open(filename, flags); if (fd == -1) goto fail; addr = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (addr == MAP_FAILED) err(1, NULL); return; fail: err(1, "%s", filename); } static ssize_t do_write(int fd, const void *buf, size_t count, off_t offset) { ssize_t count2 = 0, ret; do { ret = pwrite(fd, buf, count, offset); if (ret == -1) { if (errno == EINTR) continue; break; } if (ret == 0) break; count2 += ret; buf += ret; count -= ret; } while (count); return count2; } int main(int argc, char *argv[]) { if (argc != 2) errx(1, "no test filename argument given"); filename = argv[1]; page_size = ret = sysconf(_SC_PAGE_SIZE); if (ret == -1) err(1, NULL); ret = posix_memalign(&page, page_size, page_size); if (ret) { errno = ENOMEM; err(1, NULL); } init('d', O_RDWR | O_DIRECT); ret = do_write(fd, addr + page_size, page_size, 0); if (ret != page_size) err(1, "pwrite %s (O_DIRECT): %ld != %u", filename, ret, page_size); if (memcmp(addr, page, page_size)) errx(1, "pwrite (O_DIRECT) is broken"); if (fsync(fd)) errx(1, "fsync"); if (close(fd)) errx(1, "close"); if (unlink(filename)) err(1, "unlink %s", filename); return 0; }
> > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > index 5d39d8e54273..3507e4066de4 100644 > --- a/fs/ceph/file.c > +++ b/fs/ceph/file.c > @@ -2011,6 +2011,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct > iov_iter *to) > ceph_cap_string(got)); > > if (ci->i_inline_version == CEPH_INLINE_NONE) { > + filemap_invalidate_lock(inode->i_mapping); > if (!retry_op && > (iocb->ki_flags & IOCB_DIRECT) && > !IS_ENCRYPTED(inode)) { > @@ -2021,6 +2022,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct > iov_iter *to) > } else { > ret = ceph_sync_read(iocb, to, &retry_op); > } > + filemap_invalidate_unlock(inode->i_mapping); > } else { > retry_op = READ_INLINE; > } > @@ -2239,11 +2241,13 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, > struct iov_iter *from) > > /* we might need to revert back to that point */ > data = *from; > + filemap_invalidate_lock(inode->i_mapping); > if ((iocb->ki_flags & IOCB_DIRECT) && !IS_ENCRYPTED(inode)) > written = ceph_direct_read_write(iocb, &data, snapc, > &prealloc_cf); > else > written = ceph_sync_write(iocb, &data, pos, snapc); > + filemap_invalidate_unlock(inode->i_mapping); > if (direct_lock) > ceph_end_io_direct(inode); > else > > > > On 4/1/22 9:32 PM, Luís Henriques wrote: >> When doing DIO on an encrypted node, we need to invalidate the page cache in >> the range being written to, otherwise the cache will include invalid data. >> >> Signed-off-by: Luís Henriques <lhenriques@xxxxxxx> >> --- >> fs/ceph/file.c | 11 ++++++++++- >> 1 file changed, 10 insertions(+), 1 deletion(-) >> >> Changes since v1: >> - Replaced truncate_inode_pages_range() by invalidate_inode_pages2_range >> - Call fscache_invalidate with FSCACHE_INVAL_DIO_WRITE if we're doing DIO >> >> Note: I'm not really sure this last change is required, it doesn't really >> affect generic/647 result, but seems to be the most correct. >> >> diff --git a/fs/ceph/file.c b/fs/ceph/file.c >> index 5072570c2203..b2743c342305 100644 >> --- a/fs/ceph/file.c >> +++ b/fs/ceph/file.c >> @@ -1605,7 +1605,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, >> if (ret < 0) >> return ret; >> - ceph_fscache_invalidate(inode, false); >> + ceph_fscache_invalidate(inode, (iocb->ki_flags & IOCB_DIRECT)); >> ret = invalidate_inode_pages2_range(inode->i_mapping, >> pos >> PAGE_SHIFT, >> (pos + count - 1) >> PAGE_SHIFT); >> @@ -1895,6 +1895,15 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, >> req->r_inode = inode; >> req->r_mtime = mtime; >> + if (IS_ENCRYPTED(inode) && (iocb->ki_flags & IOCB_DIRECT)) { >> + ret = invalidate_inode_pages2_range( >> + inode->i_mapping, >> + write_pos >> PAGE_SHIFT, >> + (write_pos + write_len - 1) >> PAGE_SHIFT); >> + if (ret < 0) >> + dout("invalidate_inode_pages2_range returned %d\n", ret); >> + } >> + >> /* Set up the assertion */ >> if (rmw) { >> /* >> >