On Fri, 13 Aug 2021, David Howells wrote: > Trying to use a swapfile on NFS results in every DIO write failing with > ETXTBSY because generic_write_checks(), as called by nfs_direct_write() > from nfs_direct_IO(), forbids writes to swapfiles. Could we just remove this check from generic_write_checks(), and instead call deny_write_access() in swap_on? Then user-space wouldn't be able to open a swap-file for write, so there would be no need to check on every single write. NeilBrown > > Fix this by introducing a new kiocb flag, IOCB_SWAP, that's set by the swap > code to indicate that the swapper is doing this operation and so overrule > the check in generic_write_checks(). > > Without this patch, the following is seen: > > Write error on dio swapfile (3800334336) > > Altering __swap_writepage() to show the error shows: > > Write error (-26) on dio swapfile (3800334336) > > Tested by swapping off all swap partitions and then swapping on a prepared > NFS file (CONFIG_NFS_SWAP=y is also needed). Enough copies of the > following program then need to be run to force swapping to occur (at least > one per gigabyte of RAM): > > #include <stdbool.h> > #include <stdio.h> > #include <stdlib.h> > #include <unistd.h> > #include <sys/mman.h> > int main() > { > unsigned int pid = getpid(), iterations = 0; > size_t i, j, size = 1024 * 1024 * 1024; > char *p; > bool mismatch; > p = malloc(size); > if (!p) { > perror("malloc"); > exit(1); > } > srand(pid); > for (i = 0; i < size; i += 4) > *(unsigned int *)(p + i) = rand(); > do { > for (j = 0; j < 16; j++) { > for (i = 0; i < size; i += 4096) > *(unsigned int *)(p + i) += 1; > iterations++; > } > mismatch = false; > srand(pid); > for (i = 0; i < size; i += 4) { > unsigned int r = rand(); > unsigned int v = *(unsigned int *)(p + i); > if (i % 4096 == 0) > v -= iterations; > if (v != r) { > fprintf(stderr, "mismatch %zx: %x != %x (diff %x)\n", > i, v, r, v - r); > mismatch = true; > } > } > } while (!mismatch); > exit(1); > } > > > Fixes: dc617f29dbe5 ("vfs: don't allow writes to swap files") > Signed-off-by: David Howells <dhowells@xxxxxxxxxx> > cc: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > cc: Christoph Hellwig <hch@xxxxxx> > cc: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> > cc: linux-nfs@xxxxxxxxxxxxxxx > --- > > fs/read_write.c | 2 +- > include/linux/fs.h | 1 + > mm/page_io.c | 7 ++++--- > 3 files changed, 6 insertions(+), 4 deletions(-) > > diff --git a/fs/read_write.c b/fs/read_write.c > index 9db7adf160d2..daef721ca67e 100644 > --- a/fs/read_write.c > +++ b/fs/read_write.c > @@ -1646,7 +1646,7 @@ ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) > loff_t count; > int ret; > > - if (IS_SWAPFILE(inode)) > + if (IS_SWAPFILE(inode) && !(iocb->ki_flags & IOCB_SWAP)) > return -ETXTBSY; > > if (!iov_iter_count(from)) > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 640574294216..b3e6a20f28ef 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -319,6 +319,7 @@ enum rw_hint { > /* iocb->ki_waitq is valid */ > #define IOCB_WAITQ (1 << 19) > #define IOCB_NOIO (1 << 20) > +#define IOCB_SWAP (1 << 21) /* This is a swap request */ > > struct kiocb { > struct file *ki_filp; > diff --git a/mm/page_io.c b/mm/page_io.c > index d597bc6e6e45..edb72bf624d2 100644 > --- a/mm/page_io.c > +++ b/mm/page_io.c > @@ -303,7 +303,8 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, > > iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE); > init_sync_kiocb(&kiocb, swap_file); > - kiocb.ki_pos = page_file_offset(page); > + kiocb.ki_pos = page_file_offset(page); > + kiocb.ki_flags = IOCB_DIRECT | IOCB_WRITE | IOCB_SWAP; > > set_page_writeback(page); > unlock_page(page); > @@ -324,8 +325,8 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, > */ > set_page_dirty(page); > ClearPageReclaim(page); > - pr_err_ratelimited("Write error on dio swapfile (%llu)\n", > - page_file_offset(page)); > + pr_err_ratelimited("Write error (%d) on dio swapfile (%llu)\n", > + ret, page_file_offset(page)); > } > end_page_writeback(page); > return ret; > > >