Hi Trond:
If old writeback (such as -ERESTARTSYS or -EINTR, etc.) exist in struct
address_space->wb_err, nfs_file_write() will always return the
unexpected error. filemap_check_wb_err() will return the old error
even if there is no new writeback error between filemap_sample_wb_err()
and filemap_check_wb_err().
```c
since = filemap_sample_wb_err() = 0 // never be error
errseq_sample
if (!(old & ERRSEQ_SEEN)) // nobody see the error
return 0;
nfs_wb_all // no new error
error = filemap_check_wb_err(..., since) != 0 // unexpected error
```
By the way, the following process have redundant code in nfs_file_write():
```c
if (mntflags & NFS_MOUNT_WRITE_WAIT) {
result = filemap_fdatawait_range(file->f_mapping,
iocb->ki_pos - written,
iocb->ki_pos - 1);
if (result < 0)
goto out;
}
```
filemap_fdatawait_range() will always return 0, since patch 6c984083ec24
("NFS: Use of mapping_set_error() results in spurious errors") do not
save the error in struct address_space->flags:
filemap_fdatawait_range(file->f_mapping, ...) = 0
filemap_check_errors(mapping) = 0
test_bit(..., &mapping->flags) // flags always is 0
So the return value result is always 0, `if (result < 0)` is redundant
在 2022/5/14 22:27, trondmy@xxxxxxxxxx 写道:
From: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx>
Any errors reported by the write() system call need to be cleared from
the file descriptor's error tracking. The current call to nfs_wb_all()
causes the error to be reported, but since it doesn't call
file_check_and_advance_wb_err(), we can end up reporting the same error
a second time when the application calls fsync().
Note that since Linux 4.13, the rule is that EIO may be reported for
write(), but it must be reported by a subsequent fsync(), so let's just
drop reporting it in write.
The check for nfs_ctx_key_to_expire() is just a duplicate to the one
already in nfs_write_end(), so let's drop that too.
Reported-by: ChenXiaoSong <chenxiaosong2@xxxxxxxxxx>
Fixes: ce368536dd61 ("nfs: nfs_file_write() should check for writeback errors")
Signed-off-by: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx>
---
fs/nfs/file.c | 34 ++++++++++++++--------------------
1 file changed, 14 insertions(+), 20 deletions(-)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 7c380e555224..87e4cd5e8fe2 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -598,18 +598,6 @@ static const struct vm_operations_struct nfs_file_vm_ops = {
.page_mkwrite = nfs_vm_page_mkwrite,
};
-static int nfs_need_check_write(struct file *filp, struct inode *inode,
- int error)
-{
- struct nfs_open_context *ctx;
-
- ctx = nfs_file_open_context(filp);
- if (nfs_error_is_fatal_on_server(error) ||
- nfs_ctx_key_to_expire(ctx, inode))
- return 1;
- return 0;
-}
-
ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
@@ -637,7 +625,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
if (iocb->ki_flags & IOCB_APPEND || iocb->ki_pos > i_size_read(inode)) {
result = nfs_revalidate_file_size(inode, file);
if (result)
- goto out;
+ return result;
}
nfs_clear_invalid_mapping(file->f_mapping);
@@ -656,6 +644,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
written = result;
iocb->ki_pos += written;
+ nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
if (mntflags & NFS_MOUNT_WRITE_EAGER) {
result = filemap_fdatawrite_range(file->f_mapping,
@@ -673,17 +662,22 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
}
result = generic_write_sync(iocb, written);
if (result < 0)
- goto out;
+ return result;
+out:
/* Return error values */
error = filemap_check_wb_err(file->f_mapping, since);
- if (nfs_need_check_write(file, inode, error)) {
- int err = nfs_wb_all(inode);
- if (err < 0)
- result = err;
+ switch (error) {
+ default:
+ break;
+ case -EDQUOT:
+ case -EFBIG:
+ case -ENOSPC:
+ nfs_wb_all(inode);
+ error = file_check_and_advance_wb_err(file);
+ if (error < 0)
+ result = error;
}
- nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
-out:
return result;
out_swapfile: