> Subject: Re: [Patch v2 14/15] CIFS: Add support for direct I/O write > > On 6/26/2018 12:39 AM, Long Li wrote: > >> Subject: Re: [Patch v2 14/15] CIFS: Add support for direct I/O write > >> > >> On 5/30/2018 3:48 PM, Long Li wrote: > >>> From: Long Li <longli@xxxxxxxxxxxxx> > >>> > >>> Implement the function for direct I/O write. It doesn't support AIO, > >>> which will be implemented in a follow up patch. > >>> > >>> Signed-off-by: Long Li <longli@xxxxxxxxxxxxx> > >>> --- > >>> fs/cifs/cifsfs.h | 1 + > >>> fs/cifs/file.c | 165 > >> +++++++++++++++++++++++++++++++++++++++++++++++++++++++ > >>> 2 files changed, 166 insertions(+) > >>> > >>> diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index > >>> 7fba9aa..e9c5103 100644 > >>> --- a/fs/cifs/cifsfs.h > >>> +++ b/fs/cifs/cifsfs.h > >>> @@ -105,6 +105,7 @@ extern ssize_t cifs_user_readv(struct kiocb > >>> *iocb, > >> struct iov_iter *to); > >>> extern ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to); > >>> extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to); > >>> extern ssize_t cifs_user_writev(struct kiocb *iocb, struct > >>> iov_iter *from); > >>> +extern ssize_t cifs_direct_writev(struct kiocb *iocb, struct > >>> +iov_iter *from); > >>> extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter > *from); > >>> extern int cifs_lock(struct file *, int, struct file_lock *); > >>> extern int cifs_fsync(struct file *, loff_t, loff_t, int); diff > >>> --git a/fs/cifs/file.c b/fs/cifs/file.c index e6e6f24..8c385b1 > >>> 100644 > >>> --- a/fs/cifs/file.c > >>> +++ b/fs/cifs/file.c > >>> @@ -2461,6 +2461,35 @@ cifs_uncached_writedata_release(struct kref > >>> *refcount) > >>> > >>> static void collect_uncached_write_data(struct cifs_aio_ctx > >>> *ctx); > >>> > >>> +static void cifs_direct_writedata_release(struct kref *refcount) { > >>> + int i; > >>> + struct cifs_writedata *wdata = container_of(refcount, > >>> + struct cifs_writedata, refcount); > >>> + > >>> + for (i = 0; i < wdata->nr_pages; i++) > >>> + put_page(wdata->pages[i]); > >>> + > >>> + cifs_writedata_release(refcount); > >>> +} > >>> + > >>> +static void cifs_direct_writev_complete(struct work_struct *work) { > >>> + struct cifs_writedata *wdata = container_of(work, > >>> + struct cifs_writedata, work); > >>> + struct inode *inode = d_inode(wdata->cfile->dentry); > >>> + struct cifsInodeInfo *cifsi = CIFS_I(inode); > >>> + > >>> + spin_lock(&inode->i_lock); > >>> + cifs_update_eof(cifsi, wdata->offset, wdata->bytes); > >>> + if (cifsi->server_eof > inode->i_size) > >>> + i_size_write(inode, cifsi->server_eof); > >>> + spin_unlock(&inode->i_lock); > >>> + > >>> + complete(&wdata->done); > >>> + kref_put(&wdata->refcount, cifs_direct_writedata_release); } > >>> + > >>> static void > >>> cifs_uncached_writev_complete(struct work_struct *work) > >>> { > >>> @@ -2703,6 +2732,142 @@ static void > >>> collect_uncached_write_data(struct > >> cifs_aio_ctx *ctx) > >>> complete(&ctx->done); > >>> } > >>> > >>> +ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter > >>> +*from) { > >>> + struct file *file = iocb->ki_filp; > >>> + ssize_t total_written = 0; > >>> + struct cifsFileInfo *cfile; > >>> + struct cifs_tcon *tcon; > >>> + struct cifs_sb_info *cifs_sb; > >>> + struct TCP_Server_Info *server; > >>> + pid_t pid; > >>> + unsigned long nr_pages; > >>> + loff_t offset = iocb->ki_pos; > >>> + size_t len = iov_iter_count(from); > >>> + int rc; > >>> + struct cifs_writedata *wdata; > >>> + > >>> + /* > >>> + * iov_iter_get_pages_alloc doesn't work with ITER_KVEC. > >>> + * In this case, fall back to non-direct write function. > >>> + */ > >>> + if (from->type & ITER_KVEC) { > >>> + cifs_dbg(FYI, "use non-direct cifs_user_writev for kvec > >> I/O\n"); > >>> + return cifs_user_writev(iocb, from); > >>> + } > >>> + > >>> + rc = generic_write_checks(iocb, from); > >>> + if (rc <= 0) > >>> + return rc; > >>> + > >>> + cifs_sb = CIFS_FILE_SB(file); > >>> + cfile = file->private_data; > >>> + tcon = tlink_tcon(cfile->tlink); > >>> + server = tcon->ses->server; > >>> + > >>> + if (!server->ops->async_writev) > >>> + return -ENOSYS; > >>> + > >>> + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) > >>> + pid = cfile->pid; > >>> + else > >>> + pid = current->tgid; > >>> + > >>> + do { > >>> + unsigned int wsize, credits; > >>> + struct page **pagevec; > >>> + size_t start; > >>> + ssize_t cur_len; > >>> + > >>> + rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize, > >>> + &wsize, &credits); > >>> + if (rc) > >>> + break; > >>> + > >>> + cur_len = iov_iter_get_pages_alloc( > >>> + from, &pagevec, wsize, &start); > >>> + if (cur_len < 0) { > >>> + cifs_dbg(VFS, > >>> + "direct_writev couldn't get user pages " > >>> + "(rc=%zd) iter type %d iov_offset %lu count" > >>> + " %lu\n", > >>> + cur_len, from->type, > >>> + from->iov_offset, from->count); > >>> + dump_stack(); > >>> + break; > >>> + } > >>> + if (cur_len < 0) > >>> + break; > >> > >> This cur_len < 0 test is redundant with the prior if(), delete. > >>> + > >>> + nr_pages = (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE; > >> > >> Am I misreading, or will this return be one more page than needed? If > >> start (the first byte offset) is > 0, nr_pages will already be one. > >> And if cur_len is 4KB, even if start is 0, nr_pages will be two. > > > > I think the calculation is correct, assuming cur_len > 0. (which > > should be the case if we reach here) > > Err, cur_len could possibly be zero, the prior line only breaks the loop if > cur_len < 0. I will look into this. It's a good idea to return error on 0 since we have no way to proceed. > > > If cur_len is 4kb and start is 0, nr_pages will be 1. > > Hmm, I guess. But again, it seems as if this page handling is all being coded > inline, in subtly different ways. I'm just concerned about clarity and > robustness. To me, if it's hard to review, it's even harder to maintain. I will address this in an updated patch. > > Tom. > > > > >> > >>> + > >>> + wdata = cifs_writedata_direct_alloc(pagevec, > >>> + cifs_direct_writev_complete); > >>> + if (!wdata) { > >>> + rc = -ENOMEM; > >>> + add_credits_and_wake_if(server, credits, 0); > >>> + break; > >>> + } > >>> + > >>> + wdata->nr_pages = nr_pages; > >>> + wdata->page_offset = start; > >>> + wdata->pagesz = PAGE_SIZE; > >>> + wdata->tailsz = > >>> + nr_pages > 1 ? > >>> + cur_len - (PAGE_SIZE - start) - > >>> + (nr_pages - 2) * PAGE_SIZE : > >>> + cur_len; > >>> + > >>> + wdata->sync_mode = WB_SYNC_ALL; > >>> + wdata->offset = (__u64)offset; > >>> + wdata->cfile = cifsFileInfo_get(cfile); > >>> + wdata->pid = pid; > >>> + wdata->bytes = cur_len; > >>> + wdata->credits = credits; > >>> + > >>> + rc = 0; > >>> + if (wdata->cfile->invalidHandle) > >>> + rc = cifs_reopen_file(wdata->cfile, false); > >>> + > >>> + if (!rc) > >>> + rc = server->ops->async_writev(wdata, > >>> + cifs_direct_writedata_release); > >>> + > >>> + if (rc) { > >>> + add_credits_and_wake_if(server, wdata->credits, 0); > >>> + kref_put(&wdata->refcount, > >>> + cifs_direct_writedata_release); > >>> + if (rc == -EAGAIN) > >>> + continue; > >>> + break; > >>> + } > >> > >> Same comments as for previous patch re the if (rc) ladder, and the > >> break/continues both being better expressed as careful goto's. > >> > >>> + > >>> + wait_for_completion(&wdata->done); > >>> + if (wdata->result) { > >>> + rc = wdata->result; > >>> + kref_put(&wdata->refcount, > >>> + cifs_direct_writedata_release); > >>> + if (rc == -EAGAIN) > >>> + continue; > >>> + break; > >>> + } > >>> + > >>> + kref_put(&wdata->refcount, cifs_direct_writedata_release); > >>> + > >>> + iov_iter_advance(from, cur_len); > >>> + total_written += cur_len; > >>> + offset += cur_len; > >>> + len -= cur_len; > >>> + } while (len); > >>> + > >>> + if (unlikely(!total_written)) > >>> + return rc; > >>> + > >>> + iocb->ki_pos += total_written; > >>> + return total_written; > >>> + > >>> +} > >>> + > >>> ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) > >>> { > >>> struct file *file = iocb->ki_filp; > >>> ��.n��������+%������w��{.n�����{���fk��ܨ}���Ơz�j:+v�����w����ޙ��&�)ߡ�a����z�ޗ���ݢj��w�f