2011/4/12 Jeff Layton <jlayton@xxxxxxxxxx>: > On Tue, 12 Apr 2011 13:43:07 +0400 > Pavel Shilovsky <piastryyy@xxxxxxxxx> wrote: > >> 2011/4/2 Jeff Layton <jlayton@xxxxxxxxxx>: >> > Add the ability for CIFS to do an asynchronous write. The kernel will >> > set the frame up as it would for a "normal" SMBWrite2 request, and use >> > cifs_call_async to send it. The mid callback will then be configured to >> > handle the result. >> > >> > Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx> >> > --- >> > fs/cifs/cifsproto.h | 19 +++++ >> > fs/cifs/cifssmb.c | 219 +++++++++++++++++++++++++++++++++++++++++++++++++++ >> > 2 files changed, 238 insertions(+), 0 deletions(-) >> >> Looks good too, except two small notes. >> > > Thanks for looking. I respun this yesterday actually and the new patch > is a bit different. With the new patchset, we'll no longer be > constrained by the current wsize limit of 14 pages. It should be able > to handle an arbitrary wsize, which should also help performance. > > >> > >> > diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h >> > index e255a2b..7b964df 100644 >> > --- a/fs/cifs/cifsproto.h >> > +++ b/fs/cifs/cifsproto.h >> > @@ -21,6 +21,7 @@ >> > #ifndef _CIFSPROTO_H >> > #define _CIFSPROTO_H >> > #include <linux/nls.h> >> > +#include <linux/pagevec.h> >> > >> > struct statfs; >> > struct smb_vol; >> > @@ -433,4 +434,22 @@ extern int mdfour(unsigned char *, unsigned char *, int); >> > extern int E_md4hash(const unsigned char *passwd, unsigned char *p16); >> > extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8, >> > unsigned char *p24); >> > + >> > +/* asynchronous write support */ >> > +struct cifs_writedata { >> > + struct list_head pending; >> > + struct kref refcount; >> > + struct completion completion; >> > + struct work_struct work; >> > + struct cifsFileInfo *cfile; >> > + __u64 offset; >> > + unsigned int bytes; >> > + int result; >> > + struct page *pages[PAGEVEC_SIZE]; >> > +}; >> > + >> > +int cifs_async_writev(struct cifs_writedata *wdata); >> > +struct cifs_writedata *cifs_writedata_alloc(void); >> > +void cifs_writedata_release(struct kref *refcount); >> > + >> > #endif /* _CIFSPROTO_H */ >> > diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c >> > index 8de7e79..b308605 100644 >> > --- a/fs/cifs/cifssmb.c >> > +++ b/fs/cifs/cifssmb.c >> > @@ -32,6 +32,7 @@ >> > #include <linux/vfs.h> >> > #include <linux/slab.h> >> > #include <linux/posix_acl_xattr.h> >> > +#include <linux/pagemap.h> >> > #include <asm/uaccess.h> >> > #include "cifspdu.h" >> > #include "cifsglob.h" >> > @@ -1604,6 +1605,224 @@ CIFSSMBWrite(const int xid, struct cifs_tcon *tcon, >> > return rc; >> > } >> > >> > +void >> > +cifs_writedata_release(struct kref *refcount) >> > +{ >> > + struct cifs_writedata *wdata = container_of(refcount, >> > + struct cifs_writedata, refcount); >> > + struct address_space *mapping; >> > + >> > + if (wdata->cfile) { >> > + mapping = wdata->cfile->dentry->d_inode->i_mapping; >> > + >> > + spin_lock(&mapping->private_lock); >> > + list_del(&wdata->pending); >> > + spin_unlock(&mapping->private_lock); >> > + >> > + cifsFileInfo_put(wdata->cfile); >> > + } >> > + >> > + kfree(wdata); >> > +} >> > + >> > +static void >> > +cifs_writev_complete(struct work_struct *work) >> > +{ >> > + struct cifs_writedata *wdata = container_of(work, >> > + struct cifs_writedata, work); >> > + struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); >> ^^^ this variable is unused. >> > Not exactly. > >> > + struct inode *inode = wdata->cfile->dentry->d_inode; >> > + struct page *page; >> > + int i = 0; >> > + >> > + if (wdata->result == 0) { >> > + cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes); >> > + cifs_stats_bytes_written(tcon, wdata->bytes); > ^^^^^ > The tcon is used here Strange, the compiler (gcc-4.4.4-1ubuntu2) reports it: /home/piastry/git/cifs-2.6/fs/cifs/cifssmb.c: In function ‘cifs_writev_complete’: /home/piastry/git/cifs-2.6/fs/cifs/cifssmb.c:1633: warning: unused variable ‘tcon’ but you are right - it is used. >> > + } >> > + >> > + for (i = 0; i < ARRAY_SIZE(wdata->pages) && (page = wdata->pages[i]); >> > + i++) { >> > + if (wdata->result == -EAGAIN) >> > + __set_page_dirty_nobuffers(page); >> > + else if (wdata->result < 0) >> > + SetPageError(page); >> > + end_page_writeback(page); >> > + page_cache_release(page); >> > + } >> > + if (wdata->result != -EAGAIN) >> > + mapping_set_error(inode->i_mapping, wdata->result); >> > + complete(&wdata->completion); >> > + kref_put(&wdata->refcount, cifs_writedata_release); >> > +} >> > + >> > +struct cifs_writedata * >> > +cifs_writedata_alloc(void) >> > +{ >> > + struct cifs_writedata *wdata; >> > + >> > + wdata = kzalloc(sizeof(*wdata), GFP_NOFS); >> > + if (wdata != NULL) { >> > + INIT_LIST_HEAD(&wdata->pending); >> > + INIT_WORK(&wdata->work, cifs_writev_complete); >> > + kref_init(&wdata->refcount); >> > + init_completion(&wdata->completion); >> > + } >> > + return wdata; >> > +} >> > + >> > +/* >> > + * Check the midState and signature on received buffer (if any), and queue the >> > + * workqueue completion task. >> > + */ >> > +static void >> > +cifs_writev_callback(struct mid_q_entry *mid) >> > +{ >> > + struct cifs_writedata *wdata = mid->callback_data; >> > + struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); >> > + unsigned int written; >> > + WRITE_RSP *smb = (WRITE_RSP *)mid->resp_buf; >> > + >> > + switch(mid->midState) { >> > + case MID_RESPONSE_RECEIVED: >> > + wdata->result = cifs_check_receive(mid, tcon->ses->server, 0); >> > + if (wdata->result != 0) >> > + break; >> > + >> > + written = le16_to_cpu(smb->CountHigh); >> > + written <<= 16; >> > + written += le16_to_cpu(smb->Count); >> > + /* >> > + * Mask off high 16 bits when bytes written as returned >> > + * by the server is greater than bytes requested by the >> > + * client. OS/2 servers are known to set incorrect >> > + * CountHigh values. >> > + */ >> > + if (written > wdata->bytes) >> > + written &= 0xFFFF; >> > + >> > + if (written < wdata->bytes) >> > + wdata->result = -ENOSPC; >> > + else >> > + wdata->bytes = written; >> > + break; >> > + case MID_REQUEST_SUBMITTED: >> > + case MID_RETRY_NEEDED: >> > + wdata->result = -EAGAIN; >> > + break; >> > + default: >> > + wdata->result = -EIO; >> > + break; >> > + } >> > + >> > + queue_work(system_nrt_wq, &wdata->work); >> > + DeleteMidQEntry(mid); >> > + atomic_dec(&tcon->ses->server->inFlight); >> > + wake_up(&tcon->ses->server->request_q); >> > +} >> > + >> > +/* cifs_async_writev - send an async write, and set up mid to handle result */ >> > +int >> > +cifs_async_writev(struct cifs_writedata *wdata) >> > +{ >> > + int i, rc = -EACCES; >> > + WRITE_REQ *smb = NULL; >> > + int wct; >> > + struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); >> > + struct inode *inode = wdata->cfile->dentry->d_inode; >> > + unsigned int npages = ARRAY_SIZE(wdata->pages); >> > + struct kvec *iov = NULL; >> > + >> > + cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes); >> ^^^^ bytes is >> zero here. You calculate it next - I suggest to move this info message >> after you get right bytes value. >> > > Good catch. Will fix. > >> > + >> > + if (tcon->ses->capabilities & CAP_LARGE_FILES) { >> > + wct = 14; >> > + } else { >> > + wct = 12; >> > + if (wdata->offset >> 32 > 0) { >> > + /* can not handle big offset for old srv */ >> > + return -EIO; >> > + } >> > + } >> > + >> > + rc = small_smb_init(SMB_COM_WRITE_ANDX, wct, tcon, (void **)&smb); >> > + if (rc) >> > + goto async_writev_out; >> > + >> > + /* FIXME: only allocate number of kvecs we need */ >> > + iov = kzalloc((npages + 1) * sizeof(*iov), GFP_NOFS); >> > + if (iov == NULL) { >> > + rc = -ENOMEM; >> > + goto async_writev_out; >> > + } >> > + >> > + smb->AndXCommand = 0xFF; /* none */ >> > + smb->Fid = wdata->cfile->netfid; >> > + smb->OffsetLow = cpu_to_le32(wdata->offset & 0xFFFFFFFF); >> > + if (wct == 14) >> > + smb->OffsetHigh = cpu_to_le32(wdata->offset >> 32); >> > + smb->Reserved = 0xFFFFFFFF; >> > + smb->WriteMode = 0; >> > + smb->Remaining = 0; >> > + >> > + smb->DataOffset = >> > + cpu_to_le16(offsetof(struct smb_com_write_req, Data) - 4); >> > + >> > + /* 4 for RFC1001 length + 1 for BCC */ >> > + iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4 + 1; >> > + iov[0].iov_base = smb; >> > + >> > + /* marshal up the pages into iov array */ >> > + npages = 0; >> > + wdata->bytes = 0; >> > + for (i = 0; i < ARRAY_SIZE(wdata->pages) && wdata->pages[i]; i++) { >> > + if (wdata->bytes + PAGE_CACHE_SIZE <= >> > + CIFS_SB(inode->i_sb)->wsize) { >> > + iov[i + 1].iov_len = min(inode->i_size - >> > + page_offset(wdata->pages[i]), >> > + (loff_t)PAGE_CACHE_SIZE); >> > + iov[i + 1].iov_base = kmap(wdata->pages[i]); >> > + wdata->bytes += iov[i + 1].iov_len; >> > + npages++; >> > + } else { >> > + /* if we're beyond wsize, then re-mark page dirty */ >> > + __set_page_dirty_nobuffers(wdata->pages[i]); >> > + } >> > + } >> > + >> > + smb->DataLengthLow = cpu_to_le16(wdata->bytes & 0xFFFF); >> > + smb->DataLengthHigh = cpu_to_le16(wdata->bytes >> 16); >> > + >> > + if (wct == 14) { >> > + inc_rfc1001_len(&smb->hdr, wdata->bytes + 1); >> > + put_bcc(wdata->bytes + 1, &smb->hdr); >> > + } else { >> > + /* wct == 12 */ >> > + struct smb_com_writex_req *smbw = >> > + (struct smb_com_writex_req *)smb; >> > + inc_rfc1001_len(&smbw->hdr, wdata->bytes + 5); >> > + put_bcc(wdata->bytes + 5, &smbw->hdr); >> > + iov[0].iov_len += 4; /* pad bigger by four bytes */ >> > + } >> > + >> > + kref_get(&wdata->refcount); >> > + rc = cifs_call_async(tcon->ses->server, iov, npages + 1, >> > + cifs_writev_callback, wdata); >> > + >> > + if (rc == 0) >> > + cifs_stats_inc(&tcon->stats.cifs_stats.num_writes); >> > + else >> > + kref_put(&wdata->refcount, cifs_writedata_release); >> > + >> > + /* send is done, unmap pages */ >> > + for (i = 0; i < npages; i++) >> > + kunmap(wdata->pages[i]); >> > + >> > +async_writev_out: >> > + cifs_small_buf_release(smb); >> > + kfree(iov); >> > + return rc; >> > +} >> > + >> > int >> > CIFSSMBWrite2(const int xid, struct cifs_tcon *tcon, >> > const int netfid, const unsigned int count, >> > -- >> > 1.7.4 >> > >> > -- >> > To unsubscribe from this list: send the line "unsubscribe linux-cifs" in >> > the body of a message to majordomo@xxxxxxxxxxxxxxx >> > More majordomo info at http://vger.kernel.org/majordomo-info.html >> > >> >> Anyway, it's good. You can my "Reviewed-by: Pavel Shilovsky >> <piastry@xxxxxxxxxxx>" tag when you fix the notes above. >> > > > -- > Jeff Layton <jlayton@xxxxxxxxxx> > -- Best regards, Pavel Shilovsky. -- To unsubscribe from this list: send the line "unsubscribe linux-cifs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html