2011/4/12 Pavel Shilovsky <piastryyy@xxxxxxxxx>: > 2011/4/12 Jeff Layton <jlayton@xxxxxxxxxx>: >> On Tue, 12 Apr 2011 13:43:07 +0400 >> Pavel Shilovsky <piastryyy@xxxxxxxxx> wrote: >> >>> 2011/4/2 Jeff Layton <jlayton@xxxxxxxxxx>: >>> > Add the ability for CIFS to do an asynchronous write. The kernel will >>> > set the frame up as it would for a "normal" SMBWrite2 request, and use >>> > cifs_call_async to send it. The mid callback will then be configured to >>> > handle the result. >>> > >>> > Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx> >>> > --- >>> > fs/cifs/cifsproto.h | 19 +++++ >>> > fs/cifs/cifssmb.c | 219 +++++++++++++++++++++++++++++++++++++++++++++++++++ >>> > 2 files changed, 238 insertions(+), 0 deletions(-) >>> >>> Looks good too, except two small notes. >>> >> >> Thanks for looking. I respun this yesterday actually and the new patch >> is a bit different. With the new patchset, we'll no longer be >> constrained by the current wsize limit of 14 pages. It should be able >> to handle an arbitrary wsize, which should also help performance. >> >> >>> > >>> > diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h >>> > index e255a2b..7b964df 100644 >>> > --- a/fs/cifs/cifsproto.h >>> > +++ b/fs/cifs/cifsproto.h >>> > @@ -21,6 +21,7 @@ >>> > #ifndef _CIFSPROTO_H >>> > #define _CIFSPROTO_H >>> > #include <linux/nls.h> >>> > +#include <linux/pagevec.h> >>> > >>> > struct statfs; >>> > struct smb_vol; >>> > @@ -433,4 +434,22 @@ extern int mdfour(unsigned char *, unsigned char *, int); >>> > extern int E_md4hash(const unsigned char *passwd, unsigned char *p16); >>> > extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8, >>> > unsigned char *p24); >>> > + >>> > +/* asynchronous write support */ >>> > +struct cifs_writedata { >>> > + struct list_head pending; >>> > + struct kref refcount; >>> > + struct completion completion; >>> > + struct work_struct work; >>> > + struct cifsFileInfo *cfile; >>> > + __u64 offset; >>> > + unsigned int bytes; >>> > + int result; >>> > + struct page *pages[PAGEVEC_SIZE]; >>> > +}; >>> > + >>> > +int cifs_async_writev(struct cifs_writedata *wdata); >>> > +struct cifs_writedata *cifs_writedata_alloc(void); >>> > +void cifs_writedata_release(struct kref *refcount); >>> > + >>> > #endif /* _CIFSPROTO_H */ >>> > diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c >>> > index 8de7e79..b308605 100644 >>> > --- a/fs/cifs/cifssmb.c >>> > +++ b/fs/cifs/cifssmb.c >>> > @@ -32,6 +32,7 @@ >>> > #include <linux/vfs.h> >>> > #include <linux/slab.h> >>> > #include <linux/posix_acl_xattr.h> >>> > +#include <linux/pagemap.h> >>> > #include <asm/uaccess.h> >>> > #include "cifspdu.h" >>> > #include "cifsglob.h" >>> > @@ -1604,6 +1605,224 @@ CIFSSMBWrite(const int xid, struct cifs_tcon *tcon, >>> > return rc; >>> > } >>> > >>> > +void >>> > +cifs_writedata_release(struct kref *refcount) >>> > +{ >>> > + struct cifs_writedata *wdata = container_of(refcount, >>> > + struct cifs_writedata, refcount); >>> > + struct address_space *mapping; >>> > + >>> > + if (wdata->cfile) { >>> > + mapping = wdata->cfile->dentry->d_inode->i_mapping; >>> > + >>> > + spin_lock(&mapping->private_lock); >>> > + list_del(&wdata->pending); >>> > + spin_unlock(&mapping->private_lock); >>> > + >>> > + cifsFileInfo_put(wdata->cfile); >>> > + } >>> > + >>> > + kfree(wdata); >>> > +} >>> > + >>> > +static void >>> > +cifs_writev_complete(struct work_struct *work) >>> > +{ >>> > + struct cifs_writedata *wdata = container_of(work, >>> > + struct cifs_writedata, work); >>> > + struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); >>> ^^^ this variable is unused. >>> >> Not exactly. >> >>> > + struct inode *inode = wdata->cfile->dentry->d_inode; >>> > + struct page *page; >>> > + int i = 0; >>> > + >>> > + if (wdata->result == 0) { >>> > + cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes); >>> > + cifs_stats_bytes_written(tcon, wdata->bytes); >> ^^^^^ >> The tcon is used here > > Strange, the compiler (gcc-4.4.4-1ubuntu2) reports it: > /home/piastry/git/cifs-2.6/fs/cifs/cifssmb.c: In function > ‘cifs_writev_complete’: > /home/piastry/git/cifs-2.6/fs/cifs/cifssmb.c:1633: warning: unused > variable ‘tcon’ > > but you are right - it is used. figured it out: I compiled module without CONFIG_CIFS_STATS. > >>> > + } >>> > + >>> > + for (i = 0; i < ARRAY_SIZE(wdata->pages) && (page = wdata->pages[i]); >>> > + i++) { >>> > + if (wdata->result == -EAGAIN) >>> > + __set_page_dirty_nobuffers(page); >>> > + else if (wdata->result < 0) >>> > + SetPageError(page); >>> > + end_page_writeback(page); >>> > + page_cache_release(page); >>> > + } >>> > + if (wdata->result != -EAGAIN) >>> > + mapping_set_error(inode->i_mapping, wdata->result); >>> > + complete(&wdata->completion); >>> > + kref_put(&wdata->refcount, cifs_writedata_release); >>> > +} >>> > + >>> > +struct cifs_writedata * >>> > +cifs_writedata_alloc(void) >>> > +{ >>> > + struct cifs_writedata *wdata; >>> > + >>> > + wdata = kzalloc(sizeof(*wdata), GFP_NOFS); >>> > + if (wdata != NULL) { >>> > + INIT_LIST_HEAD(&wdata->pending); >>> > + INIT_WORK(&wdata->work, cifs_writev_complete); >>> > + kref_init(&wdata->refcount); >>> > + init_completion(&wdata->completion); >>> > + } >>> > + return wdata; >>> > +} >>> > + >>> > +/* >>> > + * Check the midState and signature on received buffer (if any), and queue the >>> > + * workqueue completion task. >>> > + */ >>> > +static void >>> > +cifs_writev_callback(struct mid_q_entry *mid) >>> > +{ >>> > + struct cifs_writedata *wdata = mid->callback_data; >>> > + struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); >>> > + unsigned int written; >>> > + WRITE_RSP *smb = (WRITE_RSP *)mid->resp_buf; >>> > + >>> > + switch(mid->midState) { >>> > + case MID_RESPONSE_RECEIVED: >>> > + wdata->result = cifs_check_receive(mid, tcon->ses->server, 0); >>> > + if (wdata->result != 0) >>> > + break; >>> > + >>> > + written = le16_to_cpu(smb->CountHigh); >>> > + written <<= 16; >>> > + written += le16_to_cpu(smb->Count); >>> > + /* >>> > + * Mask off high 16 bits when bytes written as returned >>> > + * by the server is greater than bytes requested by the >>> > + * client. OS/2 servers are known to set incorrect >>> > + * CountHigh values. >>> > + */ >>> > + if (written > wdata->bytes) >>> > + written &= 0xFFFF; >>> > + >>> > + if (written < wdata->bytes) >>> > + wdata->result = -ENOSPC; >>> > + else >>> > + wdata->bytes = written; >>> > + break; >>> > + case MID_REQUEST_SUBMITTED: >>> > + case MID_RETRY_NEEDED: >>> > + wdata->result = -EAGAIN; >>> > + break; >>> > + default: >>> > + wdata->result = -EIO; >>> > + break; >>> > + } >>> > + >>> > + queue_work(system_nrt_wq, &wdata->work); >>> > + DeleteMidQEntry(mid); >>> > + atomic_dec(&tcon->ses->server->inFlight); >>> > + wake_up(&tcon->ses->server->request_q); >>> > +} >>> > + >>> > +/* cifs_async_writev - send an async write, and set up mid to handle result */ >>> > +int >>> > +cifs_async_writev(struct cifs_writedata *wdata) >>> > +{ >>> > + int i, rc = -EACCES; >>> > + WRITE_REQ *smb = NULL; >>> > + int wct; >>> > + struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); >>> > + struct inode *inode = wdata->cfile->dentry->d_inode; >>> > + unsigned int npages = ARRAY_SIZE(wdata->pages); >>> > + struct kvec *iov = NULL; >>> > + >>> > + cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes); >>> ^^^^ bytes is >>> zero here. You calculate it next - I suggest to move this info message >>> after you get right bytes value. >>> >> >> Good catch. Will fix. >> >>> > + >>> > + if (tcon->ses->capabilities & CAP_LARGE_FILES) { >>> > + wct = 14; >>> > + } else { >>> > + wct = 12; >>> > + if (wdata->offset >> 32 > 0) { >>> > + /* can not handle big offset for old srv */ >>> > + return -EIO; >>> > + } >>> > + } >>> > + >>> > + rc = small_smb_init(SMB_COM_WRITE_ANDX, wct, tcon, (void **)&smb); >>> > + if (rc) >>> > + goto async_writev_out; >>> > + >>> > + /* FIXME: only allocate number of kvecs we need */ >>> > + iov = kzalloc((npages + 1) * sizeof(*iov), GFP_NOFS); >>> > + if (iov == NULL) { >>> > + rc = -ENOMEM; >>> > + goto async_writev_out; >>> > + } >>> > + >>> > + smb->AndXCommand = 0xFF; /* none */ >>> > + smb->Fid = wdata->cfile->netfid; >>> > + smb->OffsetLow = cpu_to_le32(wdata->offset & 0xFFFFFFFF); >>> > + if (wct == 14) >>> > + smb->OffsetHigh = cpu_to_le32(wdata->offset >> 32); >>> > + smb->Reserved = 0xFFFFFFFF; >>> > + smb->WriteMode = 0; >>> > + smb->Remaining = 0; >>> > + >>> > + smb->DataOffset = >>> > + cpu_to_le16(offsetof(struct smb_com_write_req, Data) - 4); >>> > + >>> > + /* 4 for RFC1001 length + 1 for BCC */ >>> > + iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4 + 1; >>> > + iov[0].iov_base = smb; >>> > + >>> > + /* marshal up the pages into iov array */ >>> > + npages = 0; >>> > + wdata->bytes = 0; >>> > + for (i = 0; i < ARRAY_SIZE(wdata->pages) && wdata->pages[i]; i++) { >>> > + if (wdata->bytes + PAGE_CACHE_SIZE <= >>> > + CIFS_SB(inode->i_sb)->wsize) { >>> > + iov[i + 1].iov_len = min(inode->i_size - >>> > + page_offset(wdata->pages[i]), >>> > + (loff_t)PAGE_CACHE_SIZE); >>> > + iov[i + 1].iov_base = kmap(wdata->pages[i]); >>> > + wdata->bytes += iov[i + 1].iov_len; >>> > + npages++; >>> > + } else { >>> > + /* if we're beyond wsize, then re-mark page dirty */ >>> > + __set_page_dirty_nobuffers(wdata->pages[i]); >>> > + } >>> > + } >>> > + >>> > + smb->DataLengthLow = cpu_to_le16(wdata->bytes & 0xFFFF); >>> > + smb->DataLengthHigh = cpu_to_le16(wdata->bytes >> 16); >>> > + >>> > + if (wct == 14) { >>> > + inc_rfc1001_len(&smb->hdr, wdata->bytes + 1); >>> > + put_bcc(wdata->bytes + 1, &smb->hdr); >>> > + } else { >>> > + /* wct == 12 */ >>> > + struct smb_com_writex_req *smbw = >>> > + (struct smb_com_writex_req *)smb; >>> > + inc_rfc1001_len(&smbw->hdr, wdata->bytes + 5); >>> > + put_bcc(wdata->bytes + 5, &smbw->hdr); >>> > + iov[0].iov_len += 4; /* pad bigger by four bytes */ >>> > + } >>> > + >>> > + kref_get(&wdata->refcount); >>> > + rc = cifs_call_async(tcon->ses->server, iov, npages + 1, >>> > + cifs_writev_callback, wdata); >>> > + >>> > + if (rc == 0) >>> > + cifs_stats_inc(&tcon->stats.cifs_stats.num_writes); >>> > + else >>> > + kref_put(&wdata->refcount, cifs_writedata_release); >>> > + >>> > + /* send is done, unmap pages */ >>> > + for (i = 0; i < npages; i++) >>> > + kunmap(wdata->pages[i]); >>> > + >>> > +async_writev_out: >>> > + cifs_small_buf_release(smb); >>> > + kfree(iov); >>> > + return rc; >>> > +} >>> > + >>> > int >>> > CIFSSMBWrite2(const int xid, struct cifs_tcon *tcon, >>> > const int netfid, const unsigned int count, >>> > -- >>> > 1.7.4 >>> > >>> > -- >>> > To unsubscribe from this list: send the line "unsubscribe linux-cifs" in >>> > the body of a message to majordomo@xxxxxxxxxxxxxxx >>> > More majordomo info at http://vger.kernel.org/majordomo-info.html >>> > >>> >>> Anyway, it's good. You can my "Reviewed-by: Pavel Shilovsky >>> <piastry@xxxxxxxxxxx>" tag when you fix the notes above. >>> >> >> >> -- >> Jeff Layton <jlayton@xxxxxxxxxx> >> > > > > -- > Best regards, > Pavel Shilovsky. > -- Best regards, Pavel Shilovsky. -- To unsubscribe from this list: send the line "unsubscribe linux-cifs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html