On Tue, 08 May 2007 20:44:11 +0100 David Howells <dhowells@xxxxxxxxxx> wrote: > Implement support for writing to regular AFS files, including: > > (1) write > > (2) truncate > > (3) fsync, fdatasync > > (4) chmod, chown, chgrp, utime. > > AFS writeback attempts to batch writes into as chunks as large as it can manage > up to the point that it writes back 65535 pages in one chunk or it meets a > locked page. > > Furthermore, if a page has been written to using a particular key, then should > another write to that page use some other key, the first write will be flushed > before the second is allowed to take place. If the first write fails due to a > security error, then the page will be scrapped and reread before the second > write takes place. > > If a page is dirty and the callback on it is broken by the server, then the > dirty data is not discarded (same behaviour as NFS). > > Shared-writable mappings are not supported by this patch. The below isn't a review - it's some random cherrypickling. > ... > > +int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, > + pgoff_t first, pgoff_t last, > + unsigned offset, unsigned to, > + const struct afs_wait_mode *wait_mode) > +{ > + struct afs_vnode *vnode = wb->vnode; > + struct afs_call *call; > + loff_t size, pos, i_size; > + __be32 *bp; > + > + _enter(",%x,{%x:%u},,", > + key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode); > + > + size = to - offset; > + if (first != last) > + size += (loff_t)(last - first) << PAGE_SHIFT; > + pos = (loff_t)first << PAGE_SHIFT; > + pos += offset; > + > + i_size = i_size_read(&vnode->vfs_inode); > + if (pos + size > i_size) > + i_size = size + pos; > + > + _debug("size %llx, at %llx, i_size %llx", > + (unsigned long long) size, (unsigned long long) pos, > + (unsigned long long) i_size); > + > + BUG_ON(i_size > 0xffffffff); // TODO: use 64-bit store You're sure this isn't user-triggerable? > +static int afs_prepare_page(struct afs_vnode *vnode, struct page *page, > + struct key *key, unsigned offset, unsigned to) > +{ > + unsigned eof, tail, start, stop, len; > + loff_t i_size, pos; > + void *p; > + int ret; > + > + _enter(""); > + > + if (offset == 0 && to == PAGE_SIZE) > + return 0; > + > + p = kmap(page); > + > + i_size = i_size_read(&vnode->vfs_inode); > + pos = (loff_t) page->index << PAGE_SHIFT; > + if (pos >= i_size) { > + /* partial write, page beyond EOF */ > + _debug("beyond"); > + if (offset > 0) > + memset(p, 0, offset); > + if (to < PAGE_SIZE) > + memset(p + to, 0, PAGE_SIZE - to); > + kunmap(page); > + return 0; > + } > + > + if (i_size - pos >= PAGE_SIZE) { > + /* partial write, page entirely before EOF */ > + _debug("before"); > + tail = eof = PAGE_SIZE; > + } else { > + /* partial write, page overlaps EOF */ > + eof = i_size - pos; > + _debug("overlap %u", eof); > + tail = max(eof, to); > + if (tail < PAGE_SIZE) > + memset(p + tail, 0, PAGE_SIZE - tail); > + if (offset > eof) > + memset(p + eof, 0, PAGE_SIZE - eof); > + } > + > + kunmap(p); kmap_atomic() could be used here and is better. We have this zero_user_page() thing heading in which could perhaps be used here also. > + ret = 0; > + if (offset > 0 || eof > to) { > + /* need to fill one or two bits that aren't going to be written > + * (cover both fillers in one read if there are two) */ > + start = (offset > 0) ? 0 : to; > + stop = (eof > to) ? eof : offset; > + len = stop - start; > + _debug("wr=%u-%u av=0-%u rd=%u@%u", > + offset, to, eof, start, len); > + ret = afs_fill_page(vnode, key, start, len, page); > + } > + > + _leave(" = %d", ret); > + return ret; > +} > + > > ... > + ASSERTRANGE(wb->first, <=, index, <=, wb->last); wow. > +} > + > +/* > + * finalise part of a write to a page > + */ > +int afs_commit_write(struct file *file, struct page *page, > + unsigned offset, unsigned to) > +{ > + struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); > + loff_t i_size, maybe_i_size; > + > + _enter("{%x:%u},{%lx},%u,%u", > + vnode->fid.vid, vnode->fid.vnode, page->index, offset, to); > + > + maybe_i_size = (loff_t) page->index << PAGE_SHIFT; > + maybe_i_size += to; > + > + i_size = i_size_read(&vnode->vfs_inode); > + if (maybe_i_size > i_size) { > + spin_lock(&vnode->writeback_lock); > + i_size = i_size_read(&vnode->vfs_inode); > + if (maybe_i_size > i_size) > + i_size_write(&vnode->vfs_inode, maybe_i_size); > + spin_unlock(&vnode->writeback_lock); > + } > + > + set_page_dirty(page); > + > + if (PageDirty(page)) > + _debug("dirtied"); > + > + return 0; > +} One would normally run mark_inode_dirty() after any i_size_write()? > +/* > + * kill all the pages in the given range > + */ We can invalidate pages and we can truncate them and we can clean them. But here we have a new operation, "killing". I wonder what that is. > +static void afs_kill_pages(struct afs_vnode *vnode, bool error, > + pgoff_t first, pgoff_t last) > +{ > + struct pagevec pv; > + unsigned count, loop; > + > + _enter("{%x:%u},%lx-%lx", > + vnode->fid.vid, vnode->fid.vnode, first, last); > + > + pagevec_init(&pv, 0); > + > + do { > + _debug("kill %lx-%lx", first, last); > + > + count = last - first + 1; > + if (count > PAGEVEC_SIZE) > + count = PAGEVEC_SIZE; > + pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping, > + first, count, pv.pages); > + ASSERTCMP(pv.nr, ==, count); > + > + for (loop = 0; loop < count; loop++) { > + ClearPageUptodate(pv.pages[loop]); > + if (error) > + SetPageError(pv.pages[loop]); > + end_page_writeback(pv.pages[loop]); > + } > + > + __pagevec_release(&pv); > + } while (first < last); > + > + _leave(""); > +} > + > +/* > + * write a page back to the server > + * - the caller locked the page for us > + */ > +int afs_writepage(struct page *page, struct writeback_control *wbc) > +{ > + struct backing_dev_info *bdi = page->mapping->backing_dev_info; > + struct afs_writeback *wb; > + int ret; > + > + _enter("{%lx},", page->index); > + > + if (wbc->sync_mode != WB_SYNC_NONE) > + wait_on_page_writeback(page); Didn't the VFS already do that? > + if (PageWriteback(page) || !PageDirty(page)) { > + unlock_page(page); > + return 0; > + } And some of that? > + wb = (struct afs_writeback *) page_private(page); > + ASSERT(wb != NULL); > + > + ret = afs_write_back_from_locked_page(wb, page); > + unlock_page(page); > + if (ret < 0) { > + _leave(" = %d", ret); > + return 0; > + } > + > + wbc->nr_to_write -= ret; > + if (wbc->nonblocking && bdi_write_congested(bdi)) > + wbc->encountered_congestion = 1; > + > + _leave(" = 0"); > + return 0; > +} I have this vague prehistoric memory that something can go wrong at the VFS level if the address_space writes back more pages than it was asked to. But I forget what the issue was and it would be silly to have an issue with that anyway. Something to keep an eye out for. - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html