On Tue, 16 Dec 2008 17:22:37 +0200 Boaz Harrosh <bharrosh@xxxxxxxxxxx> wrote: > > OK Now we start to read and write from osd-objects, page-by-page. > The page index is the object's offset. > > > ... > > +/* > + * Callback function when writepage finishes. Check for errors, unlock, clean > + * up, etc. > + */ > +void writepage_done(struct osd_request *req, void *p) > +{ > + int ret; > + struct page *page = (struct page *)p; unneeded cast > + struct inode *inode = page->mapping->host; > + struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; > + > + ret = check_ok(req); > + free_osd_req(req); > + atomic_dec(&sbi->s_curr_pending); > + > + if (ret) { > + if (ret == -ENOSPC) > + set_bit(AS_ENOSPC, &page->mapping->flags); > + else > + set_bit(AS_EIO, &page->mapping->flags); > + > + SetPageError(page); > + } > + > + end_page_writeback(page); > + unlock_page(page); > +} > + > +/* > + * Write a page to disk. page->index gives us the page number. The page is > + * locked before this function is called. We write asynchronously and then the > + * callback function (writepage_done) is called. We signify that the operation > + * has completed by unlocking the page and calling end_page_writeback(). > + */ > +static int exofs_writepage(struct page *page, struct writeback_control *wbc) > +{ > + struct inode *inode = page->mapping->host; > + struct exofs_i_info *oi = EXOFS_I(inode); > + loff_t i_size = i_size_read(inode); > + unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; > + unsigned offset = 0; > + struct osd_request *req = NULL; > + struct exofs_sb_info *sbi; > + uint64_t start; > + uint64_t len = PAGE_CACHE_SIZE; > + unsigned char *kaddr; > + int ret = 0; > + > + if (!PageLocked(page)) > + BUG(); Could use BUG_ON(!PageLocked) > + /* if the object has not been created, and we are not in sync mode, > + * just return. otherwise, wait. */ > + if (!ObjCreated(oi)) { > + if (!Obj2BCreated(oi)) > + BUG(); BUG_ON()? > + if (wbc->sync_mode == WB_SYNC_NONE) { > + redirty_page_for_writepage(wbc, page); > + unlock_page(page); > + ret = 0; > + goto out; > + } else { > + wait_event(oi->i_wq, ObjCreated(oi)); > + } > + } > + > + /* in this case, the page is within the limits of the file */ > + if (page->index < end_index) > + goto do_it; > + > + offset = i_size & (PAGE_CACHE_SIZE - 1); > + len = offset; > + > + /*in this case, the page is outside the limits (truncate in progress)*/ > + if (page->index >= end_index + 1 || !offset) { > + unlock_page(page); > + goto out; > + } > + > +do_it: > + BUG_ON(PageWriteback(page)); > + set_page_writeback(page); > + start = page->index << PAGE_CACHE_SHIFT; > + sbi = inode->i_sb->s_fs_info; > + > + kaddr = page_address(page); > + > + req = prepare_osd_write(sbi->s_dev, sbi->s_pid, > + inode->i_ino + EXOFS_OBJ_OFF, len, start, 0, > + kaddr); Does prepare_osd_write() modify the memory at *kaddr? If so, does it do the needed flush_dcache_page()? > + > + if (!req) { > + printk(KERN_ERR "ERROR: writepage failed.\n"); > + ret = -ENOMEM; > + goto fail; > + } > + > + oi->i_commit_size = min_t(uint64_t, oi->i_commit_size, len + start); > + > + ret = exofs_async_op(req, writepage_done, (void *)page, oi->i_cred); > + if (ret) { > + free_osd_req(req); > + goto fail; > + } > + atomic_inc(&sbi->s_curr_pending); > +out: > + return ret; > +fail: > + set_bit(AS_EIO, &page->mapping->flags); > + end_page_writeback(page); > + unlock_page(page); > + goto out; > +} > + > +/* > + * Callback for readpage > + */ > +int __readpage_done(struct osd_request *req, void *p, int unlock) > +{ > + struct page *page = (struct page *)p; unneeded cast. > + struct inode *inode = page->mapping->host; > + struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; > + int ret; > + > + ret = check_ok(req); > + free_osd_req(req); > + atomic_dec(&sbi->s_curr_pending); > + > + if (ret == 0) { > + > + /* Everything is OK */ > + SetPageUptodate(page); > + if (PageError(page)) > + ClearPageError(page); > + } else if (ret == -EFAULT) { > + char *kaddr; > + > + /* In this case we were trying to read something that wasn't on > + * disk yet - return a page full of zeroes. This should be OK, > + * because the object should be empty (if there was a write > + * before this read, the read would be waiting with the page > + * locked */ > + kaddr = page_address(page); > + memset(kaddr, 0, PAGE_CACHE_SIZE); There is I think a missing flsh_dcache_page() here. Use of the (somewhat misnamed) zero_user() would be an appropriate fix and cleanup. > + SetPageUptodate(page); > + if (PageError(page)) > + ClearPageError(page); > + } else /* Error */ > + SetPageError(page); > + > + if (unlock) > + unlock_page(page); > + > + return ret; > +} > + > +void readpage_done(struct osd_request *req, void *p) > +{ > + __readpage_done(req, p, true); > +} > + > +/* > + * Read a page from the OSD > + */ > +static int __readpage_filler(struct page *page, bool is_async_unlock) > +{ > + struct osd_request *req = NULL; > + struct inode *inode = page->mapping->host; > + struct exofs_i_info *oi = EXOFS_I(inode); > + ino_t ino = inode->i_ino; > + loff_t i_size = i_size_read(inode); > + loff_t i_start = page->index << PAGE_CACHE_SHIFT; > + unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; Using pgoff_t for this would have some small documentation benefit. > + struct super_block *sb = inode->i_sb; > + struct exofs_sb_info *sbi = sb->s_fs_info; > + uint64_t amount; > + unsigned char *kaddr; > + int ret = 0; > + > + if (!PageLocked(page)) > + BUG(); BUG_ON? > + if (PageUptodate(page)) > + goto out; > + > + if (page->index < end_index) > + amount = PAGE_CACHE_SIZE; > + else > + amount = i_size & (PAGE_CACHE_SIZE - 1); > + > + /* this will be out of bounds, or doesn't exist yet */ > + if ((page->index >= end_index + 1) || !ObjCreated(oi) || !amount > + /*|| (i_start >= oi->i_commit_size)*/) { > + kaddr = kmap_atomic(page, KM_USER0); > + memset(kaddr, 0, PAGE_CACHE_SIZE); > + flush_dcache_page(page); > + kunmap_atomic(page, KM_USER0); There's a flush_dcache_page() ;) Could use clear_highpage() here. > + SetPageUptodate(page); > + if (PageError(page)) > + ClearPageError(page); > + if (is_async_unlock) > + unlock_page(page); > + goto out; > + } > + > + if (amount != PAGE_CACHE_SIZE) { > + kaddr = kmap_atomic(page, KM_USER0); > + memset(kaddr + amount, 0, PAGE_CACHE_SIZE - amount); > + flush_dcache_page(page); > + kunmap_atomic(page, KM_USER0); Use zero_user()? > + } > + > + kaddr = page_address(page); > + > + req = prepare_osd_read(sbi->s_dev, sbi->s_pid, ino + EXOFS_OBJ_OFF, > + amount, i_start, 0, kaddr); flush_dcache_page()? > + if (!req) { > + printk(KERN_ERR "ERROR: readpage failed.\n"); > + ret = -ENOMEM; > + unlock_page(page); > + goto out; > + } > + > + atomic_inc(&sbi->s_curr_pending); > + if (!is_async_unlock) { > + exofs_sync_op(req, sbi->s_timeout, oi->i_cred); > + ret = __readpage_done(req, page, false); > + } else { > + ret = exofs_async_op(req, readpage_done, page, oi->i_cred); > + if (ret) { > + free_osd_req(req); > + unlock_page(page); > + atomic_dec(&sbi->s_curr_pending); > + } > + } > + > +out: > + return ret; > +} > + > +static int readpage_filler(struct page *page) > +{ > + int ret = __readpage_filler(page, true); > + > + return ret; > +} > + > +/* > + * We don't need the file > + */ > +static int exofs_readpage(struct file *file, struct page *page) > +{ > + return readpage_filler(page); > +} > + > +/* > + * We don't need the data > + */ > +static int readpage_strip(void *data, struct page *page) > +{ > + return readpage_filler(page); > +} > + > +/* > + * read a bunch of pages - usually for readahead > + */ > +static int exofs_readpages(struct file *file, struct address_space *mapping, > + struct list_head *pages, unsigned nr_pages) > +{ > + return read_cache_pages(mapping, pages, readpage_strip, NULL); > +} > + > +struct address_space_operations exofs_aops = { const. > + .readpage = exofs_readpage, > + .readpages = exofs_readpages, > + .writepage = exofs_writepage, > + .write_begin = exofs_write_begin_export, > + .write_end = simple_write_end, > + .writepages = generic_writepages, > +}; -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html