On Sat, Aug 24, 2019 at 10:44 AM wangyan <wangyan122@xxxxxxxxxx> wrote: > According to the result, for "-size=1G", it maybe exceed the dirty pages' > upper limit, and it frequently triggered pdflush for write-back. And for > "-size=700M", it maybe didn't exceed the dirty pages' upper limit, so no > extra pdflush was triggered. > > But for 9p using "-size=1G", the latency 3.94 usec, and the bandwidth is > 2305.5MB/s. It is better than virtiofs using "-size=1G". It seems that > it is not affected by the dirty pages' upper limit. I tried to reproduce these results, but failed to get decent (>100MB/s) performance out of 9p. I don't have fscache set up, does that play a part in getting high performance cached writes? What you describe makes sense, and I have a new patch (attached), but didn't see drastic improvement in performance of virtio-fs in my tests. Thanks, Miklos
--- fs/fuse/file.c | 111 +++++++++++++++++++++++++++++++++++++--------------- fs/fuse/fuse_i.h | 3 + fs/fuse/inode.c | 3 + fs/fuse/virtio_fs.c | 4 + 4 files changed, 90 insertions(+), 31 deletions(-) --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -891,6 +891,10 @@ static int virtio_fs_fill_super(struct s if (err < 0) goto err_free_init_req; + /* No strict accounting needed for virtio-fs */ + sb->s_bdi->capabilities = 0; + bdi_set_max_ratio(sb->s_bdi, 100); + fc = fs->vqs[VQ_REQUEST].fud->fc; /* TODO take fuse_mutex around this loop? */ --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -695,6 +695,9 @@ struct fuse_conn { /** cache READLINK responses in page cache */ unsigned cache_symlinks:1; + /** use temp pages for writeback */ + unsigned writeback_tmp:1; + /* * The following bitfields are only for optimization purposes * and hence races in setting them will not cause malfunction --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1244,6 +1244,9 @@ static int fuse_fill_super(struct super_ err = fuse_fill_super_common(sb, &d); if (err < 0) goto err_free_init_req; + + get_fuse_conn_super(sb)->writeback_tmp = 1; + /* * atomic_dec_and_test() in fput() provides the necessary * memory barrier for file->private_data to be visible on all --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -363,11 +363,16 @@ static bool fuse_range_is_writeback(stru pgoff_t idx_to) { struct fuse_inode *fi = get_fuse_inode(inode); - bool found; + struct fuse_conn *fc = get_fuse_conn(inode); + bool found = false; - spin_lock(&fi->lock); - found = fuse_find_writeback(fi, idx_from, idx_to); - spin_unlock(&fi->lock); + if (fc->writeback_tmp) { + spin_lock(&fi->lock); + found = fuse_find_writeback(fi, idx_from, idx_to); + spin_unlock(&fi->lock); + } else { + WARN_ON(!list_empty(&fi->writepages)); + } return found; } @@ -1514,7 +1519,7 @@ static void fuse_writepage_free(struct f int i; for (i = 0; i < req->num_pages; i++) - __free_page(req->pages[i]); + put_page(req->pages[i]); if (req->ff) fuse_file_put(req->ff, false, false); @@ -1527,11 +1532,19 @@ static void fuse_writepage_finish(struct struct backing_dev_info *bdi = inode_to_bdi(inode); int i; - list_del(&req->writepages_entry); + if (fc->writeback_tmp) + list_del(&req->writepages_entry); + else + WARN_ON(!list_empty(&req->writepages_entry)); + for (i = 0; i < req->num_pages; i++) { - dec_wb_stat(&bdi->wb, WB_WRITEBACK); - dec_node_page_state(req->pages[i], NR_WRITEBACK_TEMP); - wb_writeout_inc(&bdi->wb); + if (fc->writeback_tmp) { + dec_wb_stat(&bdi->wb, WB_WRITEBACK); + dec_node_page_state(req->pages[i], NR_WRITEBACK_TEMP); + wb_writeout_inc(&bdi->wb); + } else { + end_page_writeback(req->pages[i]); + } } wake_up(&fi->page_waitq); } @@ -1616,6 +1629,10 @@ static void fuse_writepage_end(struct fu struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_write_in *inarg = &req->misc.write.in; struct fuse_req *next = req->misc.write.next; + + if (WARN_ON(!fc->writeback_tmp)) + break; + req->misc.write.next = next->misc.write.next; next->misc.write.next = NULL; next->ff = fuse_file_get(req->ff); @@ -1709,9 +1726,16 @@ static int fuse_writepage_locked(struct /* writeback always goes to bg_queue */ __set_bit(FR_BACKGROUND, &req->flags); - tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); - if (!tmp_page) - goto err_free; + + + if (fc->writeback_tmp) { + tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (!tmp_page) + goto err_free; + } else { + tmp_page = page; + get_page(tmp_page); + } error = -EIO; req->ff = fuse_write_file_get(fc, fi); @@ -1720,7 +1744,8 @@ static int fuse_writepage_locked(struct fuse_write_fill(req, req->ff, page_offset(page), 0); - copy_highpage(tmp_page, page); + if (fc->writeback_tmp) + copy_highpage(tmp_page, page); req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; req->misc.write.next = NULL; req->in.argpages = 1; @@ -1731,21 +1756,27 @@ static int fuse_writepage_locked(struct req->end = fuse_writepage_end; req->inode = inode; - inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); - inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); + if (fc->writeback_tmp) { + inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); + inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); + } spin_lock(&fi->lock); - list_add(&req->writepages_entry, &fi->writepages); + if (fc->writeback_tmp) + list_add(&req->writepages_entry, &fi->writepages); + else + INIT_LIST_HEAD(&req->writepages_entry); list_add_tail(&req->list, &fi->queued_writes); fuse_flush_writepages(inode); spin_unlock(&fi->lock); - end_page_writeback(page); + if (fc->writeback_tmp) + end_page_writeback(page); return 0; err_nofile: - __free_page(tmp_page); + put_page(tmp_page); err_free: fuse_request_free(req); err: @@ -1788,6 +1819,7 @@ static void fuse_writepages_send(struct struct fuse_req *req = data->req; struct inode *inode = data->inode; struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_conn *fc = get_fuse_conn(inode); int num_pages = req->num_pages; int i; @@ -1797,8 +1829,10 @@ static void fuse_writepages_send(struct fuse_flush_writepages(inode); spin_unlock(&fi->lock); - for (i = 0; i < num_pages; i++) - end_page_writeback(data->orig_pages[i]); + if (fc->writeback_tmp) { + for (i = 0; i < num_pages; i++) + end_page_writeback(data->orig_pages[i]); + } } /* @@ -1816,6 +1850,9 @@ static bool fuse_writepage_in_flight(str struct fuse_req *tmp; struct fuse_req *old_req; + if (WARN_ON(!fc->writeback_tmp)) + return false; + WARN_ON(new_req->num_pages != 0); spin_lock(&fi->lock); @@ -1901,10 +1938,15 @@ static int fuse_writepages_fill(struct p } } - err = -ENOMEM; - tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); - if (!tmp_page) - goto out_unlock; + if (fc->writeback_tmp) { + err = -ENOMEM; + tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (!tmp_page) + goto out_unlock; + } else { + tmp_page = page; + get_page(tmp_page); + } /* * The page must not be redirtied until the writeout is completed @@ -1925,7 +1967,7 @@ static int fuse_writepages_fill(struct p err = -ENOMEM; req = fuse_request_alloc_nofs(FUSE_REQ_INLINE_PAGES); if (!req) { - __free_page(tmp_page); + put_page(tmp_page); goto out_unlock; } @@ -1938,21 +1980,28 @@ static int fuse_writepages_fill(struct p req->end = fuse_writepage_end; req->inode = inode; - spin_lock(&fi->lock); - list_add(&req->writepages_entry, &fi->writepages); - spin_unlock(&fi->lock); + if (fc->writeback_tmp) { + spin_lock(&fi->lock); + list_add(&req->writepages_entry, &fi->writepages); + spin_unlock(&fi->lock); + } else { + INIT_LIST_HEAD(&req->writepages_entry); + } data->req = req; } set_page_writeback(page); - copy_highpage(tmp_page, page); + if (fc->writeback_tmp) + copy_highpage(tmp_page, page); req->pages[req->num_pages] = tmp_page; req->page_descs[req->num_pages].offset = 0; req->page_descs[req->num_pages].length = PAGE_SIZE; - inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); - inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); + if (fc->writeback_tmp) { + inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); + inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); + } err = 0; if (is_writeback && fuse_writepage_in_flight(req, page)) {