Hello, Using the start offset rather than map->br_startoff to calculate the starting page index could get more accurate data offset in page cache probe routine. With this refinement, the old max_t() could be able to remove too. Thanks Mark for pointing this out! -Jeff Cc: Mark Tinguely <tinguely@xxxxxxx> Signed-off-by: Jie Liu <jeff.liu@xxxxxxxxxx> --- fs/xfs/xfs_file.c | 155 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 files changed, 150 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 9f7ec15..113190b 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -36,6 +36,7 @@ #include <linux/dcache.h> #include <linux/falloc.h> +#include <linux/pagevec.h> static const struct vm_operations_struct xfs_file_vm_ops; @@ -966,6 +967,108 @@ xfs_vm_page_mkwrite( return block_page_mkwrite(vma, vmf, xfs_get_blocks); } +/* + * Probe the data buffer offset in page cache for unwritten extents. + * Iterate each page to find out if a buffer head state has BH_Unwritten + * or BH_Uptodate set. + */ +STATIC bool +xfs_has_unwritten_buffer( + struct inode *inode, + struct xfs_bmbt_irec *map, + loff_t *offset) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + struct pagevec pvec; + pgoff_t index; + pgoff_t end; + bool found = false; + + pagevec_init(&pvec, 0); + + index = XFS_FSB_TO_B(mp, XFS_B_TO_FSBT(mp, *offset)) + >> PAGE_CACHE_SHIFT; + end = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount) + >> PAGE_CACHE_SHIFT; + do { + unsigned int i; + unsigned nr_pages; + int want = min_t(pgoff_t, end - index, + (pgoff_t)PAGEVEC_SIZE - 1) + 1; + nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, + want); + if (nr_pages == 0) + break; + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + struct buffer_head *bh; + struct buffer_head *head; + xfs_fileoff_t last; + + /* + * There is no need to check the following pages + * if the current page offset is out of range. + */ + if (page->index > end) + goto out; + + if (!trylock_page(page)) + goto out; + + if (!page_has_buffers(page)) { + unlock_page(page); + continue; + } + + last = XFS_B_TO_FSBT(mp, + page->index << PAGE_CACHE_SHIFT); + bh = head = page_buffers(page); + do { + /* + * An extent in XFS_EXT_UNWRITTEN has disk + * blocks already mapped to it, but no data + * has been committed to them yet. If it has + * dirty data in the page cache it can be + * identified by having BH_Unwritten set in + * each buffers. Also, the buffer head state + * might be in BH_Uptodate too if the buffer + * writeback procedure was fired, we need to + * examine it as well. + */ + if ((buffer_unwritten(bh) || + buffer_uptodate(bh)) && + *offset <= XFS_FSB_TO_B(mp, last)) { + found = true; + *offset = XFS_FSB_TO_B(mp, last); + unlock_page(page); + goto out; + } + last++; + } while ((bh = bh->b_this_page) != head); + unlock_page(page); + } + + /* + * If the number of probed pages less than our desired, + * there should no more pages mapped, search done. + */ + if (nr_pages < want) + break; + + index = pvec.pages[i - 1]->index + 1; + pagevec_release(&pvec); + } while (index < end); + +out: + pagevec_release(&pvec); + if (!found) + *offset = 0; + + return found; +} + STATIC loff_t xfs_seek_data( struct file *file, @@ -1009,19 +1112,61 @@ xfs_seek_data( * Treat unwritten extent as data extent since it might * contains dirty data in page cache. */ - if (map[0].br_startblock != HOLESTARTBLOCK) { - offset = max_t(loff_t, start, - XFS_FSB_TO_B(mp, map[0].br_startoff)); + if (map[0].br_state == XFS_EXT_NORM && + !isnullstartblock(map[0].br_startblock)) { + offset = start; } else { + /* + * Landed in an unwritten extent, try to find out + * the data buffer offset from page cache firstly. + * Treat it as a hole if nothing was found, and + * skip to check the next extent. + */ + if (map[0].br_startblock == DELAYSTARTBLOCK || + map[0].br_state == XFS_EXT_UNWRITTEN) { + /* Probing page cache start from offset */ + offset = start; + if (xfs_has_unwritten_buffer(inode, &map[0], + &offset)) + goto out; + } + + /* + * Found a hole in map[0] and nothing in map[1]. + * Probably means that we are reading after EOF. + */ if (nmap == 1) { error = ENXIO; goto out_unlock; } - offset = max_t(loff_t, start, - XFS_FSB_TO_B(mp, map[1].br_startoff)); + /* + * We have two mappings, and need to check map[1] to + * see if there is data or not. + */ + if (map[1].br_state == XFS_EXT_NORM && + !isnullstartblock(map[1].br_startblock)) { + offset = XFS_FSB_TO_B(mp, map[1].br_startoff); + } else { + if (map[1].br_startblock == DELAYSTARTBLOCK || + map[1].br_state == XFS_EXT_UNWRITTEN) { + /* Probing page cache start from offset */ + offset = start; + if (xfs_has_unwritten_buffer(inode, &map[1], + &offset)) + goto out; + } + /* + * xfs_bmapi_read() can handle repeated hole regions, + * hence it should not return two extents both are + * holes. If the 2nd extent is unwritten, there must + * have data buffer resides in page cache. + */ + BUG(); + } } +out: if (offset != file->f_pos) file->f_pos = offset; -- 1.7.9 _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs