The tmpfs has already supported the PMD-sized large folios, but the tmpfs
read operation still performs copying at the PAGE SIZE granularity, which
is unreasonable. This patch changes to copy data at the folio
granularity,
which can improve the read performance, as well as changing to use folio
related functions.
Use 'fio bs=64k' to read a 1G tmpfs file populated with 2M THPs, and I
can
see about 20% performance improvement, and no regression with bs=4k.
Before the patch:
READ: bw=10.0GiB/s
After the patch:
READ: bw=12.0GiB/s
Signed-off-by: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx>
---
mm/shmem.c | 22 ++++++++++++----------
1 file changed, 12 insertions(+), 10 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index edab02a26aac..7e79b6a96da0 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3108,13 +3108,12 @@ static ssize_t shmem_file_read_iter(struct
kiocb *iocb, struct iov_iter *to)
ssize_t retval = 0;
index = iocb->ki_pos >> PAGE_SHIFT;
- offset = iocb->ki_pos & ~PAGE_MASK;
for (;;) {
struct folio *folio = NULL;
- struct page *page = NULL;
unsigned long nr, ret;
loff_t end_offset, i_size = i_size_read(inode);
+ size_t fsize;
if (unlikely(iocb->ki_pos >= i_size))
break;
@@ -3128,8 +3127,9 @@ static ssize_t shmem_file_read_iter(struct kiocb
*iocb, struct iov_iter *to)
if (folio) {
folio_unlock(folio);
- page = folio_file_page(folio, index);
- if (PageHWPoison(page)) {
+ if (folio_test_hwpoison(folio) ||
+ (folio_test_large(folio) &&
+ folio_test_has_hwpoisoned(folio))) {
folio_put(folio);
error = -EIO;
break;
@@ -3147,7 +3147,12 @@ static ssize_t shmem_file_read_iter(struct
kiocb *iocb, struct iov_iter *to)
break;
}
end_offset = min_t(loff_t, i_size, iocb->ki_pos + to->count);
- nr = min_t(loff_t, end_offset - iocb->ki_pos, PAGE_SIZE -
offset);
+ if (folio)
+ fsize = folio_size(folio);
+ else
+ fsize = PAGE_SIZE;
+ offset = iocb->ki_pos & (fsize - 1);
+ nr = min_t(loff_t, end_offset - iocb->ki_pos, fsize - offset);
if (folio) {
/*
@@ -3156,7 +3161,7 @@ static ssize_t shmem_file_read_iter(struct kiocb
*iocb, struct iov_iter *to)
* before reading the page on the kernel side.
*/