On 05/17/23 16:09, Jiaqi Yan wrote: > When a hugepage contains HWPOISON pages, read() fails to read any byte > of the hugepage and returns -EIO, although many bytes in the HWPOISON > hugepage are readable. > > Improve this by allowing hugetlbfs_read_iter returns as many bytes as > possible. For a requested range [offset, offset + len) that contains > HWPOISON page, return [offset, first HWPOISON page addr); the next read > attempt will fail and return -EIO. > > Signed-off-by: Jiaqi Yan <jiaqiyan@xxxxxxxxxx> > --- > fs/hugetlbfs/inode.c | 62 +++++++++++++++++++++++++++++++++++++++----- > 1 file changed, 56 insertions(+), 6 deletions(-) > > diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c > index ecfdfb2529a3..1baa08ec679f 100644 > --- a/fs/hugetlbfs/inode.c > +++ b/fs/hugetlbfs/inode.c > @@ -282,6 +282,46 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, > } > #endif > > +/* > + * Someone wants to read @bytes from a HWPOISON hugetlb @page from @offset. > + * Returns the maximum number of bytes one can read without touching the 1st raw > + * HWPOISON subpage. > + * > + * The implementation borrows the iteration logic from copy_page_to_iter*. > + */ > +static size_t adjust_range_hwpoison(struct page *page, size_t offset, size_t bytes) > +{ > + size_t n = 0; > + size_t res = 0; > + struct folio *folio = page_folio(page); > + > + folio_lock(folio); What is the reason for taking folio_lock? > + > + /* First subpage to start the loop. */ > + page += offset / PAGE_SIZE; > + offset %= PAGE_SIZE; > + while (1) { > + if (find_raw_hwp_page(folio, page) != NULL) > + break; > + > + /* Safe to read n bytes without touching HWPOISON subpage. */ > + n = min(bytes, (size_t)PAGE_SIZE - offset); > + res += n; > + bytes -= n; > + if (!bytes || !n) > + break; > + offset += n; > + if (offset == PAGE_SIZE) { > + page++; > + offset = 0; > + } > + } > + > + folio_unlock(folio); > + > + return res; > +} > + > /* > * Support for read() - Find the page attached to f_mapping and copy out the > * data. This provides functionality similar to filemap_read(). > @@ -300,7 +340,7 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) > > while (iov_iter_count(to)) { > struct page *page; > - size_t nr, copied; > + size_t nr, copied, want; > > /* nr is the maximum number of bytes to copy from this page */ > nr = huge_page_size(h); > @@ -328,16 +368,26 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) > } else { > unlock_page(page); > > - if (PageHWPoison(page)) { > - put_page(page); > - retval = -EIO; > - break; > + if (!PageHWPoison(page)) > + want = nr; > + else { > + /* > + * Adjust how many bytes safe to read without > + * touching the 1st raw HWPOISON subpage after > + * offset. > + */ > + want = adjust_range_hwpoison(page, offset, nr); > + if (want == 0) { > + put_page(page); > + retval = -EIO; > + break; > + } > } > > /* > * We have the page, copy it to user space buffer. > */ > - copied = copy_page_to_iter(page, offset, nr, to); > + copied = copy_page_to_iter(page, offset, want, to); > put_page(page); > } > offset += copied; > -- > 2.40.1.606.ga4b1b128d6-goog > Code looks fine, just wondering about that folio_lock. -- Mike Kravetz