Miaohe Lin <linmiaohe@xxxxxxxxxx> writes: > There is a bug in unuse_pte(): when swap page happens to be unreadable, > page filled with random data is mapped into user address space. In case > of error, a special swap entry indicating swap read fails is set to the > page table. So the swapcache page can be freed and the user won't end up > with a permanently mounted swap because a sector is bad. And if the page > is accessed later, the user process will be killed so that corrupted data > is never consumed. On the other hand, if the page is never accessed, the > user won't even notice it. Hi Miaohe, It seems we're not actually using the pfn that gets stored in the special swap entry here. Is my understanding correct? If so I think it would be better to use the new PTE markers Peter introduced[1] rather than adding another swap entry type. [1] - <https://lore.kernel.org/linux-mm/20220405014833.14015-1-peterx@xxxxxxxxxx/> > Signed-off-by: Miaohe Lin <linmiaohe@xxxxxxxxxx> > --- > v2: > use special swap entry to avoid permanently mounted swap > free the bad page in swapcache > --- > include/linux/swap.h | 7 ++++++- > include/linux/swapops.h | 10 ++++++++++ > mm/memory.c | 5 ++++- > mm/swapfile.c | 11 +++++++++++ > 4 files changed, 31 insertions(+), 2 deletions(-) > > diff --git a/include/linux/swap.h b/include/linux/swap.h > index d112434f85df..03c576111737 100644 > --- a/include/linux/swap.h > +++ b/include/linux/swap.h > @@ -55,6 +55,10 @@ static inline int current_is_kswapd(void) > * actions on faults. > */ > > +#define SWAP_READ_ERROR_NUM 1 > +#define SWAP_READ_ERROR (MAX_SWAPFILES + SWP_HWPOISON_NUM + \ > + SWP_MIGRATION_NUM + SWP_DEVICE_NUM + \ > + SWP_PTE_MARKER_NUM) > /* > * PTE markers are used to persist information onto PTEs that are mapped with > * file-backed memories. As its name "PTE" hints, it should only be applied to > @@ -120,7 +124,8 @@ static inline int current_is_kswapd(void) > > #define MAX_SWAPFILES \ > ((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \ > - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - SWP_PTE_MARKER_NUM) > + SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - \ > + SWP_PTE_MARKER_NUM - SWAP_READ_ERROR_NUM) > > /* > * Magic header for a swap area. The first part of the union is > diff --git a/include/linux/swapops.h b/include/linux/swapops.h > index fffbba0036f6..d1093384de9f 100644 > --- a/include/linux/swapops.h > +++ b/include/linux/swapops.h > @@ -108,6 +108,16 @@ static inline void *swp_to_radix_entry(swp_entry_t entry) > return xa_mk_value(entry.val); > } > > +static inline swp_entry_t make_swapin_error_entry(struct page *page) > +{ > + return swp_entry(SWAP_READ_ERROR, page_to_pfn(page)); > +} > + > +static inline int is_swapin_error_entry(swp_entry_t entry) > +{ > + return swp_type(entry) == SWAP_READ_ERROR; > +} > + > #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) > static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset) > { > diff --git a/mm/memory.c b/mm/memory.c > index e6434b824009..34d1d66a05bd 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -1476,7 +1476,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, > /* Only drop the uffd-wp marker if explicitly requested */ > if (!zap_drop_file_uffd_wp(details)) > continue; > - } else if (is_hwpoison_entry(entry)) { > + } else if (is_hwpoison_entry(entry) || > + is_swapin_error_entry(entry)) { > if (!should_zap_cows(details)) > continue; > } else { > @@ -3724,6 +3725,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) > ret = vmf->page->pgmap->ops->migrate_to_ram(vmf); > } else if (is_hwpoison_entry(entry)) { > ret = VM_FAULT_HWPOISON; > + } else if (is_swapin_error_entry(entry)) { > + ret = VM_FAULT_SIGBUS; > } else if (is_pte_marker_entry(entry)) { > ret = handle_pte_marker(vmf); > } else { > diff --git a/mm/swapfile.c b/mm/swapfile.c > index 9398e915b36b..95b63f69f388 100644 > --- a/mm/swapfile.c > +++ b/mm/swapfile.c > @@ -1797,6 +1797,17 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, > goto out; > } > > + if (unlikely(!PageUptodate(page))) { > + pte_t pteval; > + > + dec_mm_counter(vma->vm_mm, MM_SWAPENTS); > + pteval = swp_entry_to_pte(make_swapin_error_entry(page)); > + set_pte_at(vma->vm_mm, addr, pte, pteval); > + swap_free(entry); > + ret = 0; > + goto out; > + } > + > /* See do_swap_page() */ > BUG_ON(!PageAnon(page) && PageMappedToDisk(page)); > BUG_ON(PageAnon(page) && PageAnonExclusive(page));