Re: [PATCH 11/24] mm/swap: also handle swapcache lookup in swapin_readahead

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Kairui,

kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-everything]
[also build test ERROR on linus/master v6.7-rc2 next-20231117]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Kairui-Song/mm-swap-fix-a-potential-undefined-behavior-issue/20231120-035926
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20231119194740.94101-12-ryncsn%40gmail.com
patch subject: [PATCH 11/24] mm/swap: also handle swapcache lookup in swapin_readahead
config: x86_64-allnoconfig (https://download.01.org/0day-ci/archive/20231120/202311200813.x056cCRJ-lkp@xxxxxxxxx/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231120/202311200813.x056cCRJ-lkp@xxxxxxxxx/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@xxxxxxxxx>
| Closes: https://lore.kernel.org/oe-kbuild-all/202311200813.x056cCRJ-lkp@xxxxxxxxx/

All error/warnings (new ones prefixed by >>):

   In file included from mm/filemap.c:62:
>> mm/swap.h:101:52: warning: 'enum swap_cache_result' declared inside parameter list will not be visible outside of this definition or declaration
     101 |                         struct vm_fault *vmf, enum swap_cache_result *result)
         |                                                    ^~~~~~~~~~~~~~~~~
--
   In file included from mm/memory.c:93:
>> mm/swap.h:101:52: warning: 'enum swap_cache_result' declared inside parameter list will not be visible outside of this definition or declaration
     101 |                         struct vm_fault *vmf, enum swap_cache_result *result)
         |                                                    ^~~~~~~~~~~~~~~~~
   mm/memory.c: In function 'do_swap_page':
>> mm/memory.c:3790:32: error: storage size of 'cache_result' isn't known
    3790 |         enum swap_cache_result cache_result;
         |                                ^~~~~~~~~~~~
>> mm/memory.c:3857:37: error: 'SWAP_CACHE_HIT' undeclared (first use in this function); did you mean 'DQST_CACHE_HITS'?
    3857 |                 if (cache_result != SWAP_CACHE_HIT) {
         |                                     ^~~~~~~~~~~~~~
         |                                     DQST_CACHE_HITS
   mm/memory.c:3857:37: note: each undeclared identifier is reported only once for each function it appears in
>> mm/memory.c:3863:37: error: 'SWAP_CACHE_BYPASS' undeclared (first use in this function); did you mean 'SMP_CACHE_BYTES'?
    3863 |                 if (cache_result != SWAP_CACHE_BYPASS)
         |                                     ^~~~~~~~~~~~~~~~~
         |                                     SMP_CACHE_BYTES
>> mm/memory.c:3790:32: warning: unused variable 'cache_result' [-Wunused-variable]
    3790 |         enum swap_cache_result cache_result;
         |                                ^~~~~~~~~~~~


vim +3790 mm/memory.c

  3777	
  3778	/*
  3779	 * We enter with non-exclusive mmap_lock (to exclude vma changes,
  3780	 * but allow concurrent faults), and pte mapped but not yet locked.
  3781	 * We return with pte unmapped and unlocked.
  3782	 *
  3783	 * We return with the mmap_lock locked or unlocked in the same cases
  3784	 * as does filemap_fault().
  3785	 */
  3786	vm_fault_t do_swap_page(struct vm_fault *vmf)
  3787	{
  3788		struct vm_area_struct *vma = vmf->vma;
  3789		struct folio *swapcache = NULL, *folio = NULL;
> 3790		enum swap_cache_result cache_result;
  3791		struct page *page;
  3792		struct swap_info_struct *si = NULL;
  3793		rmap_t rmap_flags = RMAP_NONE;
  3794		bool exclusive = false;
  3795		swp_entry_t entry;
  3796		pte_t pte;
  3797		vm_fault_t ret = 0;
  3798	
  3799		if (!pte_unmap_same(vmf))
  3800			goto out;
  3801	
  3802		entry = pte_to_swp_entry(vmf->orig_pte);
  3803		if (unlikely(non_swap_entry(entry))) {
  3804			if (is_migration_entry(entry)) {
  3805				migration_entry_wait(vma->vm_mm, vmf->pmd,
  3806						     vmf->address);
  3807			} else if (is_device_exclusive_entry(entry)) {
  3808				vmf->page = pfn_swap_entry_to_page(entry);
  3809				ret = remove_device_exclusive_entry(vmf);
  3810			} else if (is_device_private_entry(entry)) {
  3811				if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
  3812					/*
  3813					 * migrate_to_ram is not yet ready to operate
  3814					 * under VMA lock.
  3815					 */
  3816					vma_end_read(vma);
  3817					ret = VM_FAULT_RETRY;
  3818					goto out;
  3819				}
  3820	
  3821				vmf->page = pfn_swap_entry_to_page(entry);
  3822				vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
  3823						vmf->address, &vmf->ptl);
  3824				if (unlikely(!vmf->pte ||
  3825					     !pte_same(ptep_get(vmf->pte),
  3826								vmf->orig_pte)))
  3827					goto unlock;
  3828	
  3829				/*
  3830				 * Get a page reference while we know the page can't be
  3831				 * freed.
  3832				 */
  3833				get_page(vmf->page);
  3834				pte_unmap_unlock(vmf->pte, vmf->ptl);
  3835				ret = vmf->page->pgmap->ops->migrate_to_ram(vmf);
  3836				put_page(vmf->page);
  3837			} else if (is_hwpoison_entry(entry)) {
  3838				ret = VM_FAULT_HWPOISON;
  3839			} else if (is_pte_marker_entry(entry)) {
  3840				ret = handle_pte_marker(vmf);
  3841			} else {
  3842				print_bad_pte(vma, vmf->address, vmf->orig_pte, NULL);
  3843				ret = VM_FAULT_SIGBUS;
  3844			}
  3845			goto out;
  3846		}
  3847	
  3848		/* Prevent swapoff from happening to us. */
  3849		si = get_swap_device(entry);
  3850		if (unlikely(!si))
  3851			goto out;
  3852	
  3853		page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
  3854					vmf, &cache_result);
  3855		if (page) {
  3856			folio = page_folio(page);
> 3857			if (cache_result != SWAP_CACHE_HIT) {
  3858				/* Had to read the page from swap area: Major fault */
  3859				ret = VM_FAULT_MAJOR;
  3860				count_vm_event(PGMAJFAULT);
  3861				count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
  3862			}
> 3863			if (cache_result != SWAP_CACHE_BYPASS)
  3864				swapcache = folio;
  3865			if (PageHWPoison(page)) {
  3866				/*
  3867				 * hwpoisoned dirty swapcache pages are kept for killing
  3868				 * owner processes (which may be unknown at hwpoison time)
  3869				 */
  3870				ret = VM_FAULT_HWPOISON;
  3871				goto out_release;
  3872			}
  3873		} else {
  3874			/*
  3875			 * Back out if somebody else faulted in this pte
  3876			 * while we released the pte lock.
  3877			 */
  3878			vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
  3879					vmf->address, &vmf->ptl);
  3880			if (likely(vmf->pte &&
  3881				   pte_same(ptep_get(vmf->pte), vmf->orig_pte)))
  3882				ret = VM_FAULT_OOM;
  3883			goto unlock;
  3884		}
  3885	
  3886		ret |= folio_lock_or_retry(folio, vmf);
  3887		if (ret & VM_FAULT_RETRY)
  3888			goto out_release;
  3889	
  3890		if (swapcache) {
  3891			/*
  3892			 * Make sure folio_free_swap() or swapoff did not release the
  3893			 * swapcache from under us.  The page pin, and pte_same test
  3894			 * below, are not enough to exclude that.  Even if it is still
  3895			 * swapcache, we need to check that the page's swap has not
  3896			 * changed.
  3897			 */
  3898			if (unlikely(!folio_test_swapcache(folio) ||
  3899				     page_swap_entry(page).val != entry.val))
  3900				goto out_page;
  3901	
  3902			/*
  3903			 * KSM sometimes has to copy on read faults, for example, if
  3904			 * page->index of !PageKSM() pages would be nonlinear inside the
  3905			 * anon VMA -- PageKSM() is lost on actual swapout.
  3906			 */
  3907			page = ksm_might_need_to_copy(page, vma, vmf->address);
  3908			if (unlikely(!page)) {
  3909				ret = VM_FAULT_OOM;
  3910				goto out_page;
  3911			} else if (unlikely(PTR_ERR(page) == -EHWPOISON)) {
  3912				ret = VM_FAULT_HWPOISON;
  3913				goto out_page;
  3914			}
  3915			folio = page_folio(page);
  3916	
  3917			/*
  3918			 * If we want to map a page that's in the swapcache writable, we
  3919			 * have to detect via the refcount if we're really the exclusive
  3920			 * owner. Try removing the extra reference from the local LRU
  3921			 * caches if required.
  3922			 */
  3923			if ((vmf->flags & FAULT_FLAG_WRITE) && folio == swapcache &&
  3924			    !folio_test_ksm(folio) && !folio_test_lru(folio))
  3925				lru_add_drain();
  3926		}
  3927	
  3928		folio_throttle_swaprate(folio, GFP_KERNEL);
  3929	
  3930		/*
  3931		 * Back out if somebody else already faulted in this pte.
  3932		 */
  3933		vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
  3934				&vmf->ptl);
  3935		if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte)))
  3936			goto out_nomap;
  3937	
  3938		if (unlikely(!folio_test_uptodate(folio))) {
  3939			ret = VM_FAULT_SIGBUS;
  3940			goto out_nomap;
  3941		}
  3942	
  3943		/*
  3944		 * PG_anon_exclusive reuses PG_mappedtodisk for anon pages. A swap pte
  3945		 * must never point at an anonymous page in the swapcache that is
  3946		 * PG_anon_exclusive. Sanity check that this holds and especially, that
  3947		 * no filesystem set PG_mappedtodisk on a page in the swapcache. Sanity
  3948		 * check after taking the PT lock and making sure that nobody
  3949		 * concurrently faulted in this page and set PG_anon_exclusive.
  3950		 */
  3951		BUG_ON(!folio_test_anon(folio) && folio_test_mappedtodisk(folio));
  3952		BUG_ON(folio_test_anon(folio) && PageAnonExclusive(page));
  3953	
  3954		/*
  3955		 * Check under PT lock (to protect against concurrent fork() sharing
  3956		 * the swap entry concurrently) for certainly exclusive pages.
  3957		 */
  3958		if (!folio_test_ksm(folio)) {
  3959			exclusive = pte_swp_exclusive(vmf->orig_pte);
  3960			if (folio != swapcache) {
  3961				/*
  3962				 * We have a fresh page that is not exposed to the
  3963				 * swapcache -> certainly exclusive.
  3964				 */
  3965				exclusive = true;
  3966			} else if (exclusive && folio_test_writeback(folio) &&
  3967				  data_race(si->flags & SWP_STABLE_WRITES)) {
  3968				/*
  3969				 * This is tricky: not all swap backends support
  3970				 * concurrent page modifications while under writeback.
  3971				 *
  3972				 * So if we stumble over such a page in the swapcache
  3973				 * we must not set the page exclusive, otherwise we can
  3974				 * map it writable without further checks and modify it
  3975				 * while still under writeback.
  3976				 *
  3977				 * For these problematic swap backends, simply drop the
  3978				 * exclusive marker: this is perfectly fine as we start
  3979				 * writeback only if we fully unmapped the page and
  3980				 * there are no unexpected references on the page after
  3981				 * unmapping succeeded. After fully unmapped, no
  3982				 * further GUP references (FOLL_GET and FOLL_PIN) can
  3983				 * appear, so dropping the exclusive marker and mapping
  3984				 * it only R/O is fine.
  3985				 */
  3986				exclusive = false;
  3987			}
  3988		}
  3989	
  3990		/*
  3991		 * Some architectures may have to restore extra metadata to the page
  3992		 * when reading from swap. This metadata may be indexed by swap entry
  3993		 * so this must be called before swap_free().
  3994		 */
  3995		arch_swap_restore(entry, folio);
  3996	
  3997		/*
  3998		 * Remove the swap entry and conditionally try to free up the swapcache.
  3999		 * We're already holding a reference on the page but haven't mapped it
  4000		 * yet.
  4001		 */
  4002		swap_free(entry);
  4003		if (should_try_to_free_swap(folio, vma, vmf->flags))
  4004			folio_free_swap(folio);
  4005	
  4006		inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
  4007		dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
  4008		pte = mk_pte(page, vma->vm_page_prot);
  4009	
  4010		/*
  4011		 * Same logic as in do_wp_page(); however, optimize for pages that are
  4012		 * certainly not shared either because we just allocated them without
  4013		 * exposing them to the swapcache or because the swap entry indicates
  4014		 * exclusivity.
  4015		 */
  4016		if (!folio_test_ksm(folio) &&
  4017		    (exclusive || folio_ref_count(folio) == 1)) {
  4018			if (vmf->flags & FAULT_FLAG_WRITE) {
  4019				pte = maybe_mkwrite(pte_mkdirty(pte), vma);
  4020				vmf->flags &= ~FAULT_FLAG_WRITE;
  4021			}
  4022			rmap_flags |= RMAP_EXCLUSIVE;
  4023		}
  4024		flush_icache_page(vma, page);
  4025		if (pte_swp_soft_dirty(vmf->orig_pte))
  4026			pte = pte_mksoft_dirty(pte);
  4027		if (pte_swp_uffd_wp(vmf->orig_pte))
  4028			pte = pte_mkuffd_wp(pte);
  4029		vmf->orig_pte = pte;
  4030	
  4031		/* ksm created a completely new copy */
  4032		if (unlikely(folio != swapcache && swapcache)) {
  4033			page_add_new_anon_rmap(page, vma, vmf->address);
  4034			folio_add_lru_vma(folio, vma);
  4035		} else {
  4036			page_add_anon_rmap(page, vma, vmf->address, rmap_flags);
  4037		}
  4038	
  4039		VM_BUG_ON(!folio_test_anon(folio) ||
  4040				(pte_write(pte) && !PageAnonExclusive(page)));
  4041		set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
  4042		arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
  4043	
  4044		folio_unlock(folio);
  4045		if (folio != swapcache && swapcache) {
  4046			/*
  4047			 * Hold the lock to avoid the swap entry to be reused
  4048			 * until we take the PT lock for the pte_same() check
  4049			 * (to avoid false positives from pte_same). For
  4050			 * further safety release the lock after the swap_free
  4051			 * so that the swap count won't change under a
  4052			 * parallel locked swapcache.
  4053			 */
  4054			folio_unlock(swapcache);
  4055			folio_put(swapcache);
  4056		}
  4057	
  4058		if (vmf->flags & FAULT_FLAG_WRITE) {
  4059			ret |= do_wp_page(vmf);
  4060			if (ret & VM_FAULT_ERROR)
  4061				ret &= VM_FAULT_ERROR;
  4062			goto out;
  4063		}
  4064	
  4065		/* No need to invalidate - it was non-present before */
  4066		update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1);
  4067	unlock:
  4068		if (vmf->pte)
  4069			pte_unmap_unlock(vmf->pte, vmf->ptl);
  4070	out:
  4071		if (si)
  4072			put_swap_device(si);
  4073		return ret;
  4074	out_nomap:
  4075		if (vmf->pte)
  4076			pte_unmap_unlock(vmf->pte, vmf->ptl);
  4077	out_page:
  4078		folio_unlock(folio);
  4079	out_release:
  4080		folio_put(folio);
  4081		if (folio != swapcache && swapcache) {
  4082			folio_unlock(swapcache);
  4083			folio_put(swapcache);
  4084		}
  4085		if (si)
  4086			put_swap_device(si);
  4087		return ret;
  4088	}
  4089	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux