Re: [PATCH] mm/rmap.c: split huge pmd when it really is

kbuild test robot <lkp@xxxxxxxxx> · Wed, 25 Dec 2019 05:47:36 +0800

Hi Wei,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on mmotm/master]
[also build test ERROR on linus/master v5.5-rc3 next-20191220]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Wei-Yang/mm-rmap-c-split-huge-pmd-when-it-really-is/20191225-023217
base:   git://git.cmpxchg.org/linux-mmotm.git master
config: i386-tinyconfig (attached as .config)
compiler: gcc-7 (Debian 7.5.0-3) 7.5.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@xxxxxxxxx>

All error/warnings (new ones prefixed by >>):

   In file included from include/asm-generic/bug.h:19:0,
                    from arch/x86/include/asm/bug.h:83,
                    from include/linux/bug.h:5,
                    from include/linux/mmdebug.h:5,
                    from include/linux/mm.h:9,
                    from mm/rmap.c:48:
   mm/rmap.c: In function 'try_to_unmap_one':
>> include/linux/compiler.h:350:38: error: call to '__compiletime_assert_1375' declared with attribute error: BUILD_BUG failed
     _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
                                         ^
   include/linux/kernel.h:37:48: note: in definition of macro 'IS_ALIGNED'
    #define IS_ALIGNED(x, a)  (((x) & ((typeof(x))(a) - 1)) == 0)
                                                   ^
>> include/linux/compiler.h:338:2: note: in expansion of macro '__compiletime_assert'
     __compiletime_assert(condition, msg, prefix, suffix)
     ^~~~~~~~~~~~~~~~~~~~
   include/linux/compiler.h:350:2: note: in expansion of macro '_compiletime_assert'
     _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
     ^~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:39:37: note: in expansion of macro 'compiletime_assert'
    #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
                                        ^~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:59:21: note: in expansion of macro 'BUILD_BUG_ON_MSG'
    #define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
                        ^~~~~~~~~~~~~~~~
>> include/linux/huge_mm.h:282:27: note: in expansion of macro 'BUILD_BUG'
    #define HPAGE_PMD_SIZE ({ BUILD_BUG(); 0; })
                              ^~~~~~~~~
>> mm/rmap.c:1375:23: note: in expansion of macro 'HPAGE_PMD_SIZE'
      IS_ALIGNED(address, HPAGE_PMD_SIZE)) {
                          ^~~~~~~~~~~~~~

vim +/HPAGE_PMD_SIZE +1375 mm/rmap.c

  1336	
  1337	/*
  1338	 * @arg: enum ttu_flags will be passed to this argument
  1339	 */
  1340	static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
  1341			     unsigned long address, void *arg)
  1342	{
  1343		struct mm_struct *mm = vma->vm_mm;
  1344		struct page_vma_mapped_walk pvmw = {
  1345			.page = page,
  1346			.vma = vma,
  1347			.address = address,
  1348		};
  1349		pte_t pteval;
  1350		struct page *subpage;
  1351		bool ret = true;
  1352		struct mmu_notifier_range range;
  1353		enum ttu_flags flags = (enum ttu_flags)arg;
  1354	
  1355		/* munlock has nothing to gain from examining un-locked vmas */
  1356		if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
  1357			return true;
  1358	
  1359		if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
  1360		    is_zone_device_page(page) && !is_device_private_page(page))
  1361			return true;
  1362	
  1363		/*
  1364		 * There are two places set TTU_SPLIT_HUGE_PMD
  1365		 *
  1366		 *     unmap_page()
  1367		 *     shrink_page_list()
  1368		 *
  1369		 * In both cases, the "page" here is the PageHead() of the THP.
  1370		 *
  1371		 * If the page is not a real PMD huge page, e.g. after mremap(), it is
  1372		 * not necessary to split.
  1373		 */
  1374		if ((flags & TTU_SPLIT_HUGE_PMD) &&
> 1375			IS_ALIGNED(address, HPAGE_PMD_SIZE)) {
  1376			split_huge_pmd_address(vma, address,
  1377					flags & TTU_SPLIT_FREEZE, page);
  1378		}
  1379	
  1380		/*
  1381		 * For THP, we have to assume the worse case ie pmd for invalidation.
  1382		 * For hugetlb, it could be much worse if we need to do pud
  1383		 * invalidation in the case of pmd sharing.
  1384		 *
  1385		 * Note that the page can not be free in this function as call of
  1386		 * try_to_unmap() must hold a reference on the page.
  1387		 */
  1388		mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
  1389					address,
  1390					min(vma->vm_end, address + page_size(page)));
  1391		if (PageHuge(page)) {
  1392			/*
  1393			 * If sharing is possible, start and end will be adjusted
  1394			 * accordingly.
  1395			 */
  1396			adjust_range_if_pmd_sharing_possible(vma, &range.start,
  1397							     &range.end);
  1398		}
  1399		mmu_notifier_invalidate_range_start(&range);
  1400	
  1401		while (page_vma_mapped_walk(&pvmw)) {
  1402	#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
  1403			/* PMD-mapped THP migration entry */
  1404			if (!pvmw.pte && (flags & TTU_MIGRATION)) {
  1405				VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
  1406	
  1407				set_pmd_migration_entry(&pvmw, page);
  1408				continue;
  1409			}
  1410	#endif
  1411	
  1412			/*
  1413			 * If the page is mlock()d, we cannot swap it out.
  1414			 * If it's recently referenced (perhaps page_referenced
  1415			 * skipped over this mm) then we should reactivate it.
  1416			 */
  1417			if (!(flags & TTU_IGNORE_MLOCK)) {
  1418				if (vma->vm_flags & VM_LOCKED) {
  1419					/* PTE-mapped THP are never mlocked */
  1420					if (!PageTransCompound(page)) {
  1421						/*
  1422						 * Holding pte lock, we do *not* need
  1423						 * mmap_sem here
  1424						 */
  1425						mlock_vma_page(page);
  1426					}
  1427					ret = false;
  1428					page_vma_mapped_walk_done(&pvmw);
  1429					break;
  1430				}
  1431				if (flags & TTU_MUNLOCK)
  1432					continue;
  1433			}
  1434	
  1435			/* Unexpected PMD-mapped THP? */
  1436			VM_BUG_ON_PAGE(!pvmw.pte, page);
  1437	
  1438			subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
  1439			address = pvmw.address;
  1440	
  1441			if (PageHuge(page)) {
  1442				if (huge_pmd_unshare(mm, &address, pvmw.pte)) {
  1443					/*
  1444					 * huge_pmd_unshare unmapped an entire PMD
  1445					 * page.  There is no way of knowing exactly
  1446					 * which PMDs may be cached for this mm, so
  1447					 * we must flush them all.  start/end were
  1448					 * already adjusted above to cover this range.
  1449					 */
  1450					flush_cache_range(vma, range.start, range.end);
  1451					flush_tlb_range(vma, range.start, range.end);
  1452					mmu_notifier_invalidate_range(mm, range.start,
  1453								      range.end);
  1454	
  1455					/*
  1456					 * The ref count of the PMD page was dropped
  1457					 * which is part of the way map counting
  1458					 * is done for shared PMDs.  Return 'true'
  1459					 * here.  When there is no other sharing,
  1460					 * huge_pmd_unshare returns false and we will
  1461					 * unmap the actual page and drop map count
  1462					 * to zero.
  1463					 */
  1464					page_vma_mapped_walk_done(&pvmw);
  1465					break;
  1466				}
  1467			}
  1468	
  1469			if (IS_ENABLED(CONFIG_MIGRATION) &&
  1470			    (flags & TTU_MIGRATION) &&
  1471			    is_zone_device_page(page)) {
  1472				swp_entry_t entry;
  1473				pte_t swp_pte;
  1474	
  1475				pteval = ptep_get_and_clear(mm, pvmw.address, pvmw.pte);
  1476	
  1477				/*
  1478				 * Store the pfn of the page in a special migration
  1479				 * pte. do_swap_page() will wait until the migration
  1480				 * pte is removed and then restart fault handling.
  1481				 */
  1482				entry = make_migration_entry(page, 0);
  1483				swp_pte = swp_entry_to_pte(entry);
  1484				if (pte_soft_dirty(pteval))
  1485					swp_pte = pte_swp_mksoft_dirty(swp_pte);
  1486				set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
  1487				/*
  1488				 * No need to invalidate here it will synchronize on
  1489				 * against the special swap migration pte.
  1490				 *
  1491				 * The assignment to subpage above was computed from a
  1492				 * swap PTE which results in an invalid pointer.
  1493				 * Since only PAGE_SIZE pages can currently be
  1494				 * migrated, just set it to page. This will need to be
  1495				 * changed when hugepage migrations to device private
  1496				 * memory are supported.
  1497				 */
  1498				subpage = page;
  1499				goto discard;
  1500			}
  1501	
  1502			if (!(flags & TTU_IGNORE_ACCESS)) {
  1503				if (ptep_clear_flush_young_notify(vma, address,
  1504							pvmw.pte)) {
  1505					ret = false;
  1506					page_vma_mapped_walk_done(&pvmw);
  1507					break;
  1508				}
  1509			}
  1510	
  1511			/* Nuke the page table entry. */
  1512			flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
  1513			if (should_defer_flush(mm, flags)) {
  1514				/*
  1515				 * We clear the PTE but do not flush so potentially
  1516				 * a remote CPU could still be writing to the page.
  1517				 * If the entry was previously clean then the
  1518				 * architecture must guarantee that a clear->dirty
  1519				 * transition on a cached TLB entry is written through
  1520				 * and traps if the PTE is unmapped.
  1521				 */
  1522				pteval = ptep_get_and_clear(mm, address, pvmw.pte);
  1523	
  1524				set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
  1525			} else {
  1526				pteval = ptep_clear_flush(vma, address, pvmw.pte);
  1527			}
  1528	
  1529			/* Move the dirty bit to the page. Now the pte is gone. */
  1530			if (pte_dirty(pteval))
  1531				set_page_dirty(page);
  1532	
  1533			/* Update high watermark before we lower rss */
  1534			update_hiwater_rss(mm);
  1535	
  1536			if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
  1537				pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
  1538				if (PageHuge(page)) {
  1539					hugetlb_count_sub(compound_nr(page), mm);
  1540					set_huge_swap_pte_at(mm, address,
  1541							     pvmw.pte, pteval,
  1542							     vma_mmu_pagesize(vma));
  1543				} else {
  1544					dec_mm_counter(mm, mm_counter(page));
  1545					set_pte_at(mm, address, pvmw.pte, pteval);
  1546				}
  1547	
  1548			} else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
  1549				/*
  1550				 * The guest indicated that the page content is of no
  1551				 * interest anymore. Simply discard the pte, vmscan
  1552				 * will take care of the rest.
  1553				 * A future reference will then fault in a new zero
  1554				 * page. When userfaultfd is active, we must not drop
  1555				 * this page though, as its main user (postcopy
  1556				 * migration) will not expect userfaults on already
  1557				 * copied pages.
  1558				 */
  1559				dec_mm_counter(mm, mm_counter(page));
  1560				/* We have to invalidate as we cleared the pte */
  1561				mmu_notifier_invalidate_range(mm, address,
  1562							      address + PAGE_SIZE);
  1563			} else if (IS_ENABLED(CONFIG_MIGRATION) &&
  1564					(flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))) {
  1565				swp_entry_t entry;
  1566				pte_t swp_pte;
  1567	
  1568				if (arch_unmap_one(mm, vma, address, pteval) < 0) {
  1569					set_pte_at(mm, address, pvmw.pte, pteval);
  1570					ret = false;
  1571					page_vma_mapped_walk_done(&pvmw);
  1572					break;
  1573				}
  1574	
  1575				/*
  1576				 * Store the pfn of the page in a special migration
  1577				 * pte. do_swap_page() will wait until the migration
  1578				 * pte is removed and then restart fault handling.
  1579				 */
  1580				entry = make_migration_entry(subpage,
  1581						pte_write(pteval));
  1582				swp_pte = swp_entry_to_pte(entry);
  1583				if (pte_soft_dirty(pteval))
  1584					swp_pte = pte_swp_mksoft_dirty(swp_pte);
  1585				set_pte_at(mm, address, pvmw.pte, swp_pte);
  1586				/*
  1587				 * No need to invalidate here it will synchronize on
  1588				 * against the special swap migration pte.
  1589				 */
  1590			} else if (PageAnon(page)) {
  1591				swp_entry_t entry = { .val = page_private(subpage) };
  1592				pte_t swp_pte;
  1593				/*
  1594				 * Store the swap location in the pte.
  1595				 * See handle_pte_fault() ...
  1596				 */
  1597				if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) {
  1598					WARN_ON_ONCE(1);
  1599					ret = false;
  1600					/* We have to invalidate as we cleared the pte */
  1601					mmu_notifier_invalidate_range(mm, address,
  1602								address + PAGE_SIZE);
  1603					page_vma_mapped_walk_done(&pvmw);
  1604					break;
  1605				}
  1606	
  1607				/* MADV_FREE page check */
  1608				if (!PageSwapBacked(page)) {
  1609					if (!PageDirty(page)) {
  1610						/* Invalidate as we cleared the pte */
  1611						mmu_notifier_invalidate_range(mm,
  1612							address, address + PAGE_SIZE);
  1613						dec_mm_counter(mm, MM_ANONPAGES);
  1614						goto discard;
  1615					}
  1616	
  1617					/*
  1618					 * If the page was redirtied, it cannot be
  1619					 * discarded. Remap the page to page table.
  1620					 */
  1621					set_pte_at(mm, address, pvmw.pte, pteval);
  1622					SetPageSwapBacked(page);
  1623					ret = false;
  1624					page_vma_mapped_walk_done(&pvmw);
  1625					break;
  1626				}
  1627	
  1628				if (swap_duplicate(entry) < 0) {
  1629					set_pte_at(mm, address, pvmw.pte, pteval);
  1630					ret = false;
  1631					page_vma_mapped_walk_done(&pvmw);
  1632					break;
  1633				}
  1634				if (arch_unmap_one(mm, vma, address, pteval) < 0) {
  1635					set_pte_at(mm, address, pvmw.pte, pteval);
  1636					ret = false;
  1637					page_vma_mapped_walk_done(&pvmw);
  1638					break;
  1639				}
  1640				if (list_empty(&mm->mmlist)) {
  1641					spin_lock(&mmlist_lock);
  1642					if (list_empty(&mm->mmlist))
  1643						list_add(&mm->mmlist, &init_mm.mmlist);
  1644					spin_unlock(&mmlist_lock);
  1645				}
  1646				dec_mm_counter(mm, MM_ANONPAGES);
  1647				inc_mm_counter(mm, MM_SWAPENTS);
  1648				swp_pte = swp_entry_to_pte(entry);
  1649				if (pte_soft_dirty(pteval))
  1650					swp_pte = pte_swp_mksoft_dirty(swp_pte);
  1651				set_pte_at(mm, address, pvmw.pte, swp_pte);
  1652				/* Invalidate as we cleared the pte */
  1653				mmu_notifier_invalidate_range(mm, address,
  1654							      address + PAGE_SIZE);
  1655			} else {
  1656				/*
  1657				 * This is a locked file-backed page, thus it cannot
  1658				 * be removed from the page cache and replaced by a new
  1659				 * page before mmu_notifier_invalidate_range_end, so no
  1660				 * concurrent thread might update its page table to
  1661				 * point at new page while a device still is using this
  1662				 * page.
  1663				 *
  1664				 * See Documentation/vm/mmu_notifier.rst
  1665				 */
  1666				dec_mm_counter(mm, mm_counter_file(page));
  1667			}
  1668	discard:
  1669			/*
  1670			 * No need to call mmu_notifier_invalidate_range() it has be
  1671			 * done above for all cases requiring it to happen under page
  1672			 * table lock before mmu_notifier_invalidate_range_end()
  1673			 *
  1674			 * See Documentation/vm/mmu_notifier.rst
  1675			 */
  1676			page_remove_rmap(subpage, PageHuge(page));
  1677			put_page(page);
  1678		}
  1679	
  1680		mmu_notifier_invalidate_range_end(&range);
  1681	
  1682		return ret;
  1683	}
  1684	

---
0-DAY kernel test infrastructure                 Open Source Technology Center
https://lists.01.org/hyperkitty/list/kbuild-all@xxxxxxxxxxxx Intel Corporation
Attachment:
.config.gz

Description: application/gzip