On Wed, May 11, 2022 at 12:50:20PM -0700, Sultan Alsawaf wrote: > On Wed, May 11, 2022 at 11:01:01AM -0700, Minchan Kim wrote: > > On Sun, May 08, 2022 at 07:47:02PM -0700, Sultan Alsawaf wrote: > > > Cc: stable@xxxxxxxxxxxxxxx > > > Fixes: 48b4800a1c6a ("zsmalloc: page migration support") > > > > Shouldn't the fix be Fixes: 77ff465799c6 ("zsmalloc: zs_page_migrate: skip > > unnecessary loops but not return -EBUSY if zspage is not inuse)? > > Because we didn't migrate ZS_EMPTY pages before. > > Hi, > > Yeah, 77ff465799c6 indeed seems like the commit that introduced the bug. > > > I couldn't get the point here. Why couldn't we simple lock zspage migration? > > > > diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c > > index 9152fbde33b5..05ff2315b7b1 100644 > > --- a/mm/zsmalloc.c > > +++ b/mm/zsmalloc.c > > @@ -1987,7 +1987,10 @@ static void async_free_zspage(struct work_struct *work) > > > > list_for_each_entry_safe(zspage, tmp, &free_pages, list) { > > list_del(&zspage->list); > > + > > + migrate_read_lock(zspage); > > lock_zspage(zspage); > > + migrate_read_unlock(zspage); > > > > get_zspage_mapping(zspage, &class_idx, &fullness); > > VM_BUG_ON(fullness != ZS_EMPTY); > > There are two problems with this: > 1. migrate_read_lock() is a rwlock and lock_page() can sleep. > 2. This will cause a deadlock because it violates the lock ordering in > zs_page_migrate(), since zs_page_migrate() takes migrate_write_lock() under > the page lock. > That's true. Thanks! Then, how about this? diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 9152fbde33b5..2f205c18aee4 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1716,12 +1716,31 @@ static enum fullness_group putback_zspage(struct size_class *class, * To prevent zspage destroy during migration, zspage freeing should * hold locks of all pages in the zspage. */ -static void lock_zspage(struct zspage *zspage) +static void lock_zspage(struct zs_pool *pool, struct zspage *zspage) { - struct page *page = get_first_page(zspage); - + struct page *page; + int nr_locked; + struct page *locked_pages[ZS_MAX_PAGES_PER_ZSPAGE]; + struct address_space *mapping; +retry: + nr_locked = 0; + memset(locked_pages, 0, sizeof(struct page) * ARRAY_SIZE(locked_pages)); + page = get_first_page(zspage); do { lock_page(page); + locked_pages[nr_locked++] = page; + /* + * subpage in the zspage could be migrated under us so + * verify it. Once it happens, retry the lock sequence. + */ + mapping = page_mapping(page) + if (mapping != pool->inode->i_mapping || + page_private(page) != (unsigned long)zspage) { + do { + unlock_page(locked_pages[--nr_locked]); + } while (nr_locked > 0) + goto retry; + } } while ((page = get_next_page(page)) != NULL); } @@ -1987,7 +2006,7 @@ static void async_free_zspage(struct work_struct *work) list_for_each_entry_safe(zspage, tmp, &free_pages, list) { list_del(&zspage->list); - lock_zspage(zspage); + lock_zspage(pool, zspage); get_zspage_mapping(zspage, &class_idx, &fullness); VM_BUG_ON(fullness != ZS_EMPTY);