Re: regression caused by cgroups optimization in 3.17-rc2

Johannes Weiner <hannes@xxxxxxxxxxx> · Fri, 5 Sep 2014 10:47:23 -0400

On Fri, Sep 05, 2014 at 11:25:37AM +0200, Michal Hocko wrote:
> @@ -900,10 +900,10 @@ void lru_add_drain_all(void)
>   * grabbed the page via the LRU.  If it did, give up: shrink_inactive_list()
>   * will free it.
>   */
> -void release_pages(struct page **pages, int nr, bool cold)
> +static void release_lru_pages(struct page **pages, int nr,
> +			      struct list_head *pages_to_free)
>  {
>  	int i;
> -	LIST_HEAD(pages_to_free);
>  	struct zone *zone = NULL;
>  	struct lruvec *lruvec;
>  	unsigned long uninitialized_var(flags);
> @@ -943,11 +943,26 @@ void release_pages(struct page **pages, int nr, bool cold)
>  		/* Clear Active bit in case of parallel mark_page_accessed */
>  		__ClearPageActive(page);
>  
> -		list_add(&page->lru, &pages_to_free);
> +		list_add(&page->lru, pages_to_free);
>  	}
>  	if (zone)
>  		spin_unlock_irqrestore(&zone->lru_lock, flags);
> +}
> +/*
> + * Batched page_cache_release(). Frees and uncharges all given pages
> + * for which the reference count drops to 0.
> + */
> +void release_pages(struct page **pages, int nr, bool cold)
> +{
> +	LIST_HEAD(pages_to_free);
>  
> +	while (nr) {
> +		int batch = min(nr, PAGEVEC_SIZE);
> +
> +		release_lru_pages(pages, batch, &pages_to_free);
> +		pages += batch;
> +		nr -= batch;
> +	}

We might be able to process a lot more pages in one go if nobody else
needs the lock or the CPU.  Can't we just cycle the lock or reschedule
if necessary?

diff --git a/mm/swap.c b/mm/swap.c
index 6b2dc3897cd5..ee0cf21dd521 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -944,6 +944,15 @@ void release_pages(struct page **pages, int nr, bool cold)
 		__ClearPageActive(page);
 
 		list_add(&page->lru, &pages_to_free);
+
+		if (should_resched() ||
+		    (zone && spin_needbreak(&zone->lru_lock))) {
+			if (zone) {
+				spin_unlock_irqrestore(&zone->lru_lock, flags);
+				zone = NULL;
+			}
+			cond_resched();
+		}
 	}
 	if (zone)
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 3e0ec83d000c..c487ca4682a4 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -262,19 +262,12 @@ void free_page_and_swap_cache(struct page *page)
  */
 void free_pages_and_swap_cache(struct page **pages, int nr)
 {
-	struct page **pagep = pages;
+	int i;
 
 	lru_add_drain();
-	while (nr) {
-		int todo = min(nr, PAGEVEC_SIZE);
-		int i;
-
-		for (i = 0; i < todo; i++)
-			free_swap_cache(pagep[i]);
-		release_pages(pagep, todo, false);
-		pagep += todo;
-		nr -= todo;
-	}
+	for (i = 0; i < nr; i++)
+		free_swap_cache(pages[i]);
+	release_pages(pages, nr, false);
 }
 
 /*

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>