Re: 5.7-rc0: kswapd eats cpu during a disk test?!

Hillf Danton <hdanton@xxxxxxxx> · Sat, 13 Jun 2020 12:47:38 +0800

On Sat, 13 Jun 2020 01:05:52 +0200 Pavel Machek wrote:
> > > +CC linux-mm
> > >
> > > On 5/31/20 12:34 PM, Pavel Machek wrote:
> > > > Hi!
> > > >
> > > > This is simple cat /dev/sda > /dev/zero... on thinkpad x60 (x86-32),
> > > > with spinning rust.
> > > >
> > > >   PID USER      PR  NI    VIRT    RES    SHR S  %CPU %MEM     TIME+  COMMAND
> > > >    1000 root      20   0       0      0      0 R  53.3  0.0  57:34.93  kswapd0
> > > >   27897 root      20   0    6976    580    536 R  44.5  0.0   1:44.53  cat
> > > >
> > > > It keeps both CPUs busy... and I don't think that's right.
> > >
> > > Does an older kernel behave differently here?
> >
> > Let me try on x220 (x86-64, first):
> >
> >   737 root      20   0    5404    744    680 R  31.2   0.0   0:09.98 cat       
> >  1024 root      20   0       0      0      0 S  21.4   0.0 165:22.68 kswapd0   
> >
> > That was with ssd, result with spinning rust is similar:
> >
> >   859 root      20   0    5404    740    672 D  21.1   0.0   0:03.33 cat       
> >  1024 root      20   0       0      0      0 R  11.8   0.0 165:33.07 kswapd0   
> >
> > 5.7-rc1+ kernel.
> >
> > Performance of spinning rust is down, too, on x60:
> >
> > pavel@amd:~/misc/hw/hdd1t$ sudo ddrescue --force /dev/sda1 /dev/null
> > GNU ddrescue 1.19
> > Press Ctrl-C to interrupt
> > rescued:     2147 MB,  errsize:       0 B,  current rate:    3080 kB/s
> >    ipos:     2147 MB,   errors:       0,    average rate:    5382 kB/s
> >       opos:     2147 MB, run time:    6.65 m,  successful read:
> >       0 s ago
> >       Finished
> > pavel@amd:~/misc/hw/hdd1t$ uname -a
> > Linux amd 5.7.0-next-20200611+ #123 SMP PREEMPT Thu Jun 11
> >  15:41:22 CEST 2020 i686 GNU/Linux
> >
> > And there's something clearly wrong here:
> >
> >     966 root      20   0       0      0      0 R  94.4  0.0   8:18.82   kswapd0
> >   23933 root      20   0    4612   1112   1028 D  80.6  0.0   0:26.40   ddrescue
> >  
> 
> Same x60 under older kernel:
> 
> pavel@amd:/data/fast/pavel$ sudo ddrescue --force /dev/sda4 /dev/null
> GNU ddrescue 1.19
> Press Ctrl-C to interrupt
> rescued:     6593 MB,  errsize:       0 B,  current rate:   60424 kB/s
>    ipos:     6593 MB,   errors:       0,    average rate:   95563 kB/s
> 
>   3539 root      20   0    4616   1136   1048 D  21.4  0.0   0:15.63 ddrescue
>    865 root      20   0       0      0      0 S   6.9  0.0   0:04.91  kswapd0
> 
> Linux amd 4.6.0+ #172 SMP Sun Aug 14 11:25:34 CEST 2016 i686 GNU/Linux
> 
> These are more reasonable numbers.

Treat referenced & active pages as reclaim cost.

--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2020,6 +2020,7 @@ static void shrink_active_list(unsigned
 	struct page *page;
 	unsigned nr_deactivate, nr_activate;
 	unsigned nr_rotated = 0;
+	unsigned nr_refered = 0;
 	int file = is_file_lru(lru);
 	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
 
@@ -2070,7 +2071,8 @@ static void shrink_active_list(unsigned
 				nr_rotated += hpage_nr_pages(page);
 				list_add(&page->lru, &l_active);
 				continue;
-			}
+			} else if (!file)
+				nr_refered++;
 		}
 
 		ClearPageActive(page);	/* we are de-activating */
@@ -2098,6 +2100,14 @@ static void shrink_active_list(unsigned
 	free_unref_page_list(&l_active);
 	trace_mm_vmscan_lru_shrink_active(pgdat->node_id, nr_taken, nr_activate,
 			nr_deactivate, nr_rotated, sc->priority, file);
+	if (file)
+		sc->file_cost += nr_rotated;
+	else
+		/*
+		 * add cost to avoid swapin in the near future which incurs IO
+		 * on top of reclaim
+		 */
+		sc->anon_cost += nr_refered;
 }
 
 unsigned long reclaim_pages(struct list_head *page_list)
@@ -2311,11 +2321,13 @@ static void get_scan_count(struct lruvec
 	file_cost = total_cost + sc->file_cost;
 	total_cost = anon_cost + file_cost;
 
-	ap = swappiness * (total_cost + 1);
-	ap /= anon_cost + 1;
-
-	fp = (200 - swappiness) * (total_cost + 1);
-	fp /= file_cost + 1;
+	ap = swappiness * total_cost;
+	if (anon_cost)
+		ap /= anon_cost;
+
+	fp = (200 - swappiness) * total_cost;
+	if (file_cost)
+		fp /= file_cost;
 
 	fraction[0] = ap;
 	fraction[1] = fp;