improve ps performance

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 








>From da7a8b539bc82cb3f455465718221c09b53ac256 Mon Sep 17 00:00:00 2001
From: panfengyun <panfy.fnst@xxxxxxxxxxxxxx>
Date: Mon, 25 Aug 2014 15:36:27 +0800
Subject: [PATCH] improve ps performance

When using ps command in the system which contains numerous tasks, especilly RHEL 7,
it will spend too much time to get the data of tasks.

we collect an output of ps command as part of summary information of crash dump,
and in catastrophic case, crash dump could have a huge number of tasks due to
the bug causing the system panic. But it's a problem if ps command needs to
spend several hours to finish execution.

After investigation, what we found is, because of the following kernel commit
and a fix in crash 7.0.4, we need a lot time to wait for ps command to calculate
rss.

kernel commit:
<cut>
commit 34e55232e59f7b19050267a05ff1226e5cd122a5
Author: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>
Date:   Fri Mar 5 13:41:40 2010 -0800

    mm: avoid false sharing of mm_counter
...
<cut>

crash fix:
<cut>
7.0.4    - Fix for the "ps" command's display of per-task RSS and %MEM values
           in Linux 2.6.34 and later kernels in which SPLIT_RSS_COUNTING is
           enabled.  Without the patch, the values are only taken from each
           task's mm_struct.rss_stat structure, which may contain stale values
           because they may not be synchronized with the RSS values stored
           in each per-thread task_struct.rss_stat structure; this may lead
           to invalid or slightly low RSS values, and worst-case, the %MEM
           value may show garbage percentage values.
           (vinayakm.list@xxxxxxxxx)
<cut>

So:
1. we fix rss calculation in get_task_mem_usage() to improve performance of ps
   command.
2. And we also fix the part of getting enumerators MM_FILEPAGES and MM_ANONPAGES
   which also speeds up ps command.

The 1st fix improves the performance of ps mostly.

we also have some tests. please check the following data:

OS           process number     without fix         fix all 3 parts
---------------------------------------------------------------------
RHEL6            ~100k             ~180s                 ~70s
RHEL7            ~100k          ~51870s(~14h)            ~63s

Signed-off-by: panfengyun <panfy.fnst@xxxxxxxxxxxxxx>
---
 crash-7.0.7/defs.h   |   11 +++++++
 crash-7.0.7/main.c   |    1 +
 crash-7.0.7/memory.c |   65 ++++++++++++++++++++++++++++++++++++--------
 crash-7.0.7/task.c   |   72 ++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 137 insertions(+), 12 deletions(-)

diff --git a/crash-7.0.7/defs.h b/crash-7.0.7/defs.h
index 44df6ae..5ff3bf7 100755
--- a/crash-7.0.7/defs.h
+++ b/crash-7.0.7/defs.h
@@ -758,13 +758,23 @@ struct task_context {                     /* context stored for each task */
 	int processor;
 	ulong ptask;
 	ulong mm_struct;
+	ulong tgid;
+	ulong tgid_task_context_index;
 	struct task_context *tc_next;
 };
 
+struct tgid_task_context{               /* context and tgid stored for each task */
+        ulong tgid;
+        struct task_context *tc;
+};
+
 struct task_table {                      /* kernel/local task table data */
 	struct task_context *current;
 	struct task_context *context_array;
+	struct tgid_task_context *ttc_array;
 	void (*refresh_task_table)(void);
+	long filepages;
+	long anonpages;
 	ulong flags;
         ulong task_start;
 	ulong task_end;
@@ -4834,6 +4844,7 @@ ulong generic_get_stackbase(ulong);
 ulong generic_get_stacktop(ulong);
 void dump_task_table(int);
 void sort_context_array(void);
+void set_tgid_context_array(void);
 int in_irq_ctx(ulonglong, int, ulong);
 
 /*
diff --git a/crash-7.0.7/main.c b/crash-7.0.7/main.c
index b0524d2..0728d74 100755
--- a/crash-7.0.7/main.c
+++ b/crash-7.0.7/main.c
@@ -773,6 +773,7 @@ reattempt:
 			} else if (!(pc->flags & MINIMAL_MODE)) {
 				tt->refresh_task_table();
 				sort_context_array();
+				set_tgid_context_array();	
 			}
 		}
                 if (!STREQ(pc->curcmd, pc->program_name))
diff --git a/crash-7.0.7/memory.c b/crash-7.0.7/memory.c
index c97dd39..0abaaad 100755
--- a/crash-7.0.7/memory.c
+++ b/crash-7.0.7/memory.c
@@ -225,6 +225,7 @@ static int next_module_vaddr(ulong, ulong *);
 static int next_identity_mapping(ulong, ulong *);
 static int vm_area_page_dump(ulong, ulong, ulong, ulong, ulong,
 	struct reference *);
+void set_pages(void);
 static int dump_swap_info(ulong, ulong *, ulong *);
 static void swap_info_init(void);
 static char *get_swapdev(ulong, char *);
@@ -1066,6 +1067,7 @@ vm_init(void)
 
 	page_flags_init();
 
+	set_pages();
 	vt->flags |= VM_INIT;
 }
 
@@ -4072,6 +4074,31 @@ in_user_stack(ulong task, ulong vaddr)
 }
 
 /*
+ * set the const value of filepages and anonpages 
+ * according to MM_FILEPAGES and MM_ANONPAGES.
+ */
+void 
+set_pages(void)
+{
+	long anonpages, filepages;
+	if (VALID_MEMBER(mm_struct_rss))
+		return;
+	if (VALID_MEMBER(mm_struct_rss_stat)) 
+	{
+		if (!enumerator_value("MM_FILEPAGES", &filepages) ||
+		!enumerator_value("MM_ANONPAGES", &anonpages)) 
+		{
+			filepages = 0;
+			anonpages = 1;
+		}
+		tt->filepages = filepages;
+		tt->anonpages = anonpages;
+	} else {
+		return;
+	}
+}
+
+/*
  *  Fill in the task_mem_usage structure with the RSS, virtual memory size,
  *  percent of physical memory being used, and the mm_struct address.
  */
@@ -4108,11 +4135,8 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
 		if (VALID_MEMBER(mm_struct_rss_stat)) {
 			long anonpages, filepages;
 
-			if (!enumerator_value("MM_FILEPAGES", &filepages) ||
-			    !enumerator_value("MM_ANONPAGES", &anonpages)) {
-				filepages = 0;
-				anonpages = 1;
-			}
+			anonpages = tt->anonpages;
+			filepages = tt->filepages;
 			rss += LONG(tt->mm_struct +
 				OFFSET(mm_struct_rss_stat) +
 				OFFSET(mm_rss_stat_count) +
@@ -4125,16 +4149,30 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
 
 		/* Check whether SPLIT_RSS_COUNTING is enabled */
 		if (VALID_MEMBER(task_struct_rss_stat)) {
-			int i, sync_rss;
+			int sync_rss;
 			ulong tgid;
 			struct task_context *tc1;
+			struct tgid_task_context *ttc_array, *ttc, *start, *end;
 
-			tgid = task_tgid(task);
-
-			tc1 = FIRST_CONTEXT();
-			for (i = 0; i < RUNNING_TASKS(); i++, tc1++) {
-				if (task_tgid(tc1->task) != tgid)
-					continue;
+			ttc_array = tt->ttc_array;
+			tgid = tc->tgid;
+			ttc = ttc_array + tc->tgid_task_context_index;
+			start = ttc;
+			if (tc->tgid_task_context_index > 0)
+			{
+				while ((start > ttc_array) && ((start -1 )->tgid == start->tgid)) 
+					start--;
+			}
+			end = ttc;
+			if (tc->tgid_task_context_index < RUNNING_TASKS())
+                        {
+				while ((end < (ttc_array + (RUNNING_TASKS() - 1))) && 
+					(end->tgid == (end + 1)->tgid))
+					end++;
+			}
+			for (;start <= end;)
+			{
+				tc1 = start->tc;
 
 				/* count 0 -> filepages */
 				if (!readmem(tc1->task +
@@ -4160,6 +4198,9 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
 						continue;
 
 				rss += sync_rss;
+				if(start == (ttc_array + (RUNNING_TASKS() - 1)))
+					break;
+				start++;
 			}
 		}
 
diff --git a/crash-7.0.7/task.c b/crash-7.0.7/task.c
index 75b1964..5016afb 100755
--- a/crash-7.0.7/task.c
+++ b/crash-7.0.7/task.c
@@ -19,6 +19,7 @@
 
 static ulong get_panic_context(void);
 static int sort_by_pid(const void *, const void *);
+static int sort_by_tgid(const void *, const void *);
 static void show_ps(ulong, struct psinfo *);
 static struct task_context *panic_search(void);
 static void allocate_task_space(int);
@@ -493,6 +494,7 @@ task_init(void)
 	}
 
 	sort_context_array();
+	set_tgid_context_array();
 
 	if (pc->flags & SILENT)
 		initialize_task_state();
@@ -639,6 +641,11 @@ allocate_task_space(int cnt)
                     malloc(cnt * sizeof(struct task_context))))
                         error(FATAL, "cannot malloc context array (%d tasks)",
                                 cnt);
+		if (!(tt->ttc_array = (struct tgid_task_context *)
+                    malloc(cnt * sizeof(struct tgid_task_context))))
+                        error(FATAL, "cannot malloc ttc array (%d tasks)",
+                                cnt);
+
 	} else {
                 if (!(tt->task_local = (void *)
 		    realloc(tt->task_local, cnt * sizeof(void *)))) 
@@ -652,6 +659,13 @@ allocate_task_space(int cnt)
                         error(FATAL,
                             "%scannot realloc context array (%d tasks)",
 	                	(pc->flags & RUNTIME) ? "" : "\n", cnt);
+
+		 if (!(tt->ttc_array = (struct tgid_task_context *)
+                    realloc(tt->ttc_array, 
+		    cnt * sizeof(struct tgid_task_context)))) 
+                        error(FATAL,
+                            "%scannot realloc ttc array (%d tasks)",
+	                	(pc->flags & RUNTIME) ? "" : "\n", cnt);
 	}
 }
 
@@ -2281,6 +2295,7 @@ store_context(struct task_context *tc, ulong task, char *tp)
         int *processor_addr;
         ulong *parent_addr;
         ulong *mm_addr;
+	ulong *tgid;
         int has_cpu;
 	int do_verify;
 
@@ -2320,6 +2335,7 @@ store_context(struct task_context *tc, ulong task, char *tp)
 	else
         	parent_addr = (ulong *)(tp + OFFSET(task_struct_parent));
         mm_addr = (ulong *)(tp + OFFSET(task_struct_mm));
+	tgid = (ulong *)(tp + OFFSET(task_struct_tgid));
         has_cpu = task_has_cpu(task, tp);
 
         tc->pid = (ulong)(*pid_addr);
@@ -2327,6 +2343,7 @@ store_context(struct task_context *tc, ulong task, char *tp)
         tc->processor = *processor_addr;
         tc->ptask = *parent_addr;
         tc->mm_struct = *mm_addr;
+	tc->tgid = *tgid;
         tc->task = task;
         tc->tc_next = NULL;
 
@@ -2445,6 +2462,54 @@ sort_context_array_by_last_run(void)
 }
 
 /*
+ *  Set the tgid_task_context array by tgid number.
+ */
+void
+set_tgid_context_array(void)
+{
+	ulong i;
+	struct task_context *tc;
+	struct tgid_task_context *ttc;
+	
+	if (VALID_MEMBER(mm_struct_rss) || (!VALID_MEMBER(task_struct_rss_stat)))
+		return;
+	/*
+	 * init tgid task context array by task context tc.
+	 */
+	tc = FIRST_CONTEXT();
+	ttc = tt->ttc_array;
+	for (i = 0; i< tt->running_tasks; i++,tc++,ttc++)
+	{
+		ttc->tgid = tc->tgid;
+		ttc->tc = tc;
+	}
+
+	qsort((void *)tt->ttc_array, (size_t)tt->running_tasks,
+        	sizeof(struct tgid_task_context), sort_by_tgid);
+
+	/*
+	 * record the index of tc in tgid task context array
+	 */
+	ttc = tt->ttc_array;
+	for (i = 0; i < tt->running_tasks; i++,	ttc++)
+	{
+		ttc->tc->tgid_task_context_index = i;
+	}
+}
+
+static int
+sort_by_tgid(const void *arg1, const void *arg2)
+{
+	struct tgid_task_context *t1, *t2;
+
+	t1 = (struct tgid_task_context *)arg1;
+	t2 = (struct tgid_task_context *)arg2;
+
+	return (t1->tgid < t2->tgid ? -1 :
+		t1->tgid == t2->tgid ? 0 : 1);
+}
+
+/*
  *  Keep a stash of the last task_struct accessed.  Chances are it will
  *  be hit several times before the next task is accessed.
  */
@@ -2916,7 +2981,10 @@ cmd_ps(void)
 		cmd_usage(pc->curcmd, SYNOPSIS);
 
 	if (flag & (PS_LAST_RUN|PS_MSECS))
+	{
 		sort_context_array_by_last_run();
+		set_tgid_context_array();
+	}
 	else if (psinfo.cpus) {
 		error(INFO, "-C option is only applicable with -l and -m\n");
 		goto bailout;
@@ -5966,7 +6034,10 @@ foreach(struct foreach_data *fd)
 				}
 			}
 			if (fd->flags & (FOREACH_l_FLAG|FOREACH_m_FLAG))
+			{
 				sort_context_array_by_last_run();
+				set_tgid_context_array();
+			}
 			if ((fd->flags & FOREACH_m_FLAG) && 
 			    INVALID_MEMBER(rq_timestamp))
 				option_not_supported('m');
@@ -6655,6 +6726,7 @@ dump_task_table(int verbose)
 	fprintf(fp, "        task_start: %lx\n",  tt->task_start);
 	fprintf(fp, "          task_end: %lx\n",  tt->task_end);
 	fprintf(fp, "        task_local: %lx\n",  (ulong)tt->task_local);
+	fprintf(fp, "         ttc_array: %lx\n",  (ulong)tt->ttc_array);
 	fprintf(fp, "         max_tasks: %d\n", tt->max_tasks);
 	fprintf(fp, "        nr_threads: %d\n", tt->nr_threads);
 	fprintf(fp, "     running_tasks: %ld\n", tt->running_tasks);
-- 
1.7.1

--
Crash-utility mailing list
Crash-utility@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/crash-utility

[Index of Archives]     [Fedora Development]     [Fedora Desktop]     [Fedora SELinux]     [Yosemite News]     [KDE Users]     [Fedora Tools]

 

Powered by Linux