improve ps performance

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hello Dave,

I have modified the patch by your idea.
Please check.

  Thanks,
    Pan
From 043f599d0fa02e9f81cd2187b27dff3bacfecdf6 Mon Sep 17 00:00:00 2001
From: panfengyun <panfy.fnst@xxxxxxxxxxxxxx>
Date: Tue, 9 Sep 2014 13:26:22 +0800
Subject: [PATCH] improve ps performance

When using ps command in the system which contains numerous tasks, especilly RHEL 7,
it will spend too much time to get the data of tasks.

we collect an output of ps command as part of summary information of crash dump,
and in catastrophic case, crash dump could have a huge number of tasks due to
the bug causing the system panic. But it's a problem if ps command needs to
spend several hours to finish execution.

After investigation, what we found is, because of the following kernel commit
and a fix in crash 7.0.4, we need a lot time to wait for ps command to calculate
rss.

kernel commit:
<cut>
commit 34e55232e59f7b19050267a05ff1226e5cd122a5
Author: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>
Date:   Fri Mar 5 13:41:40 2010 -0800

    mm: avoid false sharing of mm_counter
...
<cut>

crash fix:
<cut>
7.0.4    - Fix for the "ps" command's display of per-task RSS and %MEM values
           in Linux 2.6.34 and later kernels in which SPLIT_RSS_COUNTING is
           enabled.  Without the patch, the values are only taken from each
           task's mm_struct.rss_stat structure, which may contain stale values
           because they may not be synchronized with the RSS values stored
           in each per-thread task_struct.rss_stat structure; this may lead
           to invalid or slightly low RSS values, and worst-case, the %MEM
           value may show garbage percentage values.
           (vinayakm.list@xxxxxxxxx)
<cut>

So:
1. we fix rss calculation in get_task_mem_usage() to improve performance of ps
   command.
2. And we also fix the part of getting enumerators MM_FILEPAGES and MM_ANONPAGES
   which also speeds up ps command.

The 1st fix improves the performance of ps mostly.

we also have some tests. please check the following data:

OS           process number     without fix         fix all 2 parts
---------------------------------------------------------------------
RHEL6            ~100k             ~180s                 ~70s
RHEL7            ~100k          ~51870s(~14h)            ~58s

Signed-off-by: panfengyun <panfy.fnst@xxxxxxxxxxxxxx>
---
 crash-7.0.7/defs.h   |   10 ++++++
 crash-7.0.7/main.c   |    1 +
 crash-7.0.7/memory.c |   75 ++++++++++++++++++++++++++++++++++++++++----------
 crash-7.0.7/task.c   |   49 ++++++++++++++++++++++++++++++++
 4 files changed, 120 insertions(+), 15 deletions(-)

diff --git a/crash-7.0.7/defs.h b/crash-7.0.7/defs.h
index 44df6ae..2fb3a4c 100755
--- a/crash-7.0.7/defs.h
+++ b/crash-7.0.7/defs.h
@@ -761,10 +761,18 @@ struct task_context {                     /* context stored for each task */
 	struct task_context *tc_next;
 };
 
+struct tgid_task_context{               /* tgid and task stored for each task */
+        ulong tgid;
+	ulong task;
+};
+
 struct task_table {                      /* kernel/local task table data */
 	struct task_context *current;
 	struct task_context *context_array;
+	struct tgid_task_context *ttc_array;
 	void (*refresh_task_table)(void);
+	long filepages;
+	long anonpages;
 	ulong flags;
         ulong task_start;
 	ulong task_end;
@@ -4834,6 +4842,8 @@ ulong generic_get_stackbase(ulong);
 ulong generic_get_stacktop(ulong);
 void dump_task_table(int);
 void sort_context_array(void);
+void sort_tgid_task_context_array(void);
+int sort_by_tgid(const void *, const void *);
 int in_irq_ctx(ulonglong, int, ulong);
 
 /*
diff --git a/crash-7.0.7/main.c b/crash-7.0.7/main.c
index b0524d2..3d0485c 100755
--- a/crash-7.0.7/main.c
+++ b/crash-7.0.7/main.c
@@ -773,6 +773,7 @@ reattempt:
 			} else if (!(pc->flags & MINIMAL_MODE)) {
 				tt->refresh_task_table();
 				sort_context_array();
+				sort_tgid_task_context_array();	
 			}
 		}
                 if (!STREQ(pc->curcmd, pc->program_name))
diff --git a/crash-7.0.7/memory.c b/crash-7.0.7/memory.c
index c97dd39..49b86f7 100755
--- a/crash-7.0.7/memory.c
+++ b/crash-7.0.7/memory.c
@@ -225,6 +225,7 @@ static int next_module_vaddr(ulong, ulong *);
 static int next_identity_mapping(ulong, ulong *);
 static int vm_area_page_dump(ulong, ulong, ulong, ulong, ulong,
 	struct reference *);
+void set_pages(void);
 static int dump_swap_info(ulong, ulong *, ulong *);
 static void swap_info_init(void);
 static char *get_swapdev(ulong, char *);
@@ -1066,6 +1067,7 @@ vm_init(void)
 
 	page_flags_init();
 
+	set_pages();
 	vt->flags |= VM_INIT;
 }
 
@@ -4072,6 +4074,31 @@ in_user_stack(ulong task, ulong vaddr)
 }
 
 /*
+ * set the const value of filepages and anonpages 
+ * according to MM_FILEPAGES and MM_ANONPAGES.
+ */
+void 
+set_pages(void)
+{
+	long anonpages, filepages;
+	if (VALID_MEMBER(mm_struct_rss))
+		return;
+	if (VALID_MEMBER(mm_struct_rss_stat)) 
+	{
+		if (!enumerator_value("MM_FILEPAGES", &filepages) ||
+		!enumerator_value("MM_ANONPAGES", &anonpages)) 
+		{
+			filepages = 0;
+			anonpages = 1;
+		}
+		tt->filepages = filepages;
+		tt->anonpages = anonpages;
+	} else {
+		return;
+	}
+}
+
+/*
  *  Fill in the task_mem_usage structure with the RSS, virtual memory size,
  *  percent of physical memory being used, and the mm_struct address.
  */
@@ -4108,11 +4135,8 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
 		if (VALID_MEMBER(mm_struct_rss_stat)) {
 			long anonpages, filepages;
 
-			if (!enumerator_value("MM_FILEPAGES", &filepages) ||
-			    !enumerator_value("MM_ANONPAGES", &anonpages)) {
-				filepages = 0;
-				anonpages = 1;
-			}
+			anonpages = tt->anonpages;
+			filepages = tt->filepages;
 			rss += LONG(tt->mm_struct +
 				OFFSET(mm_struct_rss_stat) +
 				OFFSET(mm_rss_stat_count) +
@@ -4125,19 +4149,36 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
 
 		/* Check whether SPLIT_RSS_COUNTING is enabled */
 		if (VALID_MEMBER(task_struct_rss_stat)) {
-			int i, sync_rss;
-			ulong tgid;
-			struct task_context *tc1;
+			int sync_rss;
+			struct tgid_task_context *ttc_array, *ttc, *first, *last;
+
+			ttc_array = tt->ttc_array;
+			if (!(ttc = (struct tgid_task_context *)
+				malloc(sizeof(struct tgid_task_context))))
+				error(FATAL, "cannot malloc ttc at get_task_mem_usage()\n");
+			ttc->tgid = task_tgid(task);
+
+			ttc = (struct tgid_task_context *)bsearch(ttc, ttc_array, RUNNING_TASKS(), 
+				sizeof(struct tgid_task_context), sort_by_tgid);
+			if (NULL == ttc)
+				error(FATAL,
+				"cannot search the tgid_task_context int ttc_array : %lx\n",task);
 
-			tgid = task_tgid(task);
+			/* find the first element which have same tgid */
+			first = ttc;
+			while ((first > ttc_array) && ((first -1 )->tgid == first->tgid)) 
+				first--;
 
-			tc1 = FIRST_CONTEXT();
-			for (i = 0; i < RUNNING_TASKS(); i++, tc1++) {
-				if (task_tgid(tc1->task) != tgid)
-					continue;
+			/* find the last element which have same tgid */
+			last = ttc;
+			while ((last < (ttc_array + (RUNNING_TASKS() - 1))) && 
+				(last->tgid == (last + 1)->tgid))
+				last++;
 
+			while (first <= last)
+			{
 				/* count 0 -> filepages */
-				if (!readmem(tc1->task +
+				if (!readmem(first->task +
 					OFFSET(task_struct_rss_stat) +
 					OFFSET(task_rss_stat_count), KVADDR,
 					&sync_rss,
@@ -4149,7 +4190,7 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
 				rss += sync_rss;
 
 				/* count 1 -> anonpages */
-				if (!readmem(tc1->task +
+				if (!readmem(first->task +
 					OFFSET(task_struct_rss_stat) +
 					OFFSET(task_rss_stat_count) +
 					sizeof(int),
@@ -4160,6 +4201,10 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
 						continue;
 
 				rss += sync_rss;
+
+				if(first == last)
+					break;
+				first++;
 			}
 		}
 
diff --git a/crash-7.0.7/task.c b/crash-7.0.7/task.c
index 75b1964..ce7ca59 100755
--- a/crash-7.0.7/task.c
+++ b/crash-7.0.7/task.c
@@ -493,6 +493,7 @@ task_init(void)
 	}
 
 	sort_context_array();
+	sort_tgid_task_context_array();
 
 	if (pc->flags & SILENT)
 		initialize_task_state();
@@ -639,6 +640,11 @@ allocate_task_space(int cnt)
                     malloc(cnt * sizeof(struct task_context))))
                         error(FATAL, "cannot malloc context array (%d tasks)",
                                 cnt);
+		if (!(tt->ttc_array = (struct tgid_task_context *)
+                    malloc(cnt * sizeof(struct tgid_task_context))))
+                        error(FATAL, "cannot malloc ttc array (%d tasks)",
+                                cnt);
+
 	} else {
                 if (!(tt->task_local = (void *)
 		    realloc(tt->task_local, cnt * sizeof(void *)))) 
@@ -652,6 +658,13 @@ allocate_task_space(int cnt)
                         error(FATAL,
                             "%scannot realloc context array (%d tasks)",
 	                	(pc->flags & RUNTIME) ? "" : "\n", cnt);
+
+		 if (!(tt->ttc_array = (struct tgid_task_context *)
+                    realloc(tt->ttc_array, 
+		    cnt * sizeof(struct tgid_task_context)))) 
+                        error(FATAL,
+                            "%scannot realloc ttc array (%d tasks)",
+	                	(pc->flags & RUNTIME) ? "" : "\n", cnt);
 	}
 }
 
@@ -2281,8 +2294,10 @@ store_context(struct task_context *tc, ulong task, char *tp)
         int *processor_addr;
         ulong *parent_addr;
         ulong *mm_addr;
+	ulong *tgid;
         int has_cpu;
 	int do_verify;
+	struct tgid_task_context *ttc;
 
 	processor_addr = NULL;
 
@@ -2320,6 +2335,7 @@ store_context(struct task_context *tc, ulong task, char *tp)
 	else
         	parent_addr = (ulong *)(tp + OFFSET(task_struct_parent));
         mm_addr = (ulong *)(tp + OFFSET(task_struct_mm));
+	tgid = (ulong *)(tp + OFFSET(task_struct_tgid));
         has_cpu = task_has_cpu(task, tp);
 
         tc->pid = (ulong)(*pid_addr);
@@ -2330,6 +2346,14 @@ store_context(struct task_context *tc, ulong task, char *tp)
         tc->task = task;
         tc->tc_next = NULL;
 
+	/*
+	 *  Fill a tgid_task_context structure with the data from tc.
+	 */
+	ttc = tt->ttc_array + tt->running_tasks;
+	ttc->tgid = *tgid;
+	ttc->task = task;
+
+
         if (do_verify && !verify_task(tc, do_verify)) {
 		error(INFO, "invalid task address: %lx\n", tc->task);
                 BZERO(tc, sizeof(struct task_context));
@@ -2445,6 +2469,30 @@ sort_context_array_by_last_run(void)
 }
 
 /*
+ *  Set the tgid_task_context array by tgid number.
+ */
+void
+sort_tgid_task_context_array(void)
+{
+	if (VALID_MEMBER(mm_struct_rss) || (!VALID_MEMBER(task_struct_rss_stat)))
+		return;
+
+	qsort((void *)tt->ttc_array, (size_t)tt->running_tasks,
+        	sizeof(struct tgid_task_context), sort_by_tgid);
+}
+int
+sort_by_tgid(const void *arg1, const void *arg2)
+{
+	struct tgid_task_context *t1, *t2;
+
+	t1 = (struct tgid_task_context *)arg1;
+	t2 = (struct tgid_task_context *)arg2;
+
+	return (t1->tgid < t2->tgid ? -1 :
+		t1->tgid == t2->tgid ? 0 : 1);
+}
+
+/*
  *  Keep a stash of the last task_struct accessed.  Chances are it will
  *  be hit several times before the next task is accessed.
  */
@@ -6577,6 +6625,7 @@ dump_task_table(int verbose)
 		fprintf(fp, "          .tc_next: %lx\n", (ulong)tc->tc_next);
 	}
 	fprintf(fp, "     context_array: %lx\n",  (ulong)tt->context_array);
+	fprintf(fp, "         ttc_array: %lx\n",  (ulong)tt->ttc_array);
 	fprintf(fp, "refresh_task_table: ");
 	if (tt->refresh_task_table == refresh_fixed_task_table)
 		fprintf(fp, "refresh_fixed_task_table()\n");
-- 
1.7.1

--
Crash-utility mailing list
Crash-utility@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/crash-utility

[Index of Archives]     [Fedora Development]     [Fedora Desktop]     [Fedora SELinux]     [Yosemite News]     [KDE Users]     [Fedora Tools]

 

Powered by Linux