----- Original Message ----- > > > ----- Original Message ----- > > Hello Dave, > > > > In runq command, when dumping cfs and rt runqueues, > > it seems that we get the wrong nr_running values of rq > > and cfs_rq. > > > > Please refer to the attached patch. > > > > Thanks > > Zhang Yanfei > > Hello Zhang, > > I understand what you are trying to accomplish with this patch, but > none of my test dumpfiles can actually verify it because there is no > difference with or without your patch. What failure mode did you see > in your testing? I presume that it just showed "[no tasks queued]" > for the RT runqueue when there were actually tasks queued there? > > The reason I ask is that I'm thinking that a better solution would > be to simplify dump_CFS_runqueues() by *not* accessing and using > rq_nr_running, cfs_rq_nr_running or cfs_rq_h_nr_running. > > Those counters are only read to determine the "active" argument to > pass to dump_RT_prio_array(), which returns immediately if it is > FALSE. However, if we get rid of the "active" argument and simply > allow dump_RT_prio_array() to always check its queues every time, > it still works just fine. > > For example, I tested my set of sample dumpfiles with this patch: > > diff -u -r1.205 task.c > --- task.c 12 Jul 2012 20:04:00 -0000 1.205 > +++ task.c 22 Aug 2012 15:33:32 -0000 > @@ -7636,7 +7636,7 @@ > OFFSET(cfs_rq_tasks_timeline)); > } > > - dump_RT_prio_array(nr_running != cfs_rq_nr_running, > + dump_RT_prio_array(TRUE, > runq + OFFSET(rq_rt) + OFFSET(rt_rq_active), > &runqbuf[OFFSET(rq_rt) + > OFFSET(rt_rq_active)]); > > and the output is identical to testing with, and without, your patch. > > So the question is whether dump_CFS_runqueues() should be needlessly > complicated with all of the "nr_running" references? > > In fact, it also seems possible that a crash could happen at a point in > the scheduler code where those counters are not > valid/current/trustworthy. > > So unless you can convince me otherwise, I'd prefer to just remove > the "nr_running" business completely. Hello Zhang, Here's the patch I've got queued, which resolves the bug you encountered by simplifying things: --- task.c 12 Jul 2012 20:04:00 -0000 1.205 +++ task.c 24 Aug 2012 18:05:13 -0000 @@ -67,7 +67,7 @@ static int dump_tasks_in_cfs_rq(ulong); static void dump_on_rq_tasks(void); static void dump_CFS_runqueues(void); -static void dump_RT_prio_array(int, ulong, char *); +static void dump_RT_prio_array(ulong, char *); static void task_struct_member(struct task_context *,unsigned int, struct reference *); static void signal_reference(struct task_context *, ulong, struct reference *); static void do_sig_thread_group(ulong); @@ -7547,7 +7547,6 @@ char *runqbuf, *cfs_rq_buf; ulong tasks_timeline ATTRIBUTE_UNUSED; struct task_context *tc; - long nr_running, cfs_rq_nr_running; struct rb_root *root; struct syment *rq_sp, *init_sp; @@ -7622,22 +7621,15 @@ readmem(cfs_rq, KVADDR, cfs_rq_buf, SIZE(cfs_rq), "per-cpu cfs_rq", FAULT_ON_ERROR); - nr_running = LONG(cfs_rq_buf + OFFSET(rq_nr_running)); - cfs_rq_nr_running = ULONG(cfs_rq_buf + - OFFSET(cfs_rq_nr_running)); root = (struct rb_root *)(cfs_rq + OFFSET(cfs_rq_tasks_timeline)); } else { cfs_rq = runq + OFFSET(rq_cfs); - nr_running = LONG(runqbuf + OFFSET(rq_nr_running)); - cfs_rq_nr_running = ULONG(runqbuf + OFFSET(rq_cfs) + - OFFSET(cfs_rq_nr_running)); root = (struct rb_root *)(runq + OFFSET(rq_cfs) + OFFSET(cfs_rq_tasks_timeline)); } - dump_RT_prio_array(nr_running != cfs_rq_nr_running, - runq + OFFSET(rq_rt) + OFFSET(rt_rq_active), + dump_RT_prio_array(runq + OFFSET(rq_rt) + OFFSET(rt_rq_active), &runqbuf[OFFSET(rq_rt) + OFFSET(rt_rq_active)]); fprintf(fp, " CFS RB_ROOT: %lx\n", (ulong)root); @@ -7657,7 +7649,7 @@ } static void -dump_RT_prio_array(int active, ulong k_prio_array, char *u_prio_array) +dump_RT_prio_array(ulong k_prio_array, char *u_prio_array) { int i, c, tot, cnt, qheads; ulong offset, kvaddr, uvaddr; @@ -7668,12 +7660,6 @@ fprintf(fp, " RT PRIO_ARRAY: %lx\n", k_prio_array); - if (!active) { - INDENT(5); - fprintf(fp, "[no tasks queued]\n"); - return; - } - qheads = (i = ARRAY_LENGTH(rt_prio_array_queue)) ? i : get_array_length("rt_prio_array.queue", NULL, SIZE(list_head));
--- task.c 12 Jul 2012 20:04:00 -0000 1.205 +++ task.c 24 Aug 2012 18:05:13 -0000 @@ -67,7 +67,7 @@ static int dump_tasks_in_cfs_rq(ulong); static void dump_on_rq_tasks(void); static void dump_CFS_runqueues(void); -static void dump_RT_prio_array(int, ulong, char *); +static void dump_RT_prio_array(ulong, char *); static void task_struct_member(struct task_context *,unsigned int, struct reference *); static void signal_reference(struct task_context *, ulong, struct reference *); static void do_sig_thread_group(ulong); @@ -7547,7 +7547,6 @@ char *runqbuf, *cfs_rq_buf; ulong tasks_timeline ATTRIBUTE_UNUSED; struct task_context *tc; - long nr_running, cfs_rq_nr_running; struct rb_root *root; struct syment *rq_sp, *init_sp; @@ -7622,22 +7621,15 @@ readmem(cfs_rq, KVADDR, cfs_rq_buf, SIZE(cfs_rq), "per-cpu cfs_rq", FAULT_ON_ERROR); - nr_running = LONG(cfs_rq_buf + OFFSET(rq_nr_running)); - cfs_rq_nr_running = ULONG(cfs_rq_buf + - OFFSET(cfs_rq_nr_running)); root = (struct rb_root *)(cfs_rq + OFFSET(cfs_rq_tasks_timeline)); } else { cfs_rq = runq + OFFSET(rq_cfs); - nr_running = LONG(runqbuf + OFFSET(rq_nr_running)); - cfs_rq_nr_running = ULONG(runqbuf + OFFSET(rq_cfs) + - OFFSET(cfs_rq_nr_running)); root = (struct rb_root *)(runq + OFFSET(rq_cfs) + OFFSET(cfs_rq_tasks_timeline)); } - dump_RT_prio_array(nr_running != cfs_rq_nr_running, - runq + OFFSET(rq_rt) + OFFSET(rt_rq_active), + dump_RT_prio_array(runq + OFFSET(rq_rt) + OFFSET(rt_rq_active), &runqbuf[OFFSET(rq_rt) + OFFSET(rt_rq_active)]); fprintf(fp, " CFS RB_ROOT: %lx\n", (ulong)root); @@ -7657,7 +7649,7 @@ } static void -dump_RT_prio_array(int active, ulong k_prio_array, char *u_prio_array) +dump_RT_prio_array(ulong k_prio_array, char *u_prio_array) { int i, c, tot, cnt, qheads; ulong offset, kvaddr, uvaddr; @@ -7668,12 +7660,6 @@ fprintf(fp, " RT PRIO_ARRAY: %lx\n", k_prio_array); - if (!active) { - INDENT(5); - fprintf(fp, "[no tasks queued]\n"); - return; - } - qheads = (i = ARRAY_LENGTH(rt_prio_array_queue)) ? i : get_array_length("rt_prio_array.queue", NULL, SIZE(list_head));
-- Crash-utility mailing list Crash-utility@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/crash-utility