Re: Question about core files

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 6 Oct 2009, Manish Katiyar wrote:

On Tue, Oct 6, 2009 at 7:34 PM, Holger Kiehl <Holger.Kiehl@xxxxxx> wrote:
Hello

Most the time I compile my application without the -g option due to
performance reasons. Problem is that when it hits some bug and dumps
core, this is not very useful because there is hardly any information
in it. Is there some way to get some useful information out of
the core file.

Is it possible to post your code ? Atleast the start_process()
function. Given that you have got a sigsegv it is probably an invalid
pointer access.

The code is GPL so that is no problem. However it is long so I just
cut out start_process() which you will find below.

You can also try to print $eip (or rip since this is 64 bit machine)
and look around the assembly . Output of "disas start_process" from
gdb will also help.

I tried those but I am not familier with assembly:

   (gdb) print $eip
   $1 = void
   (gdb) print $rip
   $2 = (void (*)()) 0x404b5f <start_process+143>
   (gdb) where
   #0  0x000000304cc32215 in raise (sig=<value optimized out>)
       at ../nptl/sysdeps/unix/sysv/linux/raise.c:64
   #1  0x000000304cc33d83 in abort () at abort.c:88
   #2  0x000000000040b174 in sig_segv ()
   #3  <signal handler called>
   #4  0x0000000000404b5f in start_process ()
   #5  0x0000000000407b9a in main ()
   (gdb) disas start_process
   Dump of assembler code for function start_process:
   0x0000000000404ad0 <start_process+0>:   movslq %esi,%rsi
   0x0000000000404ad3 <start_process+3>:   mov    %rbx,-0x30(%rsp)
   0x0000000000404ad8 <start_process+8>:   mov    %rbp,-0x28(%rsp)
   0x0000000000404add <start_process+13>:  mov    %rsi,%r11
   0x0000000000404ae0 <start_process+16>:  mov    $0x68,%esi
   0x0000000000404ae5 <start_process+21>:  mov    %r12,-0x20(%rsp)
   0x0000000000404aea <start_process+26>:  imul   %rsi,%r11
   0x0000000000404aee <start_process+30>:  mov    %r13,-0x18(%rsp)
   0x0000000000404af3 <start_process+35>:  mov    %r14,-0x10(%rsp)
   0x0000000000404af8 <start_process+40>:  mov    %r15,-0x8(%rsp)
   0x0000000000404afd <start_process+45>:  sub    $0x568,%rsp
   0x0000000000404b04 <start_process+52>:  mov    %rdx,%rbx
   0x0000000000404b07 <start_process+55>:  mov    %edi,0x24(%rsp)
   0x0000000000404b0b <start_process+59>:  mov    %r11,%rdi
   0x0000000000404b0e <start_process+62>:  add    0x225513(%rip),%rdi        # 0x62a028 <qb>
   0x0000000000404b15 <start_process+69>:  cmpb   $0x0,0x31(%rdi)
   0x0000000000404b19 <start_process+73>:  je     0x404ed8 <start_process+1032>
   0x0000000000404b1f <start_process+79>:  movslq 0x28(%rdi),%rax
   0x0000000000404b23 <start_process+83>:  lea    0x0(,%rax,8),%rdx
   0x0000000000404b2b <start_process+91>:  mov    %rax,%r8
   0x0000000000404b2e <start_process+94>:  shl    $0x6,%r8
   0x0000000000404b32 <start_process+98>:  sub    %rdx,%r8
   0x0000000000404b35 <start_process+101>: add    0x2259cc(%rip),%r8        # 0x62a508 <mdb>
   0x0000000000404b3c <start_process+108>: mov    0x2c(%r8),%r9d
   0x0000000000404b40 <start_process+112>: test   %r9d,%r9d
   0x0000000000404b43 <start_process+115>: jne    0x404d70 <start_process+672>
   0x0000000000404b49 <start_process+121>: movslq 0x24(%rsp),%rax
   0x0000000000404b4e <start_process+126>: imul   $0x8f8,%rax,%r14
   0x0000000000404b55 <start_process+133>: mov    %r14,%rax
   0x0000000000404b58 <start_process+136>: add    0x225441(%rip),%rax        # 0x629fa0 <fsa>
   0x0000000000404b5f <start_process+143>: mov    0xec(%rax),%edx
   0x0000000000404b65 <start_process+149>: test   $0x1,%dl
   0x0000000000404b68 <start_process+152>: jne    0x404d30 <start_process+608>
   0x0000000000404b6e <start_process+158>: dec    %ecx
   0x0000000000404b70 <start_process+160>: je     0x404bd0 <start_process+256>
   0x0000000000404b72 <start_process+162>: mov    0xf0(%rax),%ecx
   0x0000000000404b78 <start_process+168>: mov    $0x2,%esi
   0x0000000000404b7d <start_process+173>: test   %ecx,%ecx
   0x0000000000404b7f <start_process+175>: jne    0x404c88 <start_process+440>
   0x0000000000404b85 <start_process+181>: test   %dl,%dl
   0x0000000000404b87 <start_process+183>: jns    0x404bd0 <start_process+256>
   0x0000000000404b89 <start_process+185>: mov    0x104(%rax),%ecx
   0x0000000000404b8f <start_process+191>: movslq 0x28(%rdi),%rax
   0x0000000000404b93 <start_process+195>: mov    $0xffffffff,%esi
   0x0000000000404b98 <start_process+200>: mov    %r11,(%rsp)
   0x0000000000404b9c <start_process+204>: lea    0x0(,%rax,8),%rdx
   0x0000000000404ba4 <start_process+212>: shl    $0x6,%rax
   0x0000000000404ba8 <start_process+216>: sub    %rdx,%rax
   0x0000000000404bab <start_process+219>: mov    0x225956(%rip),%rdx        # 0x62a508 <mdb>
   0x0000000000404bb2 <start_process+226>: mov    0x28(%rdx,%rax,1),%edi
   0x0000000000404bb6 <start_process+230>: mov    %rbx,%rdx
   0x0000000000404bb9 <start_process+233>: callq  0x41ab00 <check_error_queue>
   0x0000000000404bbe <start_process+238>: test   %eax,%eax
   0x0000000000404bc0 <start_process+240>: mov    %eax,%esi
   0x0000000000404bc2 <start_process+242>: mov    (%rsp),%r11
   0x0000000000404bc6 <start_process+246>: jne    0x404c88 <start_process+440>
   0x0000000000404bcc <start_process+252>: nopl   0x0(%rax)
   0x0000000000404bd0 <start_process+256>: mov    %r14,%rcx
   0x0000000000404bd3 <start_process+259>: add    0x2253c6(%rip),%rcx        # 0x629fa0 <fsa>
   0x0000000000404bda <start_process+266>: cmpb   $0x5,0xba(%rcx)
   0x0000000000404be1 <start_process+273>: je     0x404f88 <start_process+1208>
   0x0000000000404be7 <start_process+279>: mov    0x225462(%rip),%rax        # 0x62a050 <p_afd_status>
   0x0000000000404bee <start_process+286>: mov    0x225194(%rip),%ecx        # 0x629d88 <max_connections>
   0x0000000000404bf4 <start_process+292>: cmp    %ecx,0x4f4(%rax)
   0x0000000000404bfa <start_process+298>: jge    0x404d30 <start_process+608>
   0x0000000000404c00 <start_process+304>: mov    %r14,%r8
   0x0000000000404c03 <start_process+307>: add    0x225396(%rip),%r8        # 0x629fa0 <fsa>
   0x0000000000404c0a <start_process+314>: mov    0x174(%r8),%edi
   0x0000000000404c11 <start_process+321>: cmp    %edi,0x170(%r8)
   0x0000000000404c18 <start_process+328>: jge    0x404d30 <start_process+608>
   0x0000000000404c1e <start_process+334>: test   %ecx,%ecx
   0x0000000000404c20 <start_process+336>: jle    0x404c5e <start_process+398>
   0x0000000000404c22 <start_process+338>: mov    0x2251ff(%rip),%rsi        # 0x62---Type <return> to continue, or q <return> to quit---q

So all I now know is that it happened with the assembly instruction:

   mov    0xec(%rax),%edx

But what does it tell me. At what part of my code could this be?

Thanks,
Holger

--------- code of start_process() ----------
static pid_t
start_process(int fsa_pos, int qb_pos, time_t current_time, int retry)
{
   pid_t pid = PENDING;

   if ((qb[qb_pos].msg_name[0] != '\0') &&
       (mdb[qb[qb_pos].pos].age_limit > 0) &&
       ((fsa[fsa_pos].host_status & DO_NOT_DELETE_DATA) == 0) &&
       (current_time > qb[qb_pos].creation_time) &&
       ((current_time - qb[qb_pos].creation_time) > mdb[qb[qb_pos].pos].age_limit))
   {
      char del_dir[MAX_PATH_LENGTH];

      if (fsa[fsa_pos].host_status & ERROR_QUEUE_SET)
      {
         remove_from_error_queue(mdb[qb[qb_pos].pos].job_id, &fsa[fsa_pos],
                                 fsa_pos, fsa_fd);
      }
      (void)sprintf(del_dir, "%s%s%s/%s",
                    p_work_dir, AFD_FILE_DIR,
                    OUTGOING_DIR, qb[qb_pos].msg_name);
      extract_cus(qb[qb_pos].msg_name, dl.input_time, dl.split_job_counter,
                  dl.unique_number);
      remove_job_files(del_dir, fsa_pos, mdb[qb[qb_pos].pos].job_id,
                       FD, AGE_OUTPUT, -1);
      ABS_REDUCE(fsa_pos);
      pid = REMOVED;
   }
   else
   {
      int in_error_queue = NEITHER;

      if ((qb[qb_pos].msg_name[0] == '\0') &&
          (*(unsigned char *)((char *)fsa - AFD_FEATURE_FLAG_OFFSET_END) & DISABLE_RETRIEVE))
      {
         ABS_REDUCE(fsa_pos);

         return(REMOVED);
      }

      if (((fsa[fsa_pos].host_status & STOP_TRANSFER_STAT) == 0) &&
          ((retry == YES) ||
           ((fsa[fsa_pos].error_counter == 0) &&
            (((fsa[fsa_pos].host_status & ERROR_QUEUE_SET) == 0) ||
             ((fsa[fsa_pos].host_status & ERROR_QUEUE_SET) &&
              ((in_error_queue = check_error_queue(mdb[qb[qb_pos].pos].job_id,
                                                   -1, current_time,
                                                   fsa[fsa_pos].retry_interval)) == NO)))) ||
           ((fsa[fsa_pos].error_counter > 0) &&
            (fsa[fsa_pos].host_status & ERROR_QUEUE_SET) &&
            ((current_time - (fsa[fsa_pos].last_retry_time + fsa[fsa_pos].retry_interval)) >= 0) &&
            ((in_error_queue == NO) ||
             ((in_error_queue == NEITHER) &&
              (check_error_queue(mdb[qb[qb_pos].pos].job_id, -1, current_time,
                                 fsa[fsa_pos].retry_interval) == NO)))) ||
           ((fsa[fsa_pos].active_transfers == 0) &&
            ((current_time - (fsa[fsa_pos].last_retry_time + fsa[fsa_pos].retry_interval)) >= 0))))
      {
         /*
          * First lets try and take an existing process,
          * that is waiting for more data to come.
          */
         if ((fsa[fsa_pos].original_toggle_pos == NONE) &&
             ((fsa[fsa_pos].protocol_options & DISABLE_BURSTING) == 0) &&
             (fsa[fsa_pos].keep_connected > 0) &&
             (fsa[fsa_pos].active_transfers > 0) &&
             (fsa[fsa_pos].jobs_queued > 0) &&
             ((((fsa[fsa_pos].special_flag & KEEP_CON_NO_SEND) == 0) &&
               (qb[qb_pos].msg_name[0] != '\0')) ||
              (((fsa[fsa_pos].special_flag & KEEP_CON_NO_FETCH) == 0) &&
               (qb[qb_pos].msg_name[0] == '\0'))) &&
             ((qb[qb_pos].special_flag & HELPER_JOB) == 0))
         {
            int i,
                other_job_wait_pos[MAX_NO_PARALLEL_JOBS],
                other_qb_pos[MAX_NO_PARALLEL_JOBS],
                wait_counter = 0;

            for (i = 0; i < fsa[fsa_pos].allowed_transfers; i++)
            {
               if ((fsa[fsa_pos].job_status[i].proc_id != -1) &&
                   (fsa[fsa_pos].job_status[i].unique_name[2] == 5))
               {
                  int exec_qb_pos;

                  qb_pos_pid(fsa[fsa_pos].job_status[i].proc_id, &exec_qb_pos);
                  if (exec_qb_pos != -1)
                  {
                     if ((qb[qb_pos].msg_name[0] != '\0') &&
                         (qb[exec_qb_pos].msg_name[0] != '\0') &&
                         (mdb[qb[qb_pos].pos].type == mdb[qb[exec_qb_pos].pos].type) &&
                         (mdb[qb[qb_pos].pos].port == mdb[qb[exec_qb_pos].pos].port))
                     {
                        if (qb[qb_pos].retries > 0)
                        {
                           fsa[fsa_pos].job_status[i].file_name_in_use[0] = '\0';
                           fsa[fsa_pos].job_status[i].file_name_in_use[1] = 1;
                           (void)sprintf(&fsa[fsa_pos].job_status[i].file_name_in_use[2],
                                         "%u", qb[qb_pos].retries);
                        }
                        fsa[fsa_pos].job_status[i].job_id = mdb[qb[qb_pos].pos].job_id;
                        mdb[qb[qb_pos].pos].last_transfer_time = mdb[qb[exec_qb_pos].pos].last_transfer_time = current_time;
                        (void)memcpy(fsa[fsa_pos].job_status[i].unique_name,
                                     qb[qb_pos].msg_name, MAX_MSG_NAME_LENGTH);
                        (void)memcpy(connection[qb[exec_qb_pos].connect_pos].msg_name,
                                     qb[qb_pos].msg_name, MAX_MSG_NAME_LENGTH);
                        qb[qb_pos].pid = qb[exec_qb_pos].pid;
                        qb[qb_pos].connect_pos = qb[exec_qb_pos].connect_pos;
                        qb[qb_pos].special_flag |= BURST_REQUEUE;
                        connection[qb[exec_qb_pos].connect_pos].job_no = i;
                        if (qb[exec_qb_pos].pid > 0)
                        {
                           if (kill(qb[exec_qb_pos].pid, SIGUSR1) == -1)
                           {
                              system_log(DEBUG_SIGN, __FILE__, __LINE__,
                                         "Failed to send SIGUSR1 to %lld : %s",
                                         (pri_pid_t)qb[exec_qb_pos].pid, strerror(errno));
                           }
                           p_afd_status->burst2_counter++;
                        }
                        else
                        {
                           system_log(DEBUG_SIGN, __FILE__, __LINE__,
                                      "Hmmm, pid = %lld!!!", (pri_pid_t)qb[exec_qb_pos].pid);
                        }
                        if ((fsa[fsa_pos].transfer_rate_limit > 0) ||
                            (no_of_trl_groups > 0))
                        {
                           calc_trl_per_process(fsa_pos);
                        }
                        ABS_REDUCE(fsa_pos);
                        remove_msg(exec_qb_pos);

                        return(qb[qb_pos].pid);
                     }
                     else
                     {
                        other_job_wait_pos[wait_counter] = i;
                        other_qb_pos[wait_counter] = exec_qb_pos;
                        wait_counter++;
                     }
                  }
                  else
                  {
                     system_log(DEBUG_SIGN, __FILE__, __LINE__,
                                "Unable to locate qb_pos for %lld [fsa_pos=%d].",
                                (pri_pid_t)fsa[fsa_pos].job_status[i].proc_id,
                                fsa_pos);
                  }
               }
            }
            if ((fsa[fsa_pos].active_transfers == fsa[fsa_pos].allowed_transfers) &&
                (wait_counter > 0))
            {
               for (i = 0; i < wait_counter; i++)
               {
                  if (fsa[fsa_pos].job_status[other_job_wait_pos[i]].unique_name[2] == 5)
                  {
                     if (qb[other_qb_pos[i]].pid > 0)
                     {
                        fsa[fsa_pos].job_status[other_job_wait_pos[i]].unique_name[2] = 6;
                        if (qb[other_qb_pos[i]].msg_name[0] == '\0')
                        {
                           return(PENDING);
                        }
                        else
                        {
                           if (kill(qb[other_qb_pos[i]].pid, SIGUSR1) == -1)
                           {
                              system_log(DEBUG_SIGN, __FILE__, __LINE__,
                                         "Failed to send SIGUSR1 to %lld : %s",
                                         (pri_pid_t)qb[other_qb_pos[i]].pid, strerror(errno));
                              fsa[fsa_pos].job_status[other_job_wait_pos[i]].unique_name[2] = 5;
                           }
                           else
                           {
                              return(PENDING);
                           }
                        }
                     }
                     else
                     {
                        system_log(DEBUG_SIGN, __FILE__, __LINE__,
                                   "Hmmm, pid = %lld!!!", (pri_pid_t)qb[other_qb_pos[i]].pid);
                     }
                  }
               }
            }
         }

         if ((p_afd_status->no_of_transfers < max_connections) &&
             (fsa[fsa_pos].active_transfers < fsa[fsa_pos].allowed_transfers))
         {
            int pos;

            if ((pos = get_free_connection()) == INCORRECT)
            {
               system_log(ERROR_SIGN, __FILE__, __LINE__,
                          "Failed to get free connection.");
            }
            else
            {
               if ((connection[pos].job_no = get_free_disp_pos(fsa_pos)) != INCORRECT)
               {
                  if (qb[qb_pos].msg_name[0] == '\0')
                  {
                     connection[pos].fra_pos = qb[qb_pos].pos;
                     connection[pos].protocol = fra[qb[qb_pos].pos].protocol;
                     connection[pos].msg_name[0] = '\0';
                     (void)memcpy(connection[pos].dir_alias,
                                  fra[qb[qb_pos].pos].dir_alias,
                                  MAX_DIR_ALIAS_LENGTH + 1);
                  }
                  else
                  {
                     connection[pos].fra_pos = -1;
                     connection[pos].protocol = mdb[qb[qb_pos].pos].type;
                     (void)memcpy(connection[pos].msg_name, qb[qb_pos].msg_name,
                                  MAX_MSG_NAME_LENGTH);
                     connection[pos].dir_alias[0] = '\0';
                  }
                  if (qb[qb_pos].special_flag & RESEND_JOB)
                  {
                     connection[pos].resend = YES;
                  }
                  else
                  {
                     connection[pos].resend = NO;
                  }
                  connection[pos].temp_toggle = OFF;
                  (void)memcpy(connection[pos].hostname, fsa[fsa_pos].host_alias,
                               MAX_HOSTNAME_LENGTH + 1);
                  connection[pos].host_id = fsa[fsa_pos].host_id;
                  connection[pos].fsa_pos = fsa_pos;
                  if (fd_check_fsa() == YES)
                  {
                     if (check_fra_fd() == YES)
                     {
                        init_fra_data();
                     }

                     /*
                      * We need to set the connection[pos].pid to a
                      * value higher then 0 so the function get_new_positions()
                      * also locates the new connection[pos].fsa_pos. Otherwise
                      * from here on we point to some completely different
                      * host and this can cause havoc when someone uses
                      * edit_hc and changes the alias order.
                      */
                     connection[pos].pid = 1;
                     get_new_positions();
                     connection[pos].pid = 0;
                     init_msg_buffer();
                     fsa_pos = connection[pos].fsa_pos;
                     last_pos_lookup = INCORRECT;
                  }
                  (void)strcpy(fsa[fsa_pos].job_status[connection[pos].job_no].unique_name,
                               qb[qb_pos].msg_name);
                  if ((fsa[fsa_pos].error_counter == 0) &&
                      (fsa[fsa_pos].auto_toggle == ON) &&
                      (fsa[fsa_pos].original_toggle_pos != NONE) &&
                      (fsa[fsa_pos].max_successful_retries > 0))
                  {
                     if ((fsa[fsa_pos].original_toggle_pos == fsa[fsa_pos].toggle_pos) &&
                         (fsa[fsa_pos].successful_retries > 0))
                     {
                        fsa[fsa_pos].original_toggle_pos = NONE;
                        fsa[fsa_pos].successful_retries = 0;
                     }
                     else if (fsa[fsa_pos].successful_retries >= fsa[fsa_pos].max_successful_retries)
                          {
                             connection[pos].temp_toggle = ON;
                             fsa[fsa_pos].successful_retries = 0;
                          }
                          else
                          {
                             fsa[fsa_pos].successful_retries++;
                          }
                  }

                  /* Create process to distribute file. */
                  if ((connection[pos].pid = make_process(&connection[pos],
                                                          qb_pos)) > 0)
                  {
                     pid = fsa[fsa_pos].job_status[connection[pos].job_no].proc_id = connection[pos].pid;
                     fsa[fsa_pos].active_transfers += 1;
                     if ((fsa[fsa_pos].transfer_rate_limit > 0) ||
                         (no_of_trl_groups > 0))
                     {
                        calc_trl_per_process(fsa_pos);
                     }
                     ABS_REDUCE(fsa_pos);
                     qb[qb_pos].connect_pos = pos;
                     p_afd_status->no_of_transfers++;
                  }
                  else
                  {
                     fsa[fsa_pos].job_status[connection[pos].job_no].connect_status = NOT_WORKING;
                     fsa[fsa_pos].job_status[connection[pos].job_no].no_of_files = 0;
                     fsa[fsa_pos].job_status[connection[pos].job_no].no_of_files_done = 0;
                     fsa[fsa_pos].job_status[connection[pos].job_no].file_size = 0;
                     fsa[fsa_pos].job_status[connection[pos].job_no].file_size_done = 0;
                     fsa[fsa_pos].job_status[connection[pos].job_no].file_size_in_use = 0;
                     fsa[fsa_pos].job_status[connection[pos].job_no].file_size_in_use_done = 0;
                     fsa[fsa_pos].job_status[connection[pos].job_no].file_name_in_use[0] = '\0';
                     fsa[fsa_pos].job_status[connection[pos].job_no].file_name_in_use[1] = 0;
                     fsa[fsa_pos].job_status[connection[pos].job_no].unique_name[0] = '\0';
                     connection[pos].hostname[0] = '\0';
                     connection[pos].msg_name[0] = '\0';
                     connection[pos].host_id = 0;
                     connection[pos].job_no = -1;
                     connection[pos].fsa_pos = -1;
                     connection[pos].fra_pos = -1;
                     connection[pos].pid = 0;
                  }
               }
            }
         }
      }
   }
   return(pid);
}
--
To unsubscribe from this list: send the line "unsubscribe linux-c-programming" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Assembler]     [Git]     [Kernel List]     [Fedora Development]     [Fedora Announce]     [Autoconf]     [C Programming]     [Yosemite Campsites]     [Yosemite News]     [GCC Help]

  Powered by Linux