Stack trace of thread 966070:
#0 0x00000000f7ee1129 __kernel_vsyscall (linux-gate.so.1)
#1 0x00000000f6ba23b7 __poll (libc.so.6)
#2 0x00000000f792e5b5 __interceptor_poll (libasan.so.8)
#3 0x00000000f72b30a8 pqSocketCheck (libpq.so.5)
#4 0x00000000f72b3864 pqWaitTimed (libpq.so.5)
#5 0x00000000f72b38d2 pqWait (libpq.so.5)
#6 0x00000000f72aff03 PQgetResult (libpq.so.5)
#7 0x00000000f72b036a PQexecFinish (libpq.so.5)
#8 0x0000000008106dd4 checkLOCK (server)
#9 0x000000000811d871 SQL_get_tpf_rw (server)
#10 0x0000000008180269 get_tpf_rw (server)
#11 0x0000000008189e47 FINDC (server)
#12 0x000000000818cd20 FINWC (server)
#13 0x00000000f2d03778 _segKIP8 (kip8.so)
#14 0x000000000816ad0e call_segment (server)
#15 0x000000000816ba0b ENTRC (server)
#16 0x00000000f2d096ee _segKIP5 (kip5.so)
#17 0x000000000816ad0e call_segment (server)
#18 0x000000000816ba0b ENTRC (server)
#19 0x00000000f2d1722d _segKIP4 (kip4.so)
#20 0x000000000816ad0e call_segment (server)
#21 0x000000000816ba0b ENTRC (server)
#22 0x00000000f2d1dee1 _segKIPB (kipb.so)
#23 0x000000000816ad0e call_segment (server)
#24 0x000000000816c454 ENTNC (server)
#25 0x00000000f2d2be54 _segKIP9 (kip9.so)
#26 0x000000000816ad0e call_segment (server)
#27 0x00000000081c264d call_appl_entry (server)
#28 0x000000000807f227 extest (server)
#29 0x000000000808923f handle_COMMS_TYPE_FUNCTIONAL_MESSAGE (server)
#30 0x000000000808d849 handle_comms_message (server)
#31 0x00000000f77d6e41 worker (libuv.so.1)
#32 0x00000000f78ff67a _ZL17asan_thread_startPv (libasan.so.8)
#33 0x00000000f6b170ad start_thread (libc.so.6)
#34 0x00000000f6bb1dea __clone (libc.so.6)
Hi,
Is this a complete backtrace?
Can you post a complete backtrace? We need to see if there is a reference
to your application code?
What language is it written in?
What operation this thread was performing?
Thank you.
On Thu, Oct 24, 2024 at 11:12 AM Sasmit Utkarsh <utkarshsasmit@xxxxxxxxx> wrote:
>
> Dear PostgreSQL Community Team,
>
> I hope this message finds you well. I am reaching out for assistance with an issue encountered in our application, which communicates with PostgreSQL using the libpq client library.
>
> Issue Details:
> We have observed a problem where one of the application's threads gets stuck during a database operation. Below is a stack trace of the affected thread:
>
> Application Logs:
> Oct 23 10:08:44.806235 cucmtpccu1 shc-server@2.service[966034]: 0966070{ef5f81a7-d35b-4604-953d-a35665e505b7.010000}KIP8-SQL_get_tpf_rw()-SQL read data from File Address before lock fa(-1810606079) fa(94145801) fa2 htonl(22549652)
> Oct 23 10:08:44.806235 cucmtpccu1 shc-server@2.service[966034]: 0966070{ef5f81a7-d35b-4604-953d-a35665e505b7.010000}KIP8-SQL_get_tpf_rw() SelectDataCommand = CALL SQL_select_data_procedure($1, $2, NULL, NULL) hold(0) fa(-1810606079)
> Oct 23 10:08:44.807814 cucmtpccu1 shc-server@2.service[966034]: *** buffer overflow detected ***: terminated
>
> Stack Trace of Thread 966070:
> #0 0x00000000f7ee1129 __kernel_vsyscall (linux-gate.so.1)
> #1 0x00000000f6ba23b7 __poll (libc.so.6)
> #2 0x00000000f792e5b5 __interceptor_poll (libasan.so.8)
> #3 0x00000000f72b30a8 pqSocketCheck (libpq.so.5)
> #4 0x00000000f72b3864 pqWaitTimed (libpq.so.5)
> #5 0x00000000f72b38d2 pqWait (libpq.so.5)
> #6 0x00000000f72aff03 PQgetResult (libpq.so.5)
> #7 0x00000000f72b036a PQexecFinish (libpq.so.5)
> #8 0x0000000008106dd4 checkLOCK (server)
> #9 0x000000000811d871 SQL_get_tpf_rw (server)
> ...
>
> The stack trace shows that the thread is stuck in a poll operation while waiting for socket activity within the PostgreSQL client library (libpq). We suspect this could be related to a network timeout or issue. However, the application logs indicate a buffer overflow before the crash, which raises questions about whether these are related.
>
> Questions:
> -Could the buffer overflow be causing the crash, and if so, how is it related to the socket activity?
> -Are there specific configurations or checks we should perform to diagnose this issue further?
> -Any suggestions for possible solutions to resolve this problem?
>
> For additional context, I've verified that the specified record does exist in the database, and I am also attaching the implementation details for the checkLOCK function corresponding to the stack trace.
>
> Please let me know if you need any more details
>
> Your assistance with troubleshooting this would be highly appreciated.
>
> Regards,
> Sasmit Utkarsh
> +91-7674022625
int get_tpf_rw( int32_t fa, unsigned char **core_blk, size_t *st_size, unsigned char *bid, unsigned char *rcc, int *fd, bool hold, unsigned char **mmap_ptr) { if(sql_db == 1)// { return SQL_get_tpf_rw(fa, core_blk, st_size, bid, rcc, fd, hold, mmap_ptr ); // } else //if(sql_db == 0) return FILE_get_tpf_rw(fa, core_blk, st_size, bid, rcc, fd, hold, mmap_ptr ); } int SQL_get_tpf_rw( int32_t fa, unsigned char **core_blk, size_t *st_size, unsigned char *bid, unsigned char *rcc, int *fd, bool hold, unsigned char **mmap_ptr) { char *p; char *c; int rc; struct stat statbuf; PGresult *res=NULL; uint32_t fa2 = htonl(fa); char *fa_val = (char *)&fa2; bool hold2 = hold; char *hold_val = (char *)&hold2; const char *paramValues[2] = {fa_val, hold_val}; int paramLengths[2] = {sizeof(fa2), sizeof(hold2)}; int paramFormats[2] = {1, 1}; // identify parameter as binary if(SQL_vsn10) { paramValues[0] = fa_val; paramValues[1] = NULL; paramLengths[0] = sizeof(fa2); paramLengths[1] = 0; paramFormats[0] = 1; paramFormats[1] = 0; } /** if(shadowSQL_vsn10) { paramValues[0] = fa_val; paramValues[1] = NULL; paramLengths[0] = sizeof(fa2); paramLengths[1] = 0; paramFormats[0] = 1; paramFormats[1] = 0; } **/ int resultFormat = 1; // binary int nFields = 0; int nTuples = 0; int blk_size = 0; char *blk_size_val; char *data_val; if(bid) { LOG_TRACE("%s() SQL read data from File Address fa(%i) fa(%08X) fa2 htonl(%i) bid (%02X%02X)",__func__,fa,fa,fa2,*bid,*(bid+1)); } else { LOG_TRACE("%s() SQL read data from File Address fa(%i) fa(%08X) fa2 htonl(%i) bid (0)",__func__,fa,fa,fa2); } LOG_DEBUG("%s()-SQL read data from File Address before lock fa(%i) fa(%08X) fa2 htonl(%i)",__func__,fa,fa,fa2); //LOG_DEBUG("%s() CALL SQL_select_data_procedure1(%i, %i, NULL);",__func__,fa2,hold2); //LOG_DEBUG("%s() SELECT data FROM fs_data WHERE file_address = %i","SQL_select_data_procedure1",fa2); bool is_data_found = false; if(is_shadow_db) { LOG_DEBUG("%s() shadowSelectDataCommand = %s hold(%i) fa(%i)",__func__,shadowSelectDataCommand,hold,fa); LOG_DEBUG("%s() shadowSelectDataNParams = %i",__func__,shadowSelectDataNParams); } else { LOG_DEBUG("%s() SelectDataCommand = %s hold(%i) fa(%i)",__func__,SelectDataCommand,hold,fa); LOG_DEBUG("%s() SelectDataNParams = %i",__func__,SelectDataNParams); } checkLOCK(fa); if(is_shadow_db) { res = PQexecPrepared(shadow_db_conn, shadowSelectDataName, shadowSelectDataNParams, paramValues, paramLengths, paramFormats, resultFormat); LOG_DEBUG("%s()- check now in shadow db for fa(%i) fa(%08X) PQresultStatus(res):%s",__func__,fa,fa,PQresStatus(PQresultStatus(res))); if (PQresultStatus(res) == PGRES_TUPLES_OK) { nTuples = PQntuples(res); // number of rows if(nTuples == 0) { LOG_DEBUG("fa(%i) fa(%08X) could not be retrieved from Shadow DB so look into base db",fa,fa); } else { blk_size_val = PQgetvalue(res, 0, 0); data_val = PQgetvalue(res, 0, 1); if( data_val != NULL && *data_val != '\0') { is_data_found = true; LOG_DEBUG("fa(%i) fa(%08X) retrieved successfully from Shadow DB",fa,fa); } } } else LOG_ERROR("SELECT failed on Shadow DB: %s", PQerrorMessage(shadow_db_conn )); if (!is_data_found) { PQclear(res); // Cleared result only if we are moving to main DB query } } if(is_data_found !=true) { res = PQexecPrepared(conn, SelectDataName, SelectDataNParams, paramValues, paramLengths, paramFormats, resultFormat); } LOG_DEBUG("%s() res = %s",__func__,PQresStatus(PQresultStatus(res))); if ( hold ) { if ( SQL_vsn10 ) { SQL_lock_row(fa); } LOG_DEBUG("%s()-SQL read data from File Address- locked fa(%i) fa(%08X) fa2 htonl(%i)",__func__,fa,fa,fa2); } checkLOCK(fa); if (PQresultStatus(res) != PGRES_TUPLES_OK) { LOG_ERROR("SELECT failed: %s", PQerrorMessage(conn)); LOG_DEBUG("ROLLBACK TRANSACTION AND CHAIN"); PQclear(res); res = PQexec(conn,"ROLLBACK TRANSACTION AND CHAIN"); LOG_ERROR("INVALID_FILE_ADDRESS %08X",fa); rc = ERR_INVALID_FILE_ADDRESS; if ( ((uint32_t)fa)>>24 != 0x94) { if (COUNT < 255) { if ( bid != NULL ) { LOG_INFO("missing data: %08x %02x %02x", fa, *bid,*(bid+1)); char str1[8]; int16_t a=*bid; int16_t b=*(bid+1); sprintf(str1,"%02x%02x",a,b); FA[COUNT]=fa; memcpy(REC_ID1[COUNT],str1,5); COUNT++; } else { bid=0x0000; LOG_INFO("missing data: %08x", fa); FA[COUNT++]=fa; REC_ID1[COUNT][0]=0x00; } } } PQclear(res); } else { nFields = PQnfields(res); // number of columns nTuples = PQntuples(res); // number of rows LOG_DEBUG("%s() nFields(cols)=%i nTuples(rows)=%i",__func__,nFields,nTuples); if( nFields == 0 || nTuples == 0) { LOG_ERROR("SELECT failed: NO ROWS RETURNED"); PQclear(res); res = PQexec(conn,"ROLLBACK TRANSACTION AND CHAIN"); LOG_ERROR("INVALID_FILE_ADDRESS %08X",fa); rc = ERR_INVALID_FILE_ADDRESS; if ( ((uint32_t)fa)>>24 != 0x94) { if (COUNT < 255) { if ( bid != NULL ) { LOG_INFO("missing data: %08x %02x %02x", fa, *bid,*(bid+1)); char str1[8]; int16_t a=*bid; int16_t b=*(bid+1); sprintf(str1,"%02x%02x",a,b); FA[COUNT]=fa; memcpy(REC_ID1[COUNT],str1,5); COUNT++; } else { bid=0x0000; LOG_INFO("missing data: %08x", fa); FA[COUNT++]=fa; REC_ID1[COUNT][0]=0x00; } } } PQclear(res); } else { blk_size = ntohl(*(uint32_t *)PQgetvalue (res, 0, 0)); LOG_DEBUG("%s() blk_size returned is %i",__func__,blk_size); if(blk_size < 127 || blk_size > 4096) { LOG_ERROR("SELECT failed: NO ROWS RETURNED"); PQclear(res); res = PQexec(conn,"ROLLBACK TRANSACTION AND CHAIN"); LOG_ERROR("INVALID_FILE_ADDRESS %08X",fa); rc = ERR_INVALID_FILE_ADDRESS; if ( ((uint32_t)fa)>>24 != 0x94) { if (COUNT < 255) { if ( bid != NULL ) { LOG_INFO("missing data: %08x %02x %02x", fa, *bid,*(bid+1)); char str1[8]; int16_t a=*bid; int16_t b=*(bid+1); sprintf(str1,"%02x%02x",a,b); FA[COUNT]=fa; memcpy(REC_ID1[COUNT],str1,5); COUNT++; } else { bid=0x0000; LOG_INFO("missing data: %08x", fa); FA[COUNT++]=fa; REC_ID1[COUNT][0]=0x00; } } } PQclear(res); } else { *fd = fa; // use File Address as File Descriptor c = PQgetvalue(res, 0, 1); // SELECT has data as second field //dump_hex(c,16); //printf(" PQgetlength of data = %i\n",PQgetlength(res,0,1)); p = c; // use same pointer for both mmap and core_blk LOG_DEBUG("Core block address %p for FA %08X (RW)", p, fa); dump_hex(p,32); /* probably need some validation of block size? */ if (bid != NULL && memcmp(bid, p, 2) != 0 && memcmp(bid, "\x00\x00", 2) != 0) { rc = ERR_RID_MISMATCH; int b1 = *(unsigned char *)p; int b2 = *(unsigned char *)(p+1); int a1 = *(unsigned char *)(bid); int a2 = *(unsigned char *)(bid+1); LOG_ERROR("Error record id mismatch %02X%02X != %02X%02X FA %08X", b1, b2, a1, a2, fa); } else if (rcc != NULL && *rcc != 0 && memcmp(rcc, p+2, 1) != 0) { rc = ERR_RCC_MISMATCH; int b1 = *(unsigned char *)(p+2); int a1 = *(unsigned char *)(rcc); LOG_ERROR("Error rcc mismatch %02X != %02X FA %08X", b1, a1, fa); } else { rc = 0; } *core_blk = (unsigned char *)malloc(blk_size); if (*core_blk == NULL) { LOG_ERROR("Memory allocation failed"); PQclear(res); return ERR_IO_HARDWARE_ERROR; } memcpy(*core_blk,c , blk_size); LOG_TRACE("Core block address %p and mmap_ptr %p for FA %08X (RW)", core_blk, mmap_ptr,fa); //*mmap_ptr = c; //*core_blk = p; *st_size = blk_size; PQclear(res); LOG_DEBUG("%s: cleared the result from query in the end",__func__); //dump_hex(*mmap_ptr,1055); } } } //LOG_DEBUG("%s() rc = %i",__func__,rc); return rc; }