Hi group, I could really use your help with this one. I don't have all the details right now (I can provide more descriptions tomorrow and logs if needed), but maybe this will be enough: I have written a PG (8.3.8) module, which uses Flex Lexical Analyser. It takes text from database field and finds matches for defined rules. It returns a set of two text fields (value found and value type). When I run query like this: SELECT * FROM flex_me(SELECT some_text FROM some_table WHERE id = 1); It works perfectly fine. Memory never reaches more than 1% (usually its below 0.5% of system mem). But when I run query like this: SELECT flex_me(some_text_field) FROM some_table WHERE id = 1; Memory usage goes through the roof, and if the result is over about 10k matches (rows) it eats up all memory and I get "out of memory" error. I try to free all memory allocated, and even did a version with double linked list of results but the same behaviour persists. I tried to track it down on my own and from my own trials it seems that the problem lies directly in the set returning function in File 2 "flex_me()" as even with 40k of results in a 2 column array it shouldn't take more than 1MB of memory. Also when I run it just to the point of SRF_IS_FIRSTCALL() (whole bit) the memory usage doesn't go up, but when subsequent SRF_PERCALL calls are made it's where the memory usage goes through the roof. Btw, if the following code contains some nasty errors and I'm pretty sure it does, please know that I'm just learning PG and C programming. Any help or tips would be greatly appreciated. Simplified (but still relevant) code below: File 1 (Flex parser template which is compiled with flex): %{ #include <stdio.h> extern void *addToken(int type); extern char ***flexme(char *ptr); #define T_NUM 1 #define S_NUM "number" #define T_FLO 2 #define S_FLO "float" #define T_DAT 3 #define S_DAT "date #define T_WRD 7 #define S_WRD "word" char ***vals; int cnt = 0, mem_cnt = 64; %} DGT [0-9] NUMBER (-)?{DGT}+ FLOAT ((-)?{DGT}+[\.,]{DGT}+)|{NUMBER} DATE_S1 "-" DATE_S2 "," DATE_S3 "." DATE_S4 "/" DATE_S5 "" DATE_YY ([0-9]|([0-9][0-9])|([0-1][0-9][0-9][0-9])|(2[0-4][0-9][0-9])) DATE_DD ([1-9]|(([0-2][0-9])|(3[0-1]))) DATE_MM ([1-9]|((0[1-9])|(1[0-2]))) DATE_YMD_S1 ({DATE_YY}{DATE_S1}{DATE_MM}{DATE_S1}{DATE_DD}) DATE_YMD_S2 ({DATE_YY}{DATE_S2}{DATE_MM}{DATE_S2}{DATE_DD}) DATE_YMD_S3 ({DATE_YY}{DATE_S3}{DATE_MM}{DATE_S3}{DATE_DD}) DATE_YMD_S4 ({DATE_YY}{DATE_S4}{DATE_MM}{DATE_S4}{DATE_DD}) DATE_YMD_S5 ({DATE_YY}{DATE_S5}{DATE_MM}{DATE_S5}{DATE_DD}) DATE_YMD ({DATE_YMD_S1}|{DATE_YMD_S2}|{DATE_YMD_S3}|{DATE_YMD_S4}|{DATE_YMD_S5}) WORD ([a-zA-Z0-9]+) %% {FLOAT} addToken(T_FLO); {DATE_YMD} addToken(T_DAT); {WORD} addToken(T_WRD); .|\n /* eat up any unmatched character */ %% void * addToken(int type) { int x = 0; // elog(NOTICE,"W[%d] %s", type, yytext); //check if we need to add more mem if (mem_cnt-1 <= cnt) { mem_cnt *= 2; vals = repalloc(vals, mem_cnt * sizeof(char *)); // elog(NOTICE, "mem increased to: %d", mem_cnt*sizeof(char *)); } vals[cnt] = palloc(2 * sizeof(char *)); //types switch (type) { case T_FLO: //float x = strlen(S_FLO); vals[cnt][1] = palloc((x+1) * sizeof(char)); strncpy(vals[cnt][1], S_FLO, x); vals[cnt][1][x] = '\0'; break; case T_DAT: //date x = strlen(S_DAT); vals[cnt][1] = palloc((x+1) * sizeof(char)); strncpy(vals[cnt][1], S_DAT, x); vals[cnt][1][x] = '\0'; break; case T_WRD: //word x = strlen(S_WRD); vals[cnt][1] = palloc((x+1) * sizeof(char)); strncpy(vals[cnt][1], S_WRD, x); vals[cnt][1][x] = '\0'; break; default: elog(ERROR,"Unknown flexme type: %d", type); break; } //value vals[cnt][0] = palloc((yyleng+1) * sizeof(char)); strncpy(vals[cnt][0], yytext, yyleng); vals[cnt][0][yyleng] = '\0'; cnt++; // elog(NOTICE,"i: %d", cnt); return 0; } char ***flexme(char *ptr) { YY_BUFFER_STATE bp; int yyerr = 0; cnt = 0; //initial table size vals = palloc(mem_cnt * sizeof(char *)); bp = yy_scan_string(ptr); yy_switch_to_buffer(bp); yyerr = yylex(); yy_delete_buffer(bp); if (yyerr != 0) { elog(ERROR, "Flex parser error code: %d", yyerr); } return vals; } File 2 (PG function, which includes flex output analyser of compiled File 1 - lex.yy.c): #include "postgres.h" #include "fmgr.h" #include "funcapi.h" #include "lex.yy.c" char *text_to_cstring(const text *t); //this is copied directly from PG sources char * text_to_cstring(const text *t) { /* must cast away the const, unfortunately */ text *tunpacked = pg_detoast_datum_packed((struct varlena *) t); int len = VARSIZE_ANY_EXHDR(tunpacked); char *result; result = (char *) palloc(len + 1); memcpy(result, VARDATA_ANY(tunpacked), len); result[len] = '\0'; if (tunpacked != t) pfree(tunpacked); return result; } PG_FUNCTION_INFO_V1(flex_me); Datum flex_me(PG_FUNCTION_ARGS); Datum flex_me(PG_FUNCTION_ARGS) { text *in = PG_GETARG_TEXT_P(0); FuncCallContext *funcctx; TupleDesc tupdesc; AttInMetadata *attinmeta; int call_cntr, max_calls; char ***values; char *ptr; // stuff done only on the first call of the function if (SRF_IS_FIRSTCALL()) { MemoryContext oldcontext; // create a function context for cross-call persistence funcctx = SRF_FIRSTCALL_INIT(); // switch to memory context appropriate for multiple function calls oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); ptr = text_to_cstring_imm(in); values = flexme(ptr); //free char pointer pfree(ptr); // Build a tuple descriptor for our result type if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg ("function returning record called in context " "that cannot accept type record"))); // generate attribute metadata needed later to produce // tuples from raw C strings attinmeta = TupleDescGetAttInMetadata(tupdesc); funcctx->attinmeta = attinmeta; //pass first list element funcctx->user_fctx = values; // total number of tuples to be returned funcctx->max_calls = cnt; //go back to normal memory context MemoryContextSwitchTo(oldcontext); } // stuff done on every call of the function. funcctx = SRF_PERCALL_SETUP(); call_cntr = funcctx->call_cntr; max_calls = funcctx->max_calls; attinmeta = funcctx->attinmeta; values = (char ***) funcctx->user_fctx; //set return routine if (call_cntr < max_calls) { char **rvals; HeapTuple tuple; Datum result; int i; // Prepare a values array for building the returned //tuple. This should be an array of C strings which //will be processed later by the type input functions rvals = palloc(2*sizeof(char *)); //value (text) i = strlen(values[call_cntr][0]); rvals[0] = palloc((i+1)*sizeof(char)); strncpy(rvals[0], values[call_cntr][0], i); rvals[0][i] = '\0'; //type (text) i = strlen(values[call_cntr][1]); rvals[1] = palloc((i+1)*sizeof(char)); strncpy(rvals[1], values[call_cntr][1], i); rvals[1][i] = '\0'; // build a tuple and make into datum. tuple = BuildTupleFromCStrings(attinmeta, rvals); result = HeapTupleGetDatum(tuple); //free memory pfree(rvals[0]); pfree(rvals[1]); pfree(rvals); pfree(values[call_cntr][0]); pfree(values[call_cntr][1]); pfree(values[call_cntr]); //return datum SRF_RETURN_NEXT(funcctx, result); } else { SRF_RETURN_DONE(funcctx); } return true; } -- Sent via pgsql-performance mailing list (pgsql-performance@xxxxxxxxxxxxxx) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-performance