Certain query eating up all free memory (out of memory error)

Łukasz Dejneka <l.dejneka@xxxxxxxxx> · Tue, 25 May 2010 08:16:15 +0200

Hi group,

I could really use your help with this one. I don't have all the
details right now (I can provide more descriptions tomorrow and logs
if needed), but maybe this will be enough:

I have written a PG (8.3.8) module, which uses Flex Lexical Analyser.
It takes text from database field and finds matches for defined rules.
It returns a set of two text fields (value found and value type).

When I run query like this:
SELECT * FROM flex_me(SELECT some_text FROM some_table WHERE id = 1);
It works perfectly fine. Memory never reaches more than 1% (usually
its below 0.5% of system mem).

But when I run query like this:
SELECT flex_me(some_text_field) FROM some_table WHERE id = 1;
Memory usage goes through the roof, and if the result is over about
10k matches (rows) it eats up all memory and I get "out of memory"
error.

I try to free all memory allocated, and even did a version with double
linked list of results but the same behaviour persists. I tried to
track it down on my own and from my own trials it seems that the
problem lies directly in the set returning function in File 2
"flex_me()" as even with 40k of results in a 2 column array it
shouldn't take more than 1MB of memory. Also when I run it just to the
point of SRF_IS_FIRSTCALL() (whole bit) the memory usage doesn't go
up, but when subsequent SRF_PERCALL calls are made it's where the
memory usage goes through the roof.

Btw, if the following code contains some nasty errors and I'm pretty
sure it does, please know that I'm just learning PG and C programming.
Any help or tips would be greatly appreciated.

Simplified (but still relevant) code below:

File 1 (Flex parser template which is compiled with flex):

%{
#include <stdio.h>

extern void *addToken(int type);
extern char ***flexme(char *ptr);

#define T_NUM  1
#define S_NUM  "number"
#define T_FLO  2
#define S_FLO  "float"
#define T_DAT  3
#define S_DAT  "date
#define T_WRD  7
#define S_WRD  "word"

char ***vals;

int cnt = 0, mem_cnt = 64;

%}

DGT          [0-9]
NUMBER       (-)?{DGT}+
FLOAT        ((-)?{DGT}+[\.,]{DGT}+)|{NUMBER}

DATE_S1      "-"
DATE_S2      ","
DATE_S3      "."
DATE_S4      "/"
DATE_S5      ""
DATE_YY      ([0-9]|([0-9][0-9])|([0-1][0-9][0-9][0-9])|(2[0-4][0-9][0-9]))
DATE_DD      ([1-9]|(([0-2][0-9])|(3[0-1])))
DATE_MM      ([1-9]|((0[1-9])|(1[0-2])))

DATE_YMD_S1  ({DATE_YY}{DATE_S1}{DATE_MM}{DATE_S1}{DATE_DD})
DATE_YMD_S2  ({DATE_YY}{DATE_S2}{DATE_MM}{DATE_S2}{DATE_DD})
DATE_YMD_S3  ({DATE_YY}{DATE_S3}{DATE_MM}{DATE_S3}{DATE_DD})
DATE_YMD_S4  ({DATE_YY}{DATE_S4}{DATE_MM}{DATE_S4}{DATE_DD})
DATE_YMD_S5  ({DATE_YY}{DATE_S5}{DATE_MM}{DATE_S5}{DATE_DD})
DATE_YMD     ({DATE_YMD_S1}|{DATE_YMD_S2}|{DATE_YMD_S3}|{DATE_YMD_S4}|{DATE_YMD_S5})

WORD         ([a-zA-Z0-9]+)

%%

{FLOAT}      addToken(T_FLO);

{DATE_YMD}   addToken(T_DAT);

{WORD}       addToken(T_WRD);

.|\n     /* eat up any unmatched character */

%%

void *
addToken(int type)
{
 int   x = 0;

//    elog(NOTICE,"W[%d] %s", type, yytext);

   //check if we need to add more mem
   if (mem_cnt-1 <= cnt) {
       mem_cnt *= 2;
       vals = repalloc(vals, mem_cnt * sizeof(char *));
//        elog(NOTICE, "mem increased to: %d", mem_cnt*sizeof(char *));
   }
   vals[cnt] = palloc(2 * sizeof(char *));

   //types
   switch (type) {
       case T_FLO:    //float
           x = strlen(S_FLO);
           vals[cnt][1] = palloc((x+1) * sizeof(char));
           strncpy(vals[cnt][1], S_FLO, x);
           vals[cnt][1][x] = '\0';
           break;
       case T_DAT:     //date
           x = strlen(S_DAT);
           vals[cnt][1] = palloc((x+1) * sizeof(char));
           strncpy(vals[cnt][1], S_DAT, x);
           vals[cnt][1][x] = '\0';
           break;
       case T_WRD:     //word
           x = strlen(S_WRD);
           vals[cnt][1] = palloc((x+1) * sizeof(char));
           strncpy(vals[cnt][1], S_WRD, x);
           vals[cnt][1][x] = '\0';
           break;
       default:
           elog(ERROR,"Unknown flexme type: %d", type);
           break;
   }
   //value
   vals[cnt][0] = palloc((yyleng+1) * sizeof(char));
   strncpy(vals[cnt][0], yytext, yyleng);
   vals[cnt][0][yyleng] = '\0';

   cnt++;
//    elog(NOTICE,"i: %d", cnt);

   return 0;
}

char ***flexme(char *ptr)
{

   YY_BUFFER_STATE bp;
   int   yyerr = 0;
   cnt = 0;

   //initial table size
   vals = palloc(mem_cnt * sizeof(char *));

   bp = yy_scan_string(ptr);
   yy_switch_to_buffer(bp);
   yyerr = yylex();
   yy_delete_buffer(bp);

   if (yyerr != 0) {
       elog(ERROR, "Flex parser error code: %d", yyerr);
   }

   return vals;
}

File 2 (PG function, which includes flex output analyser of compiled
File 1 - lex.yy.c):

#include "postgres.h"
#include "fmgr.h"
#include "funcapi.h"

#include "lex.yy.c"

char *text_to_cstring(const text *t);   //this is copied directly from
PG sources
char *
text_to_cstring(const text *t)
{
       /* must cast away the const, unfortunately */
       text           *tunpacked = pg_detoast_datum_packed((struct
varlena *) t);
       int                        len = VARSIZE_ANY_EXHDR(tunpacked);
       char           *result;

       result = (char *) palloc(len + 1);
       memcpy(result, VARDATA_ANY(tunpacked), len);
       result[len] = '\0';

       if (tunpacked != t)
               pfree(tunpacked);

       return result;
}

PG_FUNCTION_INFO_V1(flex_me);
Datum    flex_me(PG_FUNCTION_ARGS);

Datum
flex_me(PG_FUNCTION_ARGS) {
   text             *in = PG_GETARG_TEXT_P(0);

   FuncCallContext  *funcctx;
   TupleDesc        tupdesc;
   AttInMetadata    *attinmeta;
   int              call_cntr, max_calls;
   char             ***values;
   char             *ptr;

   // stuff done only on the first call of the function
   if (SRF_IS_FIRSTCALL()) {
       MemoryContext oldcontext;

       // create a function context for cross-call persistence
       funcctx = SRF_FIRSTCALL_INIT();

       // switch to memory context appropriate for multiple  function calls
       oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

       ptr = text_to_cstring_imm(in);
       values = flexme(ptr);

       //free char pointer
       pfree(ptr);

       // Build a tuple descriptor for our result type
       if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
           ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg
              ("function returning record called in context "
               "that cannot accept type record")));

       // generate attribute metadata needed later to produce
       //   tuples from raw C strings
       attinmeta = TupleDescGetAttInMetadata(tupdesc);
       funcctx->attinmeta = attinmeta;

       //pass first list element
       funcctx->user_fctx = values;

       // total number of tuples to be returned
       funcctx->max_calls = cnt;

       //go back to normal memory context
       MemoryContextSwitchTo(oldcontext);
   }

   // stuff done on every call of the function.
   funcctx = SRF_PERCALL_SETUP();
   call_cntr = funcctx->call_cntr;
   max_calls = funcctx->max_calls;
   attinmeta = funcctx->attinmeta;
   values = (char ***) funcctx->user_fctx;

   //set return routine
   if (call_cntr < max_calls) {
       char      **rvals;
       HeapTuple tuple;
       Datum     result;
       int       i;

       // Prepare a values array for building the returned
       //tuple. This should be an array of C strings which
       //will be processed later by the type input functions
       rvals = palloc(2*sizeof(char *));

       //value (text)
       i = strlen(values[call_cntr][0]);
       rvals[0] = palloc((i+1)*sizeof(char));
       strncpy(rvals[0], values[call_cntr][0], i);
       rvals[0][i] = '\0';

       //type (text)
       i = strlen(values[call_cntr][1]);
       rvals[1] = palloc((i+1)*sizeof(char));
       strncpy(rvals[1], values[call_cntr][1], i);
       rvals[1][i] = '\0';

       // build a tuple and make into datum.
       tuple = BuildTupleFromCStrings(attinmeta, rvals);

       result = HeapTupleGetDatum(tuple);

       //free memory
       pfree(rvals[0]);
       pfree(rvals[1]);
       pfree(rvals);
       pfree(values[call_cntr][0]);
       pfree(values[call_cntr][1]);
       pfree(values[call_cntr]);

       //return datum
       SRF_RETURN_NEXT(funcctx, result);
   }
   else {
       SRF_RETURN_DONE(funcctx);
   }

   return true;
}

-- 
Sent via pgsql-performance mailing list (pgsql-performance@xxxxxxxxxxxxxx)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-performance