Certain query eating up all free memory (out of memory error)

Поиск
Список
Период
Сортировка
От Łukasz Dejneka
Тема Certain query eating up all free memory (out of memory error)
Дата
Msg-id AANLkTingEH23V3VF3_P1eaKluVa3PgsA8RGVzFAfv0_N@mail.gmail.com
обсуждение исходный текст
Ответы Re: Certain query eating up all free memory (out of memory error)
Список pgsql-performance
Hi group,

I could really use your help with this one. I don't have all the
details right now (I can provide more descriptions tomorrow and logs
if needed), but maybe this will be enough:

I have written a PG (8.3.8) module, which uses Flex Lexical Analyser.
It takes text from database field and finds matches for defined rules.
It returns a set of two text fields (value found and value type).

When I run query like this:
SELECT * FROM flex_me(SELECT some_text FROM some_table WHERE id = 1);
It works perfectly fine. Memory never reaches more than 1% (usually
its below 0.5% of system mem).

But when I run query like this:
SELECT flex_me(some_text_field) FROM some_table WHERE id = 1;
Memory usage goes through the roof, and if the result is over about
10k matches (rows) it eats up all memory and I get "out of memory"
error.

I try to free all memory allocated, and even did a version with double
linked list of results but the same behaviour persists. I tried to
track it down on my own and from my own trials it seems that the
problem lies directly in the set returning function in File 2
"flex_me()" as even with 40k of results in a 2 column array it
shouldn't take more than 1MB of memory. Also when I run it just to the
point of SRF_IS_FIRSTCALL() (whole bit) the memory usage doesn't go
up, but when subsequent SRF_PERCALL calls are made it's where the
memory usage goes through the roof.

Btw, if the following code contains some nasty errors and I'm pretty
sure it does, please know that I'm just learning PG and C programming.
Any help or tips would be greatly appreciated.

Simplified (but still relevant) code below:

File 1 (Flex parser template which is compiled with flex):

%{
#include <stdio.h>

extern void *addToken(int type);
extern char ***flexme(char *ptr);

#define T_NUM  1
#define S_NUM  "number"
#define T_FLO  2
#define S_FLO  "float"
#define T_DAT  3
#define S_DAT  "date
#define T_WRD  7
#define S_WRD  "word"

char ***vals;

int cnt = 0, mem_cnt = 64;

%}

DGT          [0-9]
NUMBER       (-)?{DGT}+
FLOAT        ((-)?{DGT}+[\.,]{DGT}+)|{NUMBER}

DATE_S1      "-"
DATE_S2      ","
DATE_S3      "."
DATE_S4      "/"
DATE_S5      ""
DATE_YY      ([0-9]|([0-9][0-9])|([0-1][0-9][0-9][0-9])|(2[0-4][0-9][0-9]))
DATE_DD      ([1-9]|(([0-2][0-9])|(3[0-1])))
DATE_MM      ([1-9]|((0[1-9])|(1[0-2])))

DATE_YMD_S1  ({DATE_YY}{DATE_S1}{DATE_MM}{DATE_S1}{DATE_DD})
DATE_YMD_S2  ({DATE_YY}{DATE_S2}{DATE_MM}{DATE_S2}{DATE_DD})
DATE_YMD_S3  ({DATE_YY}{DATE_S3}{DATE_MM}{DATE_S3}{DATE_DD})
DATE_YMD_S4  ({DATE_YY}{DATE_S4}{DATE_MM}{DATE_S4}{DATE_DD})
DATE_YMD_S5  ({DATE_YY}{DATE_S5}{DATE_MM}{DATE_S5}{DATE_DD})
DATE_YMD     ({DATE_YMD_S1}|{DATE_YMD_S2}|{DATE_YMD_S3}|{DATE_YMD_S4}|{DATE_YMD_S5})

WORD         ([a-zA-Z0-9]+)

%%

{FLOAT}      addToken(T_FLO);

{DATE_YMD}   addToken(T_DAT);

{WORD}       addToken(T_WRD);

.|\n     /* eat up any unmatched character */

%%

void *
addToken(int type)
{
  int   x = 0;

//    elog(NOTICE,"W[%d] %s", type, yytext);

    //check if we need to add more mem
    if (mem_cnt-1 <= cnt) {
        mem_cnt *= 2;
        vals = repalloc(vals, mem_cnt * sizeof(char *));
//        elog(NOTICE, "mem increased to: %d", mem_cnt*sizeof(char *));
    }
    vals[cnt] = palloc(2 * sizeof(char *));

    //types
    switch (type) {
        case T_FLO:    //float
            x = strlen(S_FLO);
            vals[cnt][1] = palloc((x+1) * sizeof(char));
            strncpy(vals[cnt][1], S_FLO, x);
            vals[cnt][1][x] = '\0';
            break;
        case T_DAT:     //date
            x = strlen(S_DAT);
            vals[cnt][1] = palloc((x+1) * sizeof(char));
            strncpy(vals[cnt][1], S_DAT, x);
            vals[cnt][1][x] = '\0';
            break;
        case T_WRD:     //word
            x = strlen(S_WRD);
            vals[cnt][1] = palloc((x+1) * sizeof(char));
            strncpy(vals[cnt][1], S_WRD, x);
            vals[cnt][1][x] = '\0';
            break;
        default:
            elog(ERROR,"Unknown flexme type: %d", type);
            break;
    }
    //value
    vals[cnt][0] = palloc((yyleng+1) * sizeof(char));
    strncpy(vals[cnt][0], yytext, yyleng);
    vals[cnt][0][yyleng] = '\0';

    cnt++;
//    elog(NOTICE,"i: %d", cnt);

    return 0;
}

char ***flexme(char *ptr)
{

    YY_BUFFER_STATE bp;
    int   yyerr = 0;
    cnt = 0;

    //initial table size
    vals = palloc(mem_cnt * sizeof(char *));

    bp = yy_scan_string(ptr);
    yy_switch_to_buffer(bp);
    yyerr = yylex();
    yy_delete_buffer(bp);

    if (yyerr != 0) {
        elog(ERROR, "Flex parser error code: %d", yyerr);
    }

    return vals;
}



File 2 (PG function, which includes flex output analyser of compiled
File 1 - lex.yy.c):

#include "postgres.h"
#include "fmgr.h"
#include "funcapi.h"

#include "lex.yy.c"

char *text_to_cstring(const text *t);   //this is copied directly from
PG sources
char *
text_to_cstring(const text *t)
{
        /* must cast away the const, unfortunately */
        text           *tunpacked = pg_detoast_datum_packed((struct
varlena *) t);
        int                        len = VARSIZE_ANY_EXHDR(tunpacked);
        char           *result;

        result = (char *) palloc(len + 1);
        memcpy(result, VARDATA_ANY(tunpacked), len);
        result[len] = '\0';

        if (tunpacked != t)
                pfree(tunpacked);

        return result;
}


PG_FUNCTION_INFO_V1(flex_me);
Datum    flex_me(PG_FUNCTION_ARGS);

Datum
flex_me(PG_FUNCTION_ARGS) {
    text             *in = PG_GETARG_TEXT_P(0);

    FuncCallContext  *funcctx;
    TupleDesc        tupdesc;
    AttInMetadata    *attinmeta;
    int              call_cntr, max_calls;
    char             ***values;
    char             *ptr;

    // stuff done only on the first call of the function
    if (SRF_IS_FIRSTCALL()) {
        MemoryContext oldcontext;

        // create a function context for cross-call persistence
        funcctx = SRF_FIRSTCALL_INIT();

        // switch to memory context appropriate for multiple  function calls
        oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

        ptr = text_to_cstring_imm(in);
        values = flexme(ptr);

        //free char pointer
        pfree(ptr);

        // Build a tuple descriptor for our result type
        if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
            ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg
               ("function returning record called in context "
                "that cannot accept type record")));

        // generate attribute metadata needed later to produce
        //   tuples from raw C strings
        attinmeta = TupleDescGetAttInMetadata(tupdesc);
        funcctx->attinmeta = attinmeta;

        //pass first list element
        funcctx->user_fctx = values;

        // total number of tuples to be returned
        funcctx->max_calls = cnt;

        //go back to normal memory context
        MemoryContextSwitchTo(oldcontext);
    }

    // stuff done on every call of the function.
    funcctx = SRF_PERCALL_SETUP();
    call_cntr = funcctx->call_cntr;
    max_calls = funcctx->max_calls;
    attinmeta = funcctx->attinmeta;
    values = (char ***) funcctx->user_fctx;

    //set return routine
    if (call_cntr < max_calls) {
        char      **rvals;
        HeapTuple tuple;
        Datum     result;
        int       i;

        // Prepare a values array for building the returned
        //tuple. This should be an array of C strings which
        //will be processed later by the type input functions
        rvals = palloc(2*sizeof(char *));

        //value (text)
        i = strlen(values[call_cntr][0]);
        rvals[0] = palloc((i+1)*sizeof(char));
        strncpy(rvals[0], values[call_cntr][0], i);
        rvals[0][i] = '\0';

        //type (text)
        i = strlen(values[call_cntr][1]);
        rvals[1] = palloc((i+1)*sizeof(char));
        strncpy(rvals[1], values[call_cntr][1], i);
        rvals[1][i] = '\0';

        // build a tuple and make into datum.
        tuple = BuildTupleFromCStrings(attinmeta, rvals);

        result = HeapTupleGetDatum(tuple);


        //free memory
        pfree(rvals[0]);
        pfree(rvals[1]);
        pfree(rvals);
        pfree(values[call_cntr][0]);
        pfree(values[call_cntr][1]);
        pfree(values[call_cntr]);

        //return datum
        SRF_RETURN_NEXT(funcctx, result);
    }
    else {
        SRF_RETURN_DONE(funcctx);
    }

    return true;
}

В списке pgsql-performance по дате отправления:

Предыдущее
От: Tyler Hildebrandt
Дата:
Сообщение: Query timing increased from 3s to 55s when used as function instead of select
Следующее
От: Krzysztof Nienartowicz
Дата:
Сообщение: Re: [BUGS] Query causing explosion of temp space with join involving partitioning