On Sun, Apr 21, 2019 at 03:08:22AM -0500, Justin Pryzby wrote:
On Sun, Apr 21, 2019 at 01:03:50AM -0400, Gunther wrote:
On 4/20/2019 21:14, Tomas Vondra wrote:
>Maybe. But before wasting any more time on the memory leak investigation,
>I suggest you first try the patch moving the BufFile allocations to a
>separate context. That'll either confirm or disprove the theory.
OK, fair enough. So, first patch 0001-* applied, recompiled and
2019-04-21 04:08:04.364 UTC [11304] LOG: server process (PID 11313) was terminated by signal 11: Segmentation fault
...
turns out the MemoryContext is NULL:
(gdb) p context
$1 = (MemoryContext) 0x0
I updated Tomas' patch to unconditionally set the context.
(Note, oldctx vs oldcxt is fairly subtle but I think deliberate?)
I don't follow - is there a typo confusing oldctx vs. oldcxt? I don't
think so, but I might have missed something. (I always struggle with which
spelling is the right one).
I think the bug is actually such simpler - the memory context was created
only in ExecuteIncreaseNumBatches() when starting with nbatch=1. But when
the initial nbatch value was higher (i.e. when starting with 2 or more
batches), it was left NULL. That was OK for testing with the contrived
data set, but it may easily break on other examples.
So here is an updated patch - hopefully this version works. I don't have
time to do much more testing now, though. But it compiles.
regards
--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
>From 3292e67135ef67f4bcf1ec0595fe79497c49d47c Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@xxxxxxxxxxxxxxx>
Date: Sun, 21 Apr 2019 13:31:14 +0200
Subject: [PATCH 1/2] move BufFile stuff into separate context
---
src/backend/executor/nodeHash.c | 54 +++++++++++++++++++++++++++++
src/backend/executor/nodeHashjoin.c | 7 ++++
src/include/executor/hashjoin.h | 1 +
3 files changed, 62 insertions(+)
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 6ffaa751f2..f7c92e78e9 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -498,6 +498,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls)
hashtable->skewTuples = 0;
hashtable->innerBatchFile = NULL;
hashtable->outerBatchFile = NULL;
+ hashtable->fileCtx = NULL;
hashtable->spaceUsed = 0;
hashtable->spacePeak = 0;
hashtable->spaceAllowed = space_allowed;
@@ -527,6 +528,10 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls)
"HashBatchContext",
ALLOCSET_DEFAULT_SIZES);
+ hashtable->fileCtx = AllocSetContextCreate(CurrentMemoryContext,
+ "HashBatchFiles",
+ ALLOCSET_DEFAULT_SIZES);
+
/* Allocate data that will live for the life of the hashjoin */
oldcxt = MemoryContextSwitchTo(hashtable->hashCxt);
@@ -559,10 +564,14 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls)
if (nbatch > 1 && hashtable->parallel_state == NULL)
{
+ MemoryContext oldctx;
+
/*
* allocate and initialize the file arrays in hashCxt (not needed for
* parallel case which uses shared tuplestores instead of raw files)
*/
+ oldctx = MemoryContextSwitchTo(hashtable->fileCtx);
+
hashtable->innerBatchFile = (BufFile **)
palloc0(nbatch * sizeof(BufFile *));
hashtable->outerBatchFile = (BufFile **)
@@ -570,6 +579,8 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls)
/* The files will not be opened until needed... */
/* ... but make sure we have temp tablespaces established for them */
PrepareTempTablespaces();
+
+ MemoryContextSwitchTo(oldctx);
}
MemoryContextSwitchTo(oldcxt);
@@ -900,6 +911,11 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
nbatch = oldnbatch * 2;
Assert(nbatch > 1);
+ elog(WARNING, "ExecHashIncreaseNumBatches: increasing number of batches from %d to %d", oldnbatch, nbatch);
+
+ elog(LOG, "ExecHashIncreaseNumBatches ======= context stats start =======");
+ MemoryContextStats(TopMemoryContext);
+
#ifdef HJDEBUG
printf("Hashjoin %p: increasing nbatch to %d because space = %zu\n",
hashtable, nbatch, hashtable->spaceUsed);
@@ -909,6 +925,10 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
if (hashtable->innerBatchFile == NULL)
{
+ MemoryContext oldctx;
+
+ oldctx = MemoryContextSwitchTo(hashtable->fileCtx);
+
/* we had no file arrays before */
hashtable->innerBatchFile = (BufFile **)
palloc0(nbatch * sizeof(BufFile *));
@@ -916,9 +936,15 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
palloc0(nbatch * sizeof(BufFile *));
/* time to establish the temp tablespaces, too */
PrepareTempTablespaces();
+
+ MemoryContextSwitchTo(oldctx);
}
else
{
+ MemoryContext oldctx;
+
+ oldctx = MemoryContextSwitchTo(hashtable->fileCtx);
+
/* enlarge arrays and zero out added entries */
hashtable->innerBatchFile = (BufFile **)
repalloc(hashtable->innerBatchFile, nbatch * sizeof(BufFile *));
@@ -928,6 +954,9 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
(nbatch - oldnbatch) * sizeof(BufFile *));
MemSet(hashtable->outerBatchFile + oldnbatch, 0,
(nbatch - oldnbatch) * sizeof(BufFile *));
+
+ MemoryContextSwitchTo(oldctx);
+
}
MemoryContextSwitchTo(oldcxt);
@@ -999,12 +1028,19 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
}
else
{
+ MemoryContext oldctx;
+
/* dump it out */
Assert(batchno > curbatch);
+
+ oldctx = MemoryContextSwitchTo(hashtable->fileCtx);
+
ExecHashJoinSaveTuple(HJTUPLE_MINTUPLE(hashTuple),
hashTuple->hashvalue,
&hashtable->innerBatchFile[batchno]);
+ MemoryContextSwitchTo(oldctx);
+
hashtable->spaceUsed -= hashTupleSize;
nfreed++;
}
@@ -1042,6 +1078,9 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
hashtable);
#endif
}
+
+ elog(LOG, "ExecHashIncreaseNumBatches ======= context stats end =======");
+ MemoryContextStats(TopMemoryContext);
}
/*
@@ -1656,13 +1695,20 @@ ExecHashTableInsert(HashJoinTable hashtable,
}
else
{
+ MemoryContext oldctx;
+
/*
* put the tuple into a temp file for later batches
*/
Assert(batchno > hashtable->curbatch);
+
+ oldctx = MemoryContextSwitchTo(hashtable->fileCtx);
+
ExecHashJoinSaveTuple(tuple,
hashvalue,
&hashtable->innerBatchFile[batchno]);
+
+ MemoryContextSwitchTo(oldctx);
}
}
@@ -2488,10 +2534,18 @@ ExecHashRemoveNextSkewBucket(HashJoinTable hashtable)
}
else
{
+ MemoryContext oldctx;
+
/* Put the tuple into a temp file for later batches */
Assert(batchno > hashtable->curbatch);
+
+ oldctx = MemoryContextSwitchTo(hashtable->fileCtx);
+
ExecHashJoinSaveTuple(tuple, hashvalue,
&hashtable->innerBatchFile[batchno]);
+
+ MemoryContextSwitchTo(oldctx);
+
pfree(hashTuple);
hashtable->spaceUsed -= tupleSize;
hashtable->spaceUsedSkew -= tupleSize;
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 5922e60eed..6a546344ac 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -389,16 +389,23 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
if (batchno != hashtable->curbatch &&
node->hj_CurSkewBucketNo == INVALID_SKEW_BUCKET_NO)
{
+ MemoryContext oldctx;
+
/*
* Need to postpone this outer tuple to a later batch.
* Save it in the corresponding outer-batch file.
*/
Assert(parallel_state == NULL);
Assert(batchno > hashtable->curbatch);
+
+ oldctx = MemoryContextSwitchTo(hashtable->fileCtx);
+
ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot),
hashvalue,
&hashtable->outerBatchFile[batchno]);
+ MemoryContextSwitchTo(oldctx);
+
/* Loop around, staying in HJ_NEED_NEW_OUTER state */
continue;
}
diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h
index a9f9872a78..ef8e4475e8 100644
--- a/src/include/executor/hashjoin.h
+++ b/src/include/executor/hashjoin.h
@@ -328,6 +328,7 @@ typedef struct HashJoinTableData
*/
BufFile **innerBatchFile; /* buffered virtual temp file per batch */
BufFile **outerBatchFile; /* buffered virtual temp file per batch */
+ MemoryContext fileCtx;
/*
* Info about the datatype-specific hash functions for the datatypes being
--
2.20.1
>From a651fcfdc689059b466b6a7658422f96dd14fc78 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@xxxxxxxxxxxxxxx>
Date: Sun, 21 Apr 2019 13:31:53 +0200
Subject: [PATCH 2/2] allocate BufFile eagerly
---
src/backend/executor/nodeHash.c | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index f7c92e78e9..4a1f70771d 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -564,6 +564,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls)
if (nbatch > 1 && hashtable->parallel_state == NULL)
{
+ int i;
MemoryContext oldctx;
/*
@@ -580,6 +581,15 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls)
/* ... but make sure we have temp tablespaces established for them */
PrepareTempTablespaces();
+ for (i = 1; i < nbatch; i++)
+ {
+ if (!hashtable->innerBatchFile[i])
+ hashtable->innerBatchFile[i] = BufFileCreateTemp(false);
+
+ if (!hashtable->outerBatchFile[i])
+ hashtable->outerBatchFile[i] = BufFileCreateTemp(false);
+ }
+
MemoryContextSwitchTo(oldctx);
}
@@ -925,6 +935,7 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
if (hashtable->innerBatchFile == NULL)
{
+ int i;
MemoryContext oldctx;
oldctx = MemoryContextSwitchTo(hashtable->fileCtx);
@@ -937,10 +948,20 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
/* time to establish the temp tablespaces, too */
PrepareTempTablespaces();
+ for (i = 1; i < nbatch; i++)
+ {
+ if (!hashtable->innerBatchFile[i])
+ hashtable->innerBatchFile[i] = BufFileCreateTemp(false);
+
+ if (!hashtable->outerBatchFile[i])
+ hashtable->outerBatchFile[i] = BufFileCreateTemp(false);
+ }
+
MemoryContextSwitchTo(oldctx);
}
else
{
+ int i;
MemoryContext oldctx;
oldctx = MemoryContextSwitchTo(hashtable->fileCtx);
@@ -955,6 +976,15 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
MemSet(hashtable->outerBatchFile + oldnbatch, 0,
(nbatch - oldnbatch) * sizeof(BufFile *));
+ for (i = 1; i < nbatch; i++)
+ {
+ if (!hashtable->innerBatchFile[i])
+ hashtable->innerBatchFile[i] = BufFileCreateTemp(false);
+
+ if (!hashtable->outerBatchFile[i])
+ hashtable->outerBatchFile[i] = BufFileCreateTemp(false);
+ }
+
MemoryContextSwitchTo(oldctx);
}
--
2.20.1