*** a/doc/src/sgml/gist.sgml
--- b/doc/src/sgml/gist.sgml
***************
*** 642,647 **** my_distance(PG_FUNCTION_ARGS)
--- 642,704 ----
+
+ GiST buffering build
+
+ Build of large GiST indexes (indexes which aren't fitting to the cache)
+ by sequential insertions tends to be slow, because large fraction of
+ insertions involves actual IO operations. The exception is well-ordered
+ datasets where near index tuples are routing to near index pages.
+ PostgreSQL from version 9.2 supports buffering index build technique
+ which can dramatically reduce number of IO operations during index build.
+ This technique appears to be application of buffer tree and based on
+ follwing works.
+
+
+
+
+
+ Efficient Bulk Operations on Dynamic R-trees
+
+
+
+
+
+
+ The Buffer Tree: A new technique for optimal I/O-algorithms
+
+
+
+
+
+
+ Implemented tecnique can be briefly described as follows. Additional
+ buffers of are attached to all nodes of some levels of the tree. Leaf
+ nodes always don't contain buffers. When index tuple reaches node with
+ buffer, it is placed into buffer. When buffer is overflowed it is emptying
+ into lower buffers or leaf pages. One emptying process can trigger
+ cascading emptying processes of underlying buffers. If split occurs in
+ node with buffer, then this buffers splits into two buffers using
+ penalty> function. Processing of index tuples in
+ bulk manner in small sub-trees give dramatical economy of IO operations.
+ However, buffering index build requires some extra CPU resources because
+ of extra penalty> calls. Also, it can infuence
+ to produced index quality in both positive and negative directions.
+ That influence depends on various factors including dataset distribution,
+ operator class implementation etc.
+
+
+
+ Buffering build can be turned off, turned on or be in automatical mode. In
+ automatical mode it is initially off, but it turns on when index size
+ reaches . That behaviour is
+ controlled by BUFFERING parameter. Also step of levels
+ where buffers appear and size of buffers are available for tuning by
+ LEVELSTEP and BUFFERSIZE parameters.
+ See for details.
+
+
+
*** a/doc/src/sgml/ref/create_index.sgml
--- b/doc/src/sgml/ref/create_index.sgml
***************
*** 341,346 **** CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ name
--- 341,392 ----
+
+ GiST indexes additionaly accepts parameters:
+
+
+
+
+
+ BUFFERING>
+
+
+ Determining mode of buffering build technique described in
+ . With OFF> it is
+ disabled. With ON> it is enabled during all build process. With
+ AUTO> (default) it is initially disabled, but it turns on when
+ index size reaches .
+
+
+
+
+
+ LEVELSTEP>
+
+
+ In buffering build buffers located at tree levels i * LEVELSTEP>,
+ i > 0 (we use upward level numbering, level = 0 corresponds to leaf pages).
+ By default LEVELSTEP> is calculated so that sub-tree
+ of LEVELSTEP> height fits
+ and .
+
+
+
+
+
+ BUFFERSIZE>
+
+
+ Maximum size of node buffer in pages. By default it is calculated so that
+ half emptying of node buffer fill in average one page per underlying node
+ buffer. This ratio guarantees effective IO usage. In some cases lower
+ BUFFERSIZE> can give comparable IO economy with less CPU
+ overhead.
+
+
+
+
+
*** a/src/backend/access/common/reloptions.c
--- b/src/backend/access/common/reloptions.c
***************
*** 30,35 ****
--- 30,38 ----
#include "utils/memutils.h"
#include "utils/rel.h"
+
+ static void validateBufferingOption(char *value);
+
/*
* Contents of pg_class.reloptions
*
***************
*** 159,164 **** static relopt_int intRelOpts[] =
--- 162,183 ----
RELOPT_KIND_HEAP | RELOPT_KIND_TOAST
}, -1, 0, 2000000000
},
+ {
+ {
+ "levelstep",
+ "Level step in GiST index buffering build",
+ RELOPT_KIND_GIST
+ },
+ -1, 1, 100
+ },
+ {
+ {
+ "buffersize",
+ "Buffer size in GiST index buffering build",
+ RELOPT_KIND_GIST
+ },
+ -1, 1, 1000000000
+ },
/* list terminator */
{{NULL}}
};
***************
*** 219,224 **** static relopt_real realRelOpts[] =
--- 238,254 ----
static relopt_string stringRelOpts[] =
{
+ {
+ {
+ "buffering",
+ "Enables buffering build for this GiST index",
+ RELOPT_KIND_GIST
+ },
+ 4,
+ false,
+ validateBufferingOption,
+ "auto"
+ },
/* list terminator */
{{NULL}}
};
***************
*** 1267,1269 **** tablespace_reloptions(Datum reloptions, bool validate)
--- 1297,1318 ----
return (bytea *) tsopts;
}
+
+ /*
+ * Validator for "buffering" option of GiST indexed. Allows only "on", "off" and
+ * "auto" values.
+ */
+ static void
+ validateBufferingOption(char *value)
+ {
+ if (!value ||
+ (
+ strcmp(value, "on") &&
+ strcmp(value, "off") &&
+ strcmp(value, "auto")
+ )
+ )
+ {
+ elog(ERROR, "Only \"on\", \"off\" and \"auto\" values are available for \"buffering\" option.");
+ }
+ }
*** a/src/backend/access/gist/Makefile
--- b/src/backend/access/gist/Makefile
***************
*** 13,18 **** top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
OBJS = gist.o gistutil.o gistxlog.o gistvacuum.o gistget.o gistscan.o \
! gistproc.o gistsplit.o
include $(top_srcdir)/src/backend/common.mk
--- 13,18 ----
include $(top_builddir)/src/Makefile.global
OBJS = gist.o gistutil.o gistxlog.o gistvacuum.o gistget.o gistscan.o \
! gistproc.o gistsplit.o gistbuild.o gistbuildbuffers.o
include $(top_srcdir)/src/backend/common.mk
*** a/src/backend/access/gist/README
--- b/src/backend/access/gist/README
***************
*** 24,29 **** The current implementation of GiST supports:
--- 24,30 ----
* provides NULL-safe interface to GiST core
* Concurrency
* Recovery support via WAL logging
+ * Buffering build algorithm
The support for concurrency implemented in PostgreSQL was developed based on
the paper "Access Methods for Next-Generation Database Systems" by
***************
*** 31,36 **** Marcel Kornaker:
--- 32,43 ----
http://www.sai.msu.su/~megera/postgres/gist/papers/concurrency/access-methods-for-next-generation.pdf.gz
+ Buffering build algorithm for GiST was developed based on the paper "Efficient
+ Bulk Operations on Dynamic R-trees" by Lars Arge, Klaus Hinrichs, Jan Vahrenhold
+ and Jeffrey Scott Vitter.
+
+ http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.135.9894&rep=rep1&type=pdf
+
The original algorithms were modified in several ways:
* They had to be adapted to PostgreSQL conventions. For example, the SEARCH
***************
*** 278,283 **** would complicate the insertion algorithm. So when an insertion sees a page
--- 285,394 ----
with F_FOLLOW_RIGHT set, it immediately tries to bring the split that
crashed in the middle to completion by adding the downlink in the parent.
+ Buffering build algorithm
+ --------------------
+
+ In buffering build algorithm levels are numbering upwards and leaf pages level
+ has number zero. An example is given on the picture below. Such numbering is
+ used in order to make pages save it's level number during all life-time even if
+ root splits.
+
+ Level Tree
+
+ 3 *
+ / \
+ 2 * *
+ / | \ / | \
+ 1 * * * * * *
+ / \ / \ / \ / \ / \ / \
+ 0 o o o o o o o o o o o o
+
+ * - internal page
+ o - leaf page
+
+ In buffering build algorithm additional buffers are associated with pages. Leaf
+ pages never have buffers. Internal pages have buffers with some level step.
+ I.e. pages on levels level_step*i (i >=1) have buffers. If level_step = 1 then
+ each internal page have buffer.
+
+ Level Tree (level_step = 1) Tree (level_step = 2)
+
+ 3 *(b) *
+ / \ / \
+ 2 *(b) *(b) *(b) *(b)
+ / | \ / | \ / | \ / | \
+ 1 *(b) *(b) *(b) *(b) *(b) *(b) * * * * * *
+ / \ / \ / \ / \ / \ / \ / \ / \ / \ / \ / \ / \
+ 0 o o o o o o o o o o o o o o o o o o o o o o o o
+
+ (b) - buffer
+
+ Each buffer can be in one of the two following states:
+ 1) Last page of the buffer is kept in main memory. Node buffer is automatically
+ switching to that state when new index tuple is added to the buffer or index
+ tuple is removed from it.
+ 2) All pages of the buffers are on the disk. All node buffer is automatically
+ switching to that state when new emptying process starts.
+
+ When index tuple is inserting, it's first path can end in following points:
+ 1) Leaf page if no levels has buffers, i.e. root level <= level_step.
+ 2) Buffer of root page if root page has a buffer.
+ 3) Buffer of topmost level page if root page doesn't have a buffer.
+
+ New index tuples are processing until root level buffer (or buffer of topmost
+ level page) will be filled at half. When some buffer is filled at half then
+ the process of it's emptying is starting.
+
+ Buffer emptying process means that index tuples from buffer are moving into
+ underlying buffers(if any) or leaf pages. Buffer emptying process involves
+ following:
+ 1) Pages of emptying buffer are one by one loading into main memory until
+ emptying stops.
+ 2) Last pages of underlying buffers are loaded into main memory.
+ 3) Page associated with buffer are cached.
+ 4) Pages between buffer and underlying buffers if level_step != 1 are cached.
+ (note that pages associated with underlying buffers aren't loading)
+ For emptying to leaf pages list of those items is following:
+ 1) Pages of emptying buffer are one by one loading into main memory until
+ emptying stops.
+ 2) Page associated with buffer are cached.
+ 3) Pages between buffer and leaf pages if level_step != 1 are cached.
+ 4) Leaf pages are cached.
+ Illustration of this requirements is given below.
+
+ Buffer emptying to another buffers Buffer emptying to leaf pages
+
+ +(cb) +(cb)
+ / \ / \
+ + + + +
+ / \ / \ / \ / \
+ *(ab) *(ab) *(ab) *(ab) x x x x
+
+ + - cached internal page
+ x - cache leaf page
+ * - non-cached internal page
+ (b) - buffer with loaded last page
+
+ One buffer emptying process can trigger another buffer emptying processes.
+ Buffer emptying stack data structure is the data structure responsible for
+ sequence of buffer emptying. Each node buffer which is half filled should be
+ inserted into buffer emptying stack.
+
+ When we're moving from buffer emptying on higher level to the buffer emptying
+ on lower level, loaded part of tree (only pages of tree not the buffers) are
+ remained in the main memory. Tree parts stack is the data structure which
+ represents hierarchy of loaded tree parts.
+
+ If split occurs on the page which have a buffer then index tuples are
+ relocating. Index tuples are relocating between buffers of pages produces by
+ split using penalty method.
+
+ When all index tuples are inserted there are still some index tuples in buffers.
+ At this moment final buffer emptying starts. Each level have a list of non-empty
+ buffers. Final emptying contain loop over all tree levels starting from topmost.
+ On each levels all it's buffers are sequentially emptying until all buffers of
+ the level are empty. Since no index tuples move upwards during buffer emptying
+ all the buffers are empty when final emptying are finished.
Authors:
Teodor Sigaev
*** a/src/backend/access/gist/gist.c
--- b/src/backend/access/gist/gist.c
***************
*** 24,38 ****
#include "utils/memutils.h"
#include "utils/rel.h"
- /* Working state for gistbuild and its callback */
- typedef struct
- {
- GISTSTATE giststate;
- int numindexattrs;
- double indtuples;
- MemoryContext tmpCtx;
- } GISTBuildState;
-
/* A List of these is used represent a split-in-progress. */
typedef struct
{
--- 24,29 ----
***************
*** 41,56 **** typedef struct
} GISTPageSplitInfo;
/* non-export function prototypes */
- static void gistbuildCallback(Relation index,
- HeapTuple htup,
- Datum *values,
- bool *isnull,
- bool tupleIsAlive,
- void *state);
- static void gistdoinsert(Relation r,
- IndexTuple itup,
- Size freespace,
- GISTSTATE *GISTstate);
static void gistfixsplit(GISTInsertState *state, GISTSTATE *giststate);
static bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
GISTSTATE *giststate,
--- 32,37 ----
***************
*** 89,226 **** createTempGistContext(void)
}
/*
- * Routine to build an index. Basically calls insert over and over.
- *
- * XXX: it would be nice to implement some sort of bulk-loading
- * algorithm, but it is not clear how to do that.
- */
- Datum
- gistbuild(PG_FUNCTION_ARGS)
- {
- Relation heap = (Relation) PG_GETARG_POINTER(0);
- Relation index = (Relation) PG_GETARG_POINTER(1);
- IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
- IndexBuildResult *result;
- double reltuples;
- GISTBuildState buildstate;
- Buffer buffer;
- Page page;
-
- /*
- * We expect to be called exactly once for any index relation. If that's
- * not the case, big trouble's what we have.
- */
- if (RelationGetNumberOfBlocks(index) != 0)
- elog(ERROR, "index \"%s\" already contains data",
- RelationGetRelationName(index));
-
- /* no locking is needed */
- initGISTstate(&buildstate.giststate, index);
-
- /* initialize the root page */
- buffer = gistNewBuffer(index);
- Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
- page = BufferGetPage(buffer);
-
- START_CRIT_SECTION();
-
- GISTInitBuffer(buffer, F_LEAF);
-
- MarkBufferDirty(buffer);
-
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
- XLogRecData rdata;
-
- rdata.data = (char *) &(index->rd_node);
- rdata.len = sizeof(RelFileNode);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
-
- recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX, &rdata);
- PageSetLSN(page, recptr);
- PageSetTLI(page, ThisTimeLineID);
- }
- else
- PageSetLSN(page, GetXLogRecPtrForTemp());
-
- UnlockReleaseBuffer(buffer);
-
- END_CRIT_SECTION();
-
- /* build the index */
- buildstate.numindexattrs = indexInfo->ii_NumIndexAttrs;
- buildstate.indtuples = 0;
-
- /*
- * create a temporary memory context that is reset once for each tuple
- * inserted into the index
- */
- buildstate.tmpCtx = createTempGistContext();
-
- /* do the heap scan */
- reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
- gistbuildCallback, (void *) &buildstate);
-
- /* okay, all heap tuples are indexed */
- MemoryContextDelete(buildstate.tmpCtx);
-
- freeGISTstate(&buildstate.giststate);
-
- /*
- * Return statistics
- */
- result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
-
- result->heap_tuples = reltuples;
- result->index_tuples = buildstate.indtuples;
-
- PG_RETURN_POINTER(result);
- }
-
- /*
- * Per-tuple callback from IndexBuildHeapScan
- */
- static void
- gistbuildCallback(Relation index,
- HeapTuple htup,
- Datum *values,
- bool *isnull,
- bool tupleIsAlive,
- void *state)
- {
- GISTBuildState *buildstate = (GISTBuildState *) state;
- IndexTuple itup;
- MemoryContext oldCtx;
-
- oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
-
- /* form an index tuple and point it at the heap tuple */
- itup = gistFormTuple(&buildstate->giststate, index,
- values, isnull, true /* size is currently bogus */ );
- itup->t_tid = htup->t_self;
-
- /*
- * Since we already have the index relation locked, we call gistdoinsert
- * directly. Normal access method calls dispatch through gistinsert,
- * which locks the relation for write. This is the right thing to do if
- * you're inserting single tups, but not when you're initializing the
- * whole index at once.
- *
- * In this path we respect the fillfactor setting, whereas insertions
- * after initial build do not.
- */
- gistdoinsert(index, itup,
- RelationGetTargetPageFreeSpace(index, GIST_DEFAULT_FILLFACTOR),
- &buildstate->giststate);
-
- buildstate->indtuples += 1;
- MemoryContextSwitchTo(oldCtx);
- MemoryContextReset(buildstate->tmpCtx);
- }
-
- /*
* gistbuildempty() -- build an empty gist index in the initialization fork
*/
Datum
--- 70,75 ----
***************
*** 275,281 **** gistinsert(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(false);
}
-
/*
* Place tuples from 'itup' to 'buffer'. If 'oldoffnum' is valid, the tuple
* at that offset is atomically removed along with inserting the new tuples.
--- 124,129 ----
***************
*** 508,514 **** gistplacetopage(GISTInsertState *state, GISTSTATE *giststate,
/* Write the WAL record */
if (RelationNeedsWAL(state->r))
recptr = gistXLogSplit(state->r->rd_node, blkno, is_leaf,
! dist, oldrlink, oldnsn, leftchildbuf);
else
recptr = GetXLogRecPtrForTemp();
--- 356,362 ----
/* Write the WAL record */
if (RelationNeedsWAL(state->r))
recptr = gistXLogSplit(state->r->rd_node, blkno, is_leaf,
! dist, oldrlink, oldnsn, leftchildbuf, false);
else
recptr = GetXLogRecPtrForTemp();
***************
*** 608,614 **** gistplacetopage(GISTInsertState *state, GISTSTATE *giststate,
* this routine assumes it is invoked in a short-lived memory context,
* so it does not bother releasing palloc'd allocations.
*/
! static void
gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
{
ItemId iid;
--- 456,462 ----
* this routine assumes it is invoked in a short-lived memory context,
* so it does not bother releasing palloc'd allocations.
*/
! void
gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
{
ItemId iid;
***************
*** 917,924 **** gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum)
{
/*
* Page was split while we looked elsewhere. We didn't see the
! * downlink to the right page when we scanned the parent, so
! * add it to the queue now.
*
* Put the right page ahead of the queue, so that we visit it
* next. That's important, because if this is the lowest internal
--- 765,772 ----
{
/*
* Page was split while we looked elsewhere. We didn't see the
! * downlink to the right page when we scanned the parent, so add
! * it to the queue now.
*
* Put the right page ahead of the queue, so that we visit it
* next. That's important, because if this is the lowest internal
***************
*** 965,971 **** gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum)
elog(ERROR, "failed to re-find parent of a page in index \"%s\", block %u",
RelationGetRelationName(r), child);
! return NULL; /* keep compiler quiet */
}
/*
--- 813,819 ----
elog(ERROR, "failed to re-find parent of a page in index \"%s\", block %u",
RelationGetRelationName(r), child);
! return NULL; /* keep compiler quiet */
}
/*
***************
*** 1414,1419 **** initGISTstate(GISTSTATE *giststate, Relation index)
--- 1262,1268 ----
else
giststate->supportCollation[i] = DEFAULT_COLLATION_OID;
}
+ giststate->gfbb = NULL;
}
void
*** /dev/null
--- b/src/backend/access/gist/gistbuild.c
***************
*** 0 ****
--- 1,1091 ----
+ /*-------------------------------------------------------------------------
+ *
+ * gistbuild.c
+ * build algorithm for GiST indexes implementation.
+ *
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/access/gist/gistbuild.c
+ *
+ *-------------------------------------------------------------------------
+ */
+ #include "postgres.h"
+
+ #include "access/genam.h"
+ #include "access/gist_private.h"
+ #include "catalog/index.h"
+ #include "catalog/pg_collation.h"
+ #include "miscadmin.h"
+ #include "optimizer/cost.h"
+ #include "storage/bufmgr.h"
+ #include "storage/indexfsm.h"
+ #include "storage/smgr.h"
+ #include "utils/memutils.h"
+ #include "utils/rel.h"
+
+ #define LEAF_PAGES_STATS_STEP 128
+ #define LEAF_PAGES_STATS_COUNT 16
+
+
+ /* Working state for gistbuild and its callback */
+ typedef struct
+ {
+ GISTSTATE giststate;
+ int numindexattrs;
+ int64 indtuples;
+ int64 indtuplesSize;
+
+ /*
+ * Buffering build mode. Possible values: 'y' - we are in buffering build
+ * mode. 'a' - we are now in regular build mode, but can switch to
+ * buffering build mode when we decide to. 'n' - we are in regular build
+ * mode and aren't going to switch.
+ */
+ char bufferingMode;
+ MemoryContext tmpCtx;
+ } GISTBuildState;
+
+ static void freeUnreferencedPath(GISTBufferingInsertStack * path);
+ static bool gistBufferingBuildPlaceToPage(GISTInsertState *state,
+ GISTBufferingInsertStack * path,
+ GISTSTATE *giststate,
+ IndexTuple *itup, int ntup,
+ OffsetNumber oldoffnum);
+ static void processItup(GISTSTATE *giststate, GISTInsertState *state,
+ GISTBuildBuffers * gfbb, IndexTuple itup,
+ GISTBufferingInsertStack * startparent);
+ static void gistFindCorrectParent(GISTSTATE *giststate, Relation r,
+ GISTBufferingInsertStack * child);
+ static void processEmptyingStack(GISTSTATE *giststate, GISTInsertState *state);
+ static void gistBufferingBuildInsert(Relation index, IndexTuple itup,
+ GISTBuildState *buildstate);
+ static void gistBuildCallback(Relation index,
+ HeapTuple htup,
+ Datum *values,
+ bool *isnull,
+ bool tupleIsAlive,
+ void *state);
+ static int getMaxLevel(Relation index);
+ static bool initBuffering(GISTBuildState *buildstate, Relation index);
+
+ /*
+ * Free unreferenced parts of path;
+ */
+ static void
+ freeUnreferencedPath(GISTBufferingInsertStack * path)
+ {
+ while (path->refCount == 0)
+ {
+ /*
+ * Path part is unreferenced. We can free it and decrease reference
+ * count of parent. If parent becomes unreferenced too procedure
+ * should be repeated for it.
+ */
+ GISTBufferingInsertStack *tmp = path->parent;
+
+ pfree(path);
+ path = tmp;
+ if (path)
+ path->refCount--;
+ else
+ break;
+ }
+ }
+
+ /*
+ * Decrease reference count of path part and remove unreferenced path parts if
+ * any.
+ */
+ void
+ decreasePathRefcount(GISTBufferingInsertStack * path)
+ {
+ path->refCount--;
+ freeUnreferencedPath(path);
+ }
+
+ /*
+ * Index tuple insert function of buffering build algorithm. In simpler than
+ * regular insert function in the fact that it don't takes care about
+ * concurrency. It invokes buffer relocation function when it splits page. Also
+ * it take several oldoffnums as a parameter because buffer relocation can alter
+ * a number of parent index tuples.
+ */
+ static bool
+ gistBufferingBuildPlaceToPage(GISTInsertState *state,
+ GISTBufferingInsertStack * path,
+ GISTSTATE *giststate,
+ IndexTuple *itup, int ntup,
+ OffsetNumber oldoffnum)
+ {
+ GISTBuildBuffers *gfbb = giststate->gfbb;
+ Buffer buffer = ReadBuffer(state->r, path->blkno);
+ Page page = (Page) BufferGetPage(buffer);
+ bool is_leaf = (GistPageIsLeaf(page)) ? true : false;
+ int i;
+ bool is_split;
+ XLogRecPtr recptr;
+
+ LockBuffer(buffer, GIST_EXCLUSIVE);
+
+ /* Remove old index tuple if needed */
+ if (OffsetNumberIsValid(oldoffnum))
+ PageIndexTupleDelete(page, oldoffnum);
+
+ /* Check if there is enough space for insertion */
+ is_split = gistnospace(page, itup, ntup,
+ InvalidOffsetNumber, state->freespace);
+
+ if (is_split)
+ {
+ /* no space for insertion */
+ IndexTuple *itvec;
+ int tlen;
+ SplitedPageLayout *dist = NULL,
+ *ptr;
+ SplitedPageLayout rootpg;
+ BlockNumber blkno = BufferGetBlockNumber(buffer);
+ BlockNumber oldrlink = InvalidBlockNumber;
+ bool is_rootsplit;
+ XLogRecPtr recptr;
+ GistNSN oldnsn = {0, 0};
+
+ is_rootsplit = (blkno == GIST_ROOT_BLKNO);
+
+ /*
+ * Form index tuples vector to split. Old tuple was already removed
+ * from the vector.
+ */
+ itvec = gistextractpage(page, &tlen);
+ itvec = gistjoinvector(itvec, &tlen, itup, ntup);
+ dist = gistSplit(state->r, page, itvec, tlen, giststate);
+
+ /*
+ * Set up pages to work with. Allocate new buffers for all but the
+ * leftmost page. The original page becomes the new leftmost page, and
+ * is just replaced with the new contents.
+ *
+ * For a root-split, allocate new buffers for all child pages, the
+ * original page is overwritten with new root page containing
+ * downlinks to the new child pages.
+ */
+ ptr = dist;
+ if (!is_rootsplit)
+ {
+ oldnsn = GistPageGetOpaque(page)->nsn;
+ oldrlink = GistPageGetOpaque(page)->rightlink;
+ dist->buffer = buffer;
+ dist->block.blkno = BufferGetBlockNumber(buffer);
+ dist->page = PageGetTempPageCopySpecial(BufferGetPage(buffer));
+
+ /* clean all flags except F_LEAF */
+ GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0;
+
+ ptr = ptr->next;
+ }
+ for (; ptr; ptr = ptr->next)
+ {
+ /* Allocate new page */
+ ptr->buffer = gistNewBuffer(state->r);
+ GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0);
+ ptr->page = BufferGetPage(ptr->buffer);
+ ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);
+ }
+
+ /*
+ * Now that we know which blocks the new pages go to, set up downlink
+ * tuples to point to them.
+ */
+ for (ptr = dist; ptr; ptr = ptr->next)
+ {
+ ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);
+ GistTupleSetValid(ptr->itup);
+ }
+
+ {
+ StringInfoData sb;
+
+ initStringInfo(&sb);
+ for (ptr = dist; ptr; ptr = ptr->next)
+ appendStringInfo(&sb, "%u ", ptr->block.blkno);
+ }
+
+ if (is_rootsplit)
+ {
+ /*
+ * Adjust the top element in the insert stacks for the new root
+ * page.
+ */
+ GISTBufferingInsertStack *oldroot = gfbb->rootitem;
+
+ gfbb->rootitem = (GISTBufferingInsertStack *) MemoryContextAlloc(
+ gfbb->context, sizeof(GISTBufferingInsertStack));
+ gfbb->rootitem->parent = NULL;
+ gfbb->rootitem->blkno = GIST_ROOT_BLKNO;
+ gfbb->rootitem->downlinkoffnum = InvalidOffsetNumber;
+ gfbb->rootitem->level = oldroot->level + 1;
+ gfbb->rootitem->refCount = 1;
+
+ oldroot->parent = gfbb->rootitem;
+ oldroot->blkno = dist->block.blkno;
+ oldroot->downlinkoffnum = InvalidOffsetNumber;
+ }
+
+ /*
+ * Parent may be changed from the moment we set it. So, let us adjust
+ * the parent.
+ */
+ if (!is_rootsplit)
+ gistFindCorrectParent(giststate, state->r, path);
+
+ /*
+ * Relocate index tuples from buffer of splitted page between buffers
+ * of the pages produced by split.
+ */
+ relocateBuildBuffersOnSplit(giststate->gfbb, giststate, state->r,
+ path, buffer, dist);
+
+ /*
+ * If this is a root split, we construct the new root page with the
+ * downlinks here directly, instead of do recursive call for their
+ * insertion. Add the new root page to the list along with the child
+ * pages.
+ */
+ if (is_rootsplit)
+ {
+ IndexTuple *downlinks;
+ int ndownlinks = 0;
+ int i;
+
+ rootpg.buffer = buffer;
+ rootpg.page = PageGetTempPageCopySpecial(
+ BufferGetPage(rootpg.buffer));
+ GistPageGetOpaque(rootpg.page)->flags = 0;
+
+ /* Prepare a vector of all the downlinks */
+ for (ptr = dist; ptr; ptr = ptr->next)
+ ndownlinks++;
+ downlinks = palloc(sizeof(IndexTuple) * ndownlinks);
+ for (i = 0, ptr = dist; ptr; ptr = ptr->next)
+ downlinks[i++] = ptr->itup;
+
+ rootpg.block.blkno = GIST_ROOT_BLKNO;
+ rootpg.block.num = ndownlinks;
+ rootpg.list = gistfillitupvec(downlinks, ndownlinks,
+ &(rootpg.lenlist));
+ rootpg.itup = NULL;
+
+ rootpg.next = dist;
+ dist = &rootpg;
+ }
+
+ /*
+ * Fill all pages. All the pages are new, ie. freshly allocated empty
+ * pages, or a temporary copy of the old page.
+ */
+ for (ptr = dist; ptr; ptr = ptr->next)
+ {
+ char *data = (char *) (ptr->list);
+
+ for (i = 0; i < ptr->block.num; i++)
+ {
+ if (PageAddItem(ptr->page,
+ (Item) data, IndexTupleSize((IndexTuple) data),
+ i + FirstOffsetNumber, false, false) ==
+ InvalidOffsetNumber)
+ elog(ERROR, "failed to add item to index page in \"%s\"",
+ RelationGetRelationName(state->r));
+ data += IndexTupleSize((IndexTuple) data);
+ }
+
+ /* Set up rightlinks */
+ if (ptr->next && ptr->block.blkno != GIST_ROOT_BLKNO)
+ GistPageGetOpaque(ptr->page)->rightlink =
+ ptr->next->block.blkno;
+ else
+ GistPageGetOpaque(ptr->page)->rightlink = oldrlink;
+ }
+
+ /* Mark buffers dirty */
+ for (ptr = dist; ptr; ptr = ptr->next)
+ MarkBufferDirty(ptr->buffer);
+
+ PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
+ dist->page = BufferGetPage(dist->buffer);
+
+ /* TODO: Write the WAL record */
+ if (RelationNeedsWAL(state->r))
+ recptr = gistXLogSplit(state->r->rd_node, blkno, is_leaf,
+ dist, oldrlink, oldnsn, InvalidBuffer, true);
+ else
+ recptr = GetXLogRecPtrForTemp();
+
+ for (ptr = dist; ptr; ptr = ptr->next)
+ {
+ PageSetLSN(ptr->page, recptr);
+ PageSetTLI(ptr->page, ThisTimeLineID);
+ }
+
+ /* Release buffers, except the one holding the inserted/updated tuple */
+ for (ptr = dist; ptr; ptr = ptr->next)
+ {
+ if (BufferIsValid(ptr->buffer))
+ UnlockReleaseBuffer(ptr->buffer);
+ }
+
+ /*
+ * If it wasn't root split, we have to insert downlinks to parent
+ * page.
+ */
+ if (!is_rootsplit)
+ {
+ IndexTuple *itups;
+ int cnt = 0,
+ i;
+
+ /*
+ * Count number of downlinks for insert.
+ */
+ for (ptr = dist; ptr; ptr = ptr->next)
+ {
+ cnt++;
+ }
+
+ /* Allocate array of new index tuples and array of offset numbers */
+ itups = (IndexTuple *) palloc(sizeof(IndexTuple) * cnt);
+
+ /* Fill those arrays */
+ i = 0;
+ for (ptr = dist; ptr; ptr = ptr->next)
+ {
+ itups[i] = ptr->itup;
+ i++;
+ }
+
+ gistBufferingBuildPlaceToPage(state, path->parent, giststate, itups,
+ cnt, path->downlinkoffnum);
+ }
+ }
+ else
+ {
+ /*
+ * Enough of space. Just insert index tuples to the page.
+ */
+ gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
+ MarkBufferDirty(buffer);
+
+ /* Write the WAL record */
+ if (RelationNeedsWAL(state->r))
+ {
+ recptr = gistXLogUpdate(state->r->rd_node, buffer, &oldoffnum,
+ OffsetNumberIsValid(oldoffnum) ? 1 : 0, itup, ntup, InvalidBuffer);
+ PageSetLSN(page, recptr);
+ PageSetTLI(page, ThisTimeLineID);
+ }
+ else
+ {
+ recptr = GetXLogRecPtrForTemp();
+ PageSetLSN(page, recptr);
+ }
+ UnlockReleaseBuffer(buffer);
+ }
+
+ return is_split;
+ }
+
+ /*
+ * Process index tuple. Run index tuple down until it meet leaf page or
+ * node buffer.
+ */
+ static void
+ processItup(GISTSTATE *giststate, GISTInsertState *state,
+ GISTBuildBuffers * gfbb, IndexTuple itup,
+ GISTBufferingInsertStack * startparent)
+ {
+ GISTBufferingInsertStack *path;
+ BlockNumber childblkno;
+ Buffer buffer;
+
+ /*
+ * NULL in startparent means that we start index tuple processing from the
+ * root.
+ */
+ if (!startparent)
+ path = gfbb->rootitem;
+ else
+ path = startparent;
+
+ /*
+ * Loop until we are on leaf page (level == 0) or we reach level with
+ * buffers (if it wasn't level that we've start at).
+ */
+ for (;;)
+ {
+ ItemId iid;
+ IndexTuple idxtuple,
+ newtup;
+ Page page;
+ OffsetNumber childoffnum;
+ GISTBufferingInsertStack *parent;
+
+ /*
+ * Do we meet a level with buffers? Surely buffer of page we start
+ * from doesn't matter.
+ */
+ if (path != startparent && LEVEL_HAS_BUFFERS(path->level, gfbb))
+ break;
+
+ /* Do we meet leaf page? */
+ if (path->level == 0)
+ break;
+
+ /* Choose child for insertion */
+ buffer = ReadBuffer(state->r, path->blkno);
+ LockBuffer(buffer, GIST_EXCLUSIVE);
+
+ page = (Page) BufferGetPage(buffer);
+ childoffnum = gistchoose(state->r, page, itup, giststate);
+ iid = PageGetItemId(page, childoffnum);
+ idxtuple = (IndexTuple) PageGetItem(page, iid);
+ childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
+
+ /* Adjust key representing child if needed */
+ newtup = gistgetadjusted(state->r, idxtuple, itup, giststate);
+
+ UnlockReleaseBuffer(buffer);
+
+ if (newtup)
+ gistBufferingBuildPlaceToPage(state, path, giststate,
+ &newtup, 1, childoffnum);
+
+ /* Create new path item representing current page */
+ parent = path;
+ path = (GISTBufferingInsertStack *) MemoryContextAlloc(gfbb->context,
+ sizeof(GISTBufferingInsertStack));
+ path->parent = parent;
+ path->level = parent->level - 1;
+ path->blkno = childblkno;
+ path->downlinkoffnum = childoffnum;
+
+ /* It's unreferenced just now */
+ path->refCount = 0;
+
+ /* Adjust reference count of parent */
+ if (parent)
+ parent->refCount++;
+ }
+
+ if (LEVEL_HAS_BUFFERS(path->level, gfbb))
+ {
+ /*
+ * We've reached level with buffers. Now place index tuple to the
+ * buffer and add buffer emptying stack element if buffer overflows.
+ */
+ bool wasOverflowed;
+ NodeBuffer *childNodeBuffer;
+
+ childNodeBuffer = getNodeBuffer(gfbb, giststate, path->blkno,
+ path->downlinkoffnum, path->parent,
+ true);
+ wasOverflowed = BUFFER_IS_OVERLOW(childNodeBuffer, gfbb);
+ pushItupToNodeBuffer(gfbb, childNodeBuffer, itup);
+ if (!wasOverflowed && BUFFER_IS_OVERLOW(childNodeBuffer, gfbb))
+ {
+ MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
+
+ gfbb->bufferEmptyingStack = lcons(childNodeBuffer, gfbb->bufferEmptyingStack);
+ MemoryContextSwitchTo(oldcxt);
+ }
+ }
+ else
+ {
+ /*
+ * We've reached leaf level. So, place index tuple here.
+ */
+ gistBufferingBuildPlaceToPage(state, path, giststate, &itup, 1, InvalidOffsetNumber);
+ }
+
+ /*
+ * Free unreferenced path items if any. Path item may be referenced by
+ * node buffer.
+ */
+ freeUnreferencedPath(path);
+ }
+
+
+ /*
+ * Find correct parent using rightlinks in buffering index build.
+ */
+ static void
+ gistFindCorrectParent(GISTSTATE *giststate, Relation r, GISTBufferingInsertStack * child)
+ {
+ GISTBuildBuffers *gfbb = giststate->gfbb;
+ GISTBufferingInsertStack *parent = child->parent;
+ OffsetNumber i,
+ maxoff;
+ ItemId iid;
+ IndexTuple idxtuple;
+ Buffer buffer;
+ Page page;
+ bool copied = false;
+
+ buffer = ReadBuffer(r, parent->blkno);
+ page = BufferGetPage(buffer);
+ LockBuffer(buffer, GIST_EXCLUSIVE);
+ gistcheckpage(r, buffer);
+
+ /* Check if it has not moved */
+ if (child->downlinkoffnum != InvalidOffsetNumber)
+ {
+ iid = PageGetItemId(page, child->downlinkoffnum);
+ idxtuple = (IndexTuple) PageGetItem(page, iid);
+ if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
+ {
+ /* Still there */
+ UnlockReleaseBuffer(buffer);
+ return;
+ }
+ }
+
+ /* parent is changed, look child in right links until found */
+ while (true)
+ {
+ maxoff = PageGetMaxOffsetNumber(page);
+ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ iid = PageGetItemId(page, i);
+ idxtuple = (IndexTuple) PageGetItem(page, iid);
+ if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
+ {
+ /* yes!!, found */
+ child->downlinkoffnum = i;
+ UnlockReleaseBuffer(buffer);
+ return;
+ }
+ }
+
+ /*
+ * We should copy parent path item because some other path items can
+ * refer to it.
+ */
+ if (!copied)
+ {
+ parent = (GISTBufferingInsertStack *) MemoryContextAlloc(gfbb->context,
+ sizeof(GISTBufferingInsertStack));
+ memcpy(parent, child->parent, sizeof(GISTBufferingInsertStack));
+ if (parent->parent)
+ parent->parent->refCount++;
+ decreasePathRefcount(child->parent);
+ child->parent = parent;
+ parent->refCount = 1;
+ copied = true;
+ }
+
+ parent->blkno = GistPageGetOpaque(page)->rightlink;
+ UnlockReleaseBuffer(buffer);
+
+ if (parent->blkno == InvalidBlockNumber)
+ {
+ /*
+ * End of chain and still didn't find parent. Should not happen
+ * during index build.
+ */
+ break;
+ }
+
+ /* Next page */
+ buffer = ReadBuffer(r, parent->blkno);
+ page = BufferGetPage(buffer);
+ LockBuffer(buffer, GIST_EXCLUSIVE);
+ gistcheckpage(r, buffer);
+ }
+
+ elog(ERROR, "failed to re-find parent for block %u", child->blkno);
+ }
+
+ /*
+ * Process buffers emptying stack. Emptying of one buffer can cause emptying
+ * of other buffers. This function iterates until this cascading emptying
+ * process finished, e.g. until buffers emptying stack is empty.
+ */
+ static void
+ processEmptyingStack(GISTSTATE *giststate, GISTInsertState *state)
+ {
+ GISTBuildBuffers *gfbb = giststate->gfbb;
+ int requiredSizeDecrease = gfbb->pagesPerBuffer * BLCKSZ;
+
+ /* Iterate while we have elements in buffers emptying stack. */
+ while (gfbb->bufferEmptyingStack != NIL)
+ {
+ int initialBusySize,
+ busySize;
+ NodeBuffer *emptyingNodeBuffer;
+
+ /* Get node buffer from emptying stack. */
+ emptyingNodeBuffer = (NodeBuffer *) linitial(gfbb->bufferEmptyingStack);
+ gfbb->bufferEmptyingStack = list_delete_first(gfbb->bufferEmptyingStack);
+
+ /*
+ * We are going to load last pages of buffers where emptying will be
+ * to. So let's unload any previously loaded buffers.
+ */
+ unloadNodeBuffers(gfbb);
+
+ /*
+ * We'are going to empty buffer at half of maximal size. So let's
+ * memorise initial busy size.
+ */
+ initialBusySize = getNodeBufferBusySize(gfbb, emptyingNodeBuffer);
+
+ /* Variables for split of current emptying buffer detection. */
+ gfbb->currentEmptyingBufferSplit = false;
+ gfbb->currentEmptyingBufferBlockNumber = emptyingNodeBuffer->nodeBlocknum;
+
+ while (true)
+ {
+ IndexTuple itup;
+
+ /* Get one index tuple from node buffer and process it */
+ if (!popItupFromNodeBuffer(gfbb, emptyingNodeBuffer, &itup))
+ break;
+ processItup(giststate, state, gfbb, itup,
+ emptyingNodeBuffer->path);
+
+ /* Free all the memory allocated during index tuple processing */
+ MemoryContextReset(CurrentMemoryContext);
+
+ /*
+ * If current emptying node buffer split we should stop emptying
+ * just because there is no such node buffer anymore.
+ */
+ if (gfbb->currentEmptyingBufferSplit)
+ break;
+
+ busySize = getNodeBufferBusySize(gfbb, emptyingNodeBuffer);
+
+ /*
+ * If we've processed half of buffer size limit and buffer is not
+ * overflowed anymore we should stop in order to avoid exceeding
+ * of limit in lower buffers.
+ */
+ if (initialBusySize - busySize >= requiredSizeDecrease &&
+ !BUFFER_IS_OVERLOW(emptyingNodeBuffer, gfbb))
+ break;
+ }
+ }
+ }
+
+ /*
+ * Insert function for buffering index build.
+ */
+ static void
+ gistBufferingBuildInsert(Relation index, IndexTuple itup,
+ GISTBuildState *buildstate)
+ {
+ GISTBuildBuffers *gfbb = buildstate->giststate.gfbb;
+ GISTInsertState insertstate;
+
+ memset(&insertstate, 0, sizeof(GISTInsertState));
+ insertstate.freespace = RelationGetTargetPageFreeSpace(index,
+ GIST_DEFAULT_FILLFACTOR);
+ insertstate.r = index;
+
+ /* We are ready for index tuple processing */
+ processItup(&buildstate->giststate, &insertstate, gfbb, itup, NULL);
+
+ /* Process buffer emptying stack if any */
+ processEmptyingStack(&buildstate->giststate, &insertstate);
+ }
+
+ /*
+ * Per-tuple callback from IndexBuildHeapScan
+ */
+ static void
+ gistBuildCallback(Relation index,
+ HeapTuple htup,
+ Datum *values,
+ bool *isnull,
+ bool tupleIsAlive,
+ void *state)
+ {
+ GISTBuildState *buildstate = (GISTBuildState *) state;
+ IndexTuple itup;
+ MemoryContext oldCtx;
+
+ oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
+
+ /* form an index tuple and point it at the heap tuple */
+ itup = gistFormTuple(&buildstate->giststate, index,
+ values, isnull, true /* size is currently bogus */ );
+ itup->t_tid = htup->t_self;
+
+ if (buildstate->bufferingMode == 'y')
+ {
+ /* We've decided to use buffering. So let's use buffering insert. */
+ gistBufferingBuildInsert(index, itup, buildstate);
+ }
+ else
+ {
+ /*
+ * We didn't decide to use buffering yet or aren't goint to use it at
+ * all. Since we already have the index relation locked, we call
+ * gistdoinsert directly. Normal access method calls dispatch through
+ * gistinsert, which locks the relation for write. This is the right
+ * thing to do if you're inserting single tups, but not when you're
+ * initializing the whole index at once.
+ *
+ * In this path we respect the fillfactor setting, whereas insertions
+ * after initial build do not.
+ */
+ gistdoinsert(index, itup,
+ RelationGetTargetPageFreeSpace(index, GIST_DEFAULT_FILLFACTOR),
+ &buildstate->giststate);
+ }
+
+ buildstate->indtuples += 1;
+ buildstate->indtuplesSize += IndexTupleSize(itup);
+
+ MemoryContextSwitchTo(oldCtx);
+ MemoryContextReset(buildstate->tmpCtx);
+
+ /*
+ * For automatic switching to buffering mode, check whether index fits to
+ * effective cache.
+ */
+ if (buildstate->bufferingMode == 'a' &&
+ effective_cache_size < smgrnblocks(index->rd_smgr, MAIN_FORKNUM))
+ {
+ /*
+ * Index doesn't fit to effective cache anymore. Trying to switch to
+ * buffering build mode.
+ */
+ if (initBuffering(buildstate, index))
+ {
+ /*
+ * Buffering build is successfully initialized. Now we can set
+ * appropriate flag.
+ */
+ buildstate->bufferingMode = 'y';
+ elog(INFO, "switching to buffered mode");
+ }
+ else
+ {
+ /*
+ * Failed to switch to buffering build due to not enough memory
+ * settings. Mark that we aren't going to switch anymore.
+ */
+ buildstate->bufferingMode = 'n';
+ }
+ }
+ }
+
+ /*
+ * Get maximum level number of GiST index.
+ */
+ static int
+ getMaxLevel(Relation index)
+ {
+ int maxLevel = 0;
+ BlockNumber blkno = GIST_ROOT_BLKNO;
+
+ while (true)
+ {
+ Buffer buffer;
+ Page page;
+ IndexTuple itup;
+
+ buffer = ReadBuffer(index, blkno);
+ page = (Page) BufferGetPage(buffer);
+ if (GistPageIsLeaf(page))
+ {
+ /* Page is leaf. We've counted height of tree. */
+ ReleaseBuffer(buffer);
+ break;
+ }
+ /* Page is not leaf. Iterate to underlying page. */
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber));
+ blkno = ItemPointerGetBlockNumber(&(itup->t_tid));
+ ReleaseBuffer(buffer);
+ maxLevel++;
+ }
+ return maxLevel;
+ }
+
+ /*
+ * Initial calculations for GiST buffering build.
+ */
+ static bool
+ initBuffering(GISTBuildState *buildstate, Relation index)
+ {
+ int pagesPerBuffer = -1;
+ Size pageFreeSpace;
+ Size itupAvgSize;
+ int i,
+ maxIndexTuplesCount;
+ int effectiveMemory;
+ int levelStep = 0;
+ GISTBuildBuffers *gfbb;
+
+ /* try to get user difened options */
+ if (index->rd_options)
+ {
+ GiSTOptions *options = (GiSTOptions *) index->rd_options;
+
+ levelStep = options->levelStep;
+ pagesPerBuffer = options->bufferSize;
+ }
+
+ /* calc number of index tuples which fit in page */
+ pageFreeSpace = BLCKSZ - SizeOfPageHeaderData -
+ sizeof(GISTPageOpaqueData) - sizeof(ItemIdData);
+
+ if (buildstate->indtuples > 0)
+ {
+ itupAvgSize = round((double) buildstate->indtuplesSize /
+ (double) buildstate->indtuples);
+ }
+ else
+ {
+ itupAvgSize = (Size) MAXALIGN(sizeof(IndexTupleData));
+ for (i = 0; i < index->rd_att->natts; i++)
+ {
+ if (index->rd_att->attrs[i]->attlen < 0)
+ itupAvgSize += VARHDRSZ;
+ else
+ itupAvgSize += index->rd_att->attrs[i]->attlen;
+ }
+ }
+ maxIndexTuplesCount = pageFreeSpace / itupAvgSize;
+
+ /* calculate level step if it isn't specified by user */
+ effectiveMemory = Min(maintenance_work_mem * 1024 / BLCKSZ,
+ effective_cache_size);
+ if (levelStep <= 0)
+ {
+ levelStep = (int) log((double) effectiveMemory / 4.0) /
+ log((double) maxIndexTuplesCount);
+ if (levelStep < 0)
+ levelStep = 0;
+ }
+
+ /* calculate buffer size if it isn't specified by user */
+ if (pagesPerBuffer <= 0)
+ {
+ pagesPerBuffer = 2;
+ for (i = 0; i < levelStep; i++)
+ {
+ pagesPerBuffer *= maxIndexTuplesCount;
+ }
+ }
+
+ if (levelStep > 0)
+ {
+ /* Enough of memory for at least level_step == 1. */
+ gfbb = palloc(sizeof(GISTBuildBuffers));
+ gfbb->pagesPerBuffer = pagesPerBuffer;
+ gfbb->levelStep = levelStep;
+ initGiSTBuildBuffers(gfbb, buildstate->indtuples > 0 ? getMaxLevel(index) : 0);
+ buildstate->giststate.gfbb = gfbb;
+ elog(INFO, "Level step = %d, pagesPerBuffer = %d",
+ levelStep, pagesPerBuffer);
+ return true;
+ }
+ else
+ {
+ /* Not enough memory for buffering build. */
+ return false;
+ }
+ }
+
+ /*
+ * Routine to build an index. Basically calls insert over and over.
+ *
+ * XXX: it would be nice to implement some sort of bulk-loading
+ * algorithm, but it is not clear how to do that.
+ */
+ Datum
+ gistbuild(PG_FUNCTION_ARGS)
+ {
+ Relation heap = (Relation) PG_GETARG_POINTER(0);
+ Relation index = (Relation) PG_GETARG_POINTER(1);
+ IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
+ IndexBuildResult *result;
+ double reltuples;
+ GISTBuildState buildstate;
+ Buffer buffer;
+ Page page;
+ MemoryContext oldcxt = CurrentMemoryContext;
+
+ if (index->rd_options)
+ {
+ /* Get buffering mode from the options string */
+ GiSTOptions *options = (GiSTOptions *) index->rd_options;
+ char *bufferingMode = (char *) options + options->bufferingModeOffset;
+
+ if (!strcmp(bufferingMode, "on"))
+ buildstate.bufferingMode = 'y';
+ else if (!strcmp(bufferingMode, "off"))
+ buildstate.bufferingMode = 'n';
+ else
+ buildstate.bufferingMode = 'a';
+ }
+ else
+ {
+ /* Automatic buffering mode switching by default */
+ buildstate.bufferingMode = 'a';
+ }
+
+ /*
+ * We expect to be called exactly once for any index relation. If that's
+ * not the case, big trouble's what we have.
+ */
+ if (RelationGetNumberOfBlocks(index) != 0)
+ elog(ERROR, "index \"%s\" already contains data",
+ RelationGetRelationName(index));
+
+ /* no locking is needed */
+ initGISTstate(&buildstate.giststate, index);
+
+ /* initialize the root page */
+ buffer = gistNewBuffer(index);
+ Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
+ page = BufferGetPage(buffer);
+
+ START_CRIT_SECTION();
+
+ GISTInitBuffer(buffer, F_LEAF);
+
+ MarkBufferDirty(buffer);
+
+ if (RelationNeedsWAL(index))
+ {
+ XLogRecPtr recptr;
+ XLogRecData rdata;
+
+ rdata.data = (char *) &(index->rd_node);
+ rdata.len = sizeof(RelFileNode);
+ rdata.buffer = InvalidBuffer;
+ rdata.next = NULL;
+
+ recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX, &rdata);
+ PageSetLSN(page, recptr);
+ PageSetTLI(page, ThisTimeLineID);
+ }
+ else
+ PageSetLSN(page, GetXLogRecPtrForTemp());
+
+ UnlockReleaseBuffer(buffer);
+
+ END_CRIT_SECTION();
+
+ /* build the index */
+ buildstate.numindexattrs = indexInfo->ii_NumIndexAttrs;
+ buildstate.indtuples = 0;
+ buildstate.indtuplesSize = 0;
+
+ /*
+ * create a temporary memory context that is reset once for each tuple
+ * inserted into the index
+ */
+ buildstate.tmpCtx = createTempGistContext();
+
+ if (buildstate.bufferingMode == 'y')
+ {
+ if (!initBuffering(&buildstate, index))
+ buildstate.bufferingMode = 'n';
+ }
+
+ /*
+ * Do the heap scan.
+ */
+ reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
+ gistBuildCallback, (void *) &buildstate);
+
+ /*
+ * If buffering build do final node buffers emptying.
+ */
+ if (buildstate.bufferingMode == 'y')
+ {
+ int i;
+ GISTInsertState insertstate;
+ NodeBuffer *nodeBuffer;
+ MemoryContext oldCtx;
+ GISTBuildBuffers *gfbb = buildstate.giststate.gfbb;
+
+ oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
+
+ memset(&insertstate, 0, sizeof(GISTInsertState));
+ insertstate.freespace = RelationGetTargetPageFreeSpace(index,
+ GIST_DEFAULT_FILLFACTOR);
+ insertstate.r = index;
+
+ for (;;)
+ {
+ nodeBuffer = getNodeBuffer(gfbb, &buildstate.giststate, GIST_ROOT_BLKNO,
+ InvalidOffsetNumber, NULL, false);
+ if (!nodeBuffer || nodeBuffer->blocksCount <= 0)
+ break;
+ MemoryContextSwitchTo(gfbb->context);
+ gfbb->bufferEmptyingStack = lcons(nodeBuffer, gfbb->bufferEmptyingStack);
+ MemoryContextSwitchTo(buildstate.tmpCtx);
+ processEmptyingStack(&buildstate.giststate, &insertstate);
+ }
+
+ /*
+ * Iterate through the levels from the most higher.
+ */
+ for (i = gfbb->buffersOnLevelsCount - 1; i >= 0; i--)
+ {
+ bool nonEmpty = true;
+
+ /*
+ * Until we have non-empty node buffers on the level, iterate over
+ * them and initial emptying of non-empty ones.
+ */
+ while (nonEmpty)
+ {
+ ListCell *p;
+
+ nonEmpty = false;
+
+ for (p = list_head(gfbb->buffersOnLevels[i]); p; p = p->next)
+ {
+ /* Get next node buffer */
+ nodeBuffer = (NodeBuffer *) p->data.ptr_value;
+
+ /* Skip empty node buffer */
+ if (nodeBuffer->blocksCount == 0)
+ continue;
+
+ /* Memorize that we met non-empty buffer. */
+ nonEmpty = true;
+
+ /* Process emptying of node buffer */
+ MemoryContextSwitchTo(gfbb->context);
+ gfbb->bufferEmptyingStack = lcons(nodeBuffer, gfbb->bufferEmptyingStack);
+ MemoryContextSwitchTo(buildstate.tmpCtx);
+ processEmptyingStack(&buildstate.giststate, &insertstate);
+ }
+ }
+ }
+ MemoryContextSwitchTo(oldCtx);
+ }
+
+ /* okay, all heap tuples are indexed */
+ MemoryContextSwitchTo(oldcxt);
+ MemoryContextDelete(buildstate.tmpCtx);
+
+ freeGISTstate(&buildstate.giststate);
+
+ /*
+ * Return statistics
+ */
+ result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
+
+ result->heap_tuples = reltuples;
+ result->index_tuples = (double) buildstate.indtuples;
+
+ PG_RETURN_POINTER(result);
+ }
*** /dev/null
--- b/src/backend/access/gist/gistbuildbuffers.c
***************
*** 0 ****
--- 1,884 ----
+
+ /*-------------------------------------------------------------------------
+ *
+ * gistbuildbuffers.c
+ * buffers management functions for GiST buffering build algorithm.
+ *
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/access/gist/gistbuildbuffers.c
+ *
+ *-------------------------------------------------------------------------
+ */
+ #include "postgres.h"
+
+ #include "access/genam.h"
+ #include "access/gist_private.h"
+ #include "catalog/index.h"
+ #include "catalog/pg_collation.h"
+ #include "miscadmin.h"
+ #include "storage/bufmgr.h"
+ #include "storage/indexfsm.h"
+ #include "storage/buffile.h"
+ #include "utils/memutils.h"
+ #include "utils/rel.h"
+
+ static NodeBufferPage *allocateNewPageBuffer(GISTBuildBuffers * gfbb);
+ static void addLoadedBuffer(GISTBuildBuffers * gfbb, BlockNumber blocknum);
+ static void loadNodeBuffer(GISTBuildBuffers * gfbb, NodeBuffer * nodeBuffer);
+ static void unloadNodeBuffer(GISTBuildBuffers * gfbb, NodeBuffer * nodeBuffer);
+ static void placeItupToPage(NodeBufferPage * pageBuffer, IndexTuple item);
+ static void getItupFromPage(NodeBufferPage * pageBuffer, IndexTuple *item);
+ static int freeBlocks_cmp(const void *a, const void *b);
+ static long getFreeBlock(GISTBuildBuffers * gfbb);
+ static void releaseBlock(GISTBuildBuffers * gfbb, long blocknum);
+
+ /*
+ * Initialize GiST buffering build structure.
+ */
+ void
+ initGiSTBuildBuffers(GISTBuildBuffers * gfbb, int maxLevel)
+ {
+ HASHCTL hashCtl;
+
+ /*
+ * Create temporary file initialize data structures for free pages
+ * management.
+ */
+ gfbb->pfile = BufFileCreateTemp(true);
+ gfbb->nFileBlocks = 0;
+ gfbb->nFreeBlocks = 0;
+ gfbb->blocksSorted = false;
+ gfbb->freeBlocksLen = 32;
+ gfbb->freeBlocks = (long *) palloc(gfbb->freeBlocksLen * sizeof(long));
+
+ /*
+ * Current memory context will be used for all in-memory data structures
+ * of buffers.
+ */
+ gfbb->context = CurrentMemoryContext;
+
+ /*
+ * nodeBuffersTab hash is association between index blocks and it's
+ * buffer.
+ */
+ hashCtl.keysize = sizeof(BlockNumber);
+ hashCtl.entrysize = sizeof(NodeBuffer);
+ hashCtl.hcxt = CurrentMemoryContext;
+ hashCtl.hash = tag_hash;
+ hashCtl.match = memcmp;
+ gfbb->nodeBuffersTab = hash_create("gistbuildbuffers",
+ 1024,
+ &hashCtl,
+ HASH_ELEM | HASH_CONTEXT | HASH_FUNCTION
+ | HASH_COMPARE);
+
+ /*
+ * Stack of node buffers which was planned for emptying.
+ */
+ gfbb->bufferEmptyingStack = NIL;
+
+ gfbb->currentEmptyingBufferBlockNumber = InvalidBlockNumber;
+ gfbb->currentEmptyingBufferSplit = false;
+
+ /*
+ * Per-level node buffers lists for final buffers emptying process.
+ */
+ gfbb->buffersOnLevelsLen = 16;
+ gfbb->buffersOnLevels = (List **) palloc(sizeof(List *) *
+ gfbb->buffersOnLevelsLen);
+ gfbb->buffersOnLevelsCount = 0;
+
+ /*
+ * "Prepared" blocks are index blocks which buffers are currently loaded
+ * into memory
+ */
+ gfbb->loadedBuffersLen = 32;
+ gfbb->loadedBuffers = (BlockNumber *) palloc(gfbb->loadedBuffersLen *
+ sizeof(BlockNumber));
+ gfbb->loadedBuffersCount = 0;
+
+ gfbb->rootitem = (GISTBufferingInsertStack *) MemoryContextAlloc(gfbb->context,
+ sizeof
+ (GISTBufferingInsertStack));
+ gfbb->rootitem->parent = NULL;
+ gfbb->rootitem->blkno = GIST_ROOT_BLKNO;
+ gfbb->rootitem->downlinkoffnum = InvalidOffsetNumber;
+ gfbb->rootitem->level = maxLevel;
+ }
+
+ /*
+ * Return NodeBuffer structure by it's block number. If createNew flag is
+ * specified then new NodeBuffer structure will be created on it's absence.
+ */
+ NodeBuffer *
+ getNodeBuffer(GISTBuildBuffers * gfbb, GISTSTATE *giststate,
+ BlockNumber nodeBlocknum,
+ OffsetNumber downlinkoffnum,
+ GISTBufferingInsertStack * parent, bool createNew)
+ {
+ NodeBuffer *nodeBuffer;
+ bool found;
+
+ /*
+ * Find nodeBuffer in hash table
+ */
+ nodeBuffer = (NodeBuffer *) hash_search(gfbb->nodeBuffersTab,
+ (const void *) &nodeBlocknum,
+ createNew ? HASH_ENTER : HASH_FIND,
+ &found);
+ if (!found)
+ {
+ GISTBufferingInsertStack *path;
+ int levelIndex;
+ int i;
+ MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
+
+ /*
+ * Node buffer wasn't found. Create new if required.
+ */
+ if (!createNew)
+ return NULL;
+
+ if (nodeBlocknum != GIST_ROOT_BLKNO)
+ {
+ path = (GISTBufferingInsertStack *) palloc(sizeof(GISTBufferingInsertStack));
+ path->parent = parent;
+ path->blkno = nodeBlocknum;
+ path->downlinkoffnum = downlinkoffnum;
+ path->level = parent->level - 1;
+ path->refCount = 0;
+ parent->refCount++;
+ Assert(path->level > 0);
+ }
+ else
+ {
+ path = gfbb->rootitem;
+ }
+
+ path->refCount++;
+
+ /*
+ * New node buffer. Fill data structure with default values.
+ */
+ nodeBuffer->pageBuffer = NULL;
+ nodeBuffer->blocksCount = 0;
+ nodeBuffer->level = path->level;
+ nodeBuffer->path = path;
+
+ /*
+ * Maintain level lists of buffers. We don't put root node buffer to
+ * that lists because it can disappear on node split. And we evade
+ * removing from that lists due to possible scan by final emptying.
+ * Also, it order to evade removing split old NodeBuffer structure is
+ * reused on that non-root node buffer split.
+ */
+ if (nodeBlocknum != GIST_ROOT_BLKNO)
+ {
+ /* Calc index of node buffer level. */
+ levelIndex = (nodeBuffer->level - gfbb->levelStep) / gfbb->levelStep;
+
+ /*
+ * Probably, we should increase number of allocated buffers lists.
+ */
+ while (levelIndex >= gfbb->buffersOnLevelsLen)
+ {
+ gfbb->buffersOnLevelsLen *= 2;
+ gfbb->buffersOnLevels =
+ (List **) repalloc(gfbb->buffersOnLevels,
+ gfbb->buffersOnLevelsLen *
+ sizeof(List *));
+ }
+
+ /* Initialize new buffers lists as empty. */
+ if (levelIndex >= gfbb->buffersOnLevelsCount)
+ {
+ for (i = gfbb->buffersOnLevelsCount; i <= levelIndex; i++)
+ {
+ gfbb->buffersOnLevels[i] = NIL;
+ }
+ gfbb->buffersOnLevelsCount = levelIndex + 1;
+ }
+
+ /* Add node buffer to the corresponding list */
+ gfbb->buffersOnLevels[levelIndex] = lcons(
+ nodeBuffer, gfbb->buffersOnLevels[levelIndex]);
+ }
+ MemoryContextSwitchTo(oldcxt);
+ }
+ else
+ {
+ if (parent != nodeBuffer->path->parent)
+ {
+ /*
+ * Other parent path item was provided than we've remembered. We
+ * trust caller to provide more correct parent than we have.
+ */
+ decreasePathRefcount(nodeBuffer->path->parent);
+ nodeBuffer->path->parent = parent;
+ parent->refCount++;
+ }
+ }
+
+ return nodeBuffer;
+ }
+
+ /*
+ * Allocate memory for buffer page.
+ */
+ static NodeBufferPage *
+ allocateNewPageBuffer(GISTBuildBuffers * gfbb)
+ {
+ NodeBufferPage *pageBuffer;
+
+ /*
+ * Allocate memory for page in appropriate context.
+ */
+ pageBuffer = (NodeBufferPage *) MemoryContextAlloc(gfbb->context, BLCKSZ);
+
+ /*
+ * Set page free space
+ */
+ PAGE_FREE_SPACE(pageBuffer) = BLCKSZ - BUFFER_PAGE_DATA_OFFSET;
+ return pageBuffer;
+ }
+
+ /*
+ * Add specified block number into preparedBlocks array.
+ */
+ static void
+ addLoadedBuffer(GISTBuildBuffers * gfbb, BlockNumber blocknum)
+ {
+ if (gfbb->loadedBuffersCount >= gfbb->loadedBuffersLen)
+ {
+ /*
+ * not enough memory is currently allocated
+ */
+ gfbb->loadedBuffersLen *= 2;
+ gfbb->loadedBuffers = (BlockNumber *) repalloc(gfbb->loadedBuffers,
+ gfbb->loadedBuffersLen *
+ sizeof(BlockNumber));
+ }
+ gfbb->loadedBuffers[gfbb->loadedBuffersCount] = blocknum;
+ gfbb->loadedBuffersCount++;
+ }
+
+
+ /*
+ * Load last page of node buffer into main memory.
+ */
+ static void
+ loadNodeBuffer(GISTBuildBuffers * gfbb, NodeBuffer * nodeBuffer)
+ {
+ /* Check if we really should load something */
+ if (!nodeBuffer->pageBuffer && nodeBuffer->blocksCount > 0)
+ {
+ /* Read block from temporary file */
+ nodeBuffer->pageBuffer = allocateNewPageBuffer(gfbb);
+ BufFileSeekBlock(gfbb->pfile, nodeBuffer->pageBlocknum);
+ BufFileRead(gfbb->pfile, nodeBuffer->pageBuffer, BLCKSZ);
+ releaseBlock(gfbb, nodeBuffer->pageBlocknum);
+ nodeBuffer->pageBlocknum = InvalidBlockNumber;
+
+ /* Mark node buffer as loaded */
+ addLoadedBuffer(gfbb, nodeBuffer->nodeBlocknum);
+ }
+ }
+
+ /*
+ * Write last page of node buffer to the disk.
+ */
+ static void
+ unloadNodeBuffer(GISTBuildBuffers * gfbb, NodeBuffer * nodeBuffer)
+ {
+ /* Check if we have something to write */
+ if (nodeBuffer->pageBuffer)
+ {
+ BlockNumber blkno;
+
+ /* Write block to the temporary file */
+ blkno = getFreeBlock(gfbb);
+ BufFileSeekBlock(gfbb->pfile, blkno);
+ BufFileWrite(gfbb->pfile, nodeBuffer->pageBuffer, BLCKSZ);
+ nodeBuffer->pageBlocknum = blkno;
+ pfree(nodeBuffer->pageBuffer);
+ nodeBuffer->pageBuffer = NULL;
+ }
+ }
+
+ /*
+ * Write last pages of all node buffers to the disk.
+ */
+ void
+ unloadNodeBuffers(GISTBuildBuffers * gfbb)
+ {
+ int i;
+
+ /* Iterate over loaded node buffers */
+ for (i = 0; i < gfbb->loadedBuffersCount; i++)
+ {
+ NodeBuffer *nodeBuffer;
+ bool found;
+
+ nodeBuffer = hash_search(gfbb->nodeBuffersTab, &gfbb->loadedBuffers[i],
+ HASH_FIND, &found);
+
+ /*
+ * Node buffer can be not found. It can disappear during page split.
+ * So, it's ok, just skip it.
+ */
+ if (!found)
+ continue;
+
+ unloadNodeBuffer(gfbb, nodeBuffer);
+ }
+ gfbb->loadedBuffersCount = 0;
+ }
+
+ /*
+ * Add item to buffer page.
+ */
+ static void
+ placeItupToPage(NodeBufferPage * pageBuffer, IndexTuple itup)
+ {
+ /*
+ * Get pointer to page free space start
+ */
+ char *ptr = (char *) pageBuffer + BUFFER_PAGE_DATA_OFFSET
+ + PAGE_FREE_SPACE(pageBuffer) - MAXALIGN(IndexTupleSize(itup));
+ size_t itupSize,
+ alignedSize;
+
+ itupSize = MAXALIGN(IndexTupleSize(itup));
+ alignedSize = IndexTupleSize(itup);
+
+ /*
+ * There should be enough of space
+ */
+ Assert(PAGE_FREE_SPACE(pageBuffer) >= MAXALIGN(IndexTupleSize(itup)));
+
+ /*
+ * Reduce free space value of page
+ */
+ PAGE_FREE_SPACE(pageBuffer) -= alignedSize;
+
+ /*
+ * Copy index tuple to free space
+ */
+ memcpy(ptr, itup, IndexTupleSize(itup));
+ }
+
+ /*
+ * Get last item from buffer page and remove it from page.
+ */
+ static void
+ getItupFromPage(NodeBufferPage * pageBuffer, IndexTuple *itup)
+ {
+ /*
+ * Get pointer to last index tuple
+ */
+ IndexTuple ptr = (IndexTuple) ((char *) pageBuffer
+ + BUFFER_PAGE_DATA_OFFSET
+ + PAGE_FREE_SPACE(pageBuffer));
+
+ /*
+ * Page shouldn't be empty
+ */
+ Assert(!PAGE_IS_EMPTY(pageBuffer));
+
+ /*
+ * Allocate memory for returned index tuple copy
+ */
+ *itup = (IndexTuple) palloc(IndexTupleSize(ptr));
+
+ /*
+ * Copy data
+ */
+ memcpy(*itup, ptr, IndexTupleSize(ptr));
+
+ /*
+ * Increase free space value of page
+ */
+ PAGE_FREE_SPACE(pageBuffer) += MAXALIGN(IndexTupleSize(*itup));
+ }
+
+ /*
+ * Push new index tuple to node buffer.
+ */
+ void
+ pushItupToNodeBuffer(GISTBuildBuffers * gfbb, NodeBuffer * nodeBuffer,
+ IndexTuple itup)
+ {
+ /*
+ * Most part of memory operations will be in buffering build persistent
+ * context. So, let's switch to it.
+ */
+ MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
+
+ if (nodeBuffer->blocksCount == 0)
+ {
+ nodeBuffer->pageBuffer = allocateNewPageBuffer(gfbb);
+ nodeBuffer->pageBuffer->prev = InvalidBlockNumber;
+ nodeBuffer->blocksCount = 1;
+ addLoadedBuffer(gfbb, nodeBuffer->nodeBlocknum);
+ }
+
+ /* Load last page of node buffer if needed */
+ if (!nodeBuffer->pageBuffer)
+ {
+ loadNodeBuffer(gfbb, nodeBuffer);
+ }
+
+ /*
+ * Check if there is enough space on the last page for the tuple
+ */
+ if (PAGE_NO_SPACE(nodeBuffer->pageBuffer, itup))
+ {
+ /*
+ * Swap previous block to disk and allocate new one
+ */
+ BlockNumber blkno;
+
+ nodeBuffer->blocksCount++;
+
+ blkno = getFreeBlock(gfbb);
+ BufFileSeekBlock(gfbb->pfile, blkno);
+ BufFileWrite(gfbb->pfile, nodeBuffer->pageBuffer, BLCKSZ);
+
+ PAGE_FREE_SPACE(nodeBuffer->pageBuffer) =
+ BLCKSZ - MAXALIGN(offsetof(NodeBufferPage, tupledata));
+ nodeBuffer->pageBuffer->prev = blkno;
+ }
+
+ placeItupToPage(nodeBuffer->pageBuffer, itup);
+
+ /*
+ * Restore memory context
+ */
+ MemoryContextSwitchTo(oldcxt);
+ }
+
+ /*
+ * Removes one index tuple from node buffer. Returns true if success and false
+ * if node buffer is empty.
+ */
+ bool
+ popItupFromNodeBuffer(GISTBuildBuffers * gfbb, NodeBuffer * nodeBuffer,
+ IndexTuple *itup)
+ {
+ /*
+ * If node buffer is empty then return false.
+ */
+ if (nodeBuffer->blocksCount <= 0)
+ return false;
+
+ /* Load last page of node buffer if needed */
+ if (!nodeBuffer->pageBuffer)
+ loadNodeBuffer(gfbb, nodeBuffer);
+
+ /*
+ * Get index tuple from last non-empty page and mark it as dirty.
+ */
+ getItupFromPage(nodeBuffer->pageBuffer, itup);
+
+ /*
+ * Check if the page which the index tuple was got from is now empty
+ */
+ if (PAGE_IS_EMPTY(nodeBuffer->pageBuffer))
+ {
+ BlockNumber prevblkno;
+
+ /*
+ * If it's empty then we need to release buffer file block and free
+ * page buffer.
+ */
+ nodeBuffer->blocksCount--;
+
+ /*
+ * If there's more pages, fetch previous one
+ */
+ prevblkno = nodeBuffer->pageBuffer->prev;
+ if (prevblkno != InvalidBlockNumber)
+ {
+ Assert(nodeBuffer->blocksCount > 0);
+ BufFileSeekBlock(gfbb->pfile, prevblkno);
+ BufFileRead(gfbb->pfile, nodeBuffer->pageBuffer, BLCKSZ);
+
+ releaseBlock(gfbb, prevblkno);
+ }
+ else
+ {
+ Assert(nodeBuffer->blocksCount == 0);
+ pfree(nodeBuffer->pageBuffer);
+ nodeBuffer->pageBuffer = NULL;
+ }
+ }
+ return true;
+ }
+
+ /*
+ * qsort comparator for sorting freeBlocks[] into decreasing order.
+ */
+ static int
+ freeBlocks_cmp(const void *a, const void *b)
+ {
+ long ablk = *((const long *) a);
+ long bblk = *((const long *) b);
+
+ /*
+ * can't just subtract because long might be wider than int
+ */
+ if (ablk < bblk)
+ return 1;
+ if (ablk > bblk)
+ return -1;
+ return 0;
+ }
+
+ /*
+ * Select a currently unused block for writing to.
+ *
+ * NB: should only be called when writer is ready to write immediately,
+ * to ensure that first write pass is sequential.
+ */
+ static long
+ getFreeBlock(GISTBuildBuffers * gfbb)
+ {
+ /*
+ * If there are multiple free blocks, we select the one appearing last in
+ * freeBlocks[] (after sorting the array if needed). If there are none,
+ * assign the next block at the end of the file.
+ */
+ if (gfbb->nFreeBlocks > 0)
+ {
+ if (!gfbb->blocksSorted)
+ {
+ qsort((void *) gfbb->freeBlocks, gfbb->nFreeBlocks,
+ sizeof(long), freeBlocks_cmp);
+ gfbb->blocksSorted = true;
+ }
+ return gfbb->freeBlocks[--gfbb->nFreeBlocks];
+ }
+ else
+ return gfbb->nFileBlocks++;
+ }
+
+ /*
+ * Return a block# to the freelist.
+ */
+ static void
+ releaseBlock(GISTBuildBuffers * gfbb, long blocknum)
+ {
+ int ndx;
+
+ /*
+ * Enlarge freeBlocks array if full.
+ */
+ if (gfbb->nFreeBlocks >= gfbb->freeBlocksLen)
+ {
+ gfbb->freeBlocksLen *= 2;
+ gfbb->freeBlocks = (long *) repalloc(gfbb->freeBlocks,
+ gfbb->freeBlocksLen *
+ sizeof(long));
+ }
+
+ /*
+ * Add blocknum to array, and mark the array unsorted if it's no longer in
+ * decreasing order.
+ */
+ ndx = gfbb->nFreeBlocks++;
+ gfbb->freeBlocks[ndx] = blocknum;
+ if (ndx > 0 && gfbb->freeBlocks[ndx - 1] < blocknum)
+ gfbb->blocksSorted = false;
+ }
+
+ /*
+ * Free buffering build data structure.
+ */
+ void
+ freeGiSTBuildBuffers(GISTBuildBuffers * gfbb)
+ {
+ /* Close buffers file. */
+ BufFileClose(gfbb->pfile);
+
+ /* All other things will be free on memory context release */
+ }
+
+ /*
+ * Data structure representing information about node buffer for index tuples
+ * relocation from splitted node buffer.
+ */
+ typedef struct
+ {
+ GISTENTRY entry[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ SplitedPageLayout *dist;
+ NodeBuffer *nodeBuffer;
+ } RelocationBufferInfo;
+
+ /*
+ * Maintain data structures on page split.
+ */
+ void
+ relocateBuildBuffersOnSplit(GISTBuildBuffers * gfbb, GISTSTATE *giststate,
+ Relation r, GISTBufferingInsertStack * path,
+ Buffer buffer, SplitedPageLayout *dist)
+ {
+ RelocationBufferInfo *relocationBuffersInfos;
+ bool found;
+ NodeBuffer *nodeBuffer;
+ BlockNumber blocknum;
+ IndexTuple itup;
+ int splitPagesCount = 0,
+ i;
+ GISTENTRY entry[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ SplitedPageLayout *ptr;
+ int level = path->level;
+ NodeBuffer nodebuf;
+
+ blocknum = BufferGetBlockNumber(buffer);
+
+ /*
+ * If splitted page level doesn't have buffers, then everything is done.
+ * Otherwise we also need to relocate index tuples of buffer of splitted
+ * page.
+ */
+ if (!LEVEL_HAS_BUFFERS(level, gfbb))
+ return;
+
+ /*
+ * Get pointer of node buffer of splitted page and remove it from the
+ * hash.
+ */
+ nodeBuffer = hash_search(gfbb->nodeBuffersTab, &blocknum,
+ HASH_FIND, &found);
+ if (!found)
+ {
+ /*
+ * Node buffer should anyway be created at this moment. Either by
+ * index tuples insertion or page split.
+ */
+ elog(ERROR,
+ "node buffer of splitting page (%u) doesn't exists while it should.",
+ blocknum);
+ }
+
+ /*
+ * We are going to perform some operations with node buffers hash. Thus,
+ * it unsafe to operate with already removed hash item and impossible if
+ * we reuse it. Let's save it.
+ */
+ memcpy(&nodebuf, nodeBuffer, sizeof(NodeBuffer));
+ nodeBuffer = &nodebuf;
+
+ if (blocknum == GIST_ROOT_BLKNO)
+ {
+ /*
+ * If root split, we don't reuse NodeBuffer data structure. So let's
+ * remove it from hash.
+ */
+ hash_search(gfbb->nodeBuffersTab, &blocknum, HASH_REMOVE, &found);
+ Assert(found);
+ }
+
+ /*
+ * Count pages produced by split and save pointer data structure of the
+ * last one.
+ */
+ for (ptr = dist; ptr; ptr = ptr->next)
+ {
+ splitPagesCount++;
+ }
+
+ /*
+ * Allocate memory for information about relocation buffers.
+ */
+ relocationBuffersInfos =
+ (RelocationBufferInfo *) palloc(sizeof(RelocationBufferInfo) *
+ splitPagesCount);
+
+ /*
+ * Fill relocation buffers information for node buffers of pages produced
+ * by split.
+ */
+ i = 0;
+ for (ptr = dist; ptr; ptr = ptr->next)
+ {
+ NodeBuffer *newNodeBuffer;
+
+ /*
+ * Decompress parent index tuple of node buffer page.
+ */
+ gistDeCompressAtt(giststate, r,
+ ptr->itup, NULL, (OffsetNumber) 0,
+ relocationBuffersInfos[i].entry,
+ relocationBuffersInfos[i].isnull);
+
+ newNodeBuffer = getNodeBuffer(gfbb, giststate, ptr->block.blkno,
+ path->downlinkoffnum, path->parent, true);
+
+ /*
+ * Fill relocation information
+ */
+ relocationBuffersInfos[i].nodeBuffer = newNodeBuffer;
+ if (newNodeBuffer->nodeBlocknum == blocknum)
+ {
+ /*
+ * Reuse of NodeBuffer data structure of splitted node. Old
+ * version was copied.
+ */
+ newNodeBuffer->blocksCount = 0;
+ newNodeBuffer->pageBuffer = NULL;
+ newNodeBuffer->pageBlocknum = InvalidBlockNumber;
+ }
+
+ /*
+ * Fill node buffer structure
+ */
+ relocationBuffersInfos[i].dist = ptr;
+
+ i++;
+ }
+
+ /*
+ * Loop of index tuples relocation.
+ */
+ while (popItupFromNodeBuffer(gfbb, nodeBuffer, &itup))
+ {
+ float sum_grow,
+ which_grow[INDEX_MAX_KEYS];
+ int i,
+ which;
+ IndexTuple newtup;
+ bool wasOverflow;
+
+ /*
+ * Choose node buffer for index tuple insert.
+ */
+
+ gistDeCompressAtt(giststate, r,
+ itup, NULL, (OffsetNumber) 0, entry, isnull);
+
+ which = -1;
+ *which_grow = -1.0f;
+ sum_grow = 1.0f;
+
+ for (i = 0; i < splitPagesCount && sum_grow; i++)
+ {
+ int j;
+ RelocationBufferInfo *splitPageInfo = &relocationBuffersInfos[i];
+
+ sum_grow = 0.0f;
+ for (j = 0; j < r->rd_att->natts; j++)
+ {
+ float usize;
+
+ usize = gistpenalty(giststate, j,
+ &splitPageInfo->entry[j],
+ splitPageInfo->isnull[j],
+ &entry[j], isnull[j]);
+
+ if (which_grow[j] < 0 || usize < which_grow[j])
+ {
+ which = i;
+ which_grow[j] = usize;
+ if (j < r->rd_att->natts - 1 && i == 0)
+ which_grow[j + 1] = -1;
+ sum_grow += which_grow[j];
+ }
+ else if (which_grow[j] == usize)
+ sum_grow += usize;
+ else
+ {
+ sum_grow = 1;
+ break;
+ }
+ }
+ }
+
+ wasOverflow =
+ BUFFER_IS_OVERLOW(relocationBuffersInfos[which].nodeBuffer, gfbb);
+
+ /*
+ * push item to selected node buffer
+ */
+ pushItupToNodeBuffer(gfbb, relocationBuffersInfos[which].nodeBuffer,
+ itup);
+
+ /*
+ * If node buffer was just overflowed then we should add it to the
+ * emptying stack.
+ */
+ if (BUFFER_IS_OVERLOW(relocationBuffersInfos[which].nodeBuffer, gfbb)
+ && !wasOverflow)
+ {
+ MemoryContext oldcxt = CurrentMemoryContext;
+
+ MemoryContextSwitchTo(gfbb->context);
+ gfbb->bufferEmptyingStack =
+ lcons(relocationBuffersInfos[which].nodeBuffer,
+ gfbb->bufferEmptyingStack);
+ MemoryContextSwitchTo(oldcxt);
+ }
+
+ /*
+ * adjust tuple of parent page
+ */
+ newtup = gistgetadjusted(r, relocationBuffersInfos[which].dist->itup,
+ itup, giststate);
+ if (newtup)
+ {
+ /*
+ * Parent page index tuple expands. We need to update old index
+ * tuple with the new one.
+ */
+ gistDeCompressAtt(giststate, r,
+ newtup, NULL, (OffsetNumber) 0,
+ relocationBuffersInfos[which].entry,
+ relocationBuffersInfos[which].isnull);
+
+ relocationBuffersInfos[which].dist->itup = newtup;
+ }
+ }
+
+ /* Report about splitting for current emptying buffer */
+ if (blocknum == gfbb->currentEmptyingBufferBlockNumber)
+ gfbb->currentEmptyingBufferSplit = true;
+
+ /*
+ * If root split, we don't reuse NodeBuffer data structure. So, one
+ * reference to path item goes away.
+ */
+ if (blocknum == GIST_ROOT_BLKNO)
+ decreasePathRefcount(nodeBuffer->path);
+
+ pfree(relocationBuffersInfos);
+ }
+
+ /*
+ * Return size of node buffer occupied by stored index tuples.
+ */
+ int
+ getNodeBufferBusySize(GISTBuildBuffers * gfbb, NodeBuffer * nodeBuffer)
+ {
+ int size;
+
+ /*
+ * No occupied buffer file blocks means that node buffer is empty.
+ */
+ if (nodeBuffer->blocksCount == 0)
+ return 0;
+ if (!nodeBuffer->pageBuffer)
+ loadNodeBuffer(gfbb, nodeBuffer);
+
+ /*
+ * We assume only the last page to be not fully filled.
+ */
+ size = (BLCKSZ - MAXALIGN(sizeof(uint32))) * nodeBuffer->blocksCount;
+ size -= PAGE_FREE_SPACE(nodeBuffer->pageBuffer);
+ return size;
+ }
*** a/src/backend/access/gist/gistutil.c
--- b/src/backend/access/gist/gistutil.c
***************
*** 670,682 **** gistoptions(PG_FUNCTION_ARGS)
{
Datum reloptions = PG_GETARG_DATUM(0);
bool validate = PG_GETARG_BOOL(1);
! bytea *result;
! result = default_reloptions(reloptions, validate, RELOPT_KIND_GIST);
- if (result)
- PG_RETURN_BYTEA_P(result);
- PG_RETURN_NULL();
}
/*
--- 670,701 ----
{
Datum reloptions = PG_GETARG_DATUM(0);
bool validate = PG_GETARG_BOOL(1);
! relopt_value *options;
! GiSTOptions *rdopts;
! int numoptions;
! static const relopt_parse_elt tab[] = {
! {"fillfactor", RELOPT_TYPE_INT, offsetof(GiSTOptions, fillfactor)},
! {"levelstep", RELOPT_TYPE_INT, offsetof(GiSTOptions, levelStep)},
! {"buffersize", RELOPT_TYPE_INT, offsetof(GiSTOptions, bufferSize)},
! {"buffering", RELOPT_TYPE_STRING, offsetof(GiSTOptions, bufferingModeOffset)}
! };
! options = parseRelOptions(reloptions, validate, RELOPT_KIND_GIST,
! &numoptions);
!
! /* if none set, we're done */
! if (numoptions == 0)
! PG_RETURN_NULL();
!
! rdopts = allocateReloptStruct(sizeof(GiSTOptions), options, numoptions);
!
! fillRelOptions((void *) rdopts, sizeof(GiSTOptions), options, numoptions,
! validate, tab, lengthof(tab));
!
! pfree(options);
!
! PG_RETURN_BYTEA_P(rdopts);
}
/*
*** a/src/backend/access/gist/gistxlog.c
--- b/src/backend/access/gist/gistxlog.c
***************
*** 266,272 **** gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
else
GistPageGetOpaque(page)->rightlink = xldata->origrlink;
GistPageGetOpaque(page)->nsn = xldata->orignsn;
! if (i < xlrec.data->npage - 1 && !isrootsplit)
GistMarkFollowRight(page);
else
GistClearFollowRight(page);
--- 266,273 ----
else
GistPageGetOpaque(page)->rightlink = xldata->origrlink;
GistPageGetOpaque(page)->nsn = xldata->orignsn;
! if (i < xlrec.data->npage - 1 && !isrootsplit &&
! !xldata->noFollowFight)
GistMarkFollowRight(page);
else
GistClearFollowRight(page);
***************
*** 414,420 **** XLogRecPtr
gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf,
SplitedPageLayout *dist,
BlockNumber origrlink, GistNSN orignsn,
! Buffer leftchildbuf)
{
XLogRecData *rdata;
gistxlogPageSplit xlrec;
--- 415,421 ----
gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf,
SplitedPageLayout *dist,
BlockNumber origrlink, GistNSN orignsn,
! Buffer leftchildbuf, bool noFollowFight)
{
XLogRecData *rdata;
gistxlogPageSplit xlrec;
***************
*** 436,441 **** gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf,
--- 437,443 ----
xlrec.npage = (uint16) npage;
xlrec.leftchild =
BufferIsValid(leftchildbuf) ? BufferGetBlockNumber(leftchildbuf) : InvalidBlockNumber;
+ xlrec.noFollowFight = noFollowFight;
rdata[0].data = (char *) &xlrec;
rdata[0].len = sizeof(gistxlogPageSplit);
*** a/src/include/access/gist_private.h
--- b/src/include/access/gist_private.h
***************
*** 17,29 ****
--- 17,72 ----
#include "access/gist.h"
#include "access/itup.h"
#include "storage/bufmgr.h"
+ #include "storage/buffile.h"
#include "utils/rbtree.h"
+ #include "utils/hsearch.h"
+
+ /* Has specified level buffers? */
+ #define LEVEL_HAS_BUFFERS(level,gfbb) ((level) != 0 && (level) % (gfbb)->levelStep == 0)
+ /* Is specified buffer at least halt-filled (should be planned for emptying)?*/
+ #define BUFFER_IS_OVERLOW(nodeBuffer,gfbb) ((nodeBuffer)->blocksCount > (gfbb)->pagesPerBuffer / 2)
/* Buffer lock modes */
#define GIST_SHARE BUFFER_LOCK_SHARE
#define GIST_EXCLUSIVE BUFFER_LOCK_EXCLUSIVE
#define GIST_UNLOCK BUFFER_LOCK_UNLOCK
+ typedef struct
+ {
+ BlockNumber prev;
+ uint32 freespace;
+ char tupledata[1];
+ } NodeBufferPage;
+
+ #define BUFFER_PAGE_DATA_OFFSET MAXALIGN(offsetof(NodeBufferPage, tupledata))
+ /* Returns free space in node buffer page */
+ #define PAGE_FREE_SPACE(nbp) (nbp->freespace)
+ /* Checks if node buffer page is empty */
+ #define PAGE_IS_EMPTY(nbp) (nbp->freespace == BLCKSZ - BUFFER_PAGE_DATA_OFFSET)
+ /* Checks if node buffers page don't contain sufficient space for index tuple */
+ #define PAGE_NO_SPACE(nbp, itup) (PAGE_FREE_SPACE(nbp) < \
+ MAXALIGN(IndexTupleSize(itup)))
+
+ /* Buffer of tree node data structure */
+ typedef struct NodeBuffer
+ {
+ /* number of page containing node */
+ BlockNumber nodeBlocknum;
+
+ /* count of blocks occupied by buffer */
+ int blocksCount;
+
+ BlockNumber pageBlocknum;
+ NodeBufferPage *pageBuffer;
+
+ /* corresponding downlink number in parent page */
+ OffsetNumber downlinkoffnum;
+ /* level of corresponding node */
+ int level;
+
+ struct GISTBufferingInsertStack *path;
+ } NodeBuffer;
+
/*
* GISTSTATE: information needed for any GiST index operation
*
***************
*** 44,49 **** typedef struct GISTSTATE
--- 87,94 ----
/* Collations to pass to the support functions */
Oid supportCollation[INDEX_MAX_KEYS];
+ struct GISTBuildBuffers *gfbb;
+
TupleDesc tupdesc;
} GISTSTATE;
***************
*** 170,175 **** typedef struct gistxlogPageSplit
--- 215,221 ----
BlockNumber leftchild; /* like in gistxlogPageUpdate */
uint16 npage; /* # of pages in the split */
+ bool noFollowFight; /* skip followRight flag setting */
/*
* follow: 1. gistxlogPage and array of IndexTupleData per page
***************
*** 225,230 **** typedef struct GISTInsertStack
--- 271,346 ----
struct GISTInsertStack *parent;
} GISTInsertStack;
+ /*
+ * Extended GISTInsertStack for buffering GiST index build. It additionally hold
+ * level number of page.
+ */
+ typedef struct GISTBufferingInsertStack
+ {
+ /* current page */
+ BlockNumber blkno;
+
+ /* offset of the downlink in the parent page, that points to this page */
+ OffsetNumber downlinkoffnum;
+
+ /* pointer to parent */
+ struct GISTBufferingInsertStack *parent;
+
+ int refCount;
+
+ /* level number */
+ int level;
+ } GISTBufferingInsertStack;
+
+ /*
+ * Data structure with general information about build buffers.
+ */
+ typedef struct GISTBuildBuffers
+ {
+ /* memory context which is persistent during buffering build */
+ MemoryContext context;
+ /* underlying files */
+ BufFile *pfile;
+ /* # of blocks used in underlying files */
+ long nFileBlocks;
+ /* is freeBlocks[] currently in order? */
+ bool blocksSorted;
+ /* resizable array of free blocks */
+ long *freeBlocks;
+ /* # of currently free blocks */
+ int nFreeBlocks;
+ /* current allocated length of freeBlocks[] */
+ int freeBlocksLen;
+
+ /* hash for buffers by block number */
+ HTAB *nodeBuffersTab;
+
+ /* stack of buffers for emptying */
+ List *bufferEmptyingStack;
+ /* number of currently emptying buffer */
+ BlockNumber currentEmptyingBufferBlockNumber;
+ /* whether currently emptying buffer was split - a signal to stop emptying */
+ bool currentEmptyingBufferSplit;
+
+ /* step of levels for buffers location */
+ int levelStep;
+ /* maximal number of pages occupied by buffer */
+ int pagesPerBuffer;
+
+ /* array of lists of non-empty buffers on levels for final emptying */
+ List **buffersOnLevels;
+ int buffersOnLevelsLen;
+ int buffersOnLevelsCount;
+
+ /* dynamic array of block numbers of buffer loaded into main memory */
+ BlockNumber *loadedBuffers;
+ /* number of block numbers */
+ int loadedBuffersCount;
+ /* length of array */
+ int loadedBuffersLen;
+ GISTBufferingInsertStack *rootitem;
+ } GISTBuildBuffers;
+
typedef struct GistSplitVector
{
GIST_SPLITVEC splitVector; /* to/from PickSplit method */
***************
*** 286,291 **** extern Datum gistinsert(PG_FUNCTION_ARGS);
--- 402,411 ----
extern MemoryContext createTempGistContext(void);
extern void initGISTstate(GISTSTATE *giststate, Relation index);
extern void freeGISTstate(GISTSTATE *giststate);
+ void gistdoinsert(Relation r,
+ IndexTuple itup,
+ Size freespace,
+ GISTSTATE *GISTstate);
extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
int len, GISTSTATE *giststate);
***************
*** 305,311 **** extern XLogRecPtr gistXLogSplit(RelFileNode node,
BlockNumber blkno, bool page_is_leaf,
SplitedPageLayout *dist,
BlockNumber origrlink, GistNSN oldnsn,
! Buffer leftchild);
/* gistget.c */
extern Datum gistgettuple(PG_FUNCTION_ARGS);
--- 425,431 ----
BlockNumber blkno, bool page_is_leaf,
SplitedPageLayout *dist,
BlockNumber origrlink, GistNSN oldnsn,
! Buffer leftchild, bool noFollowFight);
/* gistget.c */
extern Datum gistgettuple(PG_FUNCTION_ARGS);
***************
*** 313,318 **** extern Datum gistgetbitmap(PG_FUNCTION_ARGS);
--- 433,450 ----
/* gistutil.c */
+ /*
+ * Storage type for GiST's reloptions
+ */
+ typedef struct GiSTOptions
+ {
+ int32 vl_len_; /* varlena header (do not touch directly!) */
+ int fillfactor; /* page fill factor in percent (0..100) */
+ int bufferingModeOffset; /* use buffering build? */
+ int levelStep; /* level step in buffering build */
+ int bufferSize; /* buffer size in buffering build */
+ } GiSTOptions;
+
#define GiSTPageSize \
( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GISTPageOpaqueData)) )
***************
*** 380,383 **** extern void gistSplitByKey(Relation r, Page page, IndexTuple *itup,
--- 512,536 ----
GistSplitVector *v, GistEntryVector *entryvec,
int attno);
+ /* gistbuild.c */
+ void decreasePathRefcount(GISTBufferingInsertStack * path);
+
+ /* gistbuildbuffers.c */
+ void initGiSTBuildBuffers(GISTBuildBuffers * gfbb, int maxLevel);
+ extern NodeBuffer *getNodeBuffer(GISTBuildBuffers * gfbb, GISTSTATE *giststate,
+ BlockNumber blkno, OffsetNumber downlinkoffnu,
+ GISTBufferingInsertStack * parent, bool createNew);
+ void pushItupToNodeBuffer(GISTBuildBuffers * gfbb,
+ NodeBuffer * nodeBuffer, IndexTuple item);
+ bool popItupFromNodeBuffer(GISTBuildBuffers * gfbb,
+ NodeBuffer * nodeBuffer, IndexTuple *item);
+ void freeGiSTBuildBuffers(GISTBuildBuffers * gfbb);
+ extern void relocateBuildBuffersOnSplit(GISTBuildBuffers * gfbb,
+ GISTSTATE *giststate, Relation r,
+ GISTBufferingInsertStack * path, Buffer buffer,
+ SplitedPageLayout *dist);
+ int getNodeBufferBusySize(GISTBuildBuffers * gfbb,
+ NodeBuffer * nodeBuffer);
+ void unloadNodeBuffers(GISTBuildBuffers * gfbb);
+
#endif /* GIST_PRIVATE_H */