*** doc/src/sgml/config.sgml.orig Fri Feb 27 19:10:50 2009
--- doc/src/sgml/config.sgml Thu Mar 5 18:10:54 2009
***************
*** 3571,3576 ****
--- 3571,3581 ----
This setting can be overridden for individual tables by
changing storage parameters.
+
+ This parameter affects on vacuuming a table with GIN
+ index, it also specifies the minimum number of inserted or updated
+ tuples needed to trigger a VACUUM> on thos table.
+
*** doc/src/sgml/gin.sgml.orig Thu Jul 24 12:31:21 2008
--- doc/src/sgml/gin.sgml Thu Mar 5 18:10:54 2009
***************
*** 188,196 ****
list of heap pointers (PL, posting list) if the list is small enough.
Partial match algorithm
!
GIN can support partial match> queries, in which the query
does not determine an exact match for one or more keys, but the possible
--- 188,230 ----
list of heap pointers (PL, posting list) if the list is small enough.
+
+ GIN fast update technique
+
+
+ Updating a GIN index tends to be slow because of the
+ intrinsic nature of inverted indexes: inserting or updating one heap row
+ can cause many inserts into the index (one for each key extracted
+ from the indexed value). As of PostgreSQL 8.4,
+ GIN> is capable of postponing much of this work by inserting
+ new tuples into a temporary, unsorted list of pending entries.
+ When the table is vacuumed, or in some cases when the pending list
+ becomes too large, the entries are moved to the main
+ GIN data structure using the same bulk insert
+ techniques used during initial index creation. This greatly improves
+ GIN index update speed, even counting the additional
+ vacuum overhead.
+
+
+
+ The disadvantage of this approach is that searches must scan the list
+ of pending entries in addition to searching the regular index, and so
+ a large list of pending entries will slow searches significantly.
+ Proper use of autovacuum can minimize this problem.
+
+
+
+ If consistently-fast search speed is more important than update speed,
+ use of pending entries can be disabled by turning off the
+ FASTUPDATE storage parameter for a
+ GIN index. See for details.
+
+
+
Partial match algorithm
!
GIN can support partial match> queries, in which the query
does not determine an exact match for one or more keys, but the possible
***************
*** 225,235 ****
Create vs insert
! In most cases, insertion into a GIN index is slow
due to the likelihood of many keys being inserted for each value.
So, for bulk insertions into a table it is advisable to drop the GIN
index and recreate it after finishing bulk insertion.
--- 259,276 ----
Create vs insert
! Insertion into a GIN index can be slow
due to the likelihood of many keys being inserted for each value.
So, for bulk insertions into a table it is advisable to drop the GIN
index and recreate it after finishing bulk insertion.
+
+
+ As of PostgreSQL 8.4, this advice is less
+ necessary since delayed indexing is used (see for details). But for very large updates
+ it may still be best to drop and recreate the index.
+
*** doc/src/sgml/ref/create_index.sgml.orig Mon Feb 2 16:03:00 2009
--- doc/src/sgml/ref/create_index.sgml Thu Mar 5 18:10:54 2009
***************
*** 294,299 ****
--- 294,329 ----
+
+ GIN indexes accept an additional parameter:
+
+
+
+
+
+ FASTUPDATE>
+
+
+ This setting controls usage of the fast update technique described in
+ . It is a Boolean parameter:
+ ON> enables fast update, OFF> disables it.
+ (Alternative spellings of ON> and OFF> are
+ allowed as described in .) The
+ default is ON>.
+
+
+
+
+ Turning FASTUPDATE> off via ALTER INDEX> prevents
+ future insertions from going into the list of pending index entries,
+ but does not in itself flush previous entries. You might want to do a
+ VACUUM> afterward to ensure the pending list is emptied.
+
+
+
+
+
+
***************
*** 502,507 ****
--- 532,544 ----
+ To create a GIN> index with fast update turned off:
+
+ CREATE INDEX gin_idx ON documents_table (locations) WITH (fastupdate = off);
+
+
+
+
To create an index on the column code> in the table
films> and have the index reside in the tablespace
indexspace>:
*** doc/src/sgml/ref/vacuum.sgml.orig Thu Dec 11 18:16:46 2008
--- doc/src/sgml/ref/vacuum.sgml Thu Mar 5 18:10:54 2009
***************
*** 63,68 ****
--- 63,75 ----
blocks. This form is much slower and requires an exclusive lock on each
table while it is being processed.
+
+
+ For tables with GIN> indexes, VACUUM (in
+ any form) also completes any delayed index insertions, by moving pending
+ index entries to the appropriate places in the main GIN> index
+ structure. (See for more details.)
+
*** doc/src/sgml/textsearch.sgml.orig Wed Jan 7 17:40:49 2009
--- doc/src/sgml/textsearch.sgml Thu Mar 5 18:10:54 2009
***************
*** 3224,3230 ****
! GIN indexes are about ten times slower to update than GiST
--- 3224,3232 ----
! GIN indexes are moderately slower to update than GiST indexes, but
! about 10 times slower if fast update support was disabled
! (see for details)
*** src/backend/access/common/reloptions.c.orig Fri Feb 27 19:10:51 2009
--- src/backend/access/common/reloptions.c Thu Mar 5 18:10:52 2009
***************
*** 56,61 ****
--- 56,69 ----
},
true
},
+ {
+ {
+ "fastupdate",
+ "Enables \"fast update\" feature for this GIN index",
+ RELOPT_KIND_GIN
+ },
+ true
+ },
/* list terminator */
{ { NULL } }
};
*** src/backend/access/gin/Makefile.orig Thu Jul 24 12:29:55 2008
--- src/backend/access/gin/Makefile Thu Mar 5 18:10:52 2009
***************
*** 14,19 ****
OBJS = ginutil.o gininsert.o ginxlog.o ginentrypage.o gindatapage.o \
ginbtree.o ginscan.o ginget.o ginvacuum.o ginarrayproc.o \
! ginbulk.o
include $(top_srcdir)/src/backend/common.mk
--- 14,19 ----
OBJS = ginutil.o gininsert.o ginxlog.o ginentrypage.o gindatapage.o \
ginbtree.o ginscan.o ginget.o ginvacuum.o ginarrayproc.o \
! ginbulk.o ginfast.o
include $(top_srcdir)/src/backend/common.mk
*** src/backend/access/gin/ginbulk.c.orig Thu Jan 1 12:24:41 2009
--- src/backend/access/gin/ginbulk.c Thu Mar 5 18:10:52 2009
***************
*** 197,202 ****
--- 197,204 ----
if (nentry <= 0)
return;
+ Assert(ItemPointerIsValid(heapptr) && attnum >= FirstOffsetNumber);
+
i = nentry - 1;
for (; i > 0; i >>= 1)
nbit++;
*** src/backend/access/gin/ginfast.c.orig Wed Dec 31 19:00:00 1969
--- src/backend/access/gin/ginfast.c Thu Mar 5 18:10:53 2009
***************
*** 0 ****
--- 1,805 ----
+ /*-------------------------------------------------------------------------
+ *
+ * ginfast.c
+ * Fast insert routines for the Postgres inverted index access method.
+ * Pending entries are stored in linear list of pages and vacuum
+ * will transfer them into regular structure.
+ *
+ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * $PostgreSQL$
+ *
+ *-------------------------------------------------------------------------
+ */
+
+ #include "postgres.h"
+
+ #include "access/genam.h"
+ #include "access/gin.h"
+ #include "access/tuptoaster.h"
+ #include "catalog/index.h"
+ #include "commands/vacuum.h"
+ #include "miscadmin.h"
+ #include "storage/bufmgr.h"
+ #include "utils/memutils.h"
+
+
+ static int32
+ writeListPage(Relation index, Buffer buffer, IndexTuple *tuples, int32 ntuples, BlockNumber rightlink)
+ {
+ Page page = BufferGetPage(buffer);
+ int i, freesize, size=0;
+ OffsetNumber l, off;
+
+ START_CRIT_SECTION();
+
+ GinInitBuffer(buffer, GIN_LIST);
+
+ off = FirstOffsetNumber;
+
+ for(i=0; irightlink = rightlink;
+ /*
+ * tail page may contain only the whole row(s) or final
+ * part of row placed on previous pages
+ */
+ if ( rightlink == InvalidBlockNumber )
+ {
+ GinPageSetFullRow(page);
+ GinPageGetOpaque(page)->maxoff = 1;
+ }
+ else
+ {
+ GinPageGetOpaque(page)->maxoff = 0;
+ }
+
+ freesize = PageGetFreeSpace(page);
+
+ MarkBufferDirty(buffer);
+
+ if (!index->rd_istemp)
+ {
+ XLogRecData rdata[2];
+ ginxlogInsertListPage data;
+ XLogRecPtr recptr;
+ char *ptr;
+
+ rdata[0].buffer = buffer;
+ rdata[0].buffer_std = true;
+ rdata[0].data = (char*)&data;
+ rdata[0].len = sizeof(ginxlogInsertListPage);
+ rdata[0].next = rdata+1;
+
+ rdata[1].buffer = InvalidBuffer;
+ ptr = rdata[1].data = palloc( size );
+ rdata[1].len = size;
+ rdata[1].next = NULL;
+
+ for(i=0; i 0);
+
+ /*
+ * Split tuples for pages
+ */
+ for(i=0;inPendingPages++;
+ writeListPage(index, prevBuffer, tuples+startTuple, i-startTuple, BufferGetBlockNumber(curBuffer));
+ }
+ else
+ {
+ res->head = BufferGetBlockNumber(curBuffer);
+ }
+
+ prevBuffer = curBuffer;;
+ startTuple = i;
+ size = 0;
+ }
+
+ tupsize = IndexTupleSize(tuples[i]) + sizeof(ItemIdData);
+
+ if ( size + tupsize >= GinListPageSize )
+ {
+ i--;
+ curBuffer = InvalidBuffer;
+ }
+ else
+ {
+ size += tupsize;
+ }
+ }
+
+ /*
+ * Write last page
+ */
+ res->tail = BufferGetBlockNumber(curBuffer);
+ res->tailFreeSize = writeListPage(index, curBuffer, tuples+startTuple, ntuples-startTuple, InvalidBlockNumber);
+ res->nPendingPages++;
+ /* that was only one heap tuple */
+ res->nPendingHeapTuples = 1;
+ }
+
+ #define GIN_PAGE_FREESIZE \
+ ( BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(GinPageOpaqueData)) )
+ /*
+ * Inserts collected values during normal insertion. Function guarantees
+ * that all values of heap will be stored sequentually with
+ * preserving order
+ */
+ void
+ ginHeapTupleFastInsert(Relation index, GinState *ginstate, GinTupleCollector *collector)
+ {
+ Buffer metabuffer;
+ Page metapage;
+ GinMetaPageData *metadata = NULL;
+ XLogRecData rdata[2];
+ Buffer buffer = InvalidBuffer;
+ Page page = NULL;
+ ginxlogUpdateMeta data;
+ bool separateList = false;
+ bool needCleanup = false;
+
+ if ( collector->ntuples == 0 )
+ return;
+
+ data.node = index->rd_node;
+ data.ntuples = 0;
+ data.newRightlink = data.prevTail = InvalidBlockNumber;
+
+ rdata[0].buffer = InvalidBuffer;
+ rdata[0].data = (char *) &data;
+ rdata[0].len = sizeof(ginxlogUpdateMeta);
+ rdata[0].next = NULL;
+
+ metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
+ metapage = BufferGetPage(metabuffer);
+
+ if ( collector->sumsize + collector->ntuples * sizeof(ItemIdData) > GIN_PAGE_FREESIZE )
+ {
+ /*
+ * Total size is greater than one page => make sublist
+ */
+ separateList = true;
+ }
+ else
+ {
+ LockBuffer(metabuffer, GIN_EXCLUSIVE);
+ metadata = GinPageGetMeta(metapage);
+
+ if ( metadata->head == InvalidBlockNumber ||
+ collector->sumsize + collector->ntuples * sizeof(ItemIdData) > metadata->tailFreeSize )
+ {
+ /*
+ * Pending list is empty or total size is greater than freespace
+ * on tail page => make sublist
+ * We unlock metabuffer to keep high concurrency
+ */
+ separateList = true;
+ LockBuffer(metabuffer, GIN_UNLOCK);
+ }
+ }
+
+ if ( separateList )
+ {
+ GinMetaPageData sublist;
+
+ /*
+ * We should make sublist separately and append it to the tail
+ */
+ memset( &sublist, 0, sizeof(GinMetaPageData) );
+
+ makeSublist(index, collector->tuples, collector->ntuples, &sublist);
+
+ /*
+ * metapage was unlocked, see above
+ */
+ LockBuffer(metabuffer, GIN_EXCLUSIVE);
+ metadata = GinPageGetMeta(metapage);
+
+ if ( metadata->head == InvalidBlockNumber )
+ {
+ /*
+ * Sublist becomes main list
+ */
+ START_CRIT_SECTION();
+ memcpy(metadata, &sublist, sizeof(GinMetaPageData) );
+ memcpy(&data.metadata, &sublist, sizeof(GinMetaPageData) );
+ }
+ else
+ {
+ /*
+ * merge lists
+ */
+
+ data.prevTail = metadata->tail;
+ buffer = ReadBuffer(index, metadata->tail);
+ LockBuffer(buffer, GIN_EXCLUSIVE);
+ page = BufferGetPage(buffer);
+ Assert(GinPageGetOpaque(page)->rightlink == InvalidBlockNumber);
+
+ START_CRIT_SECTION();
+
+ GinPageGetOpaque(page)->rightlink = sublist.head;
+ metadata->tail = sublist.tail;
+ metadata->tailFreeSize = sublist.tailFreeSize;
+
+ metadata->nPendingPages += sublist.nPendingPages;
+ metadata->nPendingHeapTuples += sublist.nPendingHeapTuples;
+
+ memcpy(&data.metadata, metadata, sizeof(GinMetaPageData) );
+ data.newRightlink = sublist.head;
+
+ MarkBufferDirty(buffer);
+ }
+ }
+ else
+ {
+ /*
+ * Insert into tail page, metapage is already locked
+ */
+
+ OffsetNumber l, off;
+ int i, tupsize;
+ char *ptr;
+
+ buffer = ReadBuffer(index, metadata->tail);
+ LockBuffer(buffer, GIN_EXCLUSIVE);
+ page = BufferGetPage(buffer);
+ off = (PageIsEmpty(page)) ? FirstOffsetNumber :
+ OffsetNumberNext(PageGetMaxOffsetNumber(page));
+
+ rdata[0].next = rdata + 1;
+
+ rdata[1].buffer = buffer;
+ rdata[1].buffer_std = true;
+ ptr = rdata[1].data = (char *) palloc( collector->sumsize );
+ rdata[1].len = collector->sumsize;
+ rdata[1].next = NULL;
+
+ data.ntuples = collector->ntuples;
+
+ START_CRIT_SECTION();
+
+ /*
+ * Increase counter of heap tuples
+ */
+ Assert( GinPageGetOpaque(page)->maxoff <= metadata->nPendingHeapTuples );
+ GinPageGetOpaque(page)->maxoff++;
+ metadata->nPendingHeapTuples++;
+
+ for(i=0; intuples; i++)
+ {
+ tupsize = IndexTupleSize(collector->tuples[i]);
+ l = PageAddItem(page, (Item)collector->tuples[i], tupsize, off, false, false);
+
+ if (l == InvalidOffsetNumber)
+ elog(ERROR, "failed to add item to index page in \"%s\"",
+ RelationGetRelationName(index));
+
+ memcpy(ptr, collector->tuples[i], tupsize);
+ ptr+=tupsize;
+
+ off++;
+ }
+
+ metadata->tailFreeSize -= collector->sumsize + collector->ntuples * sizeof(ItemIdData);
+ memcpy(&data.metadata, metadata, sizeof(GinMetaPageData) );
+ MarkBufferDirty(buffer);
+ }
+
+ /*
+ * Make real write
+ */
+
+ MarkBufferDirty(metabuffer);
+ if ( !index->rd_istemp )
+ {
+ XLogRecPtr recptr;
+
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, rdata);
+ PageSetLSN(metapage, recptr);
+ PageSetTLI(metapage, ThisTimeLineID);
+
+ if ( buffer != InvalidBuffer )
+ {
+ PageSetLSN(page, recptr);
+ PageSetTLI(page, ThisTimeLineID);
+ }
+ }
+
+ if (buffer != InvalidBuffer)
+ UnlockReleaseBuffer(buffer);
+
+ /*
+ * Force pending list cleanup when it becomes too long.
+ * And, ginInsertCleanup could take significant amount of
+ * time, so we prefer call it when it can do all things by
+ * single collection cycle. In non-vacuum mode, it shouldn't
+ * require maintenance_work_mem, so let we fire a trigger
+ * while pending list is small enough to fit into work_mem
+ *
+ * ginInsertCleanup() should not be called inside CRIT_SECTION.
+ */
+
+ if ( metadata->nPendingPages * GIN_PAGE_FREESIZE > work_mem * 1024L )
+ needCleanup = true;
+
+ UnlockReleaseBuffer(metabuffer);
+
+ END_CRIT_SECTION();
+
+ if ( needCleanup )
+ ginInsertCleanup(index, ginstate, NULL);
+ }
+
+ /*
+ * Collect values from one tuples to be indexed. All values for
+ * one tuples should be written at once - to guarantee consistent state
+ */
+ uint32
+ ginHeapTupleFastCollect(Relation index, GinState *ginstate, GinTupleCollector *collector,
+ OffsetNumber attnum, Datum value, ItemPointer item)
+ {
+ Datum *entries;
+ int32 i,
+ nentries;
+
+ entries = extractEntriesSU(ginstate, attnum, value, &nentries);
+
+ if (nentries == 0)
+ /* nothing to insert */
+ return 0;
+
+ /*
+ * Allocate/reallocate memory for storing collected tuples
+ */
+ if ( collector->tuples == NULL )
+ {
+ collector->lentuples = nentries * index->rd_att->natts;
+ collector->tuples = (IndexTuple*)palloc(sizeof(IndexTuple) * collector->lentuples);
+ }
+
+ while ( collector->ntuples + nentries > collector->lentuples )
+ {
+ collector->lentuples *= 2;
+ collector->tuples = (IndexTuple*)repalloc( collector->tuples,
+ sizeof(IndexTuple) * collector->lentuples);
+ }
+
+ /*
+ * Creates tuple's array
+ */
+ for (i = 0; i < nentries; i++)
+ {
+ int32 tupsize;
+
+ collector->tuples[collector->ntuples + i] = GinFormTuple(ginstate, attnum, entries[i], NULL, 0);
+ collector->tuples[collector->ntuples + i]->t_tid = *item;
+ tupsize = IndexTupleSize(collector->tuples[collector->ntuples + i]);
+
+ if ( tupsize > TOAST_INDEX_TARGET || tupsize >= GinMaxItemSize)
+ elog(ERROR, "huge tuple");
+
+ collector->sumsize += tupsize;
+ }
+
+ collector->ntuples += nentries;
+
+ return nentries;
+ }
+
+ /*
+ * Deletes first pages in list before newHead page.
+ * If newHead == InvalidBlockNumber then function drops the whole list.
+ * returns true if concurrent completion process is running
+ */
+ static bool
+ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
+ IndexBulkDeleteResult *stats)
+ {
+ Page metapage;
+ GinMetaPageData *metadata;
+ BlockNumber blknoToDelete;
+
+ metapage = BufferGetPage(metabuffer);
+ metadata = GinPageGetMeta(metapage);
+ blknoToDelete = metadata->head;
+
+ do
+ {
+ Page page;
+ int i;
+ int64 nDeletedHeapTuples = 0;
+ ginxlogDeleteListPages data;
+ XLogRecData rdata[1];
+ Buffer buffers[NDELETE_AT_ONCE];
+
+ data.node = index->rd_node;
+
+ rdata[0].buffer = InvalidBuffer;
+ rdata[0].data = (char *) &data;
+ rdata[0].len = sizeof(ginxlogDeleteListPages);
+ rdata[0].next = NULL;
+
+ data.ndeleted = 0;
+ while( data.ndeleted < NDELETE_AT_ONCE && blknoToDelete != newHead )
+ {
+ data.toDelete[ data.ndeleted ] = blknoToDelete;
+ buffers[ data.ndeleted ] = ReadBuffer(index, blknoToDelete);
+ LockBuffer( buffers[ data.ndeleted ], GIN_EXCLUSIVE );
+ page = BufferGetPage( buffers[ data.ndeleted ] );
+
+ data.ndeleted++;
+ if (stats)
+ stats->pages_deleted++;
+
+ if ( GinPageIsDeleted(page) )
+ {
+ /* concurrent deletion process is detected */
+ for(i=0;imaxoff;
+ blknoToDelete = GinPageGetOpaque( page )->rightlink;
+ }
+
+ START_CRIT_SECTION();
+
+ metadata->head = blknoToDelete;
+
+ Assert( metadata->nPendingPages >= data.ndeleted );
+ metadata->nPendingPages -= data.ndeleted;
+ Assert( metadata->nPendingHeapTuples >= nDeletedHeapTuples );
+ metadata->nPendingHeapTuples -= nDeletedHeapTuples;
+
+ if ( blknoToDelete == InvalidBlockNumber )
+ {
+ metadata->tail = InvalidBlockNumber;
+ metadata->tailFreeSize = 0;
+ metadata->nPendingPages = 0;
+ metadata->nPendingHeapTuples = 0;
+ }
+ memcpy( &data.metadata, metadata, sizeof(GinMetaPageData));
+
+ MarkBufferDirty( metabuffer );
+
+ for(i=0; iflags = GIN_DELETED;
+ MarkBufferDirty( buffers[ i ] );
+ }
+
+ if ( !index->rd_istemp )
+ {
+ XLogRecPtr recptr;
+
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE, rdata);
+ PageSetLSN(metapage, recptr);
+ PageSetTLI(metapage, ThisTimeLineID);
+
+ for(i=0; invalues >= datums->maxvalues)
+ {
+ datums->maxvalues *= 2;
+ datums->values = (Datum*)repalloc( datums->values, sizeof(Datum)*datums->maxvalues);
+ }
+
+ datums->values[ datums->nvalues++ ] = datum;
+ }
+
+ /*
+ * Go through all tuples on page and collect values in memory
+ */
+
+ static void
+ processPendingPage(BuildAccumulator *accum, DatumArray *da, Page page, OffsetNumber startoff)
+ {
+ ItemPointerData heapptr;
+ OffsetNumber i,maxoff;
+ OffsetNumber attrnum, curattnum;
+
+ maxoff = PageGetMaxOffsetNumber(page);
+ Assert( maxoff >= FirstOffsetNumber );
+ ItemPointerSetInvalid(&heapptr);
+ attrnum = 0;
+
+ for (i = startoff; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
+
+ curattnum = gintuple_get_attrnum(accum->ginstate, itup);
+
+ if ( !ItemPointerIsValid(&heapptr) )
+ {
+ heapptr = itup->t_tid;
+ attrnum = curattnum;
+ }
+ else if ( !(ItemPointerEquals(&heapptr, &itup->t_tid) && curattnum == attrnum) )
+ {
+ /*
+ * We can insert several datums per call, but only for one heap tuple
+ * and one column.
+ */
+ ginInsertRecordBA(accum, &heapptr, attrnum, da->values, da->nvalues);
+ da->nvalues = 0;
+ heapptr = itup->t_tid;
+ attrnum = curattnum;
+ }
+ addDatum(da, gin_index_getattr(accum->ginstate, itup));
+ }
+
+ ginInsertRecordBA(accum, &heapptr, attrnum, da->values, da->nvalues);
+ }
+
+ /*
+ * Moves tuples from pending pages into regular GIN structure.
+ * Function doesn't require special locking and could be called
+ * in any time but only one at the same time.
+ *
+ * Non-NULL stats indicates that ginInsertCleanup is called
+ * from vacuum process, so call vacuum_delay_point() periodically.
+ */
+
+ void
+ ginInsertCleanup(Relation index, GinState *ginstate, IndexBulkDeleteResult *stats)
+ {
+ Buffer metabuffer, buffer;
+ Page metapage, page;
+ GinMetaPageData *metadata;
+ MemoryContext opCtx, oldCtx;
+ BuildAccumulator accum;
+ DatumArray datums;
+ BlockNumber blkno;
+
+ metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
+ LockBuffer(metabuffer, GIN_SHARE);
+ metapage = BufferGetPage(metabuffer);
+ metadata = GinPageGetMeta(metapage);
+
+ if ( metadata->head == InvalidBlockNumber )
+ {
+ UnlockReleaseBuffer(metabuffer);
+ return;
+ }
+
+ /*
+ * Init
+ */
+ datums.maxvalues=128;
+ datums.nvalues = 0;
+ datums.values = (Datum*)palloc(sizeof(Datum)*datums.maxvalues);
+
+ ginInitBA(&accum);
+ accum.ginstate = ginstate;
+
+ opCtx = AllocSetContextCreate(CurrentMemoryContext,
+ "Gin refresh temporary context",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+
+ oldCtx = MemoryContextSwitchTo(opCtx);
+
+ /*
+ * Read and lock head
+ */
+ blkno = metadata->head;
+ buffer = ReadBuffer(index, blkno);
+ LockBuffer(buffer, GIN_SHARE);
+ page = BufferGetPage(buffer);
+
+ LockBuffer(metabuffer, GIN_UNLOCK);
+
+ for(;;)
+ {
+ /*
+ * reset datum's collector and read page's datums into memory
+ */
+ datums.nvalues = 0;
+
+ if ( GinPageIsDeleted(page) )
+ {
+ /* concurrent completion is running */
+ UnlockReleaseBuffer( buffer );
+ break;
+ }
+
+ processPendingPage(&accum, &datums, page, FirstOffsetNumber);
+
+ if (stats)
+ vacuum_delay_point();
+
+ /*
+ * Is it time to flush memory to disk?
+ */
+ if ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber ||
+ ( GinPageHasFullRow(page) && accum.allocatedMemory > maintenance_work_mem * 1024L ) )
+ {
+ ItemPointerData *list;
+ uint32 nlist;
+ Datum entry;
+ OffsetNumber maxoff, attnum;
+
+ /*
+ * Unlock current page to increase performance.
+ * Changes of page will be checked later by comparing
+ * maxoff after completion of memory flush.
+ */
+ maxoff = PageGetMaxOffsetNumber(page);
+ LockBuffer(buffer, GIN_UNLOCK);
+
+ /*
+ * Moving collected data into regular structure can take
+ * significant amount of time - so, run it without locking pending
+ * list.
+ */
+ while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL)
+ {
+ if (stats)
+ vacuum_delay_point();
+ ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE);
+ }
+
+ /*
+ * Lock the whole list to remove pages
+ */
+ LockBuffer(metabuffer, GIN_EXCLUSIVE);
+ LockBuffer(buffer, GIN_SHARE);
+
+ if ( GinPageIsDeleted(page) )
+ {
+ /* concurrent completion is running */
+ UnlockReleaseBuffer(buffer);
+ LockBuffer(metabuffer, GIN_UNLOCK);
+ break;
+ }
+
+ /*
+ * While we keeped page unlocked it might be changed -
+ * add read the changes separately. On one page is rather
+ * small - so, overused memory isn't very big, although
+ * we should reinit accumulator. We need to make a
+ * check only once because now both page and metapage are
+ * locked. Insertion algorithm gurantees that inserted row(s)
+ * will not continue on next page.
+ */
+ if ( PageGetMaxOffsetNumber(page) != maxoff )
+ {
+ ginInitBA(&accum);
+ datums.nvalues = 0;
+ processPendingPage(&accum, &datums, page, maxoff+1);
+
+ while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL)
+ ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE);
+ }
+
+ /*
+ * Remember next page - it will become a new head
+ */
+ blkno = GinPageGetOpaque(page)->rightlink;
+ UnlockReleaseBuffer(buffer); /* shiftList will do exclusive locking */
+
+ /*
+ * remove readed pages from pending list, at this point all
+ * content of readed pages is in regular structure
+ */
+ if ( shiftList(index, metabuffer, blkno, stats) )
+ {
+ /* concurrent completion is running */
+ LockBuffer(metabuffer, GIN_UNLOCK);
+ break;
+ }
+
+ Assert( blkno == metadata->head );
+ LockBuffer(metabuffer, GIN_UNLOCK);
+
+ /*
+ * if we remove the whole list just exit
+ */
+ if ( blkno == InvalidBlockNumber )
+ break;
+
+ /*
+ * reinit state
+ */
+ MemoryContextReset(opCtx);
+ ginInitBA(&accum);
+ }
+ else
+ {
+ blkno = GinPageGetOpaque(page)->rightlink;
+ UnlockReleaseBuffer(buffer);
+ }
+
+ /*
+ * Read next page in pending list
+ */
+ CHECK_FOR_INTERRUPTS();
+ buffer = ReadBuffer(index, blkno);
+ LockBuffer(buffer, GIN_SHARE);
+ page = BufferGetPage(buffer);
+ }
+
+ ReleaseBuffer(metabuffer);
+ MemoryContextSwitchTo(oldCtx);
+ MemoryContextDelete(opCtx);
+ }
+
*** src/backend/access/gin/gindatapage.c.orig Thu Jan 1 12:24:41 2009
--- src/backend/access/gin/gindatapage.c Thu Mar 5 18:10:53 2009
***************
*** 43,50 ****
while (aptr - a < na && bptr - b < nb)
{
! if (compareItemPointers(aptr, bptr) > 0)
*dptr++ = *bptr++;
else
*dptr++ = *aptr++;
}
--- 43,56 ----
while (aptr - a < na && bptr - b < nb)
{
! int cmp = compareItemPointers(aptr, bptr);
! if (cmp > 0)
*dptr++ = *bptr++;
+ else if ( cmp == 0 )
+ {
+ *dptr++ = *bptr++;
+ aptr++;
+ }
else
*dptr++ = *aptr++;
}
***************
*** 630,640 ****
gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
if (gdi->btree.findItem(&(gdi->btree), gdi->stack))
! elog(ERROR, "item pointer (%u,%d) already exists",
! ItemPointerGetBlockNumber(gdi->btree.items + gdi->btree.curitem),
! ItemPointerGetOffsetNumber(gdi->btree.items + gdi->btree.curitem));
!
! ginInsertValue(&(gdi->btree), gdi->stack);
gdi->stack = NULL;
}
--- 636,651 ----
gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
if (gdi->btree.findItem(&(gdi->btree), gdi->stack))
! {
! /*
! * gdi->btree.items[ gdi->btree.curitem ] already exists in index
! */
! gdi->btree.curitem ++;
! LockBuffer(gdi->stack->buffer, GIN_UNLOCK);
! freeGinBtreeStack(gdi->stack);
! }
! else
! ginInsertValue(&(gdi->btree), gdi->stack);
gdi->stack = NULL;
}
*** src/backend/access/gin/ginget.c.orig Sat Jan 10 16:08:36 2009
--- src/backend/access/gin/ginget.c Thu Mar 5 18:10:53 2009
***************
*** 258,264 ****
}
/*
! * Start* functions setup begining state of searches: finds correct buffer and pins it.
*/
static void
startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
--- 258,264 ----
}
/*
! * Start* functions setup beginning state of searches: finds correct buffer and pins it.
*/
static void
startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
***************
*** 268,273 ****
--- 268,282 ----
Page page;
bool needUnlock = TRUE;
+ entry->buffer = InvalidBuffer;
+ entry->offset = InvalidOffsetNumber;
+ entry->list = NULL;
+ entry->nlist = 0;
+ entry->partialMatch = NULL;
+ entry->partialMatchResult = NULL;
+ entry->reduceResult = FALSE;
+ entry->predictNumberResult = 0;
+
if (entry->master != NULL)
{
entry->isFinished = entry->master->isFinished;
***************
*** 285,299 ****
page = BufferGetPage(stackEntry->buffer);
entry->isFinished = TRUE;
- entry->buffer = InvalidBuffer;
- entry->offset = InvalidOffsetNumber;
- entry->list = NULL;
- entry->nlist = 0;
- entry->partialMatch = NULL;
- entry->partialMatchIterator = NULL;
- entry->partialMatchResult = NULL;
- entry->reduceResult = FALSE;
- entry->predictNumberResult = 0;
if ( entry->isPartialMatch )
{
--- 294,299 ----
***************
*** 354,362 ****
entry->buffer = scanBeginPostingTree(gdi);
/*
! * We keep buffer pinned because we need to prevent deletition
* page during scan. See GIN's vacuum implementation. RefCount
! * is increased to keep buffer pinned after freeGinBtreeStack() call.
*/
IncrBufferRefCount(entry->buffer);
--- 354,363 ----
entry->buffer = scanBeginPostingTree(gdi);
/*
! * We keep buffer pinned because we need to prevent deletion of
* page during scan. See GIN's vacuum implementation. RefCount
! * is increased to keep buffer pinned after freeGinBtreeStack()
! * call.
*/
IncrBufferRefCount(entry->buffer);
***************
*** 548,570 ****
entry->isFinished = TRUE;
break;
}
! else if ( entry->partialMatchResult->ntuples < 0 )
! {
! /* bitmap became lossy */
! ereport(ERROR,
! (errcode(ERRCODE_OUT_OF_MEMORY),
! errmsg("not enough memory to store result of partial match operator" ),
! errhint("Increase the \"work_mem\" parameter.")));
! }
entry->offset = 0;
}
ItemPointerSet(&entry->curItem,
entry->partialMatchResult->blockno,
entry->partialMatchResult->offsets[ entry->offset ]);
entry->offset ++;
!
! } while (entry->isFinished == FALSE && entry->reduceResult == TRUE && dropItem(entry));
}
else if (!BufferIsValid(entry->buffer))
{
--- 549,584 ----
entry->isFinished = TRUE;
break;
}
!
! /*
! * reset counter to the beginning of entry->partialMatchResult.
! * Note, entry->offset is still greater partialMatchResult->ntuples
! * if partialMatchResult is lossy. So, on next call we will get
! * next result from TIDBitmap.
! */
entry->offset = 0;
}
+ if ( entry->partialMatchResult->ntuples < 0 )
+ {
+ /*
+ * lossy result, it's needed to check the whole page
+ */
+ ItemPointerSetLossyPage(&entry->curItem,
+ entry->partialMatchResult->blockno);
+ /*
+ * go away from the loop because we could not estimate number of
+ * results on this page to support correct reducing of result
+ * if it's enabled
+ */
+ break;
+ }
+
ItemPointerSet(&entry->curItem,
entry->partialMatchResult->blockno,
entry->partialMatchResult->offsets[ entry->offset ]);
entry->offset ++;
! } while (entry->reduceResult == TRUE && dropItem(entry));
}
else if (!BufferIsValid(entry->buffer))
{
***************
*** 618,623 ****
--- 632,641 ----
if (key->entryRes[i])
{
+ /*
+ * Move forward only entries which was the least
+ * on previous call
+ */
if (entry->isFinished == FALSE && entryGetItem(index, entry) == FALSE)
{
if (compareItemPointers(&entry->curItem, &key->curItem) < 0)
***************
*** 664,669 ****
--- 682,696 ----
*/
*keyrecheck = true;
+ /*
+ * If one of the entry's scans returns lossy result, return
+ * it without checking - we can't suggest to consistentFn
+ * anything helpful.
+ */
+
+ if (ItemPointerIsLossyPage(&key->curItem))
+ return FALSE;
+
oldCtx = MemoryContextSwitchTo(tempCtx);
res = DatumGetBool(FunctionCall4(&ginstate->consistentFn[key->attnum-1],
PointerGetDatum(key->entryRes),
***************
*** 677,682 ****
--- 704,1032 ----
return FALSE;
}
+ typedef struct fastPosition {
+ Buffer fastBuffer;
+ OffsetNumber firstOffset;
+ OffsetNumber lastOffset;
+ ItemPointerData item;
+ } fastPosition;
+
+
+ /*
+ * Get ItemPointer of next heap row to be checked from fast insert storage.
+ * Returns false if there are no more.
+ *
+ * The fastBuffer is presumed pinned and share-locked on entry, and is
+ * pinned and share-locked on success exit. On failure exit it's released.
+ */
+ static bool
+ scanGetCandidate(IndexScanDesc scan, fastPosition *pos)
+ {
+ OffsetNumber maxoff;
+ Page page;
+ IndexTuple itup;
+
+ ItemPointerSetInvalid( &pos->item );
+ for(;;)
+ {
+ page = BufferGetPage(pos->fastBuffer);
+
+ maxoff = PageGetMaxOffsetNumber(page);
+ if ( pos->firstOffset > maxoff )
+ {
+ BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
+ if ( blkno == InvalidBlockNumber )
+ {
+ UnlockReleaseBuffer(pos->fastBuffer);
+ pos->fastBuffer=InvalidBuffer;
+
+ return false;
+ }
+ else
+ {
+ /*
+ * Here we should prevent deletion of next page by
+ * insertcleanup process, which now tries to obtain
+ * exclusive lock on current page. So, we lock next
+ * page before releasing the current one
+ */
+ Buffer tmpbuf = ReadBuffer(scan->indexRelation, blkno);
+
+ LockBuffer( tmpbuf, GIN_SHARE );
+ UnlockReleaseBuffer( pos->fastBuffer);
+
+ pos->fastBuffer=tmpbuf;
+ pos->firstOffset = FirstOffsetNumber;
+ }
+ }
+ else
+ {
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
+ pos->item = itup->t_tid;
+ if ( GinPageHasFullRow(page) )
+ {
+ /*
+ * find itempointer to the next row
+ */
+ for(pos->lastOffset = pos->firstOffset+1; pos->lastOffset<=maxoff; pos->lastOffset++)
+ {
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
+ if (!ItemPointerEquals(&pos->item, &itup->t_tid))
+ break;
+ }
+ }
+ else
+ {
+ /*
+ * All itempointers are the same on this page
+ */
+ pos->lastOffset = maxoff + 1;
+ }
+ break;
+ }
+ }
+
+ return true;
+ }
+
+ static bool
+ matchPartialInPendingList(GinState *ginstate, Page page, OffsetNumber off,
+ OffsetNumber maxoff, Datum value, OffsetNumber attrnum,
+ Datum *datum, bool *datumExtracted, StrategyNumber strategy)
+ {
+ IndexTuple itup;
+ int res;
+
+ while( off < maxoff )
+ {
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
+ if ( attrnum != gintuple_get_attrnum(ginstate, itup) )
+ return false;
+
+ if (datumExtracted[ off-1 ] == false)
+ {
+ datum[ off-1 ] = gin_index_getattr(ginstate, itup);
+ datumExtracted[ off-1 ] = true;
+ }
+
+ res = DatumGetInt32(FunctionCall3(&ginstate->comparePartialFn[attrnum],
+ value,
+ datum[ off-1 ],
+ UInt16GetDatum(strategy)));
+ if ( res == 0 )
+ return true;
+ else if (res>0)
+ return false;
+ }
+
+ return false;
+ }
+ /*
+ * Sets entryRes array for each key by looking on
+ * every entry per indexed value (row) in fast insert storage.
+ * returns true if at least one of datum was matched by key's entry
+ *
+ * The fastBuffer is presumed pinned and share-locked on entry.
+ */
+ static bool
+ collectDatumForItem(IndexScanDesc scan, fastPosition *pos)
+ {
+ GinScanOpaque so = (GinScanOpaque) scan->opaque;
+ OffsetNumber attrnum;
+ Page page;
+ IndexTuple itup;
+ int i, j;
+ bool hasMatch = false;
+
+ /*
+ * Resets entryRes
+ */
+ for (i = 0; i < so->nkeys; i++)
+ {
+ GinScanKey key = so->keys + i;
+ memset( key->entryRes, FALSE, key->nentries );
+ }
+
+ for(;;)
+ {
+ Datum datum[ BLCKSZ/sizeof(IndexTupleData) ];
+ bool datumExtracted[ BLCKSZ/sizeof(IndexTupleData) ];
+
+ Assert( pos->lastOffset > pos->firstOffset );
+ memset(datumExtracted + pos->firstOffset - 1, 0, sizeof(bool) * (pos->lastOffset - pos->firstOffset ));
+
+ page = BufferGetPage(pos->fastBuffer);
+
+ for(i = 0; i < so->nkeys; i++)
+ {
+ GinScanKey key = so->keys + i;
+
+ for(j=0; jnentries; j++)
+ {
+ OffsetNumber StopLow = pos->firstOffset,
+ StopHigh = pos->lastOffset,
+ StopMiddle;
+ GinScanEntry entry = key->scanEntry + j;
+
+ if ( key->entryRes[j] )
+ continue;
+
+ while (StopLow < StopHigh)
+ {
+ StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
+
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
+ attrnum = gintuple_get_attrnum(&so->ginstate, itup);
+
+ if (key->attnum < attrnum)
+ StopHigh = StopMiddle;
+ else if (key->attnum > attrnum)
+ StopLow = StopMiddle + 1;
+ else
+ {
+ int res;
+
+ if (datumExtracted[ StopMiddle-1 ] == false)
+ {
+ datum[ StopMiddle-1 ] = gin_index_getattr(&so->ginstate, itup);
+ datumExtracted[ StopMiddle-1 ] = true;
+ }
+ res = compareEntries(&so->ginstate,
+ entry->attnum,
+ entry->entry,
+ datum[ StopMiddle-1 ]);
+
+ if ( res == 0 )
+ {
+ if ( entry->isPartialMatch )
+ key->entryRes[j] = matchPartialInPendingList(&so->ginstate, page, StopMiddle,
+ pos->lastOffset, entry->entry, entry->attnum,
+ datum, datumExtracted, entry->strategy);
+ else
+ key->entryRes[j] = true;
+ break;
+ }
+ else if ( res < 0 )
+ StopHigh = StopMiddle;
+ else
+ StopLow = StopMiddle + 1;
+ }
+ }
+
+ if ( StopLow>=StopHigh && entry->isPartialMatch )
+ key->entryRes[j] = matchPartialInPendingList(&so->ginstate, page, StopHigh,
+ pos->lastOffset, entry->entry, entry->attnum,
+ datum, datumExtracted, entry->strategy);
+
+ hasMatch |= key->entryRes[j];
+ }
+ }
+
+ pos->firstOffset = pos->lastOffset;
+
+ if ( GinPageHasFullRow(page) )
+ {
+ /*
+ * We scan all values from one tuple, go to next one
+ */
+
+ return hasMatch;
+ }
+ else
+ {
+ ItemPointerData item = pos->item;
+
+ if ( scanGetCandidate(scan, pos) == false || !ItemPointerEquals(&pos->item, &item) )
+ elog(ERROR,"Could not process tuple"); /* XXX should not be here ! */
+ }
+ }
+
+ return hasMatch;
+ }
+
+ /*
+ * Collect all matched rows from pending list in bitmap
+ */
+ static void
+ scanFastInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
+ {
+ GinScanOpaque so = (GinScanOpaque) scan->opaque;
+ MemoryContext oldCtx;
+ bool recheck, keyrecheck, match;
+ int i;
+ fastPosition pos;
+ Buffer metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
+ BlockNumber blkno;
+
+ *ntids = 0;
+
+ LockBuffer(metabuffer, GIN_SHARE);
+ blkno = GinPageGetMeta(BufferGetPage(metabuffer))->head;
+
+ /*
+ * fetch head of list before unlocking metapage.
+ * head page must be pinned to prevent deletion by vacuum process
+ */
+ if ( blkno == InvalidBlockNumber )
+ {
+ /* No pending list, so proceed with normal scan */
+ UnlockReleaseBuffer( metabuffer );
+ return;
+ }
+
+ pos.fastBuffer = ReadBuffer(scan->indexRelation, blkno);
+ LockBuffer(pos.fastBuffer, GIN_SHARE);
+ pos.firstOffset = FirstOffsetNumber;
+ UnlockReleaseBuffer( metabuffer );
+
+ /*
+ * loop for each heap row
+ */
+ while( scanGetCandidate(scan, &pos) )
+ {
+
+ /*
+ * Check entries in rows and setup entryRes array
+ */
+ if (!collectDatumForItem(scan, &pos))
+ continue;
+
+ /*
+ * check for consistent
+ */
+ oldCtx = MemoryContextSwitchTo(so->tempCtx);
+ recheck = false;
+ match = true;
+
+ for (i = 0; match && i < so->nkeys; i++)
+ {
+ GinScanKey key = so->keys + i;
+
+ keyrecheck = true;
+
+ if ( DatumGetBool(FunctionCall4(&so->ginstate.consistentFn[ key->attnum-1 ],
+ PointerGetDatum(key->entryRes),
+ UInt16GetDatum(key->strategy),
+ key->query,
+ PointerGetDatum(&keyrecheck))) == false )
+ {
+ match = false;
+ }
+
+ recheck |= keyrecheck;
+ }
+
+ MemoryContextSwitchTo(oldCtx);
+ MemoryContextReset(so->tempCtx);
+
+ if ( match )
+ {
+ tbm_add_tuples(tbm, &pos.item, 1, recheck);
+ (*ntids)++;
+ }
+ }
+ }
+
/*
* Get heap item pointer from scan
* returns true if found
***************
*** 700,711 ****
*recheck = false;
ItemPointerSetMin(item);
for (i = 0; i < so->nkeys; i++)
{
GinScanKey key = so->keys + i;
if (keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx,
! key, &keyrecheck))
return FALSE; /* finished one of keys */
if (compareItemPointers(item, &key->curItem) < 0)
*item = key->curItem;
--- 1050,1062 ----
*recheck = false;
ItemPointerSetMin(item);
+
for (i = 0; i < so->nkeys; i++)
{
GinScanKey key = so->keys + i;
if (keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx,
! key, &keyrecheck))
return FALSE; /* finished one of keys */
if (compareItemPointers(item, &key->curItem) < 0)
*item = key->curItem;
***************
*** 720,725 ****
--- 1071,1088 ----
{
int cmp = compareItemPointers(item, &key->curItem);
+ if ( cmp != 0 && (ItemPointerIsLossyPage(item) || ItemPointerIsLossyPage(&key->curItem)) )
+ {
+ /*
+ * if one of ItemPointers points to the whole page then
+ * compare only page's number
+ */
+ if ( ItemPointerGetBlockNumber(item) == ItemPointerGetBlockNumber(&key->curItem) )
+ cmp = 0;
+ else
+ cmp = (ItemPointerGetBlockNumber(item) > ItemPointerGetBlockNumber(&key->curItem)) ? 1 : -1;
+ }
+
if (cmp == 0)
break;
else if (cmp > 0)
***************
*** 757,765 ****
if (GinIsVoidRes(scan))
PG_RETURN_INT64(0);
startScan(scan);
- ntids = 0;
for (;;)
{
ItemPointerData iptr;
--- 1120,1137 ----
if (GinIsVoidRes(scan))
PG_RETURN_INT64(0);
+ ntids = 0;
+
+ /*
+ * Scan of pending pages
+ */
+ scanFastInsert(scan, tbm, &ntids);
+
+ /*
+ * Switch to regular scan
+ */
startScan(scan);
for (;;)
{
ItemPointerData iptr;
***************
*** 770,776 ****
if (!scanGetItem(scan, &iptr, &recheck))
break;
! tbm_add_tuples(tbm, &iptr, 1, recheck);
ntids++;
}
--- 1142,1151 ----
if (!scanGetItem(scan, &iptr, &recheck))
break;
! if ( ItemPointerIsLossyPage(&iptr) )
! tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
! else
! tbm_add_tuples(tbm, &iptr, 1, recheck);
ntids++;
}
***************
*** 780,800 ****
Datum
gingettuple(PG_FUNCTION_ARGS)
{
! IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
! ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
! bool res;
!
! if (dir != ForwardScanDirection)
! elog(ERROR, "GIN doesn't support other scan directions than forward");
!
! if (GinIsNewKey(scan))
! newScanKey(scan);
!
! if (GinIsVoidRes(scan))
! PG_RETURN_BOOL(false);
!
! startScan(scan);
! res = scanGetItem(scan, &scan->xs_ctup.t_self, &scan->xs_recheck);
!
! PG_RETURN_BOOL(res);
}
--- 1155,1160 ----
Datum
gingettuple(PG_FUNCTION_ARGS)
{
! elog(ERROR, "GIN doesn't support amgettuple interface");
! PG_RETURN_BOOL(false);
}
*** src/backend/access/gin/gininsert.c.orig Thu Jan 1 12:24:41 2009
--- src/backend/access/gin/gininsert.c Thu Mar 5 18:10:53 2009
***************
*** 138,144 ****
/*
* Inserts only one entry to the index, but it can add more than 1 ItemPointer.
*/
! static void
ginEntryInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datum value,
ItemPointerData *items, uint32 nitem, bool isBuild)
{
--- 138,144 ----
/*
* Inserts only one entry to the index, but it can add more than 1 ItemPointer.
*/
! void
ginEntryInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datum value,
ItemPointerData *items, uint32 nitem, bool isBuild)
{
***************
*** 273,279 ****
IndexBuildResult *result;
double reltuples;
GinBuildState buildstate;
! Buffer buffer;
ItemPointerData *list;
Datum entry;
uint32 nlist;
--- 273,279 ----
IndexBuildResult *result;
double reltuples;
GinBuildState buildstate;
! Buffer RootBuffer, MetaBuffer;
ItemPointerData *list;
Datum entry;
uint32 nlist;
***************
*** 286,296 ****
initGinState(&buildstate.ginstate, index);
/* initialize the root page */
! buffer = GinNewBuffer(index);
START_CRIT_SECTION();
! GinInitBuffer(buffer, GIN_LEAF);
! MarkBufferDirty(buffer);
if (!index->rd_istemp)
{
--- 286,302 ----
initGinState(&buildstate.ginstate, index);
+ /* initialize the meta page */
+ MetaBuffer = GinNewBuffer(index);
+
/* initialize the root page */
! RootBuffer = GinNewBuffer(index);
!
START_CRIT_SECTION();
! GinInitMetabuffer(MetaBuffer);
! MarkBufferDirty(MetaBuffer);
! GinInitBuffer(RootBuffer, GIN_LEAF);
! MarkBufferDirty(RootBuffer);
if (!index->rd_istemp)
{
***************
*** 303,318 ****
rdata.len = sizeof(RelFileNode);
rdata.next = NULL;
- page = BufferGetPage(buffer);
-
-
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata);
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
}
! UnlockReleaseBuffer(buffer);
END_CRIT_SECTION();
/* build the index */
--- 309,327 ----
rdata.len = sizeof(RelFileNode);
rdata.next = NULL;
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata);
+
+ page = BufferGetPage(RootBuffer);
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
+ page = BufferGetPage(MetaBuffer);
+ PageSetLSN(page, recptr);
+ PageSetTLI(page, ThisTimeLineID);
}
! UnlockReleaseBuffer(MetaBuffer);
! UnlockReleaseBuffer(RootBuffer);
END_CRIT_SECTION();
/* build the index */
***************
*** 417,425 ****
initGinState(&ginstate, index);
! for(i=0; inatts;i++)
! if ( !isnull[i] )
! res += ginHeapTupleInsert(index, &ginstate, (OffsetNumber)(i+1), values[i], ht_ctid);
MemoryContextSwitchTo(oldCtx);
MemoryContextDelete(insertCtx);
--- 426,451 ----
initGinState(&ginstate, index);
! if ( GinGetUseFastUpdate(index) )
! {
! GinTupleCollector collector;
!
! memset(&collector, 0, sizeof(GinTupleCollector));
! for(i=0; inatts;i++)
! if ( !isnull[i] )
! res += ginHeapTupleFastCollect(index, &ginstate, &collector,
! (OffsetNumber)(i+1), values[i], ht_ctid);
!
! ginHeapTupleFastInsert(index, &ginstate, &collector);
! }
! else
! {
! for(i=0; inatts;i++)
! if ( !isnull[i] )
! res += ginHeapTupleInsert(index, &ginstate,
! (OffsetNumber)(i+1), values[i], ht_ctid);
!
! }
MemoryContextSwitchTo(oldCtx);
MemoryContextDelete(insertCtx);
*** src/backend/access/gin/ginutil.c.orig Tue Jan 6 10:15:13 2009
--- src/backend/access/gin/ginutil.c Thu Mar 5 18:10:53 2009
***************
*** 21,26 ****
--- 21,27 ----
#include "storage/freespace.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h"
+ #include "utils/guc.h"
void
initGinState(GinState *state, Relation index)
***************
*** 57,63 ****
CurrentMemoryContext);
/*
! * Check opclass capability to do partial match.
*/
if ( index_getprocid(index, i+1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid )
{
--- 58,64 ----
CurrentMemoryContext);
/*
! * Check opclass capability to do partial match.
*/
if ( index_getprocid(index, i+1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid )
{
***************
*** 88,94 ****
bool isnull;
/*
! * First attribute is always int16, so we can safely use any
* tuple descriptor to obtain first attribute of tuple
*/
res = index_getattr(tuple, FirstOffsetNumber, ginstate->tupdesc[0],
--- 89,95 ----
bool isnull;
/*
! * First attribute is always int16, so we can safely use any
* tuple descriptor to obtain first attribute of tuple
*/
res = index_getattr(tuple, FirstOffsetNumber, ginstate->tupdesc[0],
***************
*** 213,218 ****
--- 214,235 ----
GinInitPage(BufferGetPage(b), f, BufferGetPageSize(b));
}
+ void
+ GinInitMetabuffer(Buffer b)
+ {
+ GinMetaPageData *metadata;
+ Page page = BufferGetPage(b);
+
+ GinInitPage(page, GIN_META, BufferGetPageSize(b));
+
+ metadata = GinPageGetMeta(page);
+
+ metadata->head = metadata->tail = InvalidBlockNumber;
+ metadata->tailFreeSize = 0;
+ metadata->nPendingPages = 0;
+ metadata->nPendingHeapTuples = 0;
+ }
+
int
compareEntries(GinState *ginstate, OffsetNumber attnum, Datum a, Datum b)
{
***************
*** 313,324 ****
Datum
ginoptions(PG_FUNCTION_ARGS)
{
! Datum reloptions = PG_GETARG_DATUM(0);
! bool validate = PG_GETARG_BOOL(1);
! bytea *result;
!
! result = default_reloptions(reloptions, validate, RELOPT_KIND_GIN);
! if (result)
! PG_RETURN_BYTEA_P(result);
! PG_RETURN_NULL();
}
--- 330,357 ----
Datum
ginoptions(PG_FUNCTION_ARGS)
{
! Datum reloptions = PG_GETARG_DATUM(0);
! bool validate = PG_GETARG_BOOL(1);
! relopt_value *options;
! GinOptions *rdopts;
! int numoptions;
! relopt_parse_elt tab[] = {
! {"fastupdate", RELOPT_TYPE_BOOL, offsetof(GinOptions, useFastUpdate)}
! };
!
! options = parseRelOptions(reloptions, validate, RELOPT_KIND_GIN, &numoptions);
!
! /* if none set, we're done */
! if (numoptions == 0)
! PG_RETURN_NULL();
!
! rdopts = allocateReloptStruct(sizeof(GinOptions), options, numoptions);
!
! fillRelOptions((void *) rdopts, sizeof(GinOptions), options, numoptions,
! validate, tab, lengthof(tab));
!
! pfree(options);
!
! PG_RETURN_BYTEA_P(rdopts);
}
+
*** src/backend/access/gin/ginvacuum.c.orig Thu Jan 1 12:24:42 2009
--- src/backend/access/gin/ginvacuum.c Thu Mar 5 18:10:53 2009
***************
*** 593,610 ****
BlockNumber rootOfPostingTree[BLCKSZ / (sizeof(IndexTupleData) + sizeof(ItemId))];
uint32 nRoot;
/* first time through? */
if (stats == NULL)
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
/* we'll re-count the tuples each time */
stats->num_index_tuples = 0;
-
- gvs.index = index;
gvs.result = stats;
- gvs.callback = callback;
- gvs.callback_state = callback_state;
- gvs.strategy = info->strategy;
- initGinState(&gvs.ginstate, index);
buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
RBM_NORMAL, info->strategy);
--- 593,614 ----
BlockNumber rootOfPostingTree[BLCKSZ / (sizeof(IndexTupleData) + sizeof(ItemId))];
uint32 nRoot;
+ gvs.index = index;
+ gvs.callback = callback;
+ gvs.callback_state = callback_state;
+ gvs.strategy = info->strategy;
+ initGinState(&gvs.ginstate, index);
+
/* first time through? */
if (stats == NULL)
+ {
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+ ginInsertCleanup(index, &gvs.ginstate, stats);
+ }
+
/* we'll re-count the tuples each time */
stats->num_index_tuples = 0;
gvs.result = stats;
buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
RBM_NORMAL, info->strategy);
***************
*** 703,711 ****
BlockNumber lastBlock = GIN_ROOT_BLKNO,
lastFilledBlock = GIN_ROOT_BLKNO;
! /* Set up all-zero stats if ginbulkdelete wasn't called */
if (stats == NULL)
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
/*
* XXX we always report the heap tuple count as the number of index
--- 707,724 ----
BlockNumber lastBlock = GIN_ROOT_BLKNO,
lastFilledBlock = GIN_ROOT_BLKNO;
! /*
! * Set up all-zero stats and finalyze fast insertion
! * if ginbulkdelete wasn't called
! */
if (stats == NULL)
+ {
+ GinState ginstate;
+
+ initGinState(&ginstate, index);
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+ ginInsertCleanup(index, &ginstate, stats);
+ }
/*
* XXX we always report the heap tuple count as the number of index
*** src/backend/access/gin/ginxlog.c.orig Wed Jan 21 11:14:21 2009
--- src/backend/access/gin/ginxlog.c Thu Mar 5 18:10:53 2009
***************
*** 71,90 ****
ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
{
RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
! Buffer buffer;
Page page;
! buffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true);
! Assert(BufferIsValid(buffer));
! page = (Page) BufferGetPage(buffer);
! GinInitBuffer(buffer, GIN_LEAF);
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
! MarkBufferDirty(buffer);
! UnlockReleaseBuffer(buffer);
}
static void
--- 71,100 ----
ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
{
RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
! Buffer RootBuffer, MetaBuffer;
Page page;
! MetaBuffer = XLogReadBuffer(*node, GIN_METAPAGE_BLKNO, true);
! Assert(BufferIsValid(MetaBuffer));
! GinInitMetabuffer(MetaBuffer);
!
! page = (Page) BufferGetPage(MetaBuffer);
! PageSetLSN(page, lsn);
! PageSetTLI(page, ThisTimeLineID);
! RootBuffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true);
! Assert(BufferIsValid(RootBuffer));
! page = (Page) BufferGetPage(RootBuffer);
!
! GinInitBuffer(RootBuffer, GIN_LEAF);
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
! MarkBufferDirty(MetaBuffer);
! UnlockReleaseBuffer(MetaBuffer);
! MarkBufferDirty(RootBuffer);
! UnlockReleaseBuffer(RootBuffer);
}
static void
***************
*** 433,438 ****
--- 443,616 ----
}
}
+ static void
+ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
+ {
+ ginxlogUpdateMeta *data = (ginxlogUpdateMeta*) XLogRecGetData(record);
+ Buffer metabuffer;
+ Page metapage;
+
+ metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
+ metapage = BufferGetPage(metabuffer);
+
+ if (!XLByteLE(lsn, PageGetLSN(metapage)))
+ {
+ memcpy( GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
+ PageSetLSN(metapage, lsn);
+ PageSetTLI(metapage, ThisTimeLineID);
+ MarkBufferDirty(metabuffer);
+ }
+
+ if ( data->ntuples > 0 )
+ {
+ /*
+ * insert into tail page
+ */
+ if (!(record->xl_info & XLR_BKP_BLOCK_1))
+ {
+ Buffer buffer = XLogReadBuffer(data->node, data->metadata.tail, false);
+ Page page = BufferGetPage(buffer);
+
+ if ( !XLByteLE(lsn, PageGetLSN(page)))
+ {
+ OffsetNumber l, off = (PageIsEmpty(page)) ? FirstOffsetNumber :
+ OffsetNumberNext(PageGetMaxOffsetNumber(page));
+ int i, tupsize;
+ IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogUpdateMeta));
+
+ for(i=0; intuples; i++)
+ {
+ tupsize = IndexTupleSize(tuples);
+
+ l = PageAddItem(page, (Item)tuples, tupsize, off, false, false);
+
+ if (l == InvalidOffsetNumber)
+ elog(ERROR, "failed to add item to index page");
+
+ tuples = (IndexTuple)( ((char*)tuples) + tupsize );
+ }
+
+ /*
+ * Increase counter of heap tuples
+ */
+ GinPageGetOpaque(page)->maxoff++;
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+ }
+ UnlockReleaseBuffer(buffer);
+ }
+ }
+ else if ( data->prevTail != InvalidBlockNumber )
+ {
+ /*
+ * New tail
+ */
+
+ Buffer buffer = XLogReadBuffer(data->node, data->prevTail, false);
+ Page page = BufferGetPage(buffer);
+
+ if ( !XLByteLE(lsn, PageGetLSN(page)))
+ {
+ GinPageGetOpaque(page)->rightlink = data->newRightlink;
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+ }
+ UnlockReleaseBuffer(buffer);
+ }
+
+ UnlockReleaseBuffer(metabuffer);
+ }
+
+ static void
+ ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
+ {
+ ginxlogInsertListPage *data = (ginxlogInsertListPage*) XLogRecGetData(record);
+ Buffer buffer;
+ Page page;
+ OffsetNumber l, off = FirstOffsetNumber;
+ int i, tupsize;
+ IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsertListPage));
+
+ if (record->xl_info & XLR_BKP_BLOCK_1)
+ return;
+
+ buffer = XLogReadBuffer(data->node, data->blkno, true);
+ page = BufferGetPage(buffer);
+
+ GinInitBuffer(buffer, GIN_LIST);
+ GinPageGetOpaque(page)->rightlink = data->rightlink;
+ if ( data->rightlink == InvalidBlockNumber )
+ {
+ /* tail of sublist */
+ GinPageSetFullRow(page);
+ GinPageGetOpaque(page)->maxoff = 1;
+ }
+ else
+ {
+ GinPageGetOpaque(page)->maxoff = 0;
+ }
+
+ for(i=0; intuples; i++)
+ {
+ tupsize = IndexTupleSize(tuples);
+
+ l = PageAddItem(page, (Item)tuples, tupsize, off, false, false);
+
+ if (l == InvalidOffsetNumber)
+ elog(ERROR, "failed to add item to index page");
+
+ tuples = (IndexTuple)( ((char*)tuples) + tupsize );
+ }
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+
+ UnlockReleaseBuffer(buffer);
+ }
+
+ static void
+ ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record)
+ {
+ ginxlogDeleteListPages *data = (ginxlogDeleteListPages*) XLogRecGetData(record);
+ Buffer metabuffer;
+ Page metapage;
+ int i;
+
+ metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
+ metapage = BufferGetPage(metabuffer);
+
+ if (!XLByteLE(lsn, PageGetLSN(metapage)))
+ {
+ memcpy( GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
+ PageSetLSN(metapage, lsn);
+ PageSetTLI(metapage, ThisTimeLineID);
+ MarkBufferDirty(metabuffer);
+ }
+
+ for(i=0; indeleted; i++)
+ {
+ Buffer buffer = XLogReadBuffer(data->node,data->toDelete[i],false);
+ Page page = BufferGetPage(buffer);
+
+ if ( !XLByteLE(lsn, PageGetLSN(page)))
+ {
+ GinPageGetOpaque(page)->flags = GIN_DELETED;
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+ }
+
+ UnlockReleaseBuffer(buffer);
+ }
+ UnlockReleaseBuffer(metabuffer);
+ }
+
void
gin_redo(XLogRecPtr lsn, XLogRecord *record)
{
***************
*** 461,466 ****
--- 639,653 ----
case XLOG_GIN_DELETE_PAGE:
ginRedoDeletePage(lsn, record);
break;
+ case XLOG_GIN_UPDATE_META_PAGE:
+ ginRedoUpdateMetapage(lsn, record);
+ break;
+ case XLOG_GIN_INSERT_LISTPAGE:
+ ginRedoInsertListPage(lsn, record);
+ break;
+ case XLOG_GIN_DELETE_LISTPAGE:
+ ginRedoDeleteListPages(lsn, record);
+ break;
default:
elog(PANIC, "gin_redo: unknown op code %u", info);
}
***************
*** 516,521 ****
--- 703,720 ----
appendStringInfo(buf, "Delete page, ");
desc_node(buf, ((ginxlogDeletePage *) rec)->node, ((ginxlogDeletePage *) rec)->blkno);
break;
+ case XLOG_GIN_UPDATE_META_PAGE:
+ appendStringInfo(buf, "Update metapage, ");
+ desc_node(buf, ((ginxlogUpdateMeta *) rec)->node, ((ginxlogUpdateMeta *) rec)->metadata.tail);
+ break;
+ case XLOG_GIN_INSERT_LISTPAGE:
+ appendStringInfo(buf, "insert new list page, ");
+ desc_node(buf, ((ginxlogInsertListPage *) rec)->node, ((ginxlogInsertListPage *) rec)->blkno);
+ break;
+ case XLOG_GIN_DELETE_LISTPAGE:
+ appendStringInfo(buf, "Delete list page (%d), ", ((ginxlogDeleteListPages *) rec)->ndeleted);
+ desc_node(buf, ((ginxlogDeleteListPages *) rec)->node, ((ginxlogDeleteListPages *) rec)->metadata.head);
+ break;
default:
elog(PANIC, "gin_desc: unknown op code %u", info);
}
*** src/backend/catalog/system_views.sql.orig Fri Feb 6 16:15:11 2009
--- src/backend/catalog/system_views.sql Thu Mar 5 18:10:53 2009
***************
*** 193,198 ****
--- 193,199 ----
pg_stat_get_tuples_updated(C.oid) AS n_tup_upd,
pg_stat_get_tuples_deleted(C.oid) AS n_tup_del,
pg_stat_get_tuples_hot_updated(C.oid) AS n_tup_hot_upd,
+ pg_stat_get_fresh_inserted_tuples(C.oid) AS n_fresh_tup,
pg_stat_get_live_tuples(C.oid) AS n_live_tup,
pg_stat_get_dead_tuples(C.oid) AS n_dead_tup,
pg_stat_get_last_vacuum_time(C.oid) as last_vacuum,
*** src/backend/nodes/tidbitmap.c.orig Sat Jan 10 16:08:36 2009
--- src/backend/nodes/tidbitmap.c Thu Mar 5 18:10:53 2009
***************
*** 310,315 ****
--- 310,396 ----
}
/*
+ * tbm_add_page - add the whole page to a TIDBitmap.
+ */
+ void
+ tbm_add_page(TIDBitmap *tbm, BlockNumber pageno)
+ {
+ PagetableEntry *page;
+ bool found;
+ BlockNumber chunk_pageno;
+ int bitno;
+ int wordnum;
+ int bitnum;
+
+ /* We force the bitmap into hashtable mode whenever it's lossy */
+ if (tbm->status != TBM_HASH)
+ tbm_create_pagetable(tbm);
+
+ bitno = pageno % PAGES_PER_CHUNK;
+ chunk_pageno = pageno - bitno;
+
+ /* Look up or create entry for chunk-header page */
+ page = (PagetableEntry *) hash_search(tbm->pagetable,
+ (void *) &chunk_pageno,
+ HASH_ENTER, &found);
+
+ if (!found)
+ {
+ /* Initialize it if not present before */
+ MemSet(page, 0, sizeof(PagetableEntry));
+ page->blockno = chunk_pageno;
+ page->ischunk = true;
+ /* must count it too */
+ tbm->nentries++;
+ tbm->nchunks++;
+
+ /* loop below doesn't mark chunk page itself */
+ if (bitno == 0)
+ page->words[0] = ((bitmapword) 1 << 0);
+ }
+ else if (!page->ischunk)
+ {
+ /* chunk header page was formerly non-lossy, make it lossy */
+ MemSet(page, 0, sizeof(PagetableEntry));
+ page->blockno = chunk_pageno;
+ page->ischunk = true;
+ /* we assume it had some tuple bit(s) set, so mark it lossy */
+ page->words[0] = ((bitmapword) 1 << 0);
+ /* adjust counts */
+ tbm->nchunks++;
+ tbm->npages--;
+ }
+ else
+ {
+ /* just set page's bit */
+ wordnum = WORDNUM(bitno);
+ bitnum = BITNUM(bitno);
+ page->words[wordnum] |= ((bitmapword) 1 << bitnum);
+ /* nothing to do more */
+ return;
+ }
+
+ /*
+ * look for other pages in chunk. Note, we skip
+ * chunk page itself because it's marked above.
+ */
+ for(bitno=1; bitnopagetable,
+ (void *) &chunk_pageno,
+ HASH_REMOVE, NULL) != NULL)
+ {
+ wordnum = WORDNUM(bitno);
+ bitnum = BITNUM(bitno);
+ page->words[wordnum] |= ((bitmapword) 1 << bitnum);
+ tbm->nentries--;
+ tbm->npages--; /* assume it must have been non-lossy */
+ }
+ }
+ }
+
+ /*
* tbm_union - set union
*
* a is modified in-place, b is not changed
*** src/backend/postmaster/autovacuum.c.orig Tue Feb 10 11:22:16 2009
--- src/backend/postmaster/autovacuum.c Thu Mar 5 18:10:53 2009
***************
*** 72,77 ****
--- 72,78 ----
#include "access/reloptions.h"
#include "access/transam.h"
#include "access/xact.h"
+ #include "access/gin.h"
#include "catalog/dependency.h"
#include "catalog/indexing.h"
#include "catalog/namespace.h"
***************
*** 2470,2475 ****
--- 2471,2528 ----
}
/*
+ * relation_has_pending_indexes
+ *
+ * Returns true if relation has indexes with delayed insertion.
+ * Currently, only GIN has that possiblity
+ */
+
+ static bool
+ relation_has_pending_indexes(Oid relid, Form_pg_class classForm)
+ {
+ Relation rel;
+ List *indexoidlist;
+ ListCell *indexoidscan;
+ bool has = false;
+
+ /* only ordinary cataloged heap can contains such indexes */
+ if ( classForm->relkind != RELKIND_RELATION )
+ return false;
+
+ /* has not indexes at all */
+ if ( classForm->relhasindex == false )
+ return false;
+
+ rel = RelationIdGetRelation(relid);
+
+ indexoidlist = RelationGetIndexList(rel);
+
+ foreach(indexoidscan, indexoidlist)
+ {
+ Oid indexoid = lfirst_oid(indexoidscan);
+ Relation irel = RelationIdGetRelation(indexoid);
+
+ /*
+ * Currently, only GIN in fast update mode
+ */
+ if ( irel->rd_rel->relam == GIN_AM_OID && GinGetUseFastUpdate(irel) )
+ {
+ RelationClose(irel);
+ has = true;
+ break;
+ }
+
+ RelationClose(irel);
+ }
+
+ list_free(indexoidlist);
+
+ RelationClose(rel);
+
+ return has;
+ }
+
+ /*
* relation_needs_vacanalyze
*
* Check whether a relation needs to be vacuumed or analyzed; return each into
***************
*** 2531,2537 ****
/* number of vacuum (resp. analyze) tuples at this time */
float4 vactuples,
! anltuples;
/* freeze parameters */
int freeze_max_age;
--- 2584,2591 ----
/* number of vacuum (resp. analyze) tuples at this time */
float4 vactuples,
! anltuples,
! instuples;
/* freeze parameters */
int freeze_max_age;
***************
*** 2588,2593 ****
--- 2642,2648 ----
vactuples = tabentry->n_dead_tuples;
anltuples = tabentry->n_live_tuples + tabentry->n_dead_tuples -
tabentry->last_anl_tuples;
+ instuples = tabentry->n_inserted_tuples;
vacthresh = (float4) vac_base_thresh + vac_scale_factor * reltuples;
anlthresh = (float4) anl_base_thresh + anl_scale_factor * reltuples;
***************
*** 2601,2608 ****
NameStr(classForm->relname),
vactuples, vacthresh, anltuples, anlthresh);
! /* Determine if this table needs vacuum or analyze. */
! *dovacuum = force_vacuum || (vactuples > vacthresh);
*doanalyze = (anltuples > anlthresh);
}
else
--- 2656,2668 ----
NameStr(classForm->relname),
vactuples, vacthresh, anltuples, anlthresh);
! /*
! * Determine if this table needs vacuum or analyze.
! * Use vac_base_thresh as a theshhold for instuples because
! * search time of GIN's pending pages is linear by its number.
! */
! *dovacuum = force_vacuum || (vactuples > vacthresh) ||
! (relation_has_pending_indexes(relid, classForm) && instuples > vac_base_thresh);
*doanalyze = (anltuples > anlthresh);
}
else
*** src/backend/postmaster/pgstat.c.orig Thu Jan 1 12:25:11 2009
--- src/backend/postmaster/pgstat.c Thu Mar 5 18:10:53 2009
***************
*** 3537,3542 ****
--- 3537,3545 ----
tabentry->tuples_updated = tabmsg[i].t_counts.t_tuples_updated;
tabentry->tuples_deleted = tabmsg[i].t_counts.t_tuples_deleted;
tabentry->tuples_hot_updated = tabmsg[i].t_counts.t_tuples_hot_updated;
+ tabentry->n_inserted_tuples = tabmsg[i].t_counts.t_tuples_inserted +
+ tabmsg[i].t_counts.t_tuples_updated -
+ tabmsg[i].t_counts.t_tuples_hot_updated;
tabentry->n_live_tuples = tabmsg[i].t_counts.t_new_live_tuples;
tabentry->n_dead_tuples = tabmsg[i].t_counts.t_new_dead_tuples;
tabentry->blocks_fetched = tabmsg[i].t_counts.t_blocks_fetched;
***************
*** 3560,3565 ****
--- 3563,3571 ----
tabentry->tuples_updated += tabmsg[i].t_counts.t_tuples_updated;
tabentry->tuples_deleted += tabmsg[i].t_counts.t_tuples_deleted;
tabentry->tuples_hot_updated += tabmsg[i].t_counts.t_tuples_hot_updated;
+ tabentry->n_inserted_tuples += tabmsg[i].t_counts.t_tuples_inserted +
+ tabmsg[i].t_counts.t_tuples_updated -
+ tabmsg[i].t_counts.t_tuples_hot_updated;
tabentry->n_live_tuples += tabmsg[i].t_counts.t_new_live_tuples;
tabentry->n_dead_tuples += tabmsg[i].t_counts.t_new_dead_tuples;
tabentry->blocks_fetched += tabmsg[i].t_counts.t_blocks_fetched;
***************
*** 3570,3575 ****
--- 3576,3583 ----
tabentry->n_live_tuples = Max(tabentry->n_live_tuples, 0);
/* Likewise for n_dead_tuples */
tabentry->n_dead_tuples = Max(tabentry->n_dead_tuples, 0);
+ /* Likewise for n_inserted_tuples */
+ tabentry->n_inserted_tuples = Max(tabentry->n_inserted_tuples, 0);
/*
* Add per-table stats to the per-database entry, too.
***************
*** 3770,3775 ****
--- 3778,3784 ----
tabentry->n_live_tuples = msg->m_tuples;
/* Resetting dead_tuples to 0 is an approximation ... */
tabentry->n_dead_tuples = 0;
+ tabentry->n_inserted_tuples = 0;
if (msg->m_analyze)
{
if (msg->m_scanned_all)
*** src/backend/utils/adt/pgstatfuncs.c.orig Thu Jan 1 12:25:21 2009
--- src/backend/utils/adt/pgstatfuncs.c Thu Mar 5 18:17:37 2009
***************
*** 31,36 ****
--- 31,37 ----
extern Datum pg_stat_get_tuples_deleted(PG_FUNCTION_ARGS);
extern Datum pg_stat_get_tuples_hot_updated(PG_FUNCTION_ARGS);
extern Datum pg_stat_get_live_tuples(PG_FUNCTION_ARGS);
+ extern Datum pg_stat_get_fresh_inserted_tuples(PG_FUNCTION_ARGS);
extern Datum pg_stat_get_dead_tuples(PG_FUNCTION_ARGS);
extern Datum pg_stat_get_blocks_fetched(PG_FUNCTION_ARGS);
extern Datum pg_stat_get_blocks_hit(PG_FUNCTION_ARGS);
***************
*** 211,216 ****
--- 212,233 ----
Datum
+ pg_stat_get_fresh_inserted_tuples(PG_FUNCTION_ARGS)
+ {
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->n_inserted_tuples);
+
+ PG_RETURN_INT64(result);
+ }
+
+
+ Datum
pg_stat_get_dead_tuples(PG_FUNCTION_ARGS)
{
Oid relid = PG_GETARG_OID(0);
*** src/include/access/gin.h.orig Sat Jan 10 16:08:36 2009
--- src/include/access/gin.h Thu Mar 5 18:10:53 2009
***************
*** 21,26 ****
--- 21,27 ----
#include "storage/buf.h"
#include "storage/off.h"
#include "storage/relfilenode.h"
+ #include "utils/rel.h"
/*
***************
*** 46,62 ****
OffsetNumber maxoff; /* number entries on GIN_DATA page: number of
* heap ItemPointer on GIN_DATA|GIN_LEAF page
* and number of records on GIN_DATA &
! * ~GIN_LEAF page */
uint16 flags; /* see bit definitions below */
} GinPageOpaqueData;
typedef GinPageOpaqueData *GinPageOpaque;
! #define GIN_ROOT_BLKNO (0)
#define GIN_DATA (1 << 0)
#define GIN_LEAF (1 << 1)
#define GIN_DELETED (1 << 2)
/*
* Works on page
--- 47,93 ----
OffsetNumber maxoff; /* number entries on GIN_DATA page: number of
* heap ItemPointer on GIN_DATA|GIN_LEAF page
* and number of records on GIN_DATA &
! * ~GIN_LEAF page. On GIN_LIST number of heap tuples.*/
uint16 flags; /* see bit definitions below */
} GinPageOpaqueData;
typedef GinPageOpaqueData *GinPageOpaque;
! #define GIN_METAPAGE_BLKNO (0)
! #define GIN_ROOT_BLKNO (1)
#define GIN_DATA (1 << 0)
#define GIN_LEAF (1 << 1)
#define GIN_DELETED (1 << 2)
+ #define GIN_META (1 << 3)
+ #define GIN_LIST (1 << 4)
+ #define GIN_LIST_FULLROW (1 << 5) /* makes sense only on GIN_LIST page */
+
+ typedef struct GinMetaPageData
+ {
+ /*
+ * Pointers to head and tail of list of GIN_LIST pages. These store
+ * fast-inserted entries that haven't yet been moved into the regular
+ * GIN structure.
+ */
+ BlockNumber head;
+ BlockNumber tail;
+
+ /*
+ * Free space in bytes in the list's tail page.
+ */
+ uint32 tailFreeSize;
+
+ /*
+ * Store both number of pages and number of heap tuples
+ * in pending list.
+ */
+ BlockNumber nPendingPages;
+ int64 nPendingHeapTuples;
+ } GinMetaPageData;
+
+ #define GinPageGetMeta(p) \
+ ((GinMetaPageData *) PageGetContents(p))
/*
* Works on page
***************
*** 68,73 ****
--- 99,106 ----
#define GinPageSetNonLeaf(page) ( GinPageGetOpaque(page)->flags &= ~GIN_LEAF )
#define GinPageIsData(page) ( GinPageGetOpaque(page)->flags & GIN_DATA )
#define GinPageSetData(page) ( GinPageGetOpaque(page)->flags |= GIN_DATA )
+ #define GinPageHasFullRow(page) ( GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW )
+ #define GinPageSetFullRow(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST_FULLROW )
#define GinPageIsDeleted(page) ( GinPageGetOpaque(page)->flags & GIN_DELETED)
#define GinPageSetDeleted(page) ( GinPageGetOpaque(page)->flags |= GIN_DELETED)
***************
*** 135,140 ****
--- 168,186 ----
- GinPageGetOpaque(page)->maxoff * GinSizeOfItem(page) \
- MAXALIGN(sizeof(GinPageOpaqueData)))
+ /*
+ * storage type for GIN's options.
+ */
+ typedef struct GinOptions
+ {
+ int32 vl_len_; /* varlena header (do not touch directly!) */
+ bool useFastUpdate; /* use fast updates? */
+ } GinOptions;
+
+ #define GIN_DEFAULT_USE_FASTUPDATE true
+ #define GinGetUseFastUpdate(relation) \
+ ((relation)->rd_options ? \
+ ((GinOptions *) (relation)->rd_options)->useFastUpdate : GIN_DEFAULT_USE_FASTUPDATE)
#define GIN_UNLOCK BUFFER_LOCK_UNLOCK
#define GIN_SHARE BUFFER_LOCK_SHARE
***************
*** 234,245 ****
--- 280,328 ----
BlockNumber rightLink;
} ginxlogDeletePage;
+
+ #define XLOG_GIN_UPDATE_META_PAGE 0x60
+
+ typedef struct ginxlogUpdateMeta
+ {
+ RelFileNode node;
+ GinMetaPageData metadata;
+ BlockNumber prevTail;
+ BlockNumber newRightlink;
+ int32 ntuples; /* if ntuples > 0 then metadata.tail was updated with
+ that tuples else new sub list was inserted */
+ /* follows array of inserted tuples */
+ } ginxlogUpdateMeta;
+
+ #define XLOG_GIN_INSERT_LISTPAGE 0x70
+
+ typedef struct ginxlogInsertListPage
+ {
+ RelFileNode node;
+ BlockNumber blkno;
+ BlockNumber rightlink;
+ int32 ntuples;
+ /* follows array of inserted tuples */
+ } ginxlogInsertListPage;
+
+ #define XLOG_GIN_DELETE_LISTPAGE 0x80
+
+ #define NDELETE_AT_ONCE (16)
+ typedef struct ginxlogDeleteListPages
+ {
+ RelFileNode node;
+ GinMetaPageData metadata;
+ int32 ndeleted;
+ BlockNumber toDelete[ NDELETE_AT_ONCE ];
+ } ginxlogDeleteListPages;
+
/* ginutil.c */
extern Datum ginoptions(PG_FUNCTION_ARGS);
extern void initGinState(GinState *state, Relation index);
extern Buffer GinNewBuffer(Relation index);
extern void GinInitBuffer(Buffer b, uint32 f);
extern void GinInitPage(Page page, uint32 f, Size pageSize);
+ extern void GinInitMetabuffer(Buffer b);
extern int compareEntries(GinState *ginstate, OffsetNumber attnum, Datum a, Datum b);
extern int compareAttEntries(GinState *ginstate, OffsetNumber attnum_a, Datum a,
OffsetNumber attnum_b, Datum b);
***************
*** 249,257 ****
--- 332,343 ----
extern Datum gin_index_getattr(GinState *ginstate, IndexTuple tuple);
extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple);
+
/* gininsert.c */
extern Datum ginbuild(PG_FUNCTION_ARGS);
extern Datum gininsert(PG_FUNCTION_ARGS);
+ extern void ginEntryInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datum value,
+ ItemPointerData *items, uint32 nitem, bool isBuild);
/* ginxlog.c */
extern void gin_redo(XLogRecPtr lsn, XLogRecord *record);
***************
*** 443,449 ****
#define ItemPointerSetMax(p) ItemPointerSet( (p), (BlockNumber)0xffffffff, (OffsetNumber)0xffff )
#define ItemPointerIsMax(p) ( ItemPointerGetBlockNumber(p) == (BlockNumber)0xffffffff && ItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff )
#define ItemPointerSetMin(p) ItemPointerSet( (p), (BlockNumber)0, (OffsetNumber)0)
! #define ItemPointerIsMin(p) ( ItemPointerGetBlockNumber(p) == (BlockNumber)0 && ItemPointerGetOffsetNumber(p) == (OffsetNumber)0 )
extern Datum gingetbitmap(PG_FUNCTION_ARGS);
extern Datum gingettuple(PG_FUNCTION_ARGS);
--- 529,536 ----
#define ItemPointerSetMax(p) ItemPointerSet( (p), (BlockNumber)0xffffffff, (OffsetNumber)0xffff )
#define ItemPointerIsMax(p) ( ItemPointerGetBlockNumber(p) == (BlockNumber)0xffffffff && ItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff )
#define ItemPointerSetMin(p) ItemPointerSet( (p), (BlockNumber)0, (OffsetNumber)0)
! #define ItemPointerSetLossyPage(p, b) ItemPointerSet( (p), (b), 0xffff)
! #define ItemPointerIsLossyPage(p) ( ItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff )
extern Datum gingetbitmap(PG_FUNCTION_ARGS);
extern Datum gingettuple(PG_FUNCTION_ARGS);
***************
*** 489,492 ****
--- 576,598 ----
OffsetNumber attnum, Datum *entries, int32 nentry);
extern ItemPointerData *ginGetEntry(BuildAccumulator *accum, OffsetNumber *attnum, Datum *entry, uint32 *n);
+ /* ginfast.c */
+
+ typedef struct GinTupleCollector {
+ IndexTuple *tuples;
+ uint32 ntuples;
+ uint32 lentuples;
+ uint32 sumsize;
+ } GinTupleCollector;
+
+ extern void ginHeapTupleFastInsert(Relation index, GinState *ginstate, GinTupleCollector *collector);
+ extern uint32 ginHeapTupleFastCollect(Relation index, GinState *ginstate,
+ GinTupleCollector *collector,
+ OffsetNumber attnum, Datum value, ItemPointer item);
+
+ #define GinListPageSize \
+ ( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GinPageOpaqueData)) )
+
+ extern void ginInsertCleanup(Relation index, GinState *ginstate, IndexBulkDeleteResult *stats);
+
#endif
*** src/include/catalog/pg_am.h.orig Thu Mar 5 18:06:45 2009
--- src/include/catalog/pg_am.h Thu Mar 5 18:10:53 2009
***************
*** 118,124 ****
DATA(insert OID = 783 ( gist 0 7 f f f t t t t t t 0 gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions ));
DESCR("GiST index access method");
#define GIST_AM_OID 783
! DATA(insert OID = 2742 ( gin 0 5 f f f t t f f t f 0 gininsert ginbeginscan gingettuple gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions ));
DESCR("GIN index access method");
#define GIN_AM_OID 2742
--- 118,124 ----
DATA(insert OID = 783 ( gist 0 7 f f f t t t t t t 0 gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions ));
DESCR("GiST index access method");
#define GIST_AM_OID 783
! DATA(insert OID = 2742 ( gin 0 5 f f f t t f f t f 0 gininsert ginbeginscan 0 gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions ));
DESCR("GIN index access method");
#define GIN_AM_OID 2742
*** src/include/catalog/pg_proc.h.orig Tue Feb 24 11:10:32 2009
--- src/include/catalog/pg_proc.h Thu Mar 5 18:10:54 2009
***************
*** 2965,2970 ****
--- 2965,2972 ----
DESCR("statistics: number of tuples deleted");
DATA(insert OID = 1972 ( pg_stat_get_tuples_hot_updated PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_tuples_hot_updated _null_ _null_ _null_ ));
DESCR("statistics: number of tuples hot updated");
+ DATA(insert OID = 2319 ( pg_stat_get_fresh_inserted_tuples PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_fresh_inserted_tuples _null_ _null_ _null_ ));
+ DESCR("statistics: number of inserted tuples since last vacuum");
DATA(insert OID = 2878 ( pg_stat_get_live_tuples PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_live_tuples _null_ _null_ _null_ ));
DESCR("statistics: number of live tuples");
DATA(insert OID = 2879 ( pg_stat_get_dead_tuples PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_dead_tuples _null_ _null_ _null_ ));
*** src/include/nodes/tidbitmap.h.orig Sat Jan 10 16:08:36 2009
--- src/include/nodes/tidbitmap.h Thu Mar 5 18:10:54 2009
***************
*** 52,57 ****
--- 52,58 ----
extern void tbm_add_tuples(TIDBitmap *tbm,
const ItemPointer tids, int ntids,
bool recheck);
+ extern void tbm_add_page(TIDBitmap *tbm, BlockNumber pageno);
extern void tbm_union(TIDBitmap *a, const TIDBitmap *b);
extern void tbm_intersect(TIDBitmap *a, const TIDBitmap *b);
*** src/include/pgstat.h.orig Sun Jan 4 17:19:59 2009
--- src/include/pgstat.h Thu Mar 5 18:16:50 2009
***************
*** 476,481 ****
--- 476,483 ----
PgStat_Counter tuples_deleted;
PgStat_Counter tuples_hot_updated;
+ PgStat_Counter n_inserted_tuples; /* number of non-hot inserted tuples
+ * since last vacuum */
PgStat_Counter n_live_tuples;
PgStat_Counter n_dead_tuples;
PgStat_Counter last_anl_tuples;
*** src/test/regress/expected/rules.out.orig Tue Feb 3 21:08:47 2009
--- src/test/regress/expected/rules.out Thu Mar 5 18:10:54 2009
***************
*** 1291,1304 ****
pg_shadow | SELECT pg_authid.rolname AS usename, pg_authid.oid AS usesysid, pg_authid.rolcreatedb AS usecreatedb, pg_authid.rolsuper AS usesuper, pg_authid.rolcatupdate AS usecatupd, pg_authid.rolpassword AS passwd, (pg_authid.rolvaliduntil)::abstime AS valuntil, pg_authid.rolconfig AS useconfig FROM pg_authid WHERE pg_authid.rolcanlogin;
pg_stat_activity | SELECT s.datid, d.datname, s.procpid, s.usesysid, u.rolname AS usename, s.current_query, s.waiting, s.xact_start, s.query_start, s.backend_start, s.client_addr, s.client_port FROM pg_database d, pg_stat_get_activity(NULL::integer) s(datid, procpid, usesysid, current_query, waiting, xact_start, query_start, backend_start, client_addr, client_port), pg_authid u WHERE ((s.datid = d.oid) AND (s.usesysid = u.oid));
pg_stat_all_indexes | SELECT c.oid AS relid, i.oid AS indexrelid, n.nspname AS schemaname, c.relname, i.relname AS indexrelname, pg_stat_get_numscans(i.oid) AS idx_scan, pg_stat_get_tuples_returned(i.oid) AS idx_tup_read, pg_stat_get_tuples_fetched(i.oid) AS idx_tup_fetch FROM (((pg_class c JOIN pg_index x ON ((c.oid = x.indrelid))) JOIN pg_class i ON ((i.oid = x.indexrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"]));
! pg_stat_all_tables | SELECT c.oid AS relid, n.nspname AS schemaname, c.relname, pg_stat_get_numscans(c.oid) AS seq_scan, pg_stat_get_tuples_returned(c.oid) AS seq_tup_read, (sum(pg_stat_get_numscans(i.indexrelid)))::bigint AS idx_scan, ((sum(pg_stat_get_tuples_fetched(i.indexrelid)))::bigint + pg_stat_get_tuples_fetched(c.oid)) AS idx_tup_fetch, pg_stat_get_tuples_inserted(c.oid) AS n_tup_ins, pg_stat_get_tuples_updated(c.oid) AS n_tup_upd, pg_stat_get_tuples_deleted(c.oid) AS n_tup_del, pg_stat_get_tuples_hot_updated(c.oid) AS n_tup_hot_upd, pg_stat_get_live_tuples(c.oid) AS n_live_tup, pg_stat_get_dead_tuples(c.oid) AS n_dead_tup, pg_stat_get_last_vacuum_time(c.oid) AS last_vacuum, pg_stat_get_last_autovacuum_time(c.oid) AS last_autovacuum, pg_stat_get_last_analyze_time(c.oid) AS last_analyze, pg_stat_get_last_autoanalyze_time(c.oid) AS last_autoanalyze FROM ((pg_class c LEFT JOIN pg_index i ON ((c.oid = i.indrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"])) GROUP BY c.oid, n.nspname, c.relname;
pg_stat_bgwriter | SELECT pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints_timed, pg_stat_get_bgwriter_requested_checkpoints() AS checkpoints_req, pg_stat_get_bgwriter_buf_written_checkpoints() AS buffers_checkpoint, pg_stat_get_bgwriter_buf_written_clean() AS buffers_clean, pg_stat_get_bgwriter_maxwritten_clean() AS maxwritten_clean, pg_stat_get_buf_written_backend() AS buffers_backend, pg_stat_get_buf_alloc() AS buffers_alloc;
pg_stat_database | SELECT d.oid AS datid, d.datname, pg_stat_get_db_numbackends(d.oid) AS numbackends, pg_stat_get_db_xact_commit(d.oid) AS xact_commit, pg_stat_get_db_xact_rollback(d.oid) AS xact_rollback, (pg_stat_get_db_blocks_fetched(d.oid) - pg_stat_get_db_blocks_hit(d.oid)) AS blks_read, pg_stat_get_db_blocks_hit(d.oid) AS blks_hit, pg_stat_get_db_tuples_returned(d.oid) AS tup_returned, pg_stat_get_db_tuples_fetched(d.oid) AS tup_fetched, pg_stat_get_db_tuples_inserted(d.oid) AS tup_inserted, pg_stat_get_db_tuples_updated(d.oid) AS tup_updated, pg_stat_get_db_tuples_deleted(d.oid) AS tup_deleted FROM pg_database d;
pg_stat_sys_indexes | SELECT pg_stat_all_indexes.relid, pg_stat_all_indexes.indexrelid, pg_stat_all_indexes.schemaname, pg_stat_all_indexes.relname, pg_stat_all_indexes.indexrelname, pg_stat_all_indexes.idx_scan, pg_stat_all_indexes.idx_tup_read, pg_stat_all_indexes.idx_tup_fetch FROM pg_stat_all_indexes WHERE ((pg_stat_all_indexes.schemaname = ANY (ARRAY['pg_catalog'::name, 'information_schema'::name])) OR (pg_stat_all_indexes.schemaname ~ '^pg_toast'::text));
! pg_stat_sys_tables | SELECT pg_stat_all_tables.relid, pg_stat_all_tables.schemaname, pg_stat_all_tables.relname, pg_stat_all_tables.seq_scan, pg_stat_all_tables.seq_tup_read, pg_stat_all_tables.idx_scan, pg_stat_all_tables.idx_tup_fetch, pg_stat_all_tables.n_tup_ins, pg_stat_all_tables.n_tup_upd, pg_stat_all_tables.n_tup_del, pg_stat_all_tables.n_tup_hot_upd, pg_stat_all_tables.n_live_tup, pg_stat_all_tables.n_dead_tup, pg_stat_all_tables.last_vacuum, pg_stat_all_tables.last_autovacuum, pg_stat_all_tables.last_analyze, pg_stat_all_tables.last_autoanalyze FROM pg_stat_all_tables WHERE ((pg_stat_all_tables.schemaname = ANY (ARRAY['pg_catalog'::name, 'information_schema'::name])) OR (pg_stat_all_tables.schemaname ~ '^pg_toast'::text));
pg_stat_user_functions | SELECT p.oid AS funcid, n.nspname AS schemaname, p.proname AS funcname, pg_stat_get_function_calls(p.oid) AS calls, (pg_stat_get_function_time(p.oid) / 1000) AS total_time, (pg_stat_get_function_self_time(p.oid) / 1000) AS self_time FROM (pg_proc p LEFT JOIN pg_namespace n ON ((n.oid = p.pronamespace))) WHERE ((p.prolang <> (12)::oid) AND (pg_stat_get_function_calls(p.oid) IS NOT NULL));
pg_stat_user_indexes | SELECT pg_stat_all_indexes.relid, pg_stat_all_indexes.indexrelid, pg_stat_all_indexes.schemaname, pg_stat_all_indexes.relname, pg_stat_all_indexes.indexrelname, pg_stat_all_indexes.idx_scan, pg_stat_all_indexes.idx_tup_read, pg_stat_all_indexes.idx_tup_fetch FROM pg_stat_all_indexes WHERE ((pg_stat_all_indexes.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_stat_all_indexes.schemaname !~ '^pg_toast'::text));
! pg_stat_user_tables | SELECT pg_stat_all_tables.relid, pg_stat_all_tables.schemaname, pg_stat_all_tables.relname, pg_stat_all_tables.seq_scan, pg_stat_all_tables.seq_tup_read, pg_stat_all_tables.idx_scan, pg_stat_all_tables.idx_tup_fetch, pg_stat_all_tables.n_tup_ins, pg_stat_all_tables.n_tup_upd, pg_stat_all_tables.n_tup_del, pg_stat_all_tables.n_tup_hot_upd, pg_stat_all_tables.n_live_tup, pg_stat_all_tables.n_dead_tup, pg_stat_all_tables.last_vacuum, pg_stat_all_tables.last_autovacuum, pg_stat_all_tables.last_analyze, pg_stat_all_tables.last_autoanalyze FROM pg_stat_all_tables WHERE ((pg_stat_all_tables.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_stat_all_tables.schemaname !~ '^pg_toast'::text));
pg_statio_all_indexes | SELECT c.oid AS relid, i.oid AS indexrelid, n.nspname AS schemaname, c.relname, i.relname AS indexrelname, (pg_stat_get_blocks_fetched(i.oid) - pg_stat_get_blocks_hit(i.oid)) AS idx_blks_read, pg_stat_get_blocks_hit(i.oid) AS idx_blks_hit FROM (((pg_class c JOIN pg_index x ON ((c.oid = x.indrelid))) JOIN pg_class i ON ((i.oid = x.indexrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"]));
pg_statio_all_sequences | SELECT c.oid AS relid, n.nspname AS schemaname, c.relname, (pg_stat_get_blocks_fetched(c.oid) - pg_stat_get_blocks_hit(c.oid)) AS blks_read, pg_stat_get_blocks_hit(c.oid) AS blks_hit FROM (pg_class c LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = 'S'::"char");
pg_statio_all_tables | SELECT c.oid AS relid, n.nspname AS schemaname, c.relname, (pg_stat_get_blocks_fetched(c.oid) - pg_stat_get_blocks_hit(c.oid)) AS heap_blks_read, pg_stat_get_blocks_hit(c.oid) AS heap_blks_hit, (sum((pg_stat_get_blocks_fetched(i.indexrelid) - pg_stat_get_blocks_hit(i.indexrelid))))::bigint AS idx_blks_read, (sum(pg_stat_get_blocks_hit(i.indexrelid)))::bigint AS idx_blks_hit, (pg_stat_get_blocks_fetched(t.oid) - pg_stat_get_blocks_hit(t.oid)) AS toast_blks_read, pg_stat_get_blocks_hit(t.oid) AS toast_blks_hit, (pg_stat_get_blocks_fetched(x.oid) - pg_stat_get_blocks_hit(x.oid)) AS tidx_blks_read, pg_stat_get_blocks_hit(x.oid) AS tidx_blks_hit FROM ((((pg_class c LEFT JOIN pg_index i ON ((c.oid = i.indrelid))) LEFT JOIN pg_class t ON ((c.reltoastrelid = t.oid))) LEFT JOIN pg_class x ON ((t.reltoastidxid = x.oid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"])) GROUP BY c.oid, n.nspname, c.relname, t.oid, x.oid;
--- 1291,1304 ----
pg_shadow | SELECT pg_authid.rolname AS usename, pg_authid.oid AS usesysid, pg_authid.rolcreatedb AS usecreatedb, pg_authid.rolsuper AS usesuper, pg_authid.rolcatupdate AS usecatupd, pg_authid.rolpassword AS passwd, (pg_authid.rolvaliduntil)::abstime AS valuntil, pg_authid.rolconfig AS useconfig FROM pg_authid WHERE pg_authid.rolcanlogin;
pg_stat_activity | SELECT s.datid, d.datname, s.procpid, s.usesysid, u.rolname AS usename, s.current_query, s.waiting, s.xact_start, s.query_start, s.backend_start, s.client_addr, s.client_port FROM pg_database d, pg_stat_get_activity(NULL::integer) s(datid, procpid, usesysid, current_query, waiting, xact_start, query_start, backend_start, client_addr, client_port), pg_authid u WHERE ((s.datid = d.oid) AND (s.usesysid = u.oid));
pg_stat_all_indexes | SELECT c.oid AS relid, i.oid AS indexrelid, n.nspname AS schemaname, c.relname, i.relname AS indexrelname, pg_stat_get_numscans(i.oid) AS idx_scan, pg_stat_get_tuples_returned(i.oid) AS idx_tup_read, pg_stat_get_tuples_fetched(i.oid) AS idx_tup_fetch FROM (((pg_class c JOIN pg_index x ON ((c.oid = x.indrelid))) JOIN pg_class i ON ((i.oid = x.indexrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"]));
! pg_stat_all_tables | SELECT c.oid AS relid, n.nspname AS schemaname, c.relname, pg_stat_get_numscans(c.oid) AS seq_scan, pg_stat_get_tuples_returned(c.oid) AS seq_tup_read, (sum(pg_stat_get_numscans(i.indexrelid)))::bigint AS idx_scan, ((sum(pg_stat_get_tuples_fetched(i.indexrelid)))::bigint + pg_stat_get_tuples_fetched(c.oid)) AS idx_tup_fetch, pg_stat_get_tuples_inserted(c.oid) AS n_tup_ins, pg_stat_get_tuples_updated(c.oid) AS n_tup_upd, pg_stat_get_tuples_deleted(c.oid) AS n_tup_del, pg_stat_get_tuples_hot_updated(c.oid) AS n_tup_hot_upd, pg_stat_get_fresh_inserted_tuples(c.oid) AS n_fresh_tup, pg_stat_get_live_tuples(c.oid) AS n_live_tup, pg_stat_get_dead_tuples(c.oid) AS n_dead_tup, pg_stat_get_last_vacuum_time(c.oid) AS last_vacuum, pg_stat_get_last_autovacuum_time(c.oid) AS last_autovacuum, pg_stat_get_last_analyze_time(c.oid) AS last_analyze, pg_stat_get_last_autoanalyze_time(c.oid) AS last_autoanalyze FROM ((pg_class c LEFT JOIN pg_index i ON ((c.oid = i.indrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"])) GROUP BY c.oid, n.nspname, c.relname;
pg_stat_bgwriter | SELECT pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints_timed, pg_stat_get_bgwriter_requested_checkpoints() AS checkpoints_req, pg_stat_get_bgwriter_buf_written_checkpoints() AS buffers_checkpoint, pg_stat_get_bgwriter_buf_written_clean() AS buffers_clean, pg_stat_get_bgwriter_maxwritten_clean() AS maxwritten_clean, pg_stat_get_buf_written_backend() AS buffers_backend, pg_stat_get_buf_alloc() AS buffers_alloc;
pg_stat_database | SELECT d.oid AS datid, d.datname, pg_stat_get_db_numbackends(d.oid) AS numbackends, pg_stat_get_db_xact_commit(d.oid) AS xact_commit, pg_stat_get_db_xact_rollback(d.oid) AS xact_rollback, (pg_stat_get_db_blocks_fetched(d.oid) - pg_stat_get_db_blocks_hit(d.oid)) AS blks_read, pg_stat_get_db_blocks_hit(d.oid) AS blks_hit, pg_stat_get_db_tuples_returned(d.oid) AS tup_returned, pg_stat_get_db_tuples_fetched(d.oid) AS tup_fetched, pg_stat_get_db_tuples_inserted(d.oid) AS tup_inserted, pg_stat_get_db_tuples_updated(d.oid) AS tup_updated, pg_stat_get_db_tuples_deleted(d.oid) AS tup_deleted FROM pg_database d;
pg_stat_sys_indexes | SELECT pg_stat_all_indexes.relid, pg_stat_all_indexes.indexrelid, pg_stat_all_indexes.schemaname, pg_stat_all_indexes.relname, pg_stat_all_indexes.indexrelname, pg_stat_all_indexes.idx_scan, pg_stat_all_indexes.idx_tup_read, pg_stat_all_indexes.idx_tup_fetch FROM pg_stat_all_indexes WHERE ((pg_stat_all_indexes.schemaname = ANY (ARRAY['pg_catalog'::name, 'information_schema'::name])) OR (pg_stat_all_indexes.schemaname ~ '^pg_toast'::text));
! pg_stat_sys_tables | SELECT pg_stat_all_tables.relid, pg_stat_all_tables.schemaname, pg_stat_all_tables.relname, pg_stat_all_tables.seq_scan, pg_stat_all_tables.seq_tup_read, pg_stat_all_tables.idx_scan, pg_stat_all_tables.idx_tup_fetch, pg_stat_all_tables.n_tup_ins, pg_stat_all_tables.n_tup_upd, pg_stat_all_tables.n_tup_del, pg_stat_all_tables.n_tup_hot_upd, pg_stat_all_tables.n_fresh_tup, pg_stat_all_tables.n_live_tup, pg_stat_all_tables.n_dead_tup, pg_stat_all_tables.last_vacuum, pg_stat_all_tables.last_autovacuum, pg_stat_all_tables.last_analyze, pg_stat_all_tables.last_autoanalyze FROM pg_stat_all_tables WHERE ((pg_stat_all_tables.schemaname = ANY (ARRAY['pg_catalog'::name, 'information_schema'::name])) OR (pg_stat_all_tables.schemaname ~ '^pg_toast'::text));
pg_stat_user_functions | SELECT p.oid AS funcid, n.nspname AS schemaname, p.proname AS funcname, pg_stat_get_function_calls(p.oid) AS calls, (pg_stat_get_function_time(p.oid) / 1000) AS total_time, (pg_stat_get_function_self_time(p.oid) / 1000) AS self_time FROM (pg_proc p LEFT JOIN pg_namespace n ON ((n.oid = p.pronamespace))) WHERE ((p.prolang <> (12)::oid) AND (pg_stat_get_function_calls(p.oid) IS NOT NULL));
pg_stat_user_indexes | SELECT pg_stat_all_indexes.relid, pg_stat_all_indexes.indexrelid, pg_stat_all_indexes.schemaname, pg_stat_all_indexes.relname, pg_stat_all_indexes.indexrelname, pg_stat_all_indexes.idx_scan, pg_stat_all_indexes.idx_tup_read, pg_stat_all_indexes.idx_tup_fetch FROM pg_stat_all_indexes WHERE ((pg_stat_all_indexes.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_stat_all_indexes.schemaname !~ '^pg_toast'::text));
! pg_stat_user_tables | SELECT pg_stat_all_tables.relid, pg_stat_all_tables.schemaname, pg_stat_all_tables.relname, pg_stat_all_tables.seq_scan, pg_stat_all_tables.seq_tup_read, pg_stat_all_tables.idx_scan, pg_stat_all_tables.idx_tup_fetch, pg_stat_all_tables.n_tup_ins, pg_stat_all_tables.n_tup_upd, pg_stat_all_tables.n_tup_del, pg_stat_all_tables.n_tup_hot_upd, pg_stat_all_tables.n_fresh_tup, pg_stat_all_tables.n_live_tup, pg_stat_all_tables.n_dead_tup, pg_stat_all_tables.last_vacuum, pg_stat_all_tables.last_autovacuum, pg_stat_all_tables.last_analyze, pg_stat_all_tables.last_autoanalyze FROM pg_stat_all_tables WHERE ((pg_stat_all_tables.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_stat_all_tables.schemaname !~ '^pg_toast'::text));
pg_statio_all_indexes | SELECT c.oid AS relid, i.oid AS indexrelid, n.nspname AS schemaname, c.relname, i.relname AS indexrelname, (pg_stat_get_blocks_fetched(i.oid) - pg_stat_get_blocks_hit(i.oid)) AS idx_blks_read, pg_stat_get_blocks_hit(i.oid) AS idx_blks_hit FROM (((pg_class c JOIN pg_index x ON ((c.oid = x.indrelid))) JOIN pg_class i ON ((i.oid = x.indexrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"]));
pg_statio_all_sequences | SELECT c.oid AS relid, n.nspname AS schemaname, c.relname, (pg_stat_get_blocks_fetched(c.oid) - pg_stat_get_blocks_hit(c.oid)) AS blks_read, pg_stat_get_blocks_hit(c.oid) AS blks_hit FROM (pg_class c LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = 'S'::"char");
pg_statio_all_tables | SELECT c.oid AS relid, n.nspname AS schemaname, c.relname, (pg_stat_get_blocks_fetched(c.oid) - pg_stat_get_blocks_hit(c.oid)) AS heap_blks_read, pg_stat_get_blocks_hit(c.oid) AS heap_blks_hit, (sum((pg_stat_get_blocks_fetched(i.indexrelid) - pg_stat_get_blocks_hit(i.indexrelid))))::bigint AS idx_blks_read, (sum(pg_stat_get_blocks_hit(i.indexrelid)))::bigint AS idx_blks_hit, (pg_stat_get_blocks_fetched(t.oid) - pg_stat_get_blocks_hit(t.oid)) AS toast_blks_read, pg_stat_get_blocks_hit(t.oid) AS toast_blks_hit, (pg_stat_get_blocks_fetched(x.oid) - pg_stat_get_blocks_hit(x.oid)) AS tidx_blks_read, pg_stat_get_blocks_hit(x.oid) AS tidx_blks_hit FROM ((((pg_class c LEFT JOIN pg_index i ON ((c.oid = i.indrelid))) LEFT JOIN pg_class t ON ((c.reltoastrelid = t.oid))) LEFT JOIN pg_class x ON ((t.reltoastidxid = x.oid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"])) GROUP BY c.oid, n.nspname, c.relname, t.oid, x.oid;