*** doc/src/sgml/config.sgml.orig Fri Feb 27 19:10:50 2009 --- doc/src/sgml/config.sgml Thu Mar 5 18:10:54 2009 *************** *** 3571,3576 **** --- 3571,3581 ---- This setting can be overridden for individual tables by changing storage parameters. + + This parameter affects on vacuuming a table with GIN + index, it also specifies the minimum number of inserted or updated + tuples needed to trigger a VACUUM on thos table. + *** doc/src/sgml/gin.sgml.orig Thu Jul 24 12:31:21 2008 --- doc/src/sgml/gin.sgml Thu Mar 5 18:10:54 2009 *************** *** 188,196 **** list of heap pointers (PL, posting list) if the list is small enough. Partial match algorithm ! GIN can support partial match queries, in which the query does not determine an exact match for one or more keys, but the possible --- 188,230 ---- list of heap pointers (PL, posting list) if the list is small enough. + + GIN fast update technique + + + Updating a GIN index tends to be slow because of the + intrinsic nature of inverted indexes: inserting or updating one heap row + can cause many inserts into the index (one for each key extracted + from the indexed value). As of PostgreSQL 8.4, + GIN is capable of postponing much of this work by inserting + new tuples into a temporary, unsorted list of pending entries. + When the table is vacuumed, or in some cases when the pending list + becomes too large, the entries are moved to the main + GIN data structure using the same bulk insert + techniques used during initial index creation. This greatly improves + GIN index update speed, even counting the additional + vacuum overhead. + + + + The disadvantage of this approach is that searches must scan the list + of pending entries in addition to searching the regular index, and so + a large list of pending entries will slow searches significantly. + Proper use of autovacuum can minimize this problem. + + + + If consistently-fast search speed is more important than update speed, + use of pending entries can be disabled by turning off the + FASTUPDATE storage parameter for a + GIN index. See for details. + + + Partial match algorithm ! GIN can support partial match queries, in which the query does not determine an exact match for one or more keys, but the possible *************** *** 225,235 **** Create vs insert ! In most cases, insertion into a GIN index is slow due to the likelihood of many keys being inserted for each value. So, for bulk insertions into a table it is advisable to drop the GIN index and recreate it after finishing bulk insertion. --- 259,276 ---- Create vs insert ! Insertion into a GIN index can be slow due to the likelihood of many keys being inserted for each value. So, for bulk insertions into a table it is advisable to drop the GIN index and recreate it after finishing bulk insertion. + + + As of PostgreSQL 8.4, this advice is less + necessary since delayed indexing is used (see for details). But for very large updates + it may still be best to drop and recreate the index. + *** doc/src/sgml/ref/create_index.sgml.orig Mon Feb 2 16:03:00 2009 --- doc/src/sgml/ref/create_index.sgml Thu Mar 5 18:10:54 2009 *************** *** 294,299 **** --- 294,329 ---- + + GIN indexes accept an additional parameter: + + + + + + FASTUPDATE + + + This setting controls usage of the fast update technique described in + . It is a Boolean parameter: + ON enables fast update, OFF disables it. + (Alternative spellings of ON and OFF are + allowed as described in .) The + default is ON. + + + + + Turning FASTUPDATE off via ALTER INDEX prevents + future insertions from going into the list of pending index entries, + but does not in itself flush previous entries. You might want to do a + VACUUM afterward to ensure the pending list is emptied. + + + + + + *************** *** 502,507 **** --- 532,544 ---- + To create a GIN index with fast update turned off: + + CREATE INDEX gin_idx ON documents_table (locations) WITH (fastupdate = off); + + + + To create an index on the column code in the table films and have the index reside in the tablespace indexspace: *** doc/src/sgml/ref/vacuum.sgml.orig Thu Dec 11 18:16:46 2008 --- doc/src/sgml/ref/vacuum.sgml Thu Mar 5 18:10:54 2009 *************** *** 63,68 **** --- 63,75 ---- blocks. This form is much slower and requires an exclusive lock on each table while it is being processed. + + + For tables with GIN indexes, VACUUM (in + any form) also completes any delayed index insertions, by moving pending + index entries to the appropriate places in the main GIN index + structure. (See for more details.) + *** doc/src/sgml/textsearch.sgml.orig Wed Jan 7 17:40:49 2009 --- doc/src/sgml/textsearch.sgml Thu Mar 5 18:10:54 2009 *************** *** 3224,3230 **** ! GIN indexes are about ten times slower to update than GiST --- 3224,3232 ---- ! GIN indexes are moderately slower to update than GiST indexes, but ! about 10 times slower if fast update support was disabled ! (see for details) *** src/backend/access/common/reloptions.c.orig Fri Feb 27 19:10:51 2009 --- src/backend/access/common/reloptions.c Thu Mar 5 18:10:52 2009 *************** *** 56,61 **** --- 56,69 ---- }, true }, + { + { + "fastupdate", + "Enables \"fast update\" feature for this GIN index", + RELOPT_KIND_GIN + }, + true + }, /* list terminator */ { { NULL } } }; *** src/backend/access/gin/Makefile.orig Thu Jul 24 12:29:55 2008 --- src/backend/access/gin/Makefile Thu Mar 5 18:10:52 2009 *************** *** 14,19 **** OBJS = ginutil.o gininsert.o ginxlog.o ginentrypage.o gindatapage.o \ ginbtree.o ginscan.o ginget.o ginvacuum.o ginarrayproc.o \ ! ginbulk.o include $(top_srcdir)/src/backend/common.mk --- 14,19 ---- OBJS = ginutil.o gininsert.o ginxlog.o ginentrypage.o gindatapage.o \ ginbtree.o ginscan.o ginget.o ginvacuum.o ginarrayproc.o \ ! ginbulk.o ginfast.o include $(top_srcdir)/src/backend/common.mk *** src/backend/access/gin/ginbulk.c.orig Thu Jan 1 12:24:41 2009 --- src/backend/access/gin/ginbulk.c Thu Mar 5 18:10:52 2009 *************** *** 197,202 **** --- 197,204 ---- if (nentry <= 0) return; + Assert(ItemPointerIsValid(heapptr) && attnum >= FirstOffsetNumber); + i = nentry - 1; for (; i > 0; i >>= 1) nbit++; *** src/backend/access/gin/ginfast.c.orig Wed Dec 31 19:00:00 1969 --- src/backend/access/gin/ginfast.c Thu Mar 5 18:10:53 2009 *************** *** 0 **** --- 1,805 ---- + /*------------------------------------------------------------------------- + * + * ginfast.c + * Fast insert routines for the Postgres inverted index access method. + * Pending entries are stored in linear list of pages and vacuum + * will transfer them into regular structure. + * + * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $PostgreSQL$ + * + *------------------------------------------------------------------------- + */ + + #include "postgres.h" + + #include "access/genam.h" + #include "access/gin.h" + #include "access/tuptoaster.h" + #include "catalog/index.h" + #include "commands/vacuum.h" + #include "miscadmin.h" + #include "storage/bufmgr.h" + #include "utils/memutils.h" + + + static int32 + writeListPage(Relation index, Buffer buffer, IndexTuple *tuples, int32 ntuples, BlockNumber rightlink) + { + Page page = BufferGetPage(buffer); + int i, freesize, size=0; + OffsetNumber l, off; + + START_CRIT_SECTION(); + + GinInitBuffer(buffer, GIN_LIST); + + off = FirstOffsetNumber; + + for(i=0; irightlink = rightlink; + /* + * tail page may contain only the whole row(s) or final + * part of row placed on previous pages + */ + if ( rightlink == InvalidBlockNumber ) + { + GinPageSetFullRow(page); + GinPageGetOpaque(page)->maxoff = 1; + } + else + { + GinPageGetOpaque(page)->maxoff = 0; + } + + freesize = PageGetFreeSpace(page); + + MarkBufferDirty(buffer); + + if (!index->rd_istemp) + { + XLogRecData rdata[2]; + ginxlogInsertListPage data; + XLogRecPtr recptr; + char *ptr; + + rdata[0].buffer = buffer; + rdata[0].buffer_std = true; + rdata[0].data = (char*)&data; + rdata[0].len = sizeof(ginxlogInsertListPage); + rdata[0].next = rdata+1; + + rdata[1].buffer = InvalidBuffer; + ptr = rdata[1].data = palloc( size ); + rdata[1].len = size; + rdata[1].next = NULL; + + for(i=0; i 0); + + /* + * Split tuples for pages + */ + for(i=0;inPendingPages++; + writeListPage(index, prevBuffer, tuples+startTuple, i-startTuple, BufferGetBlockNumber(curBuffer)); + } + else + { + res->head = BufferGetBlockNumber(curBuffer); + } + + prevBuffer = curBuffer;; + startTuple = i; + size = 0; + } + + tupsize = IndexTupleSize(tuples[i]) + sizeof(ItemIdData); + + if ( size + tupsize >= GinListPageSize ) + { + i--; + curBuffer = InvalidBuffer; + } + else + { + size += tupsize; + } + } + + /* + * Write last page + */ + res->tail = BufferGetBlockNumber(curBuffer); + res->tailFreeSize = writeListPage(index, curBuffer, tuples+startTuple, ntuples-startTuple, InvalidBlockNumber); + res->nPendingPages++; + /* that was only one heap tuple */ + res->nPendingHeapTuples = 1; + } + + #define GIN_PAGE_FREESIZE \ + ( BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(GinPageOpaqueData)) ) + /* + * Inserts collected values during normal insertion. Function guarantees + * that all values of heap will be stored sequentually with + * preserving order + */ + void + ginHeapTupleFastInsert(Relation index, GinState *ginstate, GinTupleCollector *collector) + { + Buffer metabuffer; + Page metapage; + GinMetaPageData *metadata = NULL; + XLogRecData rdata[2]; + Buffer buffer = InvalidBuffer; + Page page = NULL; + ginxlogUpdateMeta data; + bool separateList = false; + bool needCleanup = false; + + if ( collector->ntuples == 0 ) + return; + + data.node = index->rd_node; + data.ntuples = 0; + data.newRightlink = data.prevTail = InvalidBlockNumber; + + rdata[0].buffer = InvalidBuffer; + rdata[0].data = (char *) &data; + rdata[0].len = sizeof(ginxlogUpdateMeta); + rdata[0].next = NULL; + + metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); + metapage = BufferGetPage(metabuffer); + + if ( collector->sumsize + collector->ntuples * sizeof(ItemIdData) > GIN_PAGE_FREESIZE ) + { + /* + * Total size is greater than one page => make sublist + */ + separateList = true; + } + else + { + LockBuffer(metabuffer, GIN_EXCLUSIVE); + metadata = GinPageGetMeta(metapage); + + if ( metadata->head == InvalidBlockNumber || + collector->sumsize + collector->ntuples * sizeof(ItemIdData) > metadata->tailFreeSize ) + { + /* + * Pending list is empty or total size is greater than freespace + * on tail page => make sublist + * We unlock metabuffer to keep high concurrency + */ + separateList = true; + LockBuffer(metabuffer, GIN_UNLOCK); + } + } + + if ( separateList ) + { + GinMetaPageData sublist; + + /* + * We should make sublist separately and append it to the tail + */ + memset( &sublist, 0, sizeof(GinMetaPageData) ); + + makeSublist(index, collector->tuples, collector->ntuples, &sublist); + + /* + * metapage was unlocked, see above + */ + LockBuffer(metabuffer, GIN_EXCLUSIVE); + metadata = GinPageGetMeta(metapage); + + if ( metadata->head == InvalidBlockNumber ) + { + /* + * Sublist becomes main list + */ + START_CRIT_SECTION(); + memcpy(metadata, &sublist, sizeof(GinMetaPageData) ); + memcpy(&data.metadata, &sublist, sizeof(GinMetaPageData) ); + } + else + { + /* + * merge lists + */ + + data.prevTail = metadata->tail; + buffer = ReadBuffer(index, metadata->tail); + LockBuffer(buffer, GIN_EXCLUSIVE); + page = BufferGetPage(buffer); + Assert(GinPageGetOpaque(page)->rightlink == InvalidBlockNumber); + + START_CRIT_SECTION(); + + GinPageGetOpaque(page)->rightlink = sublist.head; + metadata->tail = sublist.tail; + metadata->tailFreeSize = sublist.tailFreeSize; + + metadata->nPendingPages += sublist.nPendingPages; + metadata->nPendingHeapTuples += sublist.nPendingHeapTuples; + + memcpy(&data.metadata, metadata, sizeof(GinMetaPageData) ); + data.newRightlink = sublist.head; + + MarkBufferDirty(buffer); + } + } + else + { + /* + * Insert into tail page, metapage is already locked + */ + + OffsetNumber l, off; + int i, tupsize; + char *ptr; + + buffer = ReadBuffer(index, metadata->tail); + LockBuffer(buffer, GIN_EXCLUSIVE); + page = BufferGetPage(buffer); + off = (PageIsEmpty(page)) ? FirstOffsetNumber : + OffsetNumberNext(PageGetMaxOffsetNumber(page)); + + rdata[0].next = rdata + 1; + + rdata[1].buffer = buffer; + rdata[1].buffer_std = true; + ptr = rdata[1].data = (char *) palloc( collector->sumsize ); + rdata[1].len = collector->sumsize; + rdata[1].next = NULL; + + data.ntuples = collector->ntuples; + + START_CRIT_SECTION(); + + /* + * Increase counter of heap tuples + */ + Assert( GinPageGetOpaque(page)->maxoff <= metadata->nPendingHeapTuples ); + GinPageGetOpaque(page)->maxoff++; + metadata->nPendingHeapTuples++; + + for(i=0; intuples; i++) + { + tupsize = IndexTupleSize(collector->tuples[i]); + l = PageAddItem(page, (Item)collector->tuples[i], tupsize, off, false, false); + + if (l == InvalidOffsetNumber) + elog(ERROR, "failed to add item to index page in \"%s\"", + RelationGetRelationName(index)); + + memcpy(ptr, collector->tuples[i], tupsize); + ptr+=tupsize; + + off++; + } + + metadata->tailFreeSize -= collector->sumsize + collector->ntuples * sizeof(ItemIdData); + memcpy(&data.metadata, metadata, sizeof(GinMetaPageData) ); + MarkBufferDirty(buffer); + } + + /* + * Make real write + */ + + MarkBufferDirty(metabuffer); + if ( !index->rd_istemp ) + { + XLogRecPtr recptr; + + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, rdata); + PageSetLSN(metapage, recptr); + PageSetTLI(metapage, ThisTimeLineID); + + if ( buffer != InvalidBuffer ) + { + PageSetLSN(page, recptr); + PageSetTLI(page, ThisTimeLineID); + } + } + + if (buffer != InvalidBuffer) + UnlockReleaseBuffer(buffer); + + /* + * Force pending list cleanup when it becomes too long. + * And, ginInsertCleanup could take significant amount of + * time, so we prefer call it when it can do all things by + * single collection cycle. In non-vacuum mode, it shouldn't + * require maintenance_work_mem, so let we fire a trigger + * while pending list is small enough to fit into work_mem + * + * ginInsertCleanup() should not be called inside CRIT_SECTION. + */ + + if ( metadata->nPendingPages * GIN_PAGE_FREESIZE > work_mem * 1024L ) + needCleanup = true; + + UnlockReleaseBuffer(metabuffer); + + END_CRIT_SECTION(); + + if ( needCleanup ) + ginInsertCleanup(index, ginstate, NULL); + } + + /* + * Collect values from one tuples to be indexed. All values for + * one tuples should be written at once - to guarantee consistent state + */ + uint32 + ginHeapTupleFastCollect(Relation index, GinState *ginstate, GinTupleCollector *collector, + OffsetNumber attnum, Datum value, ItemPointer item) + { + Datum *entries; + int32 i, + nentries; + + entries = extractEntriesSU(ginstate, attnum, value, &nentries); + + if (nentries == 0) + /* nothing to insert */ + return 0; + + /* + * Allocate/reallocate memory for storing collected tuples + */ + if ( collector->tuples == NULL ) + { + collector->lentuples = nentries * index->rd_att->natts; + collector->tuples = (IndexTuple*)palloc(sizeof(IndexTuple) * collector->lentuples); + } + + while ( collector->ntuples + nentries > collector->lentuples ) + { + collector->lentuples *= 2; + collector->tuples = (IndexTuple*)repalloc( collector->tuples, + sizeof(IndexTuple) * collector->lentuples); + } + + /* + * Creates tuple's array + */ + for (i = 0; i < nentries; i++) + { + int32 tupsize; + + collector->tuples[collector->ntuples + i] = GinFormTuple(ginstate, attnum, entries[i], NULL, 0); + collector->tuples[collector->ntuples + i]->t_tid = *item; + tupsize = IndexTupleSize(collector->tuples[collector->ntuples + i]); + + if ( tupsize > TOAST_INDEX_TARGET || tupsize >= GinMaxItemSize) + elog(ERROR, "huge tuple"); + + collector->sumsize += tupsize; + } + + collector->ntuples += nentries; + + return nentries; + } + + /* + * Deletes first pages in list before newHead page. + * If newHead == InvalidBlockNumber then function drops the whole list. + * returns true if concurrent completion process is running + */ + static bool + shiftList(Relation index, Buffer metabuffer, BlockNumber newHead, + IndexBulkDeleteResult *stats) + { + Page metapage; + GinMetaPageData *metadata; + BlockNumber blknoToDelete; + + metapage = BufferGetPage(metabuffer); + metadata = GinPageGetMeta(metapage); + blknoToDelete = metadata->head; + + do + { + Page page; + int i; + int64 nDeletedHeapTuples = 0; + ginxlogDeleteListPages data; + XLogRecData rdata[1]; + Buffer buffers[NDELETE_AT_ONCE]; + + data.node = index->rd_node; + + rdata[0].buffer = InvalidBuffer; + rdata[0].data = (char *) &data; + rdata[0].len = sizeof(ginxlogDeleteListPages); + rdata[0].next = NULL; + + data.ndeleted = 0; + while( data.ndeleted < NDELETE_AT_ONCE && blknoToDelete != newHead ) + { + data.toDelete[ data.ndeleted ] = blknoToDelete; + buffers[ data.ndeleted ] = ReadBuffer(index, blknoToDelete); + LockBuffer( buffers[ data.ndeleted ], GIN_EXCLUSIVE ); + page = BufferGetPage( buffers[ data.ndeleted ] ); + + data.ndeleted++; + if (stats) + stats->pages_deleted++; + + if ( GinPageIsDeleted(page) ) + { + /* concurrent deletion process is detected */ + for(i=0;imaxoff; + blknoToDelete = GinPageGetOpaque( page )->rightlink; + } + + START_CRIT_SECTION(); + + metadata->head = blknoToDelete; + + Assert( metadata->nPendingPages >= data.ndeleted ); + metadata->nPendingPages -= data.ndeleted; + Assert( metadata->nPendingHeapTuples >= nDeletedHeapTuples ); + metadata->nPendingHeapTuples -= nDeletedHeapTuples; + + if ( blknoToDelete == InvalidBlockNumber ) + { + metadata->tail = InvalidBlockNumber; + metadata->tailFreeSize = 0; + metadata->nPendingPages = 0; + metadata->nPendingHeapTuples = 0; + } + memcpy( &data.metadata, metadata, sizeof(GinMetaPageData)); + + MarkBufferDirty( metabuffer ); + + for(i=0; iflags = GIN_DELETED; + MarkBufferDirty( buffers[ i ] ); + } + + if ( !index->rd_istemp ) + { + XLogRecPtr recptr; + + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE, rdata); + PageSetLSN(metapage, recptr); + PageSetTLI(metapage, ThisTimeLineID); + + for(i=0; invalues >= datums->maxvalues) + { + datums->maxvalues *= 2; + datums->values = (Datum*)repalloc( datums->values, sizeof(Datum)*datums->maxvalues); + } + + datums->values[ datums->nvalues++ ] = datum; + } + + /* + * Go through all tuples on page and collect values in memory + */ + + static void + processPendingPage(BuildAccumulator *accum, DatumArray *da, Page page, OffsetNumber startoff) + { + ItemPointerData heapptr; + OffsetNumber i,maxoff; + OffsetNumber attrnum, curattnum; + + maxoff = PageGetMaxOffsetNumber(page); + Assert( maxoff >= FirstOffsetNumber ); + ItemPointerSetInvalid(&heapptr); + attrnum = 0; + + for (i = startoff; i <= maxoff; i = OffsetNumberNext(i)) + { + IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); + + curattnum = gintuple_get_attrnum(accum->ginstate, itup); + + if ( !ItemPointerIsValid(&heapptr) ) + { + heapptr = itup->t_tid; + attrnum = curattnum; + } + else if ( !(ItemPointerEquals(&heapptr, &itup->t_tid) && curattnum == attrnum) ) + { + /* + * We can insert several datums per call, but only for one heap tuple + * and one column. + */ + ginInsertRecordBA(accum, &heapptr, attrnum, da->values, da->nvalues); + da->nvalues = 0; + heapptr = itup->t_tid; + attrnum = curattnum; + } + addDatum(da, gin_index_getattr(accum->ginstate, itup)); + } + + ginInsertRecordBA(accum, &heapptr, attrnum, da->values, da->nvalues); + } + + /* + * Moves tuples from pending pages into regular GIN structure. + * Function doesn't require special locking and could be called + * in any time but only one at the same time. + * + * Non-NULL stats indicates that ginInsertCleanup is called + * from vacuum process, so call vacuum_delay_point() periodically. + */ + + void + ginInsertCleanup(Relation index, GinState *ginstate, IndexBulkDeleteResult *stats) + { + Buffer metabuffer, buffer; + Page metapage, page; + GinMetaPageData *metadata; + MemoryContext opCtx, oldCtx; + BuildAccumulator accum; + DatumArray datums; + BlockNumber blkno; + + metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); + LockBuffer(metabuffer, GIN_SHARE); + metapage = BufferGetPage(metabuffer); + metadata = GinPageGetMeta(metapage); + + if ( metadata->head == InvalidBlockNumber ) + { + UnlockReleaseBuffer(metabuffer); + return; + } + + /* + * Init + */ + datums.maxvalues=128; + datums.nvalues = 0; + datums.values = (Datum*)palloc(sizeof(Datum)*datums.maxvalues); + + ginInitBA(&accum); + accum.ginstate = ginstate; + + opCtx = AllocSetContextCreate(CurrentMemoryContext, + "Gin refresh temporary context", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + + oldCtx = MemoryContextSwitchTo(opCtx); + + /* + * Read and lock head + */ + blkno = metadata->head; + buffer = ReadBuffer(index, blkno); + LockBuffer(buffer, GIN_SHARE); + page = BufferGetPage(buffer); + + LockBuffer(metabuffer, GIN_UNLOCK); + + for(;;) + { + /* + * reset datum's collector and read page's datums into memory + */ + datums.nvalues = 0; + + if ( GinPageIsDeleted(page) ) + { + /* concurrent completion is running */ + UnlockReleaseBuffer( buffer ); + break; + } + + processPendingPage(&accum, &datums, page, FirstOffsetNumber); + + if (stats) + vacuum_delay_point(); + + /* + * Is it time to flush memory to disk? + */ + if ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber || + ( GinPageHasFullRow(page) && accum.allocatedMemory > maintenance_work_mem * 1024L ) ) + { + ItemPointerData *list; + uint32 nlist; + Datum entry; + OffsetNumber maxoff, attnum; + + /* + * Unlock current page to increase performance. + * Changes of page will be checked later by comparing + * maxoff after completion of memory flush. + */ + maxoff = PageGetMaxOffsetNumber(page); + LockBuffer(buffer, GIN_UNLOCK); + + /* + * Moving collected data into regular structure can take + * significant amount of time - so, run it without locking pending + * list. + */ + while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL) + { + if (stats) + vacuum_delay_point(); + ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE); + } + + /* + * Lock the whole list to remove pages + */ + LockBuffer(metabuffer, GIN_EXCLUSIVE); + LockBuffer(buffer, GIN_SHARE); + + if ( GinPageIsDeleted(page) ) + { + /* concurrent completion is running */ + UnlockReleaseBuffer(buffer); + LockBuffer(metabuffer, GIN_UNLOCK); + break; + } + + /* + * While we keeped page unlocked it might be changed - + * add read the changes separately. On one page is rather + * small - so, overused memory isn't very big, although + * we should reinit accumulator. We need to make a + * check only once because now both page and metapage are + * locked. Insertion algorithm gurantees that inserted row(s) + * will not continue on next page. + */ + if ( PageGetMaxOffsetNumber(page) != maxoff ) + { + ginInitBA(&accum); + datums.nvalues = 0; + processPendingPage(&accum, &datums, page, maxoff+1); + + while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL) + ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE); + } + + /* + * Remember next page - it will become a new head + */ + blkno = GinPageGetOpaque(page)->rightlink; + UnlockReleaseBuffer(buffer); /* shiftList will do exclusive locking */ + + /* + * remove readed pages from pending list, at this point all + * content of readed pages is in regular structure + */ + if ( shiftList(index, metabuffer, blkno, stats) ) + { + /* concurrent completion is running */ + LockBuffer(metabuffer, GIN_UNLOCK); + break; + } + + Assert( blkno == metadata->head ); + LockBuffer(metabuffer, GIN_UNLOCK); + + /* + * if we remove the whole list just exit + */ + if ( blkno == InvalidBlockNumber ) + break; + + /* + * reinit state + */ + MemoryContextReset(opCtx); + ginInitBA(&accum); + } + else + { + blkno = GinPageGetOpaque(page)->rightlink; + UnlockReleaseBuffer(buffer); + } + + /* + * Read next page in pending list + */ + CHECK_FOR_INTERRUPTS(); + buffer = ReadBuffer(index, blkno); + LockBuffer(buffer, GIN_SHARE); + page = BufferGetPage(buffer); + } + + ReleaseBuffer(metabuffer); + MemoryContextSwitchTo(oldCtx); + MemoryContextDelete(opCtx); + } + *** src/backend/access/gin/gindatapage.c.orig Thu Jan 1 12:24:41 2009 --- src/backend/access/gin/gindatapage.c Thu Mar 5 18:10:53 2009 *************** *** 43,50 **** while (aptr - a < na && bptr - b < nb) { ! if (compareItemPointers(aptr, bptr) > 0) *dptr++ = *bptr++; else *dptr++ = *aptr++; } --- 43,56 ---- while (aptr - a < na && bptr - b < nb) { ! int cmp = compareItemPointers(aptr, bptr); ! if (cmp > 0) *dptr++ = *bptr++; + else if ( cmp == 0 ) + { + *dptr++ = *bptr++; + aptr++; + } else *dptr++ = *aptr++; } *************** *** 630,640 **** gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack); if (gdi->btree.findItem(&(gdi->btree), gdi->stack)) ! elog(ERROR, "item pointer (%u,%d) already exists", ! ItemPointerGetBlockNumber(gdi->btree.items + gdi->btree.curitem), ! ItemPointerGetOffsetNumber(gdi->btree.items + gdi->btree.curitem)); ! ! ginInsertValue(&(gdi->btree), gdi->stack); gdi->stack = NULL; } --- 636,651 ---- gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack); if (gdi->btree.findItem(&(gdi->btree), gdi->stack)) ! { ! /* ! * gdi->btree.items[ gdi->btree.curitem ] already exists in index ! */ ! gdi->btree.curitem ++; ! LockBuffer(gdi->stack->buffer, GIN_UNLOCK); ! freeGinBtreeStack(gdi->stack); ! } ! else ! ginInsertValue(&(gdi->btree), gdi->stack); gdi->stack = NULL; } *** src/backend/access/gin/ginget.c.orig Sat Jan 10 16:08:36 2009 --- src/backend/access/gin/ginget.c Thu Mar 5 18:10:53 2009 *************** *** 258,264 **** } /* ! * Start* functions setup begining state of searches: finds correct buffer and pins it. */ static void startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) --- 258,264 ---- } /* ! * Start* functions setup beginning state of searches: finds correct buffer and pins it. */ static void startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) *************** *** 268,273 **** --- 268,282 ---- Page page; bool needUnlock = TRUE; + entry->buffer = InvalidBuffer; + entry->offset = InvalidOffsetNumber; + entry->list = NULL; + entry->nlist = 0; + entry->partialMatch = NULL; + entry->partialMatchResult = NULL; + entry->reduceResult = FALSE; + entry->predictNumberResult = 0; + if (entry->master != NULL) { entry->isFinished = entry->master->isFinished; *************** *** 285,299 **** page = BufferGetPage(stackEntry->buffer); entry->isFinished = TRUE; - entry->buffer = InvalidBuffer; - entry->offset = InvalidOffsetNumber; - entry->list = NULL; - entry->nlist = 0; - entry->partialMatch = NULL; - entry->partialMatchIterator = NULL; - entry->partialMatchResult = NULL; - entry->reduceResult = FALSE; - entry->predictNumberResult = 0; if ( entry->isPartialMatch ) { --- 294,299 ---- *************** *** 354,362 **** entry->buffer = scanBeginPostingTree(gdi); /* ! * We keep buffer pinned because we need to prevent deletition * page during scan. See GIN's vacuum implementation. RefCount ! * is increased to keep buffer pinned after freeGinBtreeStack() call. */ IncrBufferRefCount(entry->buffer); --- 354,363 ---- entry->buffer = scanBeginPostingTree(gdi); /* ! * We keep buffer pinned because we need to prevent deletion of * page during scan. See GIN's vacuum implementation. RefCount ! * is increased to keep buffer pinned after freeGinBtreeStack() ! * call. */ IncrBufferRefCount(entry->buffer); *************** *** 548,570 **** entry->isFinished = TRUE; break; } ! else if ( entry->partialMatchResult->ntuples < 0 ) ! { ! /* bitmap became lossy */ ! ereport(ERROR, ! (errcode(ERRCODE_OUT_OF_MEMORY), ! errmsg("not enough memory to store result of partial match operator" ), ! errhint("Increase the \"work_mem\" parameter."))); ! } entry->offset = 0; } ItemPointerSet(&entry->curItem, entry->partialMatchResult->blockno, entry->partialMatchResult->offsets[ entry->offset ]); entry->offset ++; ! ! } while (entry->isFinished == FALSE && entry->reduceResult == TRUE && dropItem(entry)); } else if (!BufferIsValid(entry->buffer)) { --- 549,584 ---- entry->isFinished = TRUE; break; } ! ! /* ! * reset counter to the beginning of entry->partialMatchResult. ! * Note, entry->offset is still greater partialMatchResult->ntuples ! * if partialMatchResult is lossy. So, on next call we will get ! * next result from TIDBitmap. ! */ entry->offset = 0; } + if ( entry->partialMatchResult->ntuples < 0 ) + { + /* + * lossy result, it's needed to check the whole page + */ + ItemPointerSetLossyPage(&entry->curItem, + entry->partialMatchResult->blockno); + /* + * go away from the loop because we could not estimate number of + * results on this page to support correct reducing of result + * if it's enabled + */ + break; + } + ItemPointerSet(&entry->curItem, entry->partialMatchResult->blockno, entry->partialMatchResult->offsets[ entry->offset ]); entry->offset ++; ! } while (entry->reduceResult == TRUE && dropItem(entry)); } else if (!BufferIsValid(entry->buffer)) { *************** *** 618,623 **** --- 632,641 ---- if (key->entryRes[i]) { + /* + * Move forward only entries which was the least + * on previous call + */ if (entry->isFinished == FALSE && entryGetItem(index, entry) == FALSE) { if (compareItemPointers(&entry->curItem, &key->curItem) < 0) *************** *** 664,669 **** --- 682,696 ---- */ *keyrecheck = true; + /* + * If one of the entry's scans returns lossy result, return + * it without checking - we can't suggest to consistentFn + * anything helpful. + */ + + if (ItemPointerIsLossyPage(&key->curItem)) + return FALSE; + oldCtx = MemoryContextSwitchTo(tempCtx); res = DatumGetBool(FunctionCall4(&ginstate->consistentFn[key->attnum-1], PointerGetDatum(key->entryRes), *************** *** 677,682 **** --- 704,1032 ---- return FALSE; } + typedef struct fastPosition { + Buffer fastBuffer; + OffsetNumber firstOffset; + OffsetNumber lastOffset; + ItemPointerData item; + } fastPosition; + + + /* + * Get ItemPointer of next heap row to be checked from fast insert storage. + * Returns false if there are no more. + * + * The fastBuffer is presumed pinned and share-locked on entry, and is + * pinned and share-locked on success exit. On failure exit it's released. + */ + static bool + scanGetCandidate(IndexScanDesc scan, fastPosition *pos) + { + OffsetNumber maxoff; + Page page; + IndexTuple itup; + + ItemPointerSetInvalid( &pos->item ); + for(;;) + { + page = BufferGetPage(pos->fastBuffer); + + maxoff = PageGetMaxOffsetNumber(page); + if ( pos->firstOffset > maxoff ) + { + BlockNumber blkno = GinPageGetOpaque(page)->rightlink; + if ( blkno == InvalidBlockNumber ) + { + UnlockReleaseBuffer(pos->fastBuffer); + pos->fastBuffer=InvalidBuffer; + + return false; + } + else + { + /* + * Here we should prevent deletion of next page by + * insertcleanup process, which now tries to obtain + * exclusive lock on current page. So, we lock next + * page before releasing the current one + */ + Buffer tmpbuf = ReadBuffer(scan->indexRelation, blkno); + + LockBuffer( tmpbuf, GIN_SHARE ); + UnlockReleaseBuffer( pos->fastBuffer); + + pos->fastBuffer=tmpbuf; + pos->firstOffset = FirstOffsetNumber; + } + } + else + { + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset)); + pos->item = itup->t_tid; + if ( GinPageHasFullRow(page) ) + { + /* + * find itempointer to the next row + */ + for(pos->lastOffset = pos->firstOffset+1; pos->lastOffset<=maxoff; pos->lastOffset++) + { + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset)); + if (!ItemPointerEquals(&pos->item, &itup->t_tid)) + break; + } + } + else + { + /* + * All itempointers are the same on this page + */ + pos->lastOffset = maxoff + 1; + } + break; + } + } + + return true; + } + + static bool + matchPartialInPendingList(GinState *ginstate, Page page, OffsetNumber off, + OffsetNumber maxoff, Datum value, OffsetNumber attrnum, + Datum *datum, bool *datumExtracted, StrategyNumber strategy) + { + IndexTuple itup; + int res; + + while( off < maxoff ) + { + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off)); + if ( attrnum != gintuple_get_attrnum(ginstate, itup) ) + return false; + + if (datumExtracted[ off-1 ] == false) + { + datum[ off-1 ] = gin_index_getattr(ginstate, itup); + datumExtracted[ off-1 ] = true; + } + + res = DatumGetInt32(FunctionCall3(&ginstate->comparePartialFn[attrnum], + value, + datum[ off-1 ], + UInt16GetDatum(strategy))); + if ( res == 0 ) + return true; + else if (res>0) + return false; + } + + return false; + } + /* + * Sets entryRes array for each key by looking on + * every entry per indexed value (row) in fast insert storage. + * returns true if at least one of datum was matched by key's entry + * + * The fastBuffer is presumed pinned and share-locked on entry. + */ + static bool + collectDatumForItem(IndexScanDesc scan, fastPosition *pos) + { + GinScanOpaque so = (GinScanOpaque) scan->opaque; + OffsetNumber attrnum; + Page page; + IndexTuple itup; + int i, j; + bool hasMatch = false; + + /* + * Resets entryRes + */ + for (i = 0; i < so->nkeys; i++) + { + GinScanKey key = so->keys + i; + memset( key->entryRes, FALSE, key->nentries ); + } + + for(;;) + { + Datum datum[ BLCKSZ/sizeof(IndexTupleData) ]; + bool datumExtracted[ BLCKSZ/sizeof(IndexTupleData) ]; + + Assert( pos->lastOffset > pos->firstOffset ); + memset(datumExtracted + pos->firstOffset - 1, 0, sizeof(bool) * (pos->lastOffset - pos->firstOffset )); + + page = BufferGetPage(pos->fastBuffer); + + for(i = 0; i < so->nkeys; i++) + { + GinScanKey key = so->keys + i; + + for(j=0; jnentries; j++) + { + OffsetNumber StopLow = pos->firstOffset, + StopHigh = pos->lastOffset, + StopMiddle; + GinScanEntry entry = key->scanEntry + j; + + if ( key->entryRes[j] ) + continue; + + while (StopLow < StopHigh) + { + StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); + + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle)); + attrnum = gintuple_get_attrnum(&so->ginstate, itup); + + if (key->attnum < attrnum) + StopHigh = StopMiddle; + else if (key->attnum > attrnum) + StopLow = StopMiddle + 1; + else + { + int res; + + if (datumExtracted[ StopMiddle-1 ] == false) + { + datum[ StopMiddle-1 ] = gin_index_getattr(&so->ginstate, itup); + datumExtracted[ StopMiddle-1 ] = true; + } + res = compareEntries(&so->ginstate, + entry->attnum, + entry->entry, + datum[ StopMiddle-1 ]); + + if ( res == 0 ) + { + if ( entry->isPartialMatch ) + key->entryRes[j] = matchPartialInPendingList(&so->ginstate, page, StopMiddle, + pos->lastOffset, entry->entry, entry->attnum, + datum, datumExtracted, entry->strategy); + else + key->entryRes[j] = true; + break; + } + else if ( res < 0 ) + StopHigh = StopMiddle; + else + StopLow = StopMiddle + 1; + } + } + + if ( StopLow>=StopHigh && entry->isPartialMatch ) + key->entryRes[j] = matchPartialInPendingList(&so->ginstate, page, StopHigh, + pos->lastOffset, entry->entry, entry->attnum, + datum, datumExtracted, entry->strategy); + + hasMatch |= key->entryRes[j]; + } + } + + pos->firstOffset = pos->lastOffset; + + if ( GinPageHasFullRow(page) ) + { + /* + * We scan all values from one tuple, go to next one + */ + + return hasMatch; + } + else + { + ItemPointerData item = pos->item; + + if ( scanGetCandidate(scan, pos) == false || !ItemPointerEquals(&pos->item, &item) ) + elog(ERROR,"Could not process tuple"); /* XXX should not be here ! */ + } + } + + return hasMatch; + } + + /* + * Collect all matched rows from pending list in bitmap + */ + static void + scanFastInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids) + { + GinScanOpaque so = (GinScanOpaque) scan->opaque; + MemoryContext oldCtx; + bool recheck, keyrecheck, match; + int i; + fastPosition pos; + Buffer metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO); + BlockNumber blkno; + + *ntids = 0; + + LockBuffer(metabuffer, GIN_SHARE); + blkno = GinPageGetMeta(BufferGetPage(metabuffer))->head; + + /* + * fetch head of list before unlocking metapage. + * head page must be pinned to prevent deletion by vacuum process + */ + if ( blkno == InvalidBlockNumber ) + { + /* No pending list, so proceed with normal scan */ + UnlockReleaseBuffer( metabuffer ); + return; + } + + pos.fastBuffer = ReadBuffer(scan->indexRelation, blkno); + LockBuffer(pos.fastBuffer, GIN_SHARE); + pos.firstOffset = FirstOffsetNumber; + UnlockReleaseBuffer( metabuffer ); + + /* + * loop for each heap row + */ + while( scanGetCandidate(scan, &pos) ) + { + + /* + * Check entries in rows and setup entryRes array + */ + if (!collectDatumForItem(scan, &pos)) + continue; + + /* + * check for consistent + */ + oldCtx = MemoryContextSwitchTo(so->tempCtx); + recheck = false; + match = true; + + for (i = 0; match && i < so->nkeys; i++) + { + GinScanKey key = so->keys + i; + + keyrecheck = true; + + if ( DatumGetBool(FunctionCall4(&so->ginstate.consistentFn[ key->attnum-1 ], + PointerGetDatum(key->entryRes), + UInt16GetDatum(key->strategy), + key->query, + PointerGetDatum(&keyrecheck))) == false ) + { + match = false; + } + + recheck |= keyrecheck; + } + + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(so->tempCtx); + + if ( match ) + { + tbm_add_tuples(tbm, &pos.item, 1, recheck); + (*ntids)++; + } + } + } + /* * Get heap item pointer from scan * returns true if found *************** *** 700,711 **** *recheck = false; ItemPointerSetMin(item); for (i = 0; i < so->nkeys; i++) { GinScanKey key = so->keys + i; if (keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx, ! key, &keyrecheck)) return FALSE; /* finished one of keys */ if (compareItemPointers(item, &key->curItem) < 0) *item = key->curItem; --- 1050,1062 ---- *recheck = false; ItemPointerSetMin(item); + for (i = 0; i < so->nkeys; i++) { GinScanKey key = so->keys + i; if (keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx, ! key, &keyrecheck)) return FALSE; /* finished one of keys */ if (compareItemPointers(item, &key->curItem) < 0) *item = key->curItem; *************** *** 720,725 **** --- 1071,1088 ---- { int cmp = compareItemPointers(item, &key->curItem); + if ( cmp != 0 && (ItemPointerIsLossyPage(item) || ItemPointerIsLossyPage(&key->curItem)) ) + { + /* + * if one of ItemPointers points to the whole page then + * compare only page's number + */ + if ( ItemPointerGetBlockNumber(item) == ItemPointerGetBlockNumber(&key->curItem) ) + cmp = 0; + else + cmp = (ItemPointerGetBlockNumber(item) > ItemPointerGetBlockNumber(&key->curItem)) ? 1 : -1; + } + if (cmp == 0) break; else if (cmp > 0) *************** *** 757,765 **** if (GinIsVoidRes(scan)) PG_RETURN_INT64(0); startScan(scan); - ntids = 0; for (;;) { ItemPointerData iptr; --- 1120,1137 ---- if (GinIsVoidRes(scan)) PG_RETURN_INT64(0); + ntids = 0; + + /* + * Scan of pending pages + */ + scanFastInsert(scan, tbm, &ntids); + + /* + * Switch to regular scan + */ startScan(scan); for (;;) { ItemPointerData iptr; *************** *** 770,776 **** if (!scanGetItem(scan, &iptr, &recheck)) break; ! tbm_add_tuples(tbm, &iptr, 1, recheck); ntids++; } --- 1142,1151 ---- if (!scanGetItem(scan, &iptr, &recheck)) break; ! if ( ItemPointerIsLossyPage(&iptr) ) ! tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr)); ! else ! tbm_add_tuples(tbm, &iptr, 1, recheck); ntids++; } *************** *** 780,800 **** Datum gingettuple(PG_FUNCTION_ARGS) { ! IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ! ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); ! bool res; ! ! if (dir != ForwardScanDirection) ! elog(ERROR, "GIN doesn't support other scan directions than forward"); ! ! if (GinIsNewKey(scan)) ! newScanKey(scan); ! ! if (GinIsVoidRes(scan)) ! PG_RETURN_BOOL(false); ! ! startScan(scan); ! res = scanGetItem(scan, &scan->xs_ctup.t_self, &scan->xs_recheck); ! ! PG_RETURN_BOOL(res); } --- 1155,1160 ---- Datum gingettuple(PG_FUNCTION_ARGS) { ! elog(ERROR, "GIN doesn't support amgettuple interface"); ! PG_RETURN_BOOL(false); } *** src/backend/access/gin/gininsert.c.orig Thu Jan 1 12:24:41 2009 --- src/backend/access/gin/gininsert.c Thu Mar 5 18:10:53 2009 *************** *** 138,144 **** /* * Inserts only one entry to the index, but it can add more than 1 ItemPointer. */ ! static void ginEntryInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datum value, ItemPointerData *items, uint32 nitem, bool isBuild) { --- 138,144 ---- /* * Inserts only one entry to the index, but it can add more than 1 ItemPointer. */ ! void ginEntryInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datum value, ItemPointerData *items, uint32 nitem, bool isBuild) { *************** *** 273,279 **** IndexBuildResult *result; double reltuples; GinBuildState buildstate; ! Buffer buffer; ItemPointerData *list; Datum entry; uint32 nlist; --- 273,279 ---- IndexBuildResult *result; double reltuples; GinBuildState buildstate; ! Buffer RootBuffer, MetaBuffer; ItemPointerData *list; Datum entry; uint32 nlist; *************** *** 286,296 **** initGinState(&buildstate.ginstate, index); /* initialize the root page */ ! buffer = GinNewBuffer(index); START_CRIT_SECTION(); ! GinInitBuffer(buffer, GIN_LEAF); ! MarkBufferDirty(buffer); if (!index->rd_istemp) { --- 286,302 ---- initGinState(&buildstate.ginstate, index); + /* initialize the meta page */ + MetaBuffer = GinNewBuffer(index); + /* initialize the root page */ ! RootBuffer = GinNewBuffer(index); ! START_CRIT_SECTION(); ! GinInitMetabuffer(MetaBuffer); ! MarkBufferDirty(MetaBuffer); ! GinInitBuffer(RootBuffer, GIN_LEAF); ! MarkBufferDirty(RootBuffer); if (!index->rd_istemp) { *************** *** 303,318 **** rdata.len = sizeof(RelFileNode); rdata.next = NULL; - page = BufferGetPage(buffer); - - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } ! UnlockReleaseBuffer(buffer); END_CRIT_SECTION(); /* build the index */ --- 309,327 ---- rdata.len = sizeof(RelFileNode); rdata.next = NULL; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata); + + page = BufferGetPage(RootBuffer); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); + page = BufferGetPage(MetaBuffer); + PageSetLSN(page, recptr); + PageSetTLI(page, ThisTimeLineID); } ! UnlockReleaseBuffer(MetaBuffer); ! UnlockReleaseBuffer(RootBuffer); END_CRIT_SECTION(); /* build the index */ *************** *** 417,425 **** initGinState(&ginstate, index); ! for(i=0; inatts;i++) ! if ( !isnull[i] ) ! res += ginHeapTupleInsert(index, &ginstate, (OffsetNumber)(i+1), values[i], ht_ctid); MemoryContextSwitchTo(oldCtx); MemoryContextDelete(insertCtx); --- 426,451 ---- initGinState(&ginstate, index); ! if ( GinGetUseFastUpdate(index) ) ! { ! GinTupleCollector collector; ! ! memset(&collector, 0, sizeof(GinTupleCollector)); ! for(i=0; inatts;i++) ! if ( !isnull[i] ) ! res += ginHeapTupleFastCollect(index, &ginstate, &collector, ! (OffsetNumber)(i+1), values[i], ht_ctid); ! ! ginHeapTupleFastInsert(index, &ginstate, &collector); ! } ! else ! { ! for(i=0; inatts;i++) ! if ( !isnull[i] ) ! res += ginHeapTupleInsert(index, &ginstate, ! (OffsetNumber)(i+1), values[i], ht_ctid); ! ! } MemoryContextSwitchTo(oldCtx); MemoryContextDelete(insertCtx); *** src/backend/access/gin/ginutil.c.orig Tue Jan 6 10:15:13 2009 --- src/backend/access/gin/ginutil.c Thu Mar 5 18:10:53 2009 *************** *** 21,26 **** --- 21,27 ---- #include "storage/freespace.h" #include "storage/indexfsm.h" #include "storage/lmgr.h" + #include "utils/guc.h" void initGinState(GinState *state, Relation index) *************** *** 57,63 **** CurrentMemoryContext); /* ! * Check opclass capability to do partial match. */ if ( index_getprocid(index, i+1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid ) { --- 58,64 ---- CurrentMemoryContext); /* ! * Check opclass capability to do partial match. */ if ( index_getprocid(index, i+1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid ) { *************** *** 88,94 **** bool isnull; /* ! * First attribute is always int16, so we can safely use any * tuple descriptor to obtain first attribute of tuple */ res = index_getattr(tuple, FirstOffsetNumber, ginstate->tupdesc[0], --- 89,95 ---- bool isnull; /* ! * First attribute is always int16, so we can safely use any * tuple descriptor to obtain first attribute of tuple */ res = index_getattr(tuple, FirstOffsetNumber, ginstate->tupdesc[0], *************** *** 213,218 **** --- 214,235 ---- GinInitPage(BufferGetPage(b), f, BufferGetPageSize(b)); } + void + GinInitMetabuffer(Buffer b) + { + GinMetaPageData *metadata; + Page page = BufferGetPage(b); + + GinInitPage(page, GIN_META, BufferGetPageSize(b)); + + metadata = GinPageGetMeta(page); + + metadata->head = metadata->tail = InvalidBlockNumber; + metadata->tailFreeSize = 0; + metadata->nPendingPages = 0; + metadata->nPendingHeapTuples = 0; + } + int compareEntries(GinState *ginstate, OffsetNumber attnum, Datum a, Datum b) { *************** *** 313,324 **** Datum ginoptions(PG_FUNCTION_ARGS) { ! Datum reloptions = PG_GETARG_DATUM(0); ! bool validate = PG_GETARG_BOOL(1); ! bytea *result; ! ! result = default_reloptions(reloptions, validate, RELOPT_KIND_GIN); ! if (result) ! PG_RETURN_BYTEA_P(result); ! PG_RETURN_NULL(); } --- 330,357 ---- Datum ginoptions(PG_FUNCTION_ARGS) { ! Datum reloptions = PG_GETARG_DATUM(0); ! bool validate = PG_GETARG_BOOL(1); ! relopt_value *options; ! GinOptions *rdopts; ! int numoptions; ! relopt_parse_elt tab[] = { ! {"fastupdate", RELOPT_TYPE_BOOL, offsetof(GinOptions, useFastUpdate)} ! }; ! ! options = parseRelOptions(reloptions, validate, RELOPT_KIND_GIN, &numoptions); ! ! /* if none set, we're done */ ! if (numoptions == 0) ! PG_RETURN_NULL(); ! ! rdopts = allocateReloptStruct(sizeof(GinOptions), options, numoptions); ! ! fillRelOptions((void *) rdopts, sizeof(GinOptions), options, numoptions, ! validate, tab, lengthof(tab)); ! ! pfree(options); ! ! PG_RETURN_BYTEA_P(rdopts); } + *** src/backend/access/gin/ginvacuum.c.orig Thu Jan 1 12:24:42 2009 --- src/backend/access/gin/ginvacuum.c Thu Mar 5 18:10:53 2009 *************** *** 593,610 **** BlockNumber rootOfPostingTree[BLCKSZ / (sizeof(IndexTupleData) + sizeof(ItemId))]; uint32 nRoot; /* first time through? */ if (stats == NULL) stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); /* we'll re-count the tuples each time */ stats->num_index_tuples = 0; - - gvs.index = index; gvs.result = stats; - gvs.callback = callback; - gvs.callback_state = callback_state; - gvs.strategy = info->strategy; - initGinState(&gvs.ginstate, index); buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); --- 593,614 ---- BlockNumber rootOfPostingTree[BLCKSZ / (sizeof(IndexTupleData) + sizeof(ItemId))]; uint32 nRoot; + gvs.index = index; + gvs.callback = callback; + gvs.callback_state = callback_state; + gvs.strategy = info->strategy; + initGinState(&gvs.ginstate, index); + /* first time through? */ if (stats == NULL) + { stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); + ginInsertCleanup(index, &gvs.ginstate, stats); + } + /* we'll re-count the tuples each time */ stats->num_index_tuples = 0; gvs.result = stats; buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); *************** *** 703,711 **** BlockNumber lastBlock = GIN_ROOT_BLKNO, lastFilledBlock = GIN_ROOT_BLKNO; ! /* Set up all-zero stats if ginbulkdelete wasn't called */ if (stats == NULL) stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); /* * XXX we always report the heap tuple count as the number of index --- 707,724 ---- BlockNumber lastBlock = GIN_ROOT_BLKNO, lastFilledBlock = GIN_ROOT_BLKNO; ! /* ! * Set up all-zero stats and finalyze fast insertion ! * if ginbulkdelete wasn't called ! */ if (stats == NULL) + { + GinState ginstate; + + initGinState(&ginstate, index); stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); + ginInsertCleanup(index, &ginstate, stats); + } /* * XXX we always report the heap tuple count as the number of index *** src/backend/access/gin/ginxlog.c.orig Wed Jan 21 11:14:21 2009 --- src/backend/access/gin/ginxlog.c Thu Mar 5 18:10:53 2009 *************** *** 71,90 **** ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record) { RelFileNode *node = (RelFileNode *) XLogRecGetData(record); ! Buffer buffer; Page page; ! buffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true); ! Assert(BufferIsValid(buffer)); ! page = (Page) BufferGetPage(buffer); ! GinInitBuffer(buffer, GIN_LEAF); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); ! MarkBufferDirty(buffer); ! UnlockReleaseBuffer(buffer); } static void --- 71,100 ---- ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record) { RelFileNode *node = (RelFileNode *) XLogRecGetData(record); ! Buffer RootBuffer, MetaBuffer; Page page; ! MetaBuffer = XLogReadBuffer(*node, GIN_METAPAGE_BLKNO, true); ! Assert(BufferIsValid(MetaBuffer)); ! GinInitMetabuffer(MetaBuffer); ! ! page = (Page) BufferGetPage(MetaBuffer); ! PageSetLSN(page, lsn); ! PageSetTLI(page, ThisTimeLineID); ! RootBuffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true); ! Assert(BufferIsValid(RootBuffer)); ! page = (Page) BufferGetPage(RootBuffer); ! ! GinInitBuffer(RootBuffer, GIN_LEAF); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); ! MarkBufferDirty(MetaBuffer); ! UnlockReleaseBuffer(MetaBuffer); ! MarkBufferDirty(RootBuffer); ! UnlockReleaseBuffer(RootBuffer); } static void *************** *** 433,438 **** --- 443,616 ---- } } + static void + ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record) + { + ginxlogUpdateMeta *data = (ginxlogUpdateMeta*) XLogRecGetData(record); + Buffer metabuffer; + Page metapage; + + metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false); + metapage = BufferGetPage(metabuffer); + + if (!XLByteLE(lsn, PageGetLSN(metapage))) + { + memcpy( GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData)); + PageSetLSN(metapage, lsn); + PageSetTLI(metapage, ThisTimeLineID); + MarkBufferDirty(metabuffer); + } + + if ( data->ntuples > 0 ) + { + /* + * insert into tail page + */ + if (!(record->xl_info & XLR_BKP_BLOCK_1)) + { + Buffer buffer = XLogReadBuffer(data->node, data->metadata.tail, false); + Page page = BufferGetPage(buffer); + + if ( !XLByteLE(lsn, PageGetLSN(page))) + { + OffsetNumber l, off = (PageIsEmpty(page)) ? FirstOffsetNumber : + OffsetNumberNext(PageGetMaxOffsetNumber(page)); + int i, tupsize; + IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogUpdateMeta)); + + for(i=0; intuples; i++) + { + tupsize = IndexTupleSize(tuples); + + l = PageAddItem(page, (Item)tuples, tupsize, off, false, false); + + if (l == InvalidOffsetNumber) + elog(ERROR, "failed to add item to index page"); + + tuples = (IndexTuple)( ((char*)tuples) + tupsize ); + } + + /* + * Increase counter of heap tuples + */ + GinPageGetOpaque(page)->maxoff++; + + PageSetLSN(page, lsn); + PageSetTLI(page, ThisTimeLineID); + MarkBufferDirty(buffer); + } + UnlockReleaseBuffer(buffer); + } + } + else if ( data->prevTail != InvalidBlockNumber ) + { + /* + * New tail + */ + + Buffer buffer = XLogReadBuffer(data->node, data->prevTail, false); + Page page = BufferGetPage(buffer); + + if ( !XLByteLE(lsn, PageGetLSN(page))) + { + GinPageGetOpaque(page)->rightlink = data->newRightlink; + + PageSetLSN(page, lsn); + PageSetTLI(page, ThisTimeLineID); + MarkBufferDirty(buffer); + } + UnlockReleaseBuffer(buffer); + } + + UnlockReleaseBuffer(metabuffer); + } + + static void + ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record) + { + ginxlogInsertListPage *data = (ginxlogInsertListPage*) XLogRecGetData(record); + Buffer buffer; + Page page; + OffsetNumber l, off = FirstOffsetNumber; + int i, tupsize; + IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsertListPage)); + + if (record->xl_info & XLR_BKP_BLOCK_1) + return; + + buffer = XLogReadBuffer(data->node, data->blkno, true); + page = BufferGetPage(buffer); + + GinInitBuffer(buffer, GIN_LIST); + GinPageGetOpaque(page)->rightlink = data->rightlink; + if ( data->rightlink == InvalidBlockNumber ) + { + /* tail of sublist */ + GinPageSetFullRow(page); + GinPageGetOpaque(page)->maxoff = 1; + } + else + { + GinPageGetOpaque(page)->maxoff = 0; + } + + for(i=0; intuples; i++) + { + tupsize = IndexTupleSize(tuples); + + l = PageAddItem(page, (Item)tuples, tupsize, off, false, false); + + if (l == InvalidOffsetNumber) + elog(ERROR, "failed to add item to index page"); + + tuples = (IndexTuple)( ((char*)tuples) + tupsize ); + } + + PageSetLSN(page, lsn); + PageSetTLI(page, ThisTimeLineID); + MarkBufferDirty(buffer); + + UnlockReleaseBuffer(buffer); + } + + static void + ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record) + { + ginxlogDeleteListPages *data = (ginxlogDeleteListPages*) XLogRecGetData(record); + Buffer metabuffer; + Page metapage; + int i; + + metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false); + metapage = BufferGetPage(metabuffer); + + if (!XLByteLE(lsn, PageGetLSN(metapage))) + { + memcpy( GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData)); + PageSetLSN(metapage, lsn); + PageSetTLI(metapage, ThisTimeLineID); + MarkBufferDirty(metabuffer); + } + + for(i=0; indeleted; i++) + { + Buffer buffer = XLogReadBuffer(data->node,data->toDelete[i],false); + Page page = BufferGetPage(buffer); + + if ( !XLByteLE(lsn, PageGetLSN(page))) + { + GinPageGetOpaque(page)->flags = GIN_DELETED; + + PageSetLSN(page, lsn); + PageSetTLI(page, ThisTimeLineID); + MarkBufferDirty(buffer); + } + + UnlockReleaseBuffer(buffer); + } + UnlockReleaseBuffer(metabuffer); + } + void gin_redo(XLogRecPtr lsn, XLogRecord *record) { *************** *** 461,466 **** --- 639,653 ---- case XLOG_GIN_DELETE_PAGE: ginRedoDeletePage(lsn, record); break; + case XLOG_GIN_UPDATE_META_PAGE: + ginRedoUpdateMetapage(lsn, record); + break; + case XLOG_GIN_INSERT_LISTPAGE: + ginRedoInsertListPage(lsn, record); + break; + case XLOG_GIN_DELETE_LISTPAGE: + ginRedoDeleteListPages(lsn, record); + break; default: elog(PANIC, "gin_redo: unknown op code %u", info); } *************** *** 516,521 **** --- 703,720 ---- appendStringInfo(buf, "Delete page, "); desc_node(buf, ((ginxlogDeletePage *) rec)->node, ((ginxlogDeletePage *) rec)->blkno); break; + case XLOG_GIN_UPDATE_META_PAGE: + appendStringInfo(buf, "Update metapage, "); + desc_node(buf, ((ginxlogUpdateMeta *) rec)->node, ((ginxlogUpdateMeta *) rec)->metadata.tail); + break; + case XLOG_GIN_INSERT_LISTPAGE: + appendStringInfo(buf, "insert new list page, "); + desc_node(buf, ((ginxlogInsertListPage *) rec)->node, ((ginxlogInsertListPage *) rec)->blkno); + break; + case XLOG_GIN_DELETE_LISTPAGE: + appendStringInfo(buf, "Delete list page (%d), ", ((ginxlogDeleteListPages *) rec)->ndeleted); + desc_node(buf, ((ginxlogDeleteListPages *) rec)->node, ((ginxlogDeleteListPages *) rec)->metadata.head); + break; default: elog(PANIC, "gin_desc: unknown op code %u", info); } *** src/backend/catalog/system_views.sql.orig Fri Feb 6 16:15:11 2009 --- src/backend/catalog/system_views.sql Thu Mar 5 18:10:53 2009 *************** *** 193,198 **** --- 193,199 ---- pg_stat_get_tuples_updated(C.oid) AS n_tup_upd, pg_stat_get_tuples_deleted(C.oid) AS n_tup_del, pg_stat_get_tuples_hot_updated(C.oid) AS n_tup_hot_upd, + pg_stat_get_fresh_inserted_tuples(C.oid) AS n_fresh_tup, pg_stat_get_live_tuples(C.oid) AS n_live_tup, pg_stat_get_dead_tuples(C.oid) AS n_dead_tup, pg_stat_get_last_vacuum_time(C.oid) as last_vacuum, *** src/backend/nodes/tidbitmap.c.orig Sat Jan 10 16:08:36 2009 --- src/backend/nodes/tidbitmap.c Thu Mar 5 18:10:53 2009 *************** *** 310,315 **** --- 310,396 ---- } /* + * tbm_add_page - add the whole page to a TIDBitmap. + */ + void + tbm_add_page(TIDBitmap *tbm, BlockNumber pageno) + { + PagetableEntry *page; + bool found; + BlockNumber chunk_pageno; + int bitno; + int wordnum; + int bitnum; + + /* We force the bitmap into hashtable mode whenever it's lossy */ + if (tbm->status != TBM_HASH) + tbm_create_pagetable(tbm); + + bitno = pageno % PAGES_PER_CHUNK; + chunk_pageno = pageno - bitno; + + /* Look up or create entry for chunk-header page */ + page = (PagetableEntry *) hash_search(tbm->pagetable, + (void *) &chunk_pageno, + HASH_ENTER, &found); + + if (!found) + { + /* Initialize it if not present before */ + MemSet(page, 0, sizeof(PagetableEntry)); + page->blockno = chunk_pageno; + page->ischunk = true; + /* must count it too */ + tbm->nentries++; + tbm->nchunks++; + + /* loop below doesn't mark chunk page itself */ + if (bitno == 0) + page->words[0] = ((bitmapword) 1 << 0); + } + else if (!page->ischunk) + { + /* chunk header page was formerly non-lossy, make it lossy */ + MemSet(page, 0, sizeof(PagetableEntry)); + page->blockno = chunk_pageno; + page->ischunk = true; + /* we assume it had some tuple bit(s) set, so mark it lossy */ + page->words[0] = ((bitmapword) 1 << 0); + /* adjust counts */ + tbm->nchunks++; + tbm->npages--; + } + else + { + /* just set page's bit */ + wordnum = WORDNUM(bitno); + bitnum = BITNUM(bitno); + page->words[wordnum] |= ((bitmapword) 1 << bitnum); + /* nothing to do more */ + return; + } + + /* + * look for other pages in chunk. Note, we skip + * chunk page itself because it's marked above. + */ + for(bitno=1; bitnopagetable, + (void *) &chunk_pageno, + HASH_REMOVE, NULL) != NULL) + { + wordnum = WORDNUM(bitno); + bitnum = BITNUM(bitno); + page->words[wordnum] |= ((bitmapword) 1 << bitnum); + tbm->nentries--; + tbm->npages--; /* assume it must have been non-lossy */ + } + } + } + + /* * tbm_union - set union * * a is modified in-place, b is not changed *** src/backend/postmaster/autovacuum.c.orig Tue Feb 10 11:22:16 2009 --- src/backend/postmaster/autovacuum.c Thu Mar 5 18:10:53 2009 *************** *** 72,77 **** --- 72,78 ---- #include "access/reloptions.h" #include "access/transam.h" #include "access/xact.h" + #include "access/gin.h" #include "catalog/dependency.h" #include "catalog/indexing.h" #include "catalog/namespace.h" *************** *** 2470,2475 **** --- 2471,2528 ---- } /* + * relation_has_pending_indexes + * + * Returns true if relation has indexes with delayed insertion. + * Currently, only GIN has that possiblity + */ + + static bool + relation_has_pending_indexes(Oid relid, Form_pg_class classForm) + { + Relation rel; + List *indexoidlist; + ListCell *indexoidscan; + bool has = false; + + /* only ordinary cataloged heap can contains such indexes */ + if ( classForm->relkind != RELKIND_RELATION ) + return false; + + /* has not indexes at all */ + if ( classForm->relhasindex == false ) + return false; + + rel = RelationIdGetRelation(relid); + + indexoidlist = RelationGetIndexList(rel); + + foreach(indexoidscan, indexoidlist) + { + Oid indexoid = lfirst_oid(indexoidscan); + Relation irel = RelationIdGetRelation(indexoid); + + /* + * Currently, only GIN in fast update mode + */ + if ( irel->rd_rel->relam == GIN_AM_OID && GinGetUseFastUpdate(irel) ) + { + RelationClose(irel); + has = true; + break; + } + + RelationClose(irel); + } + + list_free(indexoidlist); + + RelationClose(rel); + + return has; + } + + /* * relation_needs_vacanalyze * * Check whether a relation needs to be vacuumed or analyzed; return each into *************** *** 2531,2537 **** /* number of vacuum (resp. analyze) tuples at this time */ float4 vactuples, ! anltuples; /* freeze parameters */ int freeze_max_age; --- 2584,2591 ---- /* number of vacuum (resp. analyze) tuples at this time */ float4 vactuples, ! anltuples, ! instuples; /* freeze parameters */ int freeze_max_age; *************** *** 2588,2593 **** --- 2642,2648 ---- vactuples = tabentry->n_dead_tuples; anltuples = tabentry->n_live_tuples + tabentry->n_dead_tuples - tabentry->last_anl_tuples; + instuples = tabentry->n_inserted_tuples; vacthresh = (float4) vac_base_thresh + vac_scale_factor * reltuples; anlthresh = (float4) anl_base_thresh + anl_scale_factor * reltuples; *************** *** 2601,2608 **** NameStr(classForm->relname), vactuples, vacthresh, anltuples, anlthresh); ! /* Determine if this table needs vacuum or analyze. */ ! *dovacuum = force_vacuum || (vactuples > vacthresh); *doanalyze = (anltuples > anlthresh); } else --- 2656,2668 ---- NameStr(classForm->relname), vactuples, vacthresh, anltuples, anlthresh); ! /* ! * Determine if this table needs vacuum or analyze. ! * Use vac_base_thresh as a theshhold for instuples because ! * search time of GIN's pending pages is linear by its number. ! */ ! *dovacuum = force_vacuum || (vactuples > vacthresh) || ! (relation_has_pending_indexes(relid, classForm) && instuples > vac_base_thresh); *doanalyze = (anltuples > anlthresh); } else *** src/backend/postmaster/pgstat.c.orig Thu Jan 1 12:25:11 2009 --- src/backend/postmaster/pgstat.c Thu Mar 5 18:10:53 2009 *************** *** 3537,3542 **** --- 3537,3545 ---- tabentry->tuples_updated = tabmsg[i].t_counts.t_tuples_updated; tabentry->tuples_deleted = tabmsg[i].t_counts.t_tuples_deleted; tabentry->tuples_hot_updated = tabmsg[i].t_counts.t_tuples_hot_updated; + tabentry->n_inserted_tuples = tabmsg[i].t_counts.t_tuples_inserted + + tabmsg[i].t_counts.t_tuples_updated - + tabmsg[i].t_counts.t_tuples_hot_updated; tabentry->n_live_tuples = tabmsg[i].t_counts.t_new_live_tuples; tabentry->n_dead_tuples = tabmsg[i].t_counts.t_new_dead_tuples; tabentry->blocks_fetched = tabmsg[i].t_counts.t_blocks_fetched; *************** *** 3560,3565 **** --- 3563,3571 ---- tabentry->tuples_updated += tabmsg[i].t_counts.t_tuples_updated; tabentry->tuples_deleted += tabmsg[i].t_counts.t_tuples_deleted; tabentry->tuples_hot_updated += tabmsg[i].t_counts.t_tuples_hot_updated; + tabentry->n_inserted_tuples += tabmsg[i].t_counts.t_tuples_inserted + + tabmsg[i].t_counts.t_tuples_updated - + tabmsg[i].t_counts.t_tuples_hot_updated; tabentry->n_live_tuples += tabmsg[i].t_counts.t_new_live_tuples; tabentry->n_dead_tuples += tabmsg[i].t_counts.t_new_dead_tuples; tabentry->blocks_fetched += tabmsg[i].t_counts.t_blocks_fetched; *************** *** 3570,3575 **** --- 3576,3583 ---- tabentry->n_live_tuples = Max(tabentry->n_live_tuples, 0); /* Likewise for n_dead_tuples */ tabentry->n_dead_tuples = Max(tabentry->n_dead_tuples, 0); + /* Likewise for n_inserted_tuples */ + tabentry->n_inserted_tuples = Max(tabentry->n_inserted_tuples, 0); /* * Add per-table stats to the per-database entry, too. *************** *** 3770,3775 **** --- 3778,3784 ---- tabentry->n_live_tuples = msg->m_tuples; /* Resetting dead_tuples to 0 is an approximation ... */ tabentry->n_dead_tuples = 0; + tabentry->n_inserted_tuples = 0; if (msg->m_analyze) { if (msg->m_scanned_all) *** src/backend/utils/adt/pgstatfuncs.c.orig Thu Jan 1 12:25:21 2009 --- src/backend/utils/adt/pgstatfuncs.c Thu Mar 5 18:17:37 2009 *************** *** 31,36 **** --- 31,37 ---- extern Datum pg_stat_get_tuples_deleted(PG_FUNCTION_ARGS); extern Datum pg_stat_get_tuples_hot_updated(PG_FUNCTION_ARGS); extern Datum pg_stat_get_live_tuples(PG_FUNCTION_ARGS); + extern Datum pg_stat_get_fresh_inserted_tuples(PG_FUNCTION_ARGS); extern Datum pg_stat_get_dead_tuples(PG_FUNCTION_ARGS); extern Datum pg_stat_get_blocks_fetched(PG_FUNCTION_ARGS); extern Datum pg_stat_get_blocks_hit(PG_FUNCTION_ARGS); *************** *** 211,216 **** --- 212,233 ---- Datum + pg_stat_get_fresh_inserted_tuples(PG_FUNCTION_ARGS) + { + Oid relid = PG_GETARG_OID(0); + int64 result; + PgStat_StatTabEntry *tabentry; + + if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL) + result = 0; + else + result = (int64) (tabentry->n_inserted_tuples); + + PG_RETURN_INT64(result); + } + + + Datum pg_stat_get_dead_tuples(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); *** src/include/access/gin.h.orig Sat Jan 10 16:08:36 2009 --- src/include/access/gin.h Thu Mar 5 18:10:53 2009 *************** *** 21,26 **** --- 21,27 ---- #include "storage/buf.h" #include "storage/off.h" #include "storage/relfilenode.h" + #include "utils/rel.h" /* *************** *** 46,62 **** OffsetNumber maxoff; /* number entries on GIN_DATA page: number of * heap ItemPointer on GIN_DATA|GIN_LEAF page * and number of records on GIN_DATA & ! * ~GIN_LEAF page */ uint16 flags; /* see bit definitions below */ } GinPageOpaqueData; typedef GinPageOpaqueData *GinPageOpaque; ! #define GIN_ROOT_BLKNO (0) #define GIN_DATA (1 << 0) #define GIN_LEAF (1 << 1) #define GIN_DELETED (1 << 2) /* * Works on page --- 47,93 ---- OffsetNumber maxoff; /* number entries on GIN_DATA page: number of * heap ItemPointer on GIN_DATA|GIN_LEAF page * and number of records on GIN_DATA & ! * ~GIN_LEAF page. On GIN_LIST number of heap tuples.*/ uint16 flags; /* see bit definitions below */ } GinPageOpaqueData; typedef GinPageOpaqueData *GinPageOpaque; ! #define GIN_METAPAGE_BLKNO (0) ! #define GIN_ROOT_BLKNO (1) #define GIN_DATA (1 << 0) #define GIN_LEAF (1 << 1) #define GIN_DELETED (1 << 2) + #define GIN_META (1 << 3) + #define GIN_LIST (1 << 4) + #define GIN_LIST_FULLROW (1 << 5) /* makes sense only on GIN_LIST page */ + + typedef struct GinMetaPageData + { + /* + * Pointers to head and tail of list of GIN_LIST pages. These store + * fast-inserted entries that haven't yet been moved into the regular + * GIN structure. + */ + BlockNumber head; + BlockNumber tail; + + /* + * Free space in bytes in the list's tail page. + */ + uint32 tailFreeSize; + + /* + * Store both number of pages and number of heap tuples + * in pending list. + */ + BlockNumber nPendingPages; + int64 nPendingHeapTuples; + } GinMetaPageData; + + #define GinPageGetMeta(p) \ + ((GinMetaPageData *) PageGetContents(p)) /* * Works on page *************** *** 68,73 **** --- 99,106 ---- #define GinPageSetNonLeaf(page) ( GinPageGetOpaque(page)->flags &= ~GIN_LEAF ) #define GinPageIsData(page) ( GinPageGetOpaque(page)->flags & GIN_DATA ) #define GinPageSetData(page) ( GinPageGetOpaque(page)->flags |= GIN_DATA ) + #define GinPageHasFullRow(page) ( GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW ) + #define GinPageSetFullRow(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST_FULLROW ) #define GinPageIsDeleted(page) ( GinPageGetOpaque(page)->flags & GIN_DELETED) #define GinPageSetDeleted(page) ( GinPageGetOpaque(page)->flags |= GIN_DELETED) *************** *** 135,140 **** --- 168,186 ---- - GinPageGetOpaque(page)->maxoff * GinSizeOfItem(page) \ - MAXALIGN(sizeof(GinPageOpaqueData))) + /* + * storage type for GIN's options. + */ + typedef struct GinOptions + { + int32 vl_len_; /* varlena header (do not touch directly!) */ + bool useFastUpdate; /* use fast updates? */ + } GinOptions; + + #define GIN_DEFAULT_USE_FASTUPDATE true + #define GinGetUseFastUpdate(relation) \ + ((relation)->rd_options ? \ + ((GinOptions *) (relation)->rd_options)->useFastUpdate : GIN_DEFAULT_USE_FASTUPDATE) #define GIN_UNLOCK BUFFER_LOCK_UNLOCK #define GIN_SHARE BUFFER_LOCK_SHARE *************** *** 234,245 **** --- 280,328 ---- BlockNumber rightLink; } ginxlogDeletePage; + + #define XLOG_GIN_UPDATE_META_PAGE 0x60 + + typedef struct ginxlogUpdateMeta + { + RelFileNode node; + GinMetaPageData metadata; + BlockNumber prevTail; + BlockNumber newRightlink; + int32 ntuples; /* if ntuples > 0 then metadata.tail was updated with + that tuples else new sub list was inserted */ + /* follows array of inserted tuples */ + } ginxlogUpdateMeta; + + #define XLOG_GIN_INSERT_LISTPAGE 0x70 + + typedef struct ginxlogInsertListPage + { + RelFileNode node; + BlockNumber blkno; + BlockNumber rightlink; + int32 ntuples; + /* follows array of inserted tuples */ + } ginxlogInsertListPage; + + #define XLOG_GIN_DELETE_LISTPAGE 0x80 + + #define NDELETE_AT_ONCE (16) + typedef struct ginxlogDeleteListPages + { + RelFileNode node; + GinMetaPageData metadata; + int32 ndeleted; + BlockNumber toDelete[ NDELETE_AT_ONCE ]; + } ginxlogDeleteListPages; + /* ginutil.c */ extern Datum ginoptions(PG_FUNCTION_ARGS); extern void initGinState(GinState *state, Relation index); extern Buffer GinNewBuffer(Relation index); extern void GinInitBuffer(Buffer b, uint32 f); extern void GinInitPage(Page page, uint32 f, Size pageSize); + extern void GinInitMetabuffer(Buffer b); extern int compareEntries(GinState *ginstate, OffsetNumber attnum, Datum a, Datum b); extern int compareAttEntries(GinState *ginstate, OffsetNumber attnum_a, Datum a, OffsetNumber attnum_b, Datum b); *************** *** 249,257 **** --- 332,343 ---- extern Datum gin_index_getattr(GinState *ginstate, IndexTuple tuple); extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple); + /* gininsert.c */ extern Datum ginbuild(PG_FUNCTION_ARGS); extern Datum gininsert(PG_FUNCTION_ARGS); + extern void ginEntryInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datum value, + ItemPointerData *items, uint32 nitem, bool isBuild); /* ginxlog.c */ extern void gin_redo(XLogRecPtr lsn, XLogRecord *record); *************** *** 443,449 **** #define ItemPointerSetMax(p) ItemPointerSet( (p), (BlockNumber)0xffffffff, (OffsetNumber)0xffff ) #define ItemPointerIsMax(p) ( ItemPointerGetBlockNumber(p) == (BlockNumber)0xffffffff && ItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff ) #define ItemPointerSetMin(p) ItemPointerSet( (p), (BlockNumber)0, (OffsetNumber)0) ! #define ItemPointerIsMin(p) ( ItemPointerGetBlockNumber(p) == (BlockNumber)0 && ItemPointerGetOffsetNumber(p) == (OffsetNumber)0 ) extern Datum gingetbitmap(PG_FUNCTION_ARGS); extern Datum gingettuple(PG_FUNCTION_ARGS); --- 529,536 ---- #define ItemPointerSetMax(p) ItemPointerSet( (p), (BlockNumber)0xffffffff, (OffsetNumber)0xffff ) #define ItemPointerIsMax(p) ( ItemPointerGetBlockNumber(p) == (BlockNumber)0xffffffff && ItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff ) #define ItemPointerSetMin(p) ItemPointerSet( (p), (BlockNumber)0, (OffsetNumber)0) ! #define ItemPointerSetLossyPage(p, b) ItemPointerSet( (p), (b), 0xffff) ! #define ItemPointerIsLossyPage(p) ( ItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff ) extern Datum gingetbitmap(PG_FUNCTION_ARGS); extern Datum gingettuple(PG_FUNCTION_ARGS); *************** *** 489,492 **** --- 576,598 ---- OffsetNumber attnum, Datum *entries, int32 nentry); extern ItemPointerData *ginGetEntry(BuildAccumulator *accum, OffsetNumber *attnum, Datum *entry, uint32 *n); + /* ginfast.c */ + + typedef struct GinTupleCollector { + IndexTuple *tuples; + uint32 ntuples; + uint32 lentuples; + uint32 sumsize; + } GinTupleCollector; + + extern void ginHeapTupleFastInsert(Relation index, GinState *ginstate, GinTupleCollector *collector); + extern uint32 ginHeapTupleFastCollect(Relation index, GinState *ginstate, + GinTupleCollector *collector, + OffsetNumber attnum, Datum value, ItemPointer item); + + #define GinListPageSize \ + ( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GinPageOpaqueData)) ) + + extern void ginInsertCleanup(Relation index, GinState *ginstate, IndexBulkDeleteResult *stats); + #endif *** src/include/catalog/pg_am.h.orig Thu Mar 5 18:06:45 2009 --- src/include/catalog/pg_am.h Thu Mar 5 18:10:53 2009 *************** *** 118,124 **** DATA(insert OID = 783 ( gist 0 7 f f f t t t t t t 0 gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions )); DESCR("GiST index access method"); #define GIST_AM_OID 783 ! DATA(insert OID = 2742 ( gin 0 5 f f f t t f f t f 0 gininsert ginbeginscan gingettuple gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions )); DESCR("GIN index access method"); #define GIN_AM_OID 2742 --- 118,124 ---- DATA(insert OID = 783 ( gist 0 7 f f f t t t t t t 0 gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions )); DESCR("GiST index access method"); #define GIST_AM_OID 783 ! DATA(insert OID = 2742 ( gin 0 5 f f f t t f f t f 0 gininsert ginbeginscan 0 gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions )); DESCR("GIN index access method"); #define GIN_AM_OID 2742 *** src/include/catalog/pg_proc.h.orig Tue Feb 24 11:10:32 2009 --- src/include/catalog/pg_proc.h Thu Mar 5 18:10:54 2009 *************** *** 2965,2970 **** --- 2965,2972 ---- DESCR("statistics: number of tuples deleted"); DATA(insert OID = 1972 ( pg_stat_get_tuples_hot_updated PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_tuples_hot_updated _null_ _null_ _null_ )); DESCR("statistics: number of tuples hot updated"); + DATA(insert OID = 2319 ( pg_stat_get_fresh_inserted_tuples PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_fresh_inserted_tuples _null_ _null_ _null_ )); + DESCR("statistics: number of inserted tuples since last vacuum"); DATA(insert OID = 2878 ( pg_stat_get_live_tuples PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_live_tuples _null_ _null_ _null_ )); DESCR("statistics: number of live tuples"); DATA(insert OID = 2879 ( pg_stat_get_dead_tuples PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_dead_tuples _null_ _null_ _null_ )); *** src/include/nodes/tidbitmap.h.orig Sat Jan 10 16:08:36 2009 --- src/include/nodes/tidbitmap.h Thu Mar 5 18:10:54 2009 *************** *** 52,57 **** --- 52,58 ---- extern void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck); + extern void tbm_add_page(TIDBitmap *tbm, BlockNumber pageno); extern void tbm_union(TIDBitmap *a, const TIDBitmap *b); extern void tbm_intersect(TIDBitmap *a, const TIDBitmap *b); *** src/include/pgstat.h.orig Sun Jan 4 17:19:59 2009 --- src/include/pgstat.h Thu Mar 5 18:16:50 2009 *************** *** 476,481 **** --- 476,483 ---- PgStat_Counter tuples_deleted; PgStat_Counter tuples_hot_updated; + PgStat_Counter n_inserted_tuples; /* number of non-hot inserted tuples + * since last vacuum */ PgStat_Counter n_live_tuples; PgStat_Counter n_dead_tuples; PgStat_Counter last_anl_tuples; *** src/test/regress/expected/rules.out.orig Tue Feb 3 21:08:47 2009 --- src/test/regress/expected/rules.out Thu Mar 5 18:10:54 2009 *************** *** 1291,1304 **** pg_shadow | SELECT pg_authid.rolname AS usename, pg_authid.oid AS usesysid, pg_authid.rolcreatedb AS usecreatedb, pg_authid.rolsuper AS usesuper, pg_authid.rolcatupdate AS usecatupd, pg_authid.rolpassword AS passwd, (pg_authid.rolvaliduntil)::abstime AS valuntil, pg_authid.rolconfig AS useconfig FROM pg_authid WHERE pg_authid.rolcanlogin; pg_stat_activity | SELECT s.datid, d.datname, s.procpid, s.usesysid, u.rolname AS usename, s.current_query, s.waiting, s.xact_start, s.query_start, s.backend_start, s.client_addr, s.client_port FROM pg_database d, pg_stat_get_activity(NULL::integer) s(datid, procpid, usesysid, current_query, waiting, xact_start, query_start, backend_start, client_addr, client_port), pg_authid u WHERE ((s.datid = d.oid) AND (s.usesysid = u.oid)); pg_stat_all_indexes | SELECT c.oid AS relid, i.oid AS indexrelid, n.nspname AS schemaname, c.relname, i.relname AS indexrelname, pg_stat_get_numscans(i.oid) AS idx_scan, pg_stat_get_tuples_returned(i.oid) AS idx_tup_read, pg_stat_get_tuples_fetched(i.oid) AS idx_tup_fetch FROM (((pg_class c JOIN pg_index x ON ((c.oid = x.indrelid))) JOIN pg_class i ON ((i.oid = x.indexrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"])); ! pg_stat_all_tables | SELECT c.oid AS relid, n.nspname AS schemaname, c.relname, pg_stat_get_numscans(c.oid) AS seq_scan, pg_stat_get_tuples_returned(c.oid) AS seq_tup_read, (sum(pg_stat_get_numscans(i.indexrelid)))::bigint AS idx_scan, ((sum(pg_stat_get_tuples_fetched(i.indexrelid)))::bigint + pg_stat_get_tuples_fetched(c.oid)) AS idx_tup_fetch, pg_stat_get_tuples_inserted(c.oid) AS n_tup_ins, pg_stat_get_tuples_updated(c.oid) AS n_tup_upd, pg_stat_get_tuples_deleted(c.oid) AS n_tup_del, pg_stat_get_tuples_hot_updated(c.oid) AS n_tup_hot_upd, pg_stat_get_live_tuples(c.oid) AS n_live_tup, pg_stat_get_dead_tuples(c.oid) AS n_dead_tup, pg_stat_get_last_vacuum_time(c.oid) AS last_vacuum, pg_stat_get_last_autovacuum_time(c.oid) AS last_autovacuum, pg_stat_get_last_analyze_time(c.oid) AS last_analyze, pg_stat_get_last_autoanalyze_time(c.oid) AS last_autoanalyze FROM ((pg_class c LEFT JOIN pg_index i ON ((c.oid = i.indrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"])) GROUP BY c.oid, n.nspname, c.relname; pg_stat_bgwriter | SELECT pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints_timed, pg_stat_get_bgwriter_requested_checkpoints() AS checkpoints_req, pg_stat_get_bgwriter_buf_written_checkpoints() AS buffers_checkpoint, pg_stat_get_bgwriter_buf_written_clean() AS buffers_clean, pg_stat_get_bgwriter_maxwritten_clean() AS maxwritten_clean, pg_stat_get_buf_written_backend() AS buffers_backend, pg_stat_get_buf_alloc() AS buffers_alloc; pg_stat_database | SELECT d.oid AS datid, d.datname, pg_stat_get_db_numbackends(d.oid) AS numbackends, pg_stat_get_db_xact_commit(d.oid) AS xact_commit, pg_stat_get_db_xact_rollback(d.oid) AS xact_rollback, (pg_stat_get_db_blocks_fetched(d.oid) - pg_stat_get_db_blocks_hit(d.oid)) AS blks_read, pg_stat_get_db_blocks_hit(d.oid) AS blks_hit, pg_stat_get_db_tuples_returned(d.oid) AS tup_returned, pg_stat_get_db_tuples_fetched(d.oid) AS tup_fetched, pg_stat_get_db_tuples_inserted(d.oid) AS tup_inserted, pg_stat_get_db_tuples_updated(d.oid) AS tup_updated, pg_stat_get_db_tuples_deleted(d.oid) AS tup_deleted FROM pg_database d; pg_stat_sys_indexes | SELECT pg_stat_all_indexes.relid, pg_stat_all_indexes.indexrelid, pg_stat_all_indexes.schemaname, pg_stat_all_indexes.relname, pg_stat_all_indexes.indexrelname, pg_stat_all_indexes.idx_scan, pg_stat_all_indexes.idx_tup_read, pg_stat_all_indexes.idx_tup_fetch FROM pg_stat_all_indexes WHERE ((pg_stat_all_indexes.schemaname = ANY (ARRAY['pg_catalog'::name, 'information_schema'::name])) OR (pg_stat_all_indexes.schemaname ~ '^pg_toast'::text)); ! pg_stat_sys_tables | SELECT pg_stat_all_tables.relid, pg_stat_all_tables.schemaname, pg_stat_all_tables.relname, pg_stat_all_tables.seq_scan, pg_stat_all_tables.seq_tup_read, pg_stat_all_tables.idx_scan, pg_stat_all_tables.idx_tup_fetch, pg_stat_all_tables.n_tup_ins, pg_stat_all_tables.n_tup_upd, pg_stat_all_tables.n_tup_del, pg_stat_all_tables.n_tup_hot_upd, pg_stat_all_tables.n_live_tup, pg_stat_all_tables.n_dead_tup, pg_stat_all_tables.last_vacuum, pg_stat_all_tables.last_autovacuum, pg_stat_all_tables.last_analyze, pg_stat_all_tables.last_autoanalyze FROM pg_stat_all_tables WHERE ((pg_stat_all_tables.schemaname = ANY (ARRAY['pg_catalog'::name, 'information_schema'::name])) OR (pg_stat_all_tables.schemaname ~ '^pg_toast'::text)); pg_stat_user_functions | SELECT p.oid AS funcid, n.nspname AS schemaname, p.proname AS funcname, pg_stat_get_function_calls(p.oid) AS calls, (pg_stat_get_function_time(p.oid) / 1000) AS total_time, (pg_stat_get_function_self_time(p.oid) / 1000) AS self_time FROM (pg_proc p LEFT JOIN pg_namespace n ON ((n.oid = p.pronamespace))) WHERE ((p.prolang <> (12)::oid) AND (pg_stat_get_function_calls(p.oid) IS NOT NULL)); pg_stat_user_indexes | SELECT pg_stat_all_indexes.relid, pg_stat_all_indexes.indexrelid, pg_stat_all_indexes.schemaname, pg_stat_all_indexes.relname, pg_stat_all_indexes.indexrelname, pg_stat_all_indexes.idx_scan, pg_stat_all_indexes.idx_tup_read, pg_stat_all_indexes.idx_tup_fetch FROM pg_stat_all_indexes WHERE ((pg_stat_all_indexes.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_stat_all_indexes.schemaname !~ '^pg_toast'::text)); ! pg_stat_user_tables | SELECT pg_stat_all_tables.relid, pg_stat_all_tables.schemaname, pg_stat_all_tables.relname, pg_stat_all_tables.seq_scan, pg_stat_all_tables.seq_tup_read, pg_stat_all_tables.idx_scan, pg_stat_all_tables.idx_tup_fetch, pg_stat_all_tables.n_tup_ins, pg_stat_all_tables.n_tup_upd, pg_stat_all_tables.n_tup_del, pg_stat_all_tables.n_tup_hot_upd, pg_stat_all_tables.n_live_tup, pg_stat_all_tables.n_dead_tup, pg_stat_all_tables.last_vacuum, pg_stat_all_tables.last_autovacuum, pg_stat_all_tables.last_analyze, pg_stat_all_tables.last_autoanalyze FROM pg_stat_all_tables WHERE ((pg_stat_all_tables.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_stat_all_tables.schemaname !~ '^pg_toast'::text)); pg_statio_all_indexes | SELECT c.oid AS relid, i.oid AS indexrelid, n.nspname AS schemaname, c.relname, i.relname AS indexrelname, (pg_stat_get_blocks_fetched(i.oid) - pg_stat_get_blocks_hit(i.oid)) AS idx_blks_read, pg_stat_get_blocks_hit(i.oid) AS idx_blks_hit FROM (((pg_class c JOIN pg_index x ON ((c.oid = x.indrelid))) JOIN pg_class i ON ((i.oid = x.indexrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"])); pg_statio_all_sequences | SELECT c.oid AS relid, n.nspname AS schemaname, c.relname, (pg_stat_get_blocks_fetched(c.oid) - pg_stat_get_blocks_hit(c.oid)) AS blks_read, pg_stat_get_blocks_hit(c.oid) AS blks_hit FROM (pg_class c LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = 'S'::"char"); pg_statio_all_tables | SELECT c.oid AS relid, n.nspname AS schemaname, c.relname, (pg_stat_get_blocks_fetched(c.oid) - pg_stat_get_blocks_hit(c.oid)) AS heap_blks_read, pg_stat_get_blocks_hit(c.oid) AS heap_blks_hit, (sum((pg_stat_get_blocks_fetched(i.indexrelid) - pg_stat_get_blocks_hit(i.indexrelid))))::bigint AS idx_blks_read, (sum(pg_stat_get_blocks_hit(i.indexrelid)))::bigint AS idx_blks_hit, (pg_stat_get_blocks_fetched(t.oid) - pg_stat_get_blocks_hit(t.oid)) AS toast_blks_read, pg_stat_get_blocks_hit(t.oid) AS toast_blks_hit, (pg_stat_get_blocks_fetched(x.oid) - pg_stat_get_blocks_hit(x.oid)) AS tidx_blks_read, pg_stat_get_blocks_hit(x.oid) AS tidx_blks_hit FROM ((((pg_class c LEFT JOIN pg_index i ON ((c.oid = i.indrelid))) LEFT JOIN pg_class t ON ((c.reltoastrelid = t.oid))) LEFT JOIN pg_class x ON ((t.reltoastidxid = x.oid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"])) GROUP BY c.oid, n.nspname, c.relname, t.oid, x.oid; --- 1291,1304 ---- pg_shadow | SELECT pg_authid.rolname AS usename, pg_authid.oid AS usesysid, pg_authid.rolcreatedb AS usecreatedb, pg_authid.rolsuper AS usesuper, pg_authid.rolcatupdate AS usecatupd, pg_authid.rolpassword AS passwd, (pg_authid.rolvaliduntil)::abstime AS valuntil, pg_authid.rolconfig AS useconfig FROM pg_authid WHERE pg_authid.rolcanlogin; pg_stat_activity | SELECT s.datid, d.datname, s.procpid, s.usesysid, u.rolname AS usename, s.current_query, s.waiting, s.xact_start, s.query_start, s.backend_start, s.client_addr, s.client_port FROM pg_database d, pg_stat_get_activity(NULL::integer) s(datid, procpid, usesysid, current_query, waiting, xact_start, query_start, backend_start, client_addr, client_port), pg_authid u WHERE ((s.datid = d.oid) AND (s.usesysid = u.oid)); pg_stat_all_indexes | SELECT c.oid AS relid, i.oid AS indexrelid, n.nspname AS schemaname, c.relname, i.relname AS indexrelname, pg_stat_get_numscans(i.oid) AS idx_scan, pg_stat_get_tuples_returned(i.oid) AS idx_tup_read, pg_stat_get_tuples_fetched(i.oid) AS idx_tup_fetch FROM (((pg_class c JOIN pg_index x ON ((c.oid = x.indrelid))) JOIN pg_class i ON ((i.oid = x.indexrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"])); ! pg_stat_all_tables | SELECT c.oid AS relid, n.nspname AS schemaname, c.relname, pg_stat_get_numscans(c.oid) AS seq_scan, pg_stat_get_tuples_returned(c.oid) AS seq_tup_read, (sum(pg_stat_get_numscans(i.indexrelid)))::bigint AS idx_scan, ((sum(pg_stat_get_tuples_fetched(i.indexrelid)))::bigint + pg_stat_get_tuples_fetched(c.oid)) AS idx_tup_fetch, pg_stat_get_tuples_inserted(c.oid) AS n_tup_ins, pg_stat_get_tuples_updated(c.oid) AS n_tup_upd, pg_stat_get_tuples_deleted(c.oid) AS n_tup_del, pg_stat_get_tuples_hot_updated(c.oid) AS n_tup_hot_upd, pg_stat_get_fresh_inserted_tuples(c.oid) AS n_fresh_tup, pg_stat_get_live_tuples(c.oid) AS n_live_tup, pg_stat_get_dead_tuples(c.oid) AS n_dead_tup, pg_stat_get_last_vacuum_time(c.oid) AS last_vacuum, pg_stat_get_last_autovacuum_time(c.oid) AS last_autovacuum, pg_stat_get_last_analyze_time(c.oid) AS last_analyze, pg_stat_get_last_autoanalyze_time(c.oid) AS last_autoanalyze FROM ((pg_class c LEFT JOIN pg_index i ON ((c.oid = i.indrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"])) GROUP BY c.oid, n.nspname, c.relname; pg_stat_bgwriter | SELECT pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints_timed, pg_stat_get_bgwriter_requested_checkpoints() AS checkpoints_req, pg_stat_get_bgwriter_buf_written_checkpoints() AS buffers_checkpoint, pg_stat_get_bgwriter_buf_written_clean() AS buffers_clean, pg_stat_get_bgwriter_maxwritten_clean() AS maxwritten_clean, pg_stat_get_buf_written_backend() AS buffers_backend, pg_stat_get_buf_alloc() AS buffers_alloc; pg_stat_database | SELECT d.oid AS datid, d.datname, pg_stat_get_db_numbackends(d.oid) AS numbackends, pg_stat_get_db_xact_commit(d.oid) AS xact_commit, pg_stat_get_db_xact_rollback(d.oid) AS xact_rollback, (pg_stat_get_db_blocks_fetched(d.oid) - pg_stat_get_db_blocks_hit(d.oid)) AS blks_read, pg_stat_get_db_blocks_hit(d.oid) AS blks_hit, pg_stat_get_db_tuples_returned(d.oid) AS tup_returned, pg_stat_get_db_tuples_fetched(d.oid) AS tup_fetched, pg_stat_get_db_tuples_inserted(d.oid) AS tup_inserted, pg_stat_get_db_tuples_updated(d.oid) AS tup_updated, pg_stat_get_db_tuples_deleted(d.oid) AS tup_deleted FROM pg_database d; pg_stat_sys_indexes | SELECT pg_stat_all_indexes.relid, pg_stat_all_indexes.indexrelid, pg_stat_all_indexes.schemaname, pg_stat_all_indexes.relname, pg_stat_all_indexes.indexrelname, pg_stat_all_indexes.idx_scan, pg_stat_all_indexes.idx_tup_read, pg_stat_all_indexes.idx_tup_fetch FROM pg_stat_all_indexes WHERE ((pg_stat_all_indexes.schemaname = ANY (ARRAY['pg_catalog'::name, 'information_schema'::name])) OR (pg_stat_all_indexes.schemaname ~ '^pg_toast'::text)); ! pg_stat_sys_tables | SELECT pg_stat_all_tables.relid, pg_stat_all_tables.schemaname, pg_stat_all_tables.relname, pg_stat_all_tables.seq_scan, pg_stat_all_tables.seq_tup_read, pg_stat_all_tables.idx_scan, pg_stat_all_tables.idx_tup_fetch, pg_stat_all_tables.n_tup_ins, pg_stat_all_tables.n_tup_upd, pg_stat_all_tables.n_tup_del, pg_stat_all_tables.n_tup_hot_upd, pg_stat_all_tables.n_fresh_tup, pg_stat_all_tables.n_live_tup, pg_stat_all_tables.n_dead_tup, pg_stat_all_tables.last_vacuum, pg_stat_all_tables.last_autovacuum, pg_stat_all_tables.last_analyze, pg_stat_all_tables.last_autoanalyze FROM pg_stat_all_tables WHERE ((pg_stat_all_tables.schemaname = ANY (ARRAY['pg_catalog'::name, 'information_schema'::name])) OR (pg_stat_all_tables.schemaname ~ '^pg_toast'::text)); pg_stat_user_functions | SELECT p.oid AS funcid, n.nspname AS schemaname, p.proname AS funcname, pg_stat_get_function_calls(p.oid) AS calls, (pg_stat_get_function_time(p.oid) / 1000) AS total_time, (pg_stat_get_function_self_time(p.oid) / 1000) AS self_time FROM (pg_proc p LEFT JOIN pg_namespace n ON ((n.oid = p.pronamespace))) WHERE ((p.prolang <> (12)::oid) AND (pg_stat_get_function_calls(p.oid) IS NOT NULL)); pg_stat_user_indexes | SELECT pg_stat_all_indexes.relid, pg_stat_all_indexes.indexrelid, pg_stat_all_indexes.schemaname, pg_stat_all_indexes.relname, pg_stat_all_indexes.indexrelname, pg_stat_all_indexes.idx_scan, pg_stat_all_indexes.idx_tup_read, pg_stat_all_indexes.idx_tup_fetch FROM pg_stat_all_indexes WHERE ((pg_stat_all_indexes.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_stat_all_indexes.schemaname !~ '^pg_toast'::text)); ! pg_stat_user_tables | SELECT pg_stat_all_tables.relid, pg_stat_all_tables.schemaname, pg_stat_all_tables.relname, pg_stat_all_tables.seq_scan, pg_stat_all_tables.seq_tup_read, pg_stat_all_tables.idx_scan, pg_stat_all_tables.idx_tup_fetch, pg_stat_all_tables.n_tup_ins, pg_stat_all_tables.n_tup_upd, pg_stat_all_tables.n_tup_del, pg_stat_all_tables.n_tup_hot_upd, pg_stat_all_tables.n_fresh_tup, pg_stat_all_tables.n_live_tup, pg_stat_all_tables.n_dead_tup, pg_stat_all_tables.last_vacuum, pg_stat_all_tables.last_autovacuum, pg_stat_all_tables.last_analyze, pg_stat_all_tables.last_autoanalyze FROM pg_stat_all_tables WHERE ((pg_stat_all_tables.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_stat_all_tables.schemaname !~ '^pg_toast'::text)); pg_statio_all_indexes | SELECT c.oid AS relid, i.oid AS indexrelid, n.nspname AS schemaname, c.relname, i.relname AS indexrelname, (pg_stat_get_blocks_fetched(i.oid) - pg_stat_get_blocks_hit(i.oid)) AS idx_blks_read, pg_stat_get_blocks_hit(i.oid) AS idx_blks_hit FROM (((pg_class c JOIN pg_index x ON ((c.oid = x.indrelid))) JOIN pg_class i ON ((i.oid = x.indexrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"])); pg_statio_all_sequences | SELECT c.oid AS relid, n.nspname AS schemaname, c.relname, (pg_stat_get_blocks_fetched(c.oid) - pg_stat_get_blocks_hit(c.oid)) AS blks_read, pg_stat_get_blocks_hit(c.oid) AS blks_hit FROM (pg_class c LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = 'S'::"char"); pg_statio_all_tables | SELECT c.oid AS relid, n.nspname AS schemaname, c.relname, (pg_stat_get_blocks_fetched(c.oid) - pg_stat_get_blocks_hit(c.oid)) AS heap_blks_read, pg_stat_get_blocks_hit(c.oid) AS heap_blks_hit, (sum((pg_stat_get_blocks_fetched(i.indexrelid) - pg_stat_get_blocks_hit(i.indexrelid))))::bigint AS idx_blks_read, (sum(pg_stat_get_blocks_hit(i.indexrelid)))::bigint AS idx_blks_hit, (pg_stat_get_blocks_fetched(t.oid) - pg_stat_get_blocks_hit(t.oid)) AS toast_blks_read, pg_stat_get_blocks_hit(t.oid) AS toast_blks_hit, (pg_stat_get_blocks_fetched(x.oid) - pg_stat_get_blocks_hit(x.oid)) AS tidx_blks_read, pg_stat_get_blocks_hit(x.oid) AS tidx_blks_hit FROM ((((pg_class c LEFT JOIN pg_index i ON ((c.oid = i.indrelid))) LEFT JOIN pg_class t ON ((c.reltoastrelid = t.oid))) LEFT JOIN pg_class x ON ((t.reltoastidxid = x.oid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"])) GROUP BY c.oid, n.nspname, c.relname, t.oid, x.oid;