Re: [BUGS] BUG #14155: bloom index error with unlogged table
| От | Tom Lane |
|---|---|
| Тема | Re: [BUGS] BUG #14155: bloom index error with unlogged table |
| Дата | |
| Msg-id | 23767.1464926580@sss.pgh.pa.us обсуждение исходный текст |
| Ответ на | Re: [BUGS] BUG #14155: bloom index error with unlogged table (Tom Lane <tgl@sss.pgh.pa.us>) |
| Ответы |
Re: [BUGS] BUG #14155: bloom index error with unlogged table
Re: [BUGS] BUG #14155: bloom index error with unlogged table |
| Список | pgsql-hackers |
I wrote:
> Jeff Janes <jeff.janes@gmail.com> writes:
>> My biggest gripe with it at the moment is that the signature size should be
>> expressed in bits, and then internally rounded up to a multiple of 16,
>> rather than having it be expressed in 'uint16'.
>> If that were done it would be easier to fix the documentation to be more
>> understandable.
> +1 ... that sort of definition seems much more future-proof, too.
> IMO it's not too late to change this. (We probably don't want to change
> the on-disk representation of the reloptions, but we could convert from
> bits to words in bloptions().)
There were no objections to this, but also no action. Attached is a draft
patch ... any complaints?
regards, tom lane
diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h
index c21eebf..a9daf63 100644
*** a/contrib/bloom/bloom.h
--- b/contrib/bloom/bloom.h
*************** typedef BloomPageOpaqueData *BloomPageOp
*** 79,96 ****
#define BLOOM_HEAD_BLKNO (1) /* first data page */
/*
! * Maximum of bloom signature length in uint16. Actual value
! * is 512 bytes
*/
! #define MAX_BLOOM_LENGTH (256)
/* Bloom index options */
typedef struct BloomOptions
{
int32 vl_len_; /* varlena header (do not touch directly!) */
! int bloomLength; /* length of signature in uint16 */
! int bitSize[INDEX_MAX_KEYS]; /* signature bits per index
! * key */
} BloomOptions;
/*
--- 79,108 ----
#define BLOOM_HEAD_BLKNO (1) /* first data page */
/*
! * We store Bloom signatures as arrays of uint16 words.
*/
! typedef uint16 BloomSignatureWord;
!
! #define SIGNWORDBITS ((int) (BITS_PER_BYTE * sizeof(BloomSignatureWord)))
!
! /*
! * Default and maximum Bloom signature length in bits.
! */
! #define DEFAULT_BLOOM_LENGTH (5 * SIGNWORDBITS)
! #define MAX_BLOOM_LENGTH (256 * SIGNWORDBITS)
!
! /*
! * Default and maximum signature bits generated per index key.
! */
! #define DEFAULT_BLOOM_BITS 2
! #define MAX_BLOOM_BITS (MAX_BLOOM_LENGTH - 1)
/* Bloom index options */
typedef struct BloomOptions
{
int32 vl_len_; /* varlena header (do not touch directly!) */
! int bloomLength; /* length of signature in words (not bits!) */
! int bitSize[INDEX_MAX_KEYS]; /* signature bits per index key */
} BloomOptions;
/*
*************** typedef struct BloomState
*** 143,154 ****
/*
* Tuples are very different from all other relations
*/
- typedef uint16 SignType;
-
typedef struct BloomTuple
{
ItemPointerData heapPtr;
! SignType sign[FLEXIBLE_ARRAY_MEMBER];
} BloomTuple;
#define BLOOMTUPLEHDRSZ offsetof(BloomTuple, sign)
--- 155,164 ----
/*
* Tuples are very different from all other relations
*/
typedef struct BloomTuple
{
ItemPointerData heapPtr;
! BloomSignatureWord sign[FLEXIBLE_ARRAY_MEMBER];
} BloomTuple;
#define BLOOMTUPLEHDRSZ offsetof(BloomTuple, sign)
*************** typedef struct BloomTuple
*** 156,162 ****
/* Opaque data structure for bloom index scan */
typedef struct BloomScanOpaqueData
{
! SignType *sign; /* Scan signature */
BloomState state;
} BloomScanOpaqueData;
--- 166,172 ----
/* Opaque data structure for bloom index scan */
typedef struct BloomScanOpaqueData
{
! BloomSignatureWord *sign; /* Scan signature */
BloomState state;
} BloomScanOpaqueData;
*************** extern void BloomFillMetapage(Relation i
*** 170,176 ****
extern void BloomInitMetapage(Relation index);
extern void BloomInitPage(Page page, uint16 flags);
extern Buffer BloomNewBuffer(Relation index);
! extern void signValue(BloomState * state, SignType * sign, Datum value, int attno);
extern BloomTuple *BloomFormTuple(BloomState * state, ItemPointer iptr, Datum *values, bool *isnull);
extern bool BloomPageAddItem(BloomState * state, Page page, BloomTuple * tuple);
--- 180,186 ----
extern void BloomInitMetapage(Relation index);
extern void BloomInitPage(Page page, uint16 flags);
extern Buffer BloomNewBuffer(Relation index);
! extern void signValue(BloomState * state, BloomSignatureWord * sign, Datum value, int attno);
extern BloomTuple *BloomFormTuple(BloomState * state, ItemPointer iptr, Datum *values, bool *isnull);
extern bool BloomPageAddItem(BloomState * state, Page page, BloomTuple * tuple);
diff --git a/contrib/bloom/blscan.c b/contrib/bloom/blscan.c
index aebf32a..0c954dc 100644
*** a/contrib/bloom/blscan.c
--- b/contrib/bloom/blscan.c
*************** blgetbitmap(IndexScanDesc scan, TIDBitma
*** 93,99 ****
/* New search: have to calculate search signature */
ScanKey skey = scan->keyData;
! so->sign = palloc0(sizeof(SignType) * so->state.opts.bloomLength);
for (i = 0; i < scan->numberOfKeys; i++)
{
--- 93,99 ----
/* New search: have to calculate search signature */
ScanKey skey = scan->keyData;
! so->sign = palloc0(sizeof(BloomSignatureWord) * so->state.opts.bloomLength);
for (i = 0; i < scan->numberOfKeys; i++)
{
diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c
index 4a5b343..8f13476 100644
*** a/contrib/bloom/blutils.c
--- b/contrib/bloom/blutils.c
***************
*** 27,49 ****
#include "bloom.h"
! /* Signature dealing macros */
! #define BITSIGNTYPE (BITS_PER_BYTE * sizeof(SignType))
! #define GETWORD(x,i) ( *( (SignType*)(x) + (int)( (i) / BITSIGNTYPE ) ) )
! #define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % BITSIGNTYPE ) )
! #define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % BITSIGNTYPE ) )
! #define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % BITSIGNTYPE )) & 0x01 )
PG_FUNCTION_INFO_V1(blhandler);
! /* Kind of relation optioms for bloom index */
static relopt_kind bl_relopt_kind;
static int32 myRand(void);
static void mySrand(uint32 seed);
/*
! * Module initialize function: initilized relation options.
*/
void
_PG_init(void)
--- 27,52 ----
#include "bloom.h"
! /* Signature dealing macros - note i is assumed to be of type int */
! #define GETWORD(x,i) ( *( (BloomSignatureWord *)(x) + ( (i) / SIGNWORDBITS ) ) )
! #define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % SIGNWORDBITS ) )
! #define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % SIGNWORDBITS ) )
! #define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % SIGNWORDBITS )) & 0x01 )
PG_FUNCTION_INFO_V1(blhandler);
! /* Kind of relation options for bloom index */
static relopt_kind bl_relopt_kind;
+ /* parse table for fillRelOptions */
+ static relopt_parse_elt bl_relopt_tab[INDEX_MAX_KEYS + 1];
static int32 myRand(void);
static void mySrand(uint32 seed);
/*
! * Module initialize function: initialize info about Bloom relation options.
! *
! * Note: keep this in sync with makeDefaultBloomOptions().
*/
void
_PG_init(void)
*************** _PG_init(void)
*** 53,70 ****
bl_relopt_kind = add_reloption_kind();
add_int_reloption(bl_relopt_kind, "length",
! "Length of signature in uint16 type", 5, 1, 256);
for (i = 0; i < INDEX_MAX_KEYS; i++)
{
! snprintf(buf, 16, "col%d", i + 1);
add_int_reloption(bl_relopt_kind, buf,
! "Number of bits for corresponding column", 2, 1, 2048);
}
}
/*
* Bloom handler function: return IndexAmRoutine with access method parameters
* and callbacks.
*/
--- 56,101 ----
bl_relopt_kind = add_reloption_kind();
+ /* Option for length of signature */
add_int_reloption(bl_relopt_kind, "length",
! "Length of signature in bits",
! DEFAULT_BLOOM_LENGTH, 1, MAX_BLOOM_LENGTH);
! bl_relopt_tab[0].optname = "length";
! bl_relopt_tab[0].opttype = RELOPT_TYPE_INT;
! bl_relopt_tab[0].offset = offsetof(BloomOptions, bloomLength);
+ /* Number of bits for each possible index column: col1, col2, ... */
for (i = 0; i < INDEX_MAX_KEYS; i++)
{
! snprintf(buf, sizeof(buf), "col%d", i + 1);
add_int_reloption(bl_relopt_kind, buf,
! "Number of bits for corresponding column",
! DEFAULT_BLOOM_BITS, 1, MAX_BLOOM_BITS);
! bl_relopt_tab[i + 1].optname = MemoryContextStrdup(TopMemoryContext,
! buf);
! bl_relopt_tab[i + 1].opttype = RELOPT_TYPE_INT;
! bl_relopt_tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]);
}
}
/*
+ * Construct a default set of Bloom options.
+ */
+ static BloomOptions *
+ makeDefaultBloomOptions(void)
+ {
+ BloomOptions *opts;
+ int i;
+
+ opts = (BloomOptions *) palloc0(sizeof(BloomOptions));
+ opts->bloomLength = (DEFAULT_BLOOM_LENGTH + SIGNWORDBITS - 1) / SIGNWORDBITS;
+ for (i = 0; i < INDEX_MAX_KEYS; i++)
+ opts->bitSize[i] = DEFAULT_BLOOM_BITS;
+ SET_VARSIZE(opts, sizeof(BloomOptions));
+ return opts;
+ }
+
+ /*
* Bloom handler function: return IndexAmRoutine with access method parameters
* and callbacks.
*/
*************** initBloomState(BloomState *state, Relati
*** 157,163 ****
memcpy(&state->opts, index->rd_amcache, sizeof(state->opts));
state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ +
! sizeof(SignType) * state->opts.bloomLength;
}
/*
--- 188,194 ----
memcpy(&state->opts, index->rd_amcache, sizeof(state->opts));
state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ +
! sizeof(BloomSignatureWord) * state->opts.bloomLength;
}
/*
*************** mySrand(uint32 seed)
*** 208,214 ****
* Add bits of given value to the signature.
*/
void
! signValue(BloomState *state, SignType *sign, Datum value, int attno)
{
uint32 hashVal;
int nBit,
--- 239,245 ----
* Add bits of given value to the signature.
*/
void
! signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno)
{
uint32 hashVal;
int nBit,
*************** signValue(BloomState *state, SignType *s
*** 231,238 ****
for (j = 0; j < state->opts.bitSize[attno]; j++)
{
! /* prevent mutiple evaluation */
! nBit = myRand() % (state->opts.bloomLength * BITSIGNTYPE);
SETBIT(sign, nBit);
}
}
--- 262,269 ----
for (j = 0; j < state->opts.bitSize[attno]; j++)
{
! /* prevent multiple evaluation in SETBIT macro */
! nBit = myRand() % (state->opts.bloomLength * SIGNWORDBITS);
SETBIT(sign, nBit);
}
}
*************** BloomInitPage(Page page, uint16 flags)
*** 362,400 ****
}
/*
- * Adjust options of bloom index.
- *
- * This must produce default options when *opts is initially all-zero.
- */
- static void
- adjustBloomOptions(BloomOptions *opts)
- {
- int i;
-
- /* Default length of bloom filter is 5 of 16-bit integers */
- if (opts->bloomLength <= 0)
- opts->bloomLength = 5;
- else if (opts->bloomLength > MAX_BLOOM_LENGTH)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("length of bloom signature (%d) is greater than maximum %d",
- opts->bloomLength, MAX_BLOOM_LENGTH)));
-
- /* Check signature length */
- for (i = 0; i < INDEX_MAX_KEYS; i++)
- {
- /*
- * Zero and negative number of bits is meaningless. Also setting
- * more bits than signature have seems useless. Replace both cases
- * with 2 bits default.
- */
- if (opts->bitSize[i] <= 0
- || opts->bitSize[i] >= opts->bloomLength * sizeof(SignType) * BITS_PER_BYTE)
- opts->bitSize[i] = 2;
- }
- }
-
- /*
* Fill in metapage for bloom index.
*/
void
--- 393,398 ----
*************** BloomFillMetapage(Relation index, Page m
*** 405,418 ****
/*
* Choose the index's options. If reloptions have been assigned, use
! * those, otherwise create default options by applying adjustBloomOptions
! * to a zeroed chunk of memory. We apply adjustBloomOptions to existing
! * reloptions too, just out of paranoia; they should be valid already.
*/
opts = (BloomOptions *) index->rd_options;
if (!opts)
! opts = (BloomOptions *) palloc0(sizeof(BloomOptions));
! adjustBloomOptions(opts);
/*
* Initialize contents of meta page, including a copy of the options,
--- 403,413 ----
/*
* Choose the index's options. If reloptions have been assigned, use
! * those, otherwise create default options.
*/
opts = (BloomOptions *) index->rd_options;
if (!opts)
! opts = makeDefaultBloomOptions();
/*
* Initialize contents of meta page, including a copy of the options,
*************** bloptions(Datum reloptions, bool validat
*** 462,491 ****
relopt_value *options;
int numoptions;
BloomOptions *rdopts;
- relopt_parse_elt tab[INDEX_MAX_KEYS + 1];
- int i;
- char buf[16];
-
- /* Option for length of signature */
- tab[0].optname = "length";
- tab[0].opttype = RELOPT_TYPE_INT;
- tab[0].offset = offsetof(BloomOptions, bloomLength);
-
- /* Number of bits for each of possible columns: col1, col2, ... */
- for (i = 0; i < INDEX_MAX_KEYS; i++)
- {
- snprintf(buf, sizeof(buf), "col%d", i + 1);
- tab[i + 1].optname = pstrdup(buf);
- tab[i + 1].opttype = RELOPT_TYPE_INT;
- tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]);
- }
options = parseRelOptions(reloptions, validate, bl_relopt_kind, &numoptions);
rdopts = allocateReloptStruct(sizeof(BloomOptions), options, numoptions);
fillRelOptions((void *) rdopts, sizeof(BloomOptions), options, numoptions,
! validate, tab, INDEX_MAX_KEYS + 1);
! adjustBloomOptions(rdopts);
return (bytea *) rdopts;
}
--- 457,471 ----
relopt_value *options;
int numoptions;
BloomOptions *rdopts;
+ /* Parse the user-given reloptions */
options = parseRelOptions(reloptions, validate, bl_relopt_kind, &numoptions);
rdopts = allocateReloptStruct(sizeof(BloomOptions), options, numoptions);
fillRelOptions((void *) rdopts, sizeof(BloomOptions), options, numoptions,
! validate, bl_relopt_tab, lengthof(bl_relopt_tab));
! /* Convert signature length to words, rounding up */
! rdopts->bloomLength = (rdopts->bloomLength + SIGNWORDBITS - 1) / SIGNWORDBITS;
return (bytea *) rdopts;
}
diff --git a/doc/src/sgml/bloom.sgml b/doc/src/sgml/bloom.sgml
index 49cb066..abf30e7 100644
*** a/doc/src/sgml/bloom.sgml
--- b/doc/src/sgml/bloom.sgml
***************
*** 8,15 ****
</indexterm>
<para>
! <literal>bloom</> is a module which implements an index access method. It comes
! as an example of custom access methods and generic WAL records usage. But it
is also useful in itself.
</para>
--- 8,15 ----
</indexterm>
<para>
! <literal>bloom</> is a module that implements an index access method. It comes
! as an example of custom access methods and generic WAL record usage. But it
is also useful in itself.
</para>
***************
*** 22,29 ****
allows fast exclusion of non-candidate tuples via signatures.
Since a signature is a lossy representation of all indexed attributes,
search results must be rechecked using heap information.
! The user can specify signature length (in uint16, default is 5) and the
! number of bits, which can be set per attribute (1 < colN < 2048).
</para>
<para>
--- 22,30 ----
allows fast exclusion of non-candidate tuples via signatures.
Since a signature is a lossy representation of all indexed attributes,
search results must be rechecked using heap information.
! The user can specify signature length in bits (default 80, maximum 4096)
! and the number of bits generated for each index column (default 2,
! maximum 4095).
</para>
<para>
***************
*** 51,64 ****
<term><literal>length</></term>
<listitem>
<para>
! Length of signature in uint16 type values
</para>
</listitem>
</varlistentry>
</variablelist>
<variablelist>
<varlistentry>
! <term><literal>col1 — col16</></term>
<listitem>
<para>
Number of bits for corresponding column
--- 52,65 ----
<term><literal>length</></term>
<listitem>
<para>
! Length of signature in bits
</para>
</listitem>
</varlistentry>
</variablelist>
<variablelist>
<varlistentry>
! <term><literal>col1 — col32</></term>
<listitem>
<para>
Number of bits for corresponding column
***************
*** 77,88 ****
<programlisting>
CREATE INDEX bloomidx ON tbloom USING bloom (i1,i2,i3)
! WITH (length=5, col1=2, col2=2, col3=4);
</programlisting>
<para>
Here, we created a bloom index with a signature length of 80 bits,
! and attributes i1 and i2 mapped to 2 bits, and attribute i3 to 4 bits.
</para>
<para>
--- 78,89 ----
<programlisting>
CREATE INDEX bloomidx ON tbloom USING bloom (i1,i2,i3)
! WITH (length=80, col1=2, col2=2, col3=4);
</programlisting>
<para>
Here, we created a bloom index with a signature length of 80 bits,
! and attributes i1 and i2 mapped to 2 bits, and attribute i3 mapped to 4 bits.
</para>
<para>
В списке pgsql-hackers по дате отправления: