Re: [BUGS] BUG #14155: bloom index error with unlogged table

Поиск
Список
Период
Сортировка
От Tom Lane
Тема Re: [BUGS] BUG #14155: bloom index error with unlogged table
Дата
Msg-id 23767.1464926580@sss.pgh.pa.us
обсуждение исходный текст
Ответ на Re: [BUGS] BUG #14155: bloom index error with unlogged table  (Tom Lane <tgl@sss.pgh.pa.us>)
Ответы Re: [BUGS] BUG #14155: bloom index error with unlogged table
Re: [BUGS] BUG #14155: bloom index error with unlogged table
Список pgsql-hackers
I wrote:
> Jeff Janes <jeff.janes@gmail.com> writes:
>> My biggest gripe with it at the moment is that the signature size should be
>> expressed in bits, and then internally rounded up to a multiple of 16,
>> rather than having it be expressed in 'uint16'.
>> If that were done it would be easier to fix the documentation to be more
>> understandable.

> +1 ... that sort of definition seems much more future-proof, too.
> IMO it's not too late to change this.  (We probably don't want to change
> the on-disk representation of the reloptions, but we could convert from
> bits to words in bloptions().)

There were no objections to this, but also no action.  Attached is a draft
patch ... any complaints?

            regards, tom lane

diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h
index c21eebf..a9daf63 100644
*** a/contrib/bloom/bloom.h
--- b/contrib/bloom/bloom.h
*************** typedef BloomPageOpaqueData *BloomPageOp
*** 79,96 ****
  #define BLOOM_HEAD_BLKNO        (1)        /* first data page */

  /*
!  * Maximum of bloom signature length in uint16. Actual value
!  * is 512 bytes
   */
! #define MAX_BLOOM_LENGTH        (256)

  /* Bloom index options */
  typedef struct BloomOptions
  {
      int32        vl_len_;        /* varlena header (do not touch directly!) */
!     int            bloomLength;    /* length of signature in uint16 */
!     int            bitSize[INDEX_MAX_KEYS];        /* signature bits per index
!                                                  * key */
  }    BloomOptions;

  /*
--- 79,108 ----
  #define BLOOM_HEAD_BLKNO        (1)        /* first data page */

  /*
!  * We store Bloom signatures as arrays of uint16 words.
   */
! typedef uint16 BloomSignatureWord;
!
! #define SIGNWORDBITS ((int) (BITS_PER_BYTE * sizeof(BloomSignatureWord)))
!
! /*
!  * Default and maximum Bloom signature length in bits.
!  */
! #define DEFAULT_BLOOM_LENGTH    (5 * SIGNWORDBITS)
! #define MAX_BLOOM_LENGTH        (256 * SIGNWORDBITS)
!
! /*
!  * Default and maximum signature bits generated per index key.
!  */
! #define DEFAULT_BLOOM_BITS        2
! #define MAX_BLOOM_BITS            (MAX_BLOOM_LENGTH - 1)

  /* Bloom index options */
  typedef struct BloomOptions
  {
      int32        vl_len_;        /* varlena header (do not touch directly!) */
!     int            bloomLength;    /* length of signature in words (not bits!) */
!     int            bitSize[INDEX_MAX_KEYS];    /* signature bits per index key */
  }    BloomOptions;

  /*
*************** typedef struct BloomState
*** 143,154 ****
  /*
   * Tuples are very different from all other relations
   */
- typedef uint16 SignType;
-
  typedef struct BloomTuple
  {
      ItemPointerData heapPtr;
!     SignType    sign[FLEXIBLE_ARRAY_MEMBER];
  }    BloomTuple;

  #define BLOOMTUPLEHDRSZ offsetof(BloomTuple, sign)
--- 155,164 ----
  /*
   * Tuples are very different from all other relations
   */
  typedef struct BloomTuple
  {
      ItemPointerData heapPtr;
!     BloomSignatureWord    sign[FLEXIBLE_ARRAY_MEMBER];
  }    BloomTuple;

  #define BLOOMTUPLEHDRSZ offsetof(BloomTuple, sign)
*************** typedef struct BloomTuple
*** 156,162 ****
  /* Opaque data structure for bloom index scan */
  typedef struct BloomScanOpaqueData
  {
!     SignType   *sign;            /* Scan signature */
      BloomState    state;
  }    BloomScanOpaqueData;

--- 166,172 ----
  /* Opaque data structure for bloom index scan */
  typedef struct BloomScanOpaqueData
  {
!     BloomSignatureWord   *sign;            /* Scan signature */
      BloomState    state;
  }    BloomScanOpaqueData;

*************** extern void BloomFillMetapage(Relation i
*** 170,176 ****
  extern void BloomInitMetapage(Relation index);
  extern void BloomInitPage(Page page, uint16 flags);
  extern Buffer BloomNewBuffer(Relation index);
! extern void signValue(BloomState * state, SignType * sign, Datum value, int attno);
  extern BloomTuple *BloomFormTuple(BloomState * state, ItemPointer iptr, Datum *values, bool *isnull);
  extern bool BloomPageAddItem(BloomState * state, Page page, BloomTuple * tuple);

--- 180,186 ----
  extern void BloomInitMetapage(Relation index);
  extern void BloomInitPage(Page page, uint16 flags);
  extern Buffer BloomNewBuffer(Relation index);
! extern void signValue(BloomState * state, BloomSignatureWord * sign, Datum value, int attno);
  extern BloomTuple *BloomFormTuple(BloomState * state, ItemPointer iptr, Datum *values, bool *isnull);
  extern bool BloomPageAddItem(BloomState * state, Page page, BloomTuple * tuple);

diff --git a/contrib/bloom/blscan.c b/contrib/bloom/blscan.c
index aebf32a..0c954dc 100644
*** a/contrib/bloom/blscan.c
--- b/contrib/bloom/blscan.c
*************** blgetbitmap(IndexScanDesc scan, TIDBitma
*** 93,99 ****
          /* New search: have to calculate search signature */
          ScanKey        skey = scan->keyData;

!         so->sign = palloc0(sizeof(SignType) * so->state.opts.bloomLength);

          for (i = 0; i < scan->numberOfKeys; i++)
          {
--- 93,99 ----
          /* New search: have to calculate search signature */
          ScanKey        skey = scan->keyData;

!         so->sign = palloc0(sizeof(BloomSignatureWord) * so->state.opts.bloomLength);

          for (i = 0; i < scan->numberOfKeys; i++)
          {
diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c
index 4a5b343..8f13476 100644
*** a/contrib/bloom/blutils.c
--- b/contrib/bloom/blutils.c
***************
*** 27,49 ****

  #include "bloom.h"

! /* Signature dealing macros */
! #define BITSIGNTYPE (BITS_PER_BYTE * sizeof(SignType))
! #define GETWORD(x,i) ( *( (SignType*)(x) + (int)( (i) / BITSIGNTYPE ) ) )
! #define CLRBIT(x,i)   GETWORD(x,i) &= ~( 0x01 << ( (i) % BITSIGNTYPE ) )
! #define SETBIT(x,i)   GETWORD(x,i) |=  ( 0x01 << ( (i) % BITSIGNTYPE ) )
! #define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % BITSIGNTYPE )) & 0x01 )

  PG_FUNCTION_INFO_V1(blhandler);

! /* Kind of relation optioms for bloom index */
  static relopt_kind bl_relopt_kind;

  static int32 myRand(void);
  static void mySrand(uint32 seed);

  /*
!  * Module initialize function: initilized relation options.
   */
  void
  _PG_init(void)
--- 27,52 ----

  #include "bloom.h"

! /* Signature dealing macros - note i is assumed to be of type int */
! #define GETWORD(x,i) ( *( (BloomSignatureWord *)(x) + ( (i) / SIGNWORDBITS ) ) )
! #define CLRBIT(x,i)   GETWORD(x,i) &= ~( 0x01 << ( (i) % SIGNWORDBITS ) )
! #define SETBIT(x,i)   GETWORD(x,i) |=  ( 0x01 << ( (i) % SIGNWORDBITS ) )
! #define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % SIGNWORDBITS )) & 0x01 )

  PG_FUNCTION_INFO_V1(blhandler);

! /* Kind of relation options for bloom index */
  static relopt_kind bl_relopt_kind;
+ /* parse table for fillRelOptions */
+ static relopt_parse_elt bl_relopt_tab[INDEX_MAX_KEYS + 1];

  static int32 myRand(void);
  static void mySrand(uint32 seed);

  /*
!  * Module initialize function: initialize info about Bloom relation options.
!  *
!  * Note: keep this in sync with makeDefaultBloomOptions().
   */
  void
  _PG_init(void)
*************** _PG_init(void)
*** 53,70 ****

      bl_relopt_kind = add_reloption_kind();

      add_int_reloption(bl_relopt_kind, "length",
!                       "Length of signature in uint16 type", 5, 1, 256);

      for (i = 0; i < INDEX_MAX_KEYS; i++)
      {
!         snprintf(buf, 16, "col%d", i + 1);
          add_int_reloption(bl_relopt_kind, buf,
!                       "Number of bits for corresponding column", 2, 1, 2048);
      }
  }

  /*
   * Bloom handler function: return IndexAmRoutine with access method parameters
   * and callbacks.
   */
--- 56,101 ----

      bl_relopt_kind = add_reloption_kind();

+     /* Option for length of signature */
      add_int_reloption(bl_relopt_kind, "length",
!                       "Length of signature in bits",
!                       DEFAULT_BLOOM_LENGTH, 1, MAX_BLOOM_LENGTH);
!     bl_relopt_tab[0].optname = "length";
!     bl_relopt_tab[0].opttype = RELOPT_TYPE_INT;
!     bl_relopt_tab[0].offset = offsetof(BloomOptions, bloomLength);

+     /* Number of bits for each possible index column: col1, col2, ... */
      for (i = 0; i < INDEX_MAX_KEYS; i++)
      {
!         snprintf(buf, sizeof(buf), "col%d", i + 1);
          add_int_reloption(bl_relopt_kind, buf,
!                           "Number of bits for corresponding column",
!                           DEFAULT_BLOOM_BITS, 1, MAX_BLOOM_BITS);
!         bl_relopt_tab[i + 1].optname = MemoryContextStrdup(TopMemoryContext,
!                                                            buf);
!         bl_relopt_tab[i + 1].opttype = RELOPT_TYPE_INT;
!         bl_relopt_tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]);
      }
  }

  /*
+  * Construct a default set of Bloom options.
+  */
+ static BloomOptions *
+ makeDefaultBloomOptions(void)
+ {
+     BloomOptions *opts;
+     int                i;
+
+     opts = (BloomOptions *) palloc0(sizeof(BloomOptions));
+     opts->bloomLength = (DEFAULT_BLOOM_LENGTH + SIGNWORDBITS - 1) / SIGNWORDBITS;
+     for (i = 0; i < INDEX_MAX_KEYS; i++)
+         opts->bitSize[i] = DEFAULT_BLOOM_BITS;
+     SET_VARSIZE(opts, sizeof(BloomOptions));
+     return opts;
+ }
+
+ /*
   * Bloom handler function: return IndexAmRoutine with access method parameters
   * and callbacks.
   */
*************** initBloomState(BloomState *state, Relati
*** 157,163 ****

      memcpy(&state->opts, index->rd_amcache, sizeof(state->opts));
      state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ +
!         sizeof(SignType) * state->opts.bloomLength;
  }

  /*
--- 188,194 ----

      memcpy(&state->opts, index->rd_amcache, sizeof(state->opts));
      state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ +
!         sizeof(BloomSignatureWord) * state->opts.bloomLength;
  }

  /*
*************** mySrand(uint32 seed)
*** 208,214 ****
   * Add bits of given value to the signature.
   */
  void
! signValue(BloomState *state, SignType *sign, Datum value, int attno)
  {
      uint32        hashVal;
      int            nBit,
--- 239,245 ----
   * Add bits of given value to the signature.
   */
  void
! signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno)
  {
      uint32        hashVal;
      int            nBit,
*************** signValue(BloomState *state, SignType *s
*** 231,238 ****

      for (j = 0; j < state->opts.bitSize[attno]; j++)
      {
!         /* prevent mutiple evaluation */
!         nBit = myRand() % (state->opts.bloomLength * BITSIGNTYPE);
          SETBIT(sign, nBit);
      }
  }
--- 262,269 ----

      for (j = 0; j < state->opts.bitSize[attno]; j++)
      {
!         /* prevent multiple evaluation in SETBIT macro */
!         nBit = myRand() % (state->opts.bloomLength * SIGNWORDBITS);
          SETBIT(sign, nBit);
      }
  }
*************** BloomInitPage(Page page, uint16 flags)
*** 362,400 ****
  }

  /*
-  * Adjust options of bloom index.
-  *
-  * This must produce default options when *opts is initially all-zero.
-  */
- static void
- adjustBloomOptions(BloomOptions *opts)
- {
-     int                i;
-
-     /* Default length of bloom filter is 5 of 16-bit integers */
-     if (opts->bloomLength <= 0)
-         opts->bloomLength = 5;
-     else if (opts->bloomLength > MAX_BLOOM_LENGTH)
-         ereport(ERROR,
-                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                  errmsg("length of bloom signature (%d) is greater than maximum %d",
-                         opts->bloomLength, MAX_BLOOM_LENGTH)));
-
-     /* Check signature length */
-     for (i = 0; i < INDEX_MAX_KEYS; i++)
-     {
-         /*
-          * Zero and negative number of bits is meaningless.  Also setting
-          * more bits than signature have seems useless.  Replace both cases
-          * with 2 bits default.
-          */
-         if (opts->bitSize[i] <= 0
-             || opts->bitSize[i] >= opts->bloomLength * sizeof(SignType) * BITS_PER_BYTE)
-             opts->bitSize[i] = 2;
-     }
- }
-
- /*
   * Fill in metapage for bloom index.
   */
  void
--- 393,398 ----
*************** BloomFillMetapage(Relation index, Page m
*** 405,418 ****

      /*
       * Choose the index's options.  If reloptions have been assigned, use
!      * those, otherwise create default options by applying adjustBloomOptions
!      * to a zeroed chunk of memory.  We apply adjustBloomOptions to existing
!      * reloptions too, just out of paranoia; they should be valid already.
       */
      opts = (BloomOptions *) index->rd_options;
      if (!opts)
!         opts = (BloomOptions *) palloc0(sizeof(BloomOptions));
!     adjustBloomOptions(opts);

      /*
       * Initialize contents of meta page, including a copy of the options,
--- 403,413 ----

      /*
       * Choose the index's options.  If reloptions have been assigned, use
!      * those, otherwise create default options.
       */
      opts = (BloomOptions *) index->rd_options;
      if (!opts)
!         opts = makeDefaultBloomOptions();

      /*
       * Initialize contents of meta page, including a copy of the options,
*************** bloptions(Datum reloptions, bool validat
*** 462,491 ****
      relopt_value *options;
      int            numoptions;
      BloomOptions *rdopts;
-     relopt_parse_elt tab[INDEX_MAX_KEYS + 1];
-     int            i;
-     char        buf[16];
-
-     /* Option for length of signature */
-     tab[0].optname = "length";
-     tab[0].opttype = RELOPT_TYPE_INT;
-     tab[0].offset = offsetof(BloomOptions, bloomLength);
-
-     /* Number of bits for each of possible columns: col1, col2, ... */
-     for (i = 0; i < INDEX_MAX_KEYS; i++)
-     {
-         snprintf(buf, sizeof(buf), "col%d", i + 1);
-         tab[i + 1].optname = pstrdup(buf);
-         tab[i + 1].opttype = RELOPT_TYPE_INT;
-         tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]);
-     }

      options = parseRelOptions(reloptions, validate, bl_relopt_kind, &numoptions);
      rdopts = allocateReloptStruct(sizeof(BloomOptions), options, numoptions);
      fillRelOptions((void *) rdopts, sizeof(BloomOptions), options, numoptions,
!                    validate, tab, INDEX_MAX_KEYS + 1);

!     adjustBloomOptions(rdopts);

      return (bytea *) rdopts;
  }
--- 457,471 ----
      relopt_value *options;
      int            numoptions;
      BloomOptions *rdopts;

+     /* Parse the user-given reloptions */
      options = parseRelOptions(reloptions, validate, bl_relopt_kind, &numoptions);
      rdopts = allocateReloptStruct(sizeof(BloomOptions), options, numoptions);
      fillRelOptions((void *) rdopts, sizeof(BloomOptions), options, numoptions,
!                    validate, bl_relopt_tab, lengthof(bl_relopt_tab));

!     /* Convert signature length to words, rounding up */
!     rdopts->bloomLength = (rdopts->bloomLength + SIGNWORDBITS - 1) / SIGNWORDBITS;

      return (bytea *) rdopts;
  }
diff --git a/doc/src/sgml/bloom.sgml b/doc/src/sgml/bloom.sgml
index 49cb066..abf30e7 100644
*** a/doc/src/sgml/bloom.sgml
--- b/doc/src/sgml/bloom.sgml
***************
*** 8,15 ****
   </indexterm>

   <para>
!   <literal>bloom</> is a module which implements an index access method.  It comes
!   as an example of custom access methods and generic WAL records usage.  But it
    is also useful in itself.
   </para>

--- 8,15 ----
   </indexterm>

   <para>
!   <literal>bloom</> is a module that implements an index access method.  It comes
!   as an example of custom access methods and generic WAL record usage.  But it
    is also useful in itself.
   </para>

***************
*** 22,29 ****
     allows fast exclusion of non-candidate tuples via signatures.
     Since a signature is a lossy representation of all indexed attributes,
     search results must be rechecked using heap information.
!    The user can specify signature length (in uint16, default is 5) and the
!    number of bits, which can be set per attribute (1 < colN < 2048).
    </para>

    <para>
--- 22,30 ----
     allows fast exclusion of non-candidate tuples via signatures.
     Since a signature is a lossy representation of all indexed attributes,
     search results must be rechecked using heap information.
!    The user can specify signature length in bits (default 80, maximum 4096)
!    and the number of bits generated for each index column (default 2,
!    maximum 4095).
    </para>

    <para>
***************
*** 51,64 ****
      <term><literal>length</></term>
      <listitem>
       <para>
!       Length of signature in uint16 type values
       </para>
      </listitem>
     </varlistentry>
     </variablelist>
     <variablelist>
     <varlistentry>
!     <term><literal>col1 — col16</></term>
      <listitem>
       <para>
        Number of bits for corresponding column
--- 52,65 ----
      <term><literal>length</></term>
      <listitem>
       <para>
!       Length of signature in bits
       </para>
      </listitem>
     </varlistentry>
     </variablelist>
     <variablelist>
     <varlistentry>
!     <term><literal>col1 — col32</></term>
      <listitem>
       <para>
        Number of bits for corresponding column
***************
*** 77,88 ****

  <programlisting>
  CREATE INDEX bloomidx ON tbloom USING bloom (i1,i2,i3)
!        WITH (length=5, col1=2, col2=2, col3=4);
  </programlisting>

    <para>
     Here, we created a bloom index with a signature length of 80 bits,
!    and attributes i1 and i2 mapped to 2 bits, and attribute i3 to 4 bits.
    </para>

    <para>
--- 78,89 ----

  <programlisting>
  CREATE INDEX bloomidx ON tbloom USING bloom (i1,i2,i3)
!        WITH (length=80, col1=2, col2=2, col3=4);
  </programlisting>

    <para>
     Here, we created a bloom index with a signature length of 80 bits,
!    and attributes i1 and i2 mapped to 2 bits, and attribute i3 mapped to 4 bits.
    </para>

    <para>

В списке pgsql-hackers по дате отправления:

Предыдущее
От: "David G. Johnston"
Дата:
Сообщение: Re: Prepared statements and generic plans
Следующее
От: Petr Jelinek
Дата:
Сообщение: Re: Rename max_parallel_degree?