Re: ANALYZE versus expression indexes with nondefault opckeytype

Поиск
Список
Период
Сортировка
От Tom Lane
Тема Re: ANALYZE versus expression indexes with nondefault opckeytype
Дата
Msg-id 6188.1280692989@sss.pgh.pa.us
обсуждение исходный текст
Ответ на Re: ANALYZE versus expression indexes with nondefault opckeytype  (Tom Lane <tgl@sss.pgh.pa.us>)
Список pgsql-hackers
I wrote:
> Robert Haas <robertmhaas@gmail.com> writes:
>> Yeah, maybe you're right.  But I'd still prefer to see us break the
>> ABI and do this just in 9.0 rather than changing 8.4.

> OK, I can live with that.  I'll take a look at it shortly.

Proposed patch attached (compiles, untested as yet).

            regards, tom lane

Index: src/backend/commands/analyze.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/commands/analyze.c,v
retrieving revision 1.152
diff -c -r1.152 analyze.c
*** src/backend/commands/analyze.c    26 Feb 2010 02:00:37 -0000    1.152
--- src/backend/commands/analyze.c    1 Aug 2010 19:56:12 -0000
***************
*** 92,98 ****
                      AnlIndexData *indexdata, int nindexes,
                      HeapTuple *rows, int numrows,
                      MemoryContext col_context);
! static VacAttrStats *examine_attribute(Relation onerel, int attnum);
  static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
                      int targrows, double *totalrows, double *totaldeadrows);
  static double random_fract(void);
--- 92,99 ----
                      AnlIndexData *indexdata, int nindexes,
                      HeapTuple *rows, int numrows,
                      MemoryContext col_context);
! static VacAttrStats *examine_attribute(Relation onerel, int attnum,
!                                        Node *index_expr);
  static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
                      int targrows, double *totalrows, double *totaldeadrows);
  static double random_fract(void);
***************
*** 339,345 ****
                          (errcode(ERRCODE_UNDEFINED_COLUMN),
                      errmsg("column \"%s\" of relation \"%s\" does not exist",
                             col, RelationGetRelationName(onerel))));
!             vacattrstats[tcnt] = examine_attribute(onerel, i);
              if (vacattrstats[tcnt] != NULL)
                  tcnt++;
          }
--- 340,346 ----
                          (errcode(ERRCODE_UNDEFINED_COLUMN),
                      errmsg("column \"%s\" of relation \"%s\" does not exist",
                             col, RelationGetRelationName(onerel))));
!             vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
              if (vacattrstats[tcnt] != NULL)
                  tcnt++;
          }
***************
*** 353,359 ****
          tcnt = 0;
          for (i = 1; i <= attr_cnt; i++)
          {
!             vacattrstats[tcnt] = examine_attribute(onerel, i);
              if (vacattrstats[tcnt] != NULL)
                  tcnt++;
          }
--- 354,360 ----
          tcnt = 0;
          for (i = 1; i <= attr_cnt; i++)
          {
!             vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
              if (vacattrstats[tcnt] != NULL)
                  tcnt++;
          }
***************
*** 407,427 ****
                              elog(ERROR, "too few entries in indexprs list");
                          indexkey = (Node *) lfirst(indexpr_item);
                          indexpr_item = lnext(indexpr_item);
-
-                         /*
-                          * Can't analyze if the opclass uses a storage type
-                          * different from the expression result type. We'd get
-                          * confused because the type shown in pg_attribute for
-                          * the index column doesn't match what we are getting
-                          * from the expression. Perhaps this can be fixed
-                          * someday, but for now, punt.
-                          */
-                         if (exprType(indexkey) !=
-                             Irel[ind]->rd_att->attrs[i]->atttypid)
-                             continue;
-
                          thisdata->vacattrstats[tcnt] =
!                             examine_attribute(Irel[ind], i + 1);
                          if (thisdata->vacattrstats[tcnt] != NULL)
                          {
                              tcnt++;
--- 408,415 ----
                              elog(ERROR, "too few entries in indexprs list");
                          indexkey = (Node *) lfirst(indexpr_item);
                          indexpr_item = lnext(indexpr_item);
                          thisdata->vacattrstats[tcnt] =
!                             examine_attribute(Irel[ind], i + 1, indexkey);
                          if (thisdata->vacattrstats[tcnt] != NULL)
                          {
                              tcnt++;
***************
*** 802,810 ****
   *
   * Determine whether the column is analyzable; if so, create and initialize
   * a VacAttrStats struct for it.  If not, return NULL.
   */
  static VacAttrStats *
! examine_attribute(Relation onerel, int attnum)
  {
      Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
      HeapTuple    typtuple;
--- 790,801 ----
   *
   * Determine whether the column is analyzable; if so, create and initialize
   * a VacAttrStats struct for it.  If not, return NULL.
+  *
+  * If index_expr isn't NULL, then we're trying to analyze an expression index,
+  * and index_expr is the expression tree representing the column's data.
   */
  static VacAttrStats *
! examine_attribute(Relation onerel, int attnum, Node *index_expr)
  {
      Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
      HeapTuple    typtuple;
***************
*** 827,835 ****
      stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
      stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
      memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE);
!     typtuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(attr->atttypid));
      if (!HeapTupleIsValid(typtuple))
!         elog(ERROR, "cache lookup failed for type %u", attr->atttypid);
      stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type));
      memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type));
      ReleaseSysCache(typtuple);
--- 818,847 ----
      stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
      stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
      memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE);
!
!     /*
!      * When analyzing an expression index, believe the expression tree's type
!      * not the column datatype --- the latter might be the opckeytype storage
!      * type of the opclass, which is not interesting for our purposes.  (Note:
!      * if we did anything with non-expression index columns, we'd need to
!      * figure out where to get the correct type info from, but for now that's
!      * not a problem.)  It's not clear whether anyone will care about the
!      * typmod, but we store that too just in case.
!      */
!     if (index_expr)
!     {
!         stats->attrtypid = exprType(index_expr);
!         stats->attrtypmod = exprTypmod(index_expr);
!     }
!     else
!     {
!         stats->attrtypid = attr->atttypid;
!         stats->attrtypmod = attr->atttypmod;
!     }
!
!     typtuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(stats->attrtypid));
      if (!HeapTupleIsValid(typtuple))
!         elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
      stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type));
      memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type));
      ReleaseSysCache(typtuple);
***************
*** 838,849 ****

      /*
       * The fields describing the stats->stavalues[n] element types default to
!      * the type of the field being analyzed, but the type-specific typanalyze
       * function can change them if it wants to store something else.
       */
      for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
      {
!         stats->statypid[i] = stats->attr->atttypid;
          stats->statyplen[i] = stats->attrtype->typlen;
          stats->statypbyval[i] = stats->attrtype->typbyval;
          stats->statypalign[i] = stats->attrtype->typalign;
--- 850,861 ----

      /*
       * The fields describing the stats->stavalues[n] element types default to
!      * the type of the data being analyzed, but the type-specific typanalyze
       * function can change them if it wants to store something else.
       */
      for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
      {
!         stats->statypid[i] = stats->attrtypid;
          stats->statyplen[i] = stats->attrtype->typlen;
          stats->statypbyval[i] = stats->attrtype->typbyval;
          stats->statypalign[i] = stats->attrtype->typalign;
***************
*** 1780,1786 ****
          attr->attstattarget = default_statistics_target;

      /* Look for default "<" and "=" operators for column's type */
!     get_sort_group_operators(attr->atttypid,
                               false, false, false,
                               <opr, &eqopr, NULL);

--- 1792,1798 ----
          attr->attstattarget = default_statistics_target;

      /* Look for default "<" and "=" operators for column's type */
!     get_sort_group_operators(stats->attrtypid,
                               false, false, false,
                               <opr, &eqopr, NULL);

***************
*** 1860,1869 ****
      int            nonnull_cnt = 0;
      int            toowide_cnt = 0;
      double        total_width = 0;
!     bool        is_varlena = (!stats->attr->attbyval &&
!                               stats->attr->attlen == -1);
!     bool        is_varwidth = (!stats->attr->attbyval &&
!                                stats->attr->attlen < 0);
      FmgrInfo    f_cmpeq;
      typedef struct
      {
--- 1872,1881 ----
      int            nonnull_cnt = 0;
      int            toowide_cnt = 0;
      double        total_width = 0;
!     bool        is_varlena = (!stats->attrtype->typbyval &&
!                               stats->attrtype->typlen == -1);
!     bool        is_varwidth = (!stats->attrtype->typbyval &&
!                                stats->attrtype->typlen < 0);
      FmgrInfo    f_cmpeq;
      typedef struct
      {
***************
*** 2126,2133 ****
              for (i = 0; i < num_mcv; i++)
              {
                  mcv_values[i] = datumCopy(track[i].value,
!                                           stats->attr->attbyval,
!                                           stats->attr->attlen);
                  mcv_freqs[i] = (double) track[i].count / (double) samplerows;
              }
              MemoryContextSwitchTo(old_context);
--- 2138,2145 ----
              for (i = 0; i < num_mcv; i++)
              {
                  mcv_values[i] = datumCopy(track[i].value,
!                                           stats->attrtype->typbyval,
!                                           stats->attrtype->typlen);
                  mcv_freqs[i] = (double) track[i].count / (double) samplerows;
              }
              MemoryContextSwitchTo(old_context);
***************
*** 2184,2193 ****
      int            nonnull_cnt = 0;
      int            toowide_cnt = 0;
      double        total_width = 0;
!     bool        is_varlena = (!stats->attr->attbyval &&
!                               stats->attr->attlen == -1);
!     bool        is_varwidth = (!stats->attr->attbyval &&
!                                stats->attr->attlen < 0);
      double        corr_xysum;
      Oid            cmpFn;
      int            cmpFlags;
--- 2196,2205 ----
      int            nonnull_cnt = 0;
      int            toowide_cnt = 0;
      double        total_width = 0;
!     bool        is_varlena = (!stats->attrtype->typbyval &&
!                               stats->attrtype->typlen == -1);
!     bool        is_varwidth = (!stats->attrtype->typbyval &&
!                                stats->attrtype->typlen < 0);
      double        corr_xysum;
      Oid            cmpFn;
      int            cmpFlags;
***************
*** 2476,2483 ****
              for (i = 0; i < num_mcv; i++)
              {
                  mcv_values[i] = datumCopy(values[track[i].first].value,
!                                           stats->attr->attbyval,
!                                           stats->attr->attlen);
                  mcv_freqs[i] = (double) track[i].count / (double) samplerows;
              }
              MemoryContextSwitchTo(old_context);
--- 2488,2495 ----
              for (i = 0; i < num_mcv; i++)
              {
                  mcv_values[i] = datumCopy(values[track[i].first].value,
!                                           stats->attrtype->typbyval,
!                                           stats->attrtype->typlen);
                  mcv_freqs[i] = (double) track[i].count / (double) samplerows;
              }
              MemoryContextSwitchTo(old_context);
***************
*** 2583,2590 ****
              for (i = 0; i < num_hist; i++)
              {
                  hist_values[i] = datumCopy(values[pos].value,
!                                            stats->attr->attbyval,
!                                            stats->attr->attlen);
                  pos += delta;
                  posfrac += deltafrac;
                  if (posfrac >= (num_hist - 1))
--- 2595,2602 ----
              for (i = 0; i < num_hist; i++)
              {
                  hist_values[i] = datumCopy(values[pos].value,
!                                            stats->attrtype->typbyval,
!                                            stats->attrtype->typlen);
                  pos += delta;
                  posfrac += deltafrac;
                  if (posfrac >= (num_hist - 1))
Index: src/include/commands/vacuum.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/commands/vacuum.h,v
retrieving revision 1.89
diff -c -r1.89 vacuum.h
*** src/include/commands/vacuum.h    9 Feb 2010 21:43:30 -0000    1.89
--- src/include/commands/vacuum.h    1 Aug 2010 19:56:12 -0000
***************
*** 62,70 ****
      /*
       * These fields are set up by the main ANALYZE code before invoking the
       * type-specific typanalyze function.
       */
      Form_pg_attribute attr;        /* copy of pg_attribute row for column */
!     Form_pg_type attrtype;        /* copy of pg_type row for column */
      MemoryContext anl_context;    /* where to save long-lived data */

      /*
--- 62,78 ----
      /*
       * These fields are set up by the main ANALYZE code before invoking the
       * type-specific typanalyze function.
+      *
+      * Note: do not assume that the data being analyzed has the same datatype
+      * shown in attr, ie do not trust attr->atttypid, attlen, etc.  This is
+      * because some index opclasses store a different type than the underlying
+      * column/expression.  Instead use attrtypid, attrtypmod, and attrtype for
+      * information about the datatype being fed to the typanalyze function.
       */
      Form_pg_attribute attr;        /* copy of pg_attribute row for column */
!     Oid            attrtypid;        /* type of data being analyzed */
!     int32        attrtypmod;        /* typmod of data being analyzed */
!     Form_pg_type attrtype;        /* copy of pg_type row for attrtypid */
      MemoryContext anl_context;    /* where to save long-lived data */

      /*
***************
*** 95,104 ****

      /*
       * These fields describe the stavalues[n] element types. They will be
!      * initialized to be the same as the column's that's underlying the slot,
!      * but a custom typanalyze function might want to store an array of
!      * something other than the analyzed column's elements. It should then
!      * overwrite these fields.
       */
      Oid            statypid[STATISTIC_NUM_SLOTS];
      int2        statyplen[STATISTIC_NUM_SLOTS];
--- 103,111 ----

      /*
       * These fields describe the stavalues[n] element types. They will be
!      * initialized to match attrtypid, but a custom typanalyze function might
!      * want to store an array of something other than the analyzed column's
!      * elements. It should then overwrite these fields.
       */
      Oid            statypid[STATISTIC_NUM_SLOTS];
      int2        statyplen[STATISTIC_NUM_SLOTS];

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Tom Lane
Дата:
Сообщение: Re: review: psql: edit function, show function commands patch
Следующее
От: Pavel Stehule
Дата:
Сообщение: Re: review: psql: edit function, show function commands patch