Re: GiST index build versus NaN coordinates

Поиск
Список
Период
Сортировка
От Tom Lane
Тема Re: GiST index build versus NaN coordinates
Дата
Msg-id 1391.1468351441@sss.pgh.pa.us
обсуждение исходный текст
Ответ на GiST index build versus NaN coordinates  (Tom Lane <tgl@sss.pgh.pa.us>)
Список pgsql-hackers
I wrote:
> I looked into the problem reported in bug #14238,
> https://www.postgresql.org/message-id/20160708151747.1426.60150@wrigleys.postgresql.org
> I think that probably the most reasonable answer is to replace all the
> raw "double" comparisons in this code with float8_cmp_internal() or
> something that's the moral equivalent of that.  Does anyone want to
> propose something else?

Here's a draft patch that modifies gistproc.c to treat NaNs essentially
as though they are infinities.  I've only touched the code associated
with GiST index insertion, not with searches, so it's quite possible that
searches will still not work right in the presence of NaNs.  However,
this does fix the index-build infinite loop complained of originally.

I've not yet looked at the SPGiST issues reported by Andreas.

            regards, tom lane

diff --git a/src/backend/access/gist/gistproc.c b/src/backend/access/gist/gistproc.c
index e8213e2..d47211a 100644
*** a/src/backend/access/gist/gistproc.c
--- b/src/backend/access/gist/gistproc.c
***************
*** 17,36 ****
   */
  #include "postgres.h"

  #include "access/gist.h"
  #include "access/stratnum.h"
  #include "utils/geo_decls.h"


  static bool gist_box_leaf_consistent(BOX *key, BOX *query,
                           StrategyNumber strategy);
- static double size_box(BOX *box);
  static bool rtree_internal_consistent(BOX *key, BOX *query,
                            StrategyNumber strategy);

  /* Minimum accepted ratio of split */
  #define LIMIT_RATIO 0.3


  /**************************************************
   * Box ops
--- 17,47 ----
   */
  #include "postgres.h"

+ #include <math.h>
+
  #include "access/gist.h"
  #include "access/stratnum.h"
+ #include "utils/builtins.h"
  #include "utils/geo_decls.h"


  static bool gist_box_leaf_consistent(BOX *key, BOX *query,
                           StrategyNumber strategy);
  static bool rtree_internal_consistent(BOX *key, BOX *query,
                            StrategyNumber strategy);

  /* Minimum accepted ratio of split */
  #define LIMIT_RATIO 0.3

+ /* Convenience macros for NaN-aware comparisons */
+ #define FLOAT8_EQ(a,b)    (float8_cmp_internal(a, b) == 0)
+ #define FLOAT8_LT(a,b)    (float8_cmp_internal(a, b) < 0)
+ #define FLOAT8_LE(a,b)    (float8_cmp_internal(a, b) <= 0)
+ #define FLOAT8_GT(a,b)    (float8_cmp_internal(a, b) > 0)
+ #define FLOAT8_GE(a,b)    (float8_cmp_internal(a, b) >= 0)
+ #define FLOAT8_MAX(a,b)  (FLOAT8_GT(a, b) ? (a) : (b))
+ #define FLOAT8_MIN(a,b)  (FLOAT8_LT(a, b) ? (a) : (b))
+

  /**************************************************
   * Box ops
*************** static bool rtree_internal_consistent(BO
*** 40,51 ****
   * Calculates union of two boxes, a and b. The result is stored in *n.
   */
  static void
! rt_box_union(BOX *n, BOX *a, BOX *b)
  {
!     n->high.x = Max(a->high.x, b->high.x);
!     n->high.y = Max(a->high.y, b->high.y);
!     n->low.x = Min(a->low.x, b->low.x);
!     n->low.y = Min(a->low.y, b->low.y);
  }

  /*
--- 51,103 ----
   * Calculates union of two boxes, a and b. The result is stored in *n.
   */
  static void
! rt_box_union(BOX *n, const BOX *a, const BOX *b)
  {
!     n->high.x = FLOAT8_MAX(a->high.x, b->high.x);
!     n->high.y = FLOAT8_MAX(a->high.y, b->high.y);
!     n->low.x = FLOAT8_MIN(a->low.x, b->low.x);
!     n->low.y = FLOAT8_MIN(a->low.y, b->low.y);
! }
!
! /*
!  * Size of a BOX for penalty-calculation purposes.
!  * The result can be +Infinity, but not NaN.
!  */
! static double
! size_box(const BOX *box)
! {
!     /*
!      * Check for zero-width cases.  Note that we define the size of a zero-
!      * by-infinity box as zero.  It's important to special-case this somehow,
!      * as naively multiplying infinity by zero will produce NaN.
!      *
!      * The less-than cases should not happen, but if they do, say "zero".
!      */
!     if (FLOAT8_LE(box->high.x, box->low.x) ||
!         FLOAT8_LE(box->high.y, box->low.y))
!         return 0.0;
!
!     /*
!      * We treat NaN as larger than +Infinity, so any distance involving a NaN
!      * and a non-NaN is infinite.  Note the previous check eliminated the
!      * possibility that the low fields are NaNs.
!      */
!     if (isnan(box->high.x) || isnan(box->high.y))
!         return get_float8_infinity();
!     return (box->high.x - box->low.x) * (box->high.y - box->low.y);
! }
!
! /*
!  * Return amount by which the union of the two boxes is larger than
!  * the original BOX's area.  The result can be +Infinity, but not NaN.
!  */
! static double
! box_penalty(const BOX *original, const BOX *new)
! {
!     BOX            unionbox;
!
!     rt_box_union(&unionbox, original, new);
!     return size_box(&unionbox) - size_box(original);
  }

  /*
*************** gist_box_consistent(PG_FUNCTION_ARGS)
*** 85,100 ****
                                                   strategy));
  }

  static void
! adjustBox(BOX *b, BOX *addon)
  {
!     if (b->high.x < addon->high.x)
          b->high.x = addon->high.x;
!     if (b->low.x > addon->low.x)
          b->low.x = addon->low.x;
!     if (b->high.y < addon->high.y)
          b->high.y = addon->high.y;
!     if (b->low.y > addon->low.y)
          b->low.y = addon->low.y;
  }

--- 137,155 ----
                                                   strategy));
  }

+ /*
+  * Increase BOX b to include addon.
+  */
  static void
! adjustBox(BOX *b, const BOX *addon)
  {
!     if (FLOAT8_LT(b->high.x, addon->high.x))
          b->high.x = addon->high.x;
!     if (FLOAT8_GT(b->low.x, addon->low.x))
          b->low.x = addon->low.x;
!     if (FLOAT8_LT(b->high.y, addon->high.y))
          b->high.y = addon->high.y;
!     if (FLOAT8_GT(b->low.y, addon->low.y))
          b->low.y = addon->low.y;
  }

*************** gist_box_penalty(PG_FUNCTION_ARGS)
*** 174,183 ****
      float       *result = (float *) PG_GETARG_POINTER(2);
      BOX           *origbox = DatumGetBoxP(origentry->key);
      BOX           *newbox = DatumGetBoxP(newentry->key);
-     BOX            unionbox;

!     rt_box_union(&unionbox, origbox, newbox);
!     *result = (float) (size_box(&unionbox) - size_box(origbox));
      PG_RETURN_POINTER(result);
  }

--- 229,236 ----
      float       *result = (float *) PG_GETARG_POINTER(2);
      BOX           *origbox = DatumGetBoxP(origentry->key);
      BOX           *newbox = DatumGetBoxP(newentry->key);

!     *result = (float) box_penalty(origbox, newbox);
      PG_RETURN_POINTER(result);
  }

*************** interval_cmp_lower(const void *i1, const
*** 290,301 ****
      double        lower1 = ((const SplitInterval *) i1)->lower,
                  lower2 = ((const SplitInterval *) i2)->lower;

!     if (lower1 < lower2)
!         return -1;
!     else if (lower1 > lower2)
!         return 1;
!     else
!         return 0;
  }

  /*
--- 343,349 ----
      double        lower1 = ((const SplitInterval *) i1)->lower,
                  lower2 = ((const SplitInterval *) i2)->lower;

!     return float8_cmp_internal(lower1, lower2);
  }

  /*
*************** interval_cmp_upper(const void *i1, const
*** 307,322 ****
      double        upper1 = ((const SplitInterval *) i1)->upper,
                  upper2 = ((const SplitInterval *) i2)->upper;

!     if (upper1 < upper2)
!         return -1;
!     else if (upper1 > upper2)
!         return 1;
!     else
!         return 0;
  }

  /*
!  * Replace negative value with zero.
   */
  static inline float
  non_negative(float val)
--- 355,365 ----
      double        upper1 = ((const SplitInterval *) i1)->upper,
                  upper2 = ((const SplitInterval *) i2)->upper;

!     return float8_cmp_internal(upper1, upper2);
  }

  /*
!  * Replace negative (or NaN) value with zero.
   */
  static inline float
  non_negative(float val)
*************** g_box_consider_split(ConsiderSplitContex
*** 436,459 ****
  }

  /*
-  * Return increase of original BOX area by new BOX area insertion.
-  */
- static double
- box_penalty(BOX *original, BOX *new)
- {
-     double        union_width,
-                 union_height;
-
-     union_width = Max(original->high.x, new->high.x) -
-         Min(original->low.x, new->low.x);
-     union_height = Max(original->high.y, new->high.y) -
-         Min(original->low.y, new->low.y);
-     return union_width * union_height - (original->high.x - original->low.x) *
-         (original->high.y - original->low.y);
- }
-
- /*
   * Compare common entries by their deltas.
   */
  static int
  common_entry_cmp(const void *i1, const void *i2)
--- 479,486 ----
  }

  /*
   * Compare common entries by their deltas.
+  * (We assume the deltas can't be NaN.)
   */
  static int
  common_entry_cmp(const void *i1, const void *i2)
*************** gist_box_picksplit(PG_FUNCTION_ARGS)
*** 615,623 ****
              /*
               * Find next lower bound of right group.
               */
!             while (i1 < nentries && rightLower == intervalsLower[i1].lower)
              {
!                 leftUpper = Max(leftUpper, intervalsLower[i1].upper);
                  i1++;
              }
              if (i1 >= nentries)
--- 642,652 ----
              /*
               * Find next lower bound of right group.
               */
!             while (i1 < nentries &&
!                    FLOAT8_EQ(rightLower, intervalsLower[i1].lower))
              {
!                 if (FLOAT8_LT(leftUpper, intervalsLower[i1].upper))
!                     leftUpper = intervalsLower[i1].upper;
                  i1++;
              }
              if (i1 >= nentries)
*************** gist_box_picksplit(PG_FUNCTION_ARGS)
*** 628,634 ****
               * Find count of intervals which anyway should be placed to the
               * left group.
               */
!             while (i2 < nentries && intervalsUpper[i2].upper <= leftUpper)
                  i2++;

              /*
--- 657,664 ----
               * Find count of intervals which anyway should be placed to the
               * left group.
               */
!             while (i2 < nentries &&
!                    FLOAT8_LE(intervalsUpper[i2].upper, leftUpper))
                  i2++;

              /*
*************** gist_box_picksplit(PG_FUNCTION_ARGS)
*** 650,658 ****
              /*
               * Find next upper bound of left group.
               */
!             while (i2 >= 0 && leftUpper == intervalsUpper[i2].upper)
              {
!                 rightLower = Min(rightLower, intervalsUpper[i2].lower);
                  i2--;
              }
              if (i2 < 0)
--- 680,689 ----
              /*
               * Find next upper bound of left group.
               */
!             while (i2 >= 0 && FLOAT8_EQ(leftUpper, intervalsUpper[i2].upper))
              {
!                 if (FLOAT8_GT(rightLower, intervalsUpper[i2].lower))
!                     rightLower = intervalsUpper[i2].lower;
                  i2--;
              }
              if (i2 < 0)
*************** gist_box_picksplit(PG_FUNCTION_ARGS)
*** 663,669 ****
               * Find count of intervals which anyway should be placed to the
               * right group.
               */
!             while (i1 >= 0 && intervalsLower[i1].lower >= rightLower)
                  i1--;

              /*
--- 694,700 ----
               * Find count of intervals which anyway should be placed to the
               * right group.
               */
!             while (i1 >= 0 && FLOAT8_GE(intervalsLower[i1].lower, rightLower))
                  i1--;

              /*
*************** gist_box_picksplit(PG_FUNCTION_ARGS)
*** 751,760 ****
              upper = box->high.y;
          }

!         if (upper <= context.leftUpper)
          {
              /* Fits to the left group */
!             if (lower >= context.rightLower)
              {
                  /* Fits also to the right group, so "common entry" */
                  commonEntries[commonEntriesCount++].index = i;
--- 782,791 ----
              upper = box->high.y;
          }

!         if (FLOAT8_LE(upper, context.leftUpper))
          {
              /* Fits to the left group */
!             if (FLOAT8_GE(lower, context.rightLower))
              {
                  /* Fits also to the right group, so "common entry" */
                  commonEntries[commonEntriesCount++].index = i;
*************** gist_box_picksplit(PG_FUNCTION_ARGS)
*** 772,778 ****
               * entry didn't fit on the left group, it better fit in the right
               * group.
               */
!             Assert(lower >= context.rightLower);

              /* Doesn't fit to the left group, so join to the right group */
              PLACE_RIGHT(box, i);
--- 803,809 ----
               * entry didn't fit on the left group, it better fit in the right
               * group.
               */
!             Assert(FLOAT8_GE(lower, context.rightLower));

              /* Doesn't fit to the left group, so join to the right group */
              PLACE_RIGHT(box, i);
*************** gist_box_same(PG_FUNCTION_ARGS)
*** 856,863 ****
      bool       *result = (bool *) PG_GETARG_POINTER(2);

      if (b1 && b2)
!         *result = (b1->low.x == b2->low.x && b1->low.y == b2->low.y &&
!                    b1->high.x == b2->high.x && b1->high.y == b2->high.y);
      else
          *result = (b1 == NULL && b2 == NULL);
      PG_RETURN_POINTER(result);
--- 887,896 ----
      bool       *result = (bool *) PG_GETARG_POINTER(2);

      if (b1 && b2)
!         *result = (FLOAT8_EQ(b1->low.x, b2->low.x) &&
!                    FLOAT8_EQ(b1->low.y, b2->low.y) &&
!                    FLOAT8_EQ(b1->high.x, b2->high.x) &&
!                    FLOAT8_EQ(b1->high.y, b2->high.y));
      else
          *result = (b1 == NULL && b2 == NULL);
      PG_RETURN_POINTER(result);
*************** gist_box_leaf_consistent(BOX *key, BOX *
*** 943,956 ****
      return retval;
  }

- static double
- size_box(BOX *box)
- {
-     if (box->high.x <= box->low.x || box->high.y <= box->low.y)
-         return 0.0;
-     return (box->high.x - box->low.x) * (box->high.y - box->low.y);
- }
-
  /*****************************************
   * Common rtree functions (for boxes, polygons, and circles)
   *****************************************/
--- 976,981 ----
diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c
index cdc5d52..8aa17e1 100644
*** a/src/backend/utils/adt/float.c
--- b/src/backend/utils/adt/float.c
*************** float8        degree_c_one_half = 0.5;
*** 90,97 ****
  float8        degree_c_one = 1.0;

  /* Local function prototypes */
- static int    float4_cmp_internal(float4 a, float4 b);
- static int    float8_cmp_internal(float8 a, float8 b);
  static double sind_q1(double x);
  static double cosd_q1(double x);
  static void init_degree_constants(void);
--- 90,95 ----
*************** float8div(PG_FUNCTION_ARGS)
*** 936,942 ****
  /*
   *        float4{eq,ne,lt,le,gt,ge}        - float4/float4 comparison operations
   */
! static int
  float4_cmp_internal(float4 a, float4 b)
  {
      /*
--- 934,940 ----
  /*
   *        float4{eq,ne,lt,le,gt,ge}        - float4/float4 comparison operations
   */
! int
  float4_cmp_internal(float4 a, float4 b)
  {
      /*
*************** btfloat4sortsupport(PG_FUNCTION_ARGS)
*** 1050,1056 ****
  /*
   *        float8{eq,ne,lt,le,gt,ge}        - float8/float8 comparison operations
   */
! static int
  float8_cmp_internal(float8 a, float8 b)
  {
      /*
--- 1048,1054 ----
  /*
   *        float8{eq,ne,lt,le,gt,ge}        - float8/float8 comparison operations
   */
! int
  float8_cmp_internal(float8 a, float8 b)
  {
      /*
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 01976a1..8cebc86 100644
*** a/src/include/utils/builtins.h
--- b/src/include/utils/builtins.h
*************** extern int    is_infinite(double val);
*** 346,351 ****
--- 346,353 ----
  extern double float8in_internal(char *num, char **endptr_p,
                    const char *type_name, const char *orig_string);
  extern char *float8out_internal(double num);
+ extern int    float4_cmp_internal(float4 a, float4 b);
+ extern int    float8_cmp_internal(float8 a, float8 b);

  extern Datum float4in(PG_FUNCTION_ARGS);
  extern Datum float4out(PG_FUNCTION_ARGS);

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Kenneth Marshall
Дата:
Сообщение: Re: pg_basebackup wish list
Следующее
От: Merlin Moncure
Дата:
Сообщение: DO with a large amount of statements get stuck with high memory consumption