Re: Index-only scan for btree_gist turns bpchar to char

Поиск
Список
Период
Сортировка
От Tom Lane
Тема Re: Index-only scan for btree_gist turns bpchar to char
Дата
Msg-id 1162796.1641496875@sss.pgh.pa.us
обсуждение исходный текст
Ответ на Re: Index-only scan for btree_gist turns bpchar to char  (Japin Li <japinli@hotmail.com>)
Ответы Re: Index-only scan for btree_gist turns bpchar to char  (Japin Li <japinli@hotmail.com>)
Список pgsql-hackers
Japin Li <japinli@hotmail.com> writes:
> On Thu, 06 Jan 2022 at 00:34, Tom Lane <tgl@sss.pgh.pa.us> wrote:
>> The minimum-effort fix would be to apply rtrim1 to both strings
>> in gbt_bpchar_consistent, but I wonder if we can improve on that
>> by pushing the ignore-trailing-spaces behavior further down.
>> I didn't look yet at whether gbt_var_consistent can support
>> any type-specific behavior.

> Adding type-specific for gbt_var_consistent looks like more generally.
> For example, for bpchar type, we should call bpchareq rather than texteq.

I looked at this and it does seem like it might work, as per attached
patch.  The one thing that is troubling me is that the opclass is set
up to apply gbt_text_same, which is formally the Wrong Thing for bpchar,
because the equality semantics shouldn't be quite the same.  But we
could not fix that without a module version bump, which is annoying.
I think that it might not be necessary to change it, because

(1) There's no such thing as unique GIST indexes, so it should not
matter if the "same" function is a bit stricter than the datatype's
nominal notion of equality.  It's certainly okay for that to vary
from the semantics applied by the consistent function --- GIST has
no idea that the consistent function is allegedly testing equality.

(2) If all the input values for a column have been coerced to the same
typmod, then it doesn't matter because two values that are equal after
space-stripping would be equal without space-stripping, too.

However, (2) doesn't hold for an existing index that the user has failed
to REINDEX, because then the index would contain some space-stripped
values that same() will not say are equal to incoming new values.
Again, I think this doesn't matter much, but maybe I'm missing
something.  I've not really dug into what GIST uses the same()
function for.

In any case, if we do need same() to implement the identical
behavior to bpchareq(), then the other solution isn't sufficient
either.

So in short, it seems like we ought to do some compatibility testing
and see if this code misbehaves at all with an index created by the
old code.  I don't particularly want to do that ... any volunteers?

            regards, tom lane

diff --git a/contrib/btree_gist/btree_text.c b/contrib/btree_gist/btree_text.c
index 8019d11281..be0eac7975 100644
--- a/contrib/btree_gist/btree_text.c
+++ b/contrib/btree_gist/btree_text.c
@@ -90,6 +90,76 @@ static gbtree_vinfo tinfo =
     NULL
 };

+/* bpchar needs its own comparison rules */
+
+static bool
+gbt_bpchargt(const void *a, const void *b, Oid collation, FmgrInfo *flinfo)
+{
+    return DatumGetBool(DirectFunctionCall2Coll(bpchargt,
+                                                collation,
+                                                PointerGetDatum(a),
+                                                PointerGetDatum(b)));
+}
+
+static bool
+gbt_bpcharge(const void *a, const void *b, Oid collation, FmgrInfo *flinfo)
+{
+    return DatumGetBool(DirectFunctionCall2Coll(bpcharge,
+                                                collation,
+                                                PointerGetDatum(a),
+                                                PointerGetDatum(b)));
+}
+
+static bool
+gbt_bpchareq(const void *a, const void *b, Oid collation, FmgrInfo *flinfo)
+{
+    return DatumGetBool(DirectFunctionCall2Coll(bpchareq,
+                                                collation,
+                                                PointerGetDatum(a),
+                                                PointerGetDatum(b)));
+}
+
+static bool
+gbt_bpcharle(const void *a, const void *b, Oid collation, FmgrInfo *flinfo)
+{
+    return DatumGetBool(DirectFunctionCall2Coll(bpcharle,
+                                                collation,
+                                                PointerGetDatum(a),
+                                                PointerGetDatum(b)));
+}
+
+static bool
+gbt_bpcharlt(const void *a, const void *b, Oid collation, FmgrInfo *flinfo)
+{
+    return DatumGetBool(DirectFunctionCall2Coll(bpcharlt,
+                                                collation,
+                                                PointerGetDatum(a),
+                                                PointerGetDatum(b)));
+}
+
+static int32
+gbt_bpcharcmp(const void *a, const void *b, Oid collation, FmgrInfo *flinfo)
+{
+    return DatumGetInt32(DirectFunctionCall2Coll(bpcharcmp,
+                                                 collation,
+                                                 PointerGetDatum(a),
+                                                 PointerGetDatum(b)));
+}
+
+static gbtree_vinfo bptinfo =
+{
+    gbt_t_bpchar,
+    0,
+    false,
+    gbt_bpchargt,
+    gbt_bpcharge,
+    gbt_bpchareq,
+    gbt_bpcharle,
+    gbt_bpcharlt,
+    gbt_bpcharcmp,
+    NULL
+};
+

 /**************************************************
  * Text ops
@@ -112,29 +182,8 @@ gbt_text_compress(PG_FUNCTION_ARGS)
 Datum
 gbt_bpchar_compress(PG_FUNCTION_ARGS)
 {
-    GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
-    GISTENTRY  *retval;
-
-    if (tinfo.eml == 0)
-    {
-        tinfo.eml = pg_database_encoding_max_length();
-    }
-
-    if (entry->leafkey)
-    {
-
-        Datum        d = DirectFunctionCall1(rtrim1, entry->key);
-        GISTENTRY    trim;
-
-        gistentryinit(trim, d,
-                      entry->rel, entry->page,
-                      entry->offset, true);
-        retval = gbt_var_compress(&trim, &tinfo);
-    }
-    else
-        retval = entry;
-
-    PG_RETURN_POINTER(retval);
+    /* This should never have been distinct from gbt_text_compress */
+    return gbt_text_compress(fcinfo);
 }


@@ -179,18 +228,17 @@ gbt_bpchar_consistent(PG_FUNCTION_ARGS)
     bool        retval;
     GBT_VARKEY *key = (GBT_VARKEY *) DatumGetPointer(entry->key);
     GBT_VARKEY_R r = gbt_var_key_readable(key);
-    void       *trim = (void *) DatumGetPointer(DirectFunctionCall1(rtrim1, PointerGetDatum(query)));

     /* All cases served by this function are exact */
     *recheck = false;

-    if (tinfo.eml == 0)
+    if (bptinfo.eml == 0)
     {
-        tinfo.eml = pg_database_encoding_max_length();
+        bptinfo.eml = pg_database_encoding_max_length();
     }

-    retval = gbt_var_consistent(&r, trim, strategy, PG_GET_COLLATION(),
-                                GIST_LEAF(entry), &tinfo, fcinfo->flinfo);
+    retval = gbt_var_consistent(&r, query, strategy, PG_GET_COLLATION(),
+                                GIST_LEAF(entry), &bptinfo, fcinfo->flinfo);
     PG_RETURN_BOOL(retval);
 }

diff --git a/contrib/btree_gist/expected/char.out b/contrib/btree_gist/expected/char.out
index d715c045cc..45cf9f38d6 100644
--- a/contrib/btree_gist/expected/char.out
+++ b/contrib/btree_gist/expected/char.out
@@ -75,8 +75,8 @@ SELECT * FROM chartmp WHERE a BETWEEN '31a' AND '31c';
 (2 rows)

 SELECT * FROM chartmp WHERE a BETWEEN '31a' AND '31c';
-  a
-------
- 31b0
+                a
+----------------------------------
+ 31b0
 (1 row)

diff --git a/contrib/btree_gist/expected/char_1.out b/contrib/btree_gist/expected/char_1.out
index 867318002b..7b7824b717 100644
--- a/contrib/btree_gist/expected/char_1.out
+++ b/contrib/btree_gist/expected/char_1.out
@@ -75,8 +75,8 @@ SELECT * FROM chartmp WHERE a BETWEEN '31a' AND '31c';
 (2 rows)

 SELECT * FROM chartmp WHERE a BETWEEN '31a' AND '31c';
-  a
-------
- 31b0
+                a
+----------------------------------
+ 31b0
 (1 row)


В списке pgsql-hackers по дате отправления:

Предыдущее
От: Tomas Vondra
Дата:
Сообщение: Re: Collecting statistics about contents of JSONB columns
Следующее
От: Tomas Vondra
Дата:
Сообщение: Re: Collecting statistics about contents of JSONB columns