Re: BUG #16865: Regression: GIN Negated prefix search returns results that contain the search term

Поиск
Список
Период
Сортировка
От Tom Lane
Тема Re: BUG #16865: Regression: GIN Negated prefix search returns results that contain the search term
Дата
Msg-id 2317851.1613435266@sss.pgh.pa.us
обсуждение исходный текст
Ответ на Re: BUG #16865: Regression: GIN Negated prefix search returns results that contain the search term  (Tom Lane <tgl@sss.pgh.pa.us>)
Ответы Re: BUG #16865: Regression: GIN Negated prefix search returns results that contain the search term  (Dimitri Nüscheler <dimitri.nuescheler@gmail.com>)
Список pgsql-bugs
I wrote:
> Anyway, this seems to put the final nail in the coffin of the idea
> that it's sufficient for TS_execute to return a boolean.  At least
> the tsginidx.c callers really need to see the 3-way result.  Hence
> I propose the attached patch.  It fixes the given test case, but
> I wonder if you can try it and see if you see any remaining problems.

After further reflection, it seems like now that we've done that,
we should go all the way and replace the out-of-band recheck result
from checkcondition_gin with a MAYBE result, as attached.

AFAICT the previous patch didn't leave any user-visible bug,
but there is an inefficiency in using the out-of-band signaling.
Consider a query like

    (a & b:B) | c

and suppose that some index entry has "b" and "c" but not "a".
With the code as it stands, when checkcondition_gin checks for a
match to "b:B" it will find one, and then because of the weight
restriction it will return TS_MAYBE, and also set need_recheck.
Then TS_execute_recurse will combine the TS_MAYBE with the TS_NO
for "a" to produce TS_NO, and finally combine that with TS_YES
for "c" to get TS_YES.  This is a correct result: the row
unquestionably matches the query.  But since we set the
need_recheck flag, we'll force a heap visit and recheck anyway.
That's bogus, and we don't need to accept it anymore now that
the data return path is fully ternary-clean.  Returning TS_MAYBE
is sufficient to do all the right things, so I propose the
attached v2.

            regards, tom lane

diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c
index 6c913baaba..78de531eb8 100644
--- a/src/backend/utils/adt/tsginidx.c
+++ b/src/backend/utils/adt/tsginidx.c
@@ -175,7 +175,6 @@ typedef struct
     QueryItem  *first_item;
     GinTernaryValue *check;
     int           *map_item_operand;
-    bool       *need_recheck;
 } GinChkVal;

 /*
@@ -184,27 +183,24 @@ typedef struct
 static TSTernaryValue
 checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
 {
+    GinTernaryValue result;
     GinChkVal  *gcv = (GinChkVal *) checkval;
     int            j;

-    /*
-     * if any val requiring a weight is used or caller needs position
-     * information then set recheck flag
-     */
-    if (val->weight != 0 || data != NULL)
-        *(gcv->need_recheck) = true;
-
     /* convert item's number to corresponding entry's (operand's) number */
     j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];

+    /* determine presence of current entry in indexed value */
+    result = gcv->check[j];
+
     /*
-     * return presence of current entry in indexed value; but TRUE becomes
-     * MAYBE in the presence of a query requiring recheck
+     * if any val requiring a weight is used or caller needs position
+     * information then we must recheck, so replace TRUE with MAYBE.
      */
-    if (gcv->check[j] == GIN_TRUE)
+    if (result == GIN_TRUE)
     {
         if (val->weight != 0 || data != NULL)
-            return TS_MAYBE;
+            result = GIN_MAYBE;
     }

     /*
@@ -212,7 +208,7 @@ checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
      * assignments.  We could use a switch statement to map the values if that
      * ever stops being true, but it seems unlikely to happen.
      */
-    return (TSTernaryValue) gcv->check[j];
+    return (TSTernaryValue) result;
 }

 Datum
@@ -244,12 +240,23 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS)
                          "sizes of GinTernaryValue and bool are not equal");
         gcv.check = (GinTernaryValue *) check;
         gcv.map_item_operand = (int *) (extra_data[0]);
-        gcv.need_recheck = recheck;

-        res = TS_execute(GETQUERY(query),
-                         &gcv,
-                         TS_EXEC_PHRASE_NO_POS,
-                         checkcondition_gin);
+        switch (TS_execute_ternary(GETQUERY(query),
+                                   &gcv,
+                                   TS_EXEC_PHRASE_NO_POS,
+                                   checkcondition_gin))
+        {
+            case TS_NO:
+                res = false;
+                break;
+            case TS_YES:
+                res = true;
+                break;
+            case TS_MAYBE:
+                res = true;
+                *recheck = true;
+                break;
+        }
     }

     PG_RETURN_BOOL(res);
@@ -266,10 +273,6 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
     /* int32    nkeys = PG_GETARG_INT32(3); */
     Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
     GinTernaryValue res = GIN_FALSE;
-    bool        recheck;
-
-    /* Initially assume query doesn't require recheck */
-    recheck = false;

     if (query->size > 0)
     {
@@ -282,13 +285,11 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
         gcv.first_item = GETQUERY(query);
         gcv.check = check;
         gcv.map_item_operand = (int *) (extra_data[0]);
-        gcv.need_recheck = &recheck;

-        if (TS_execute(GETQUERY(query),
-                       &gcv,
-                       TS_EXEC_PHRASE_NO_POS,
-                       checkcondition_gin))
-            res = recheck ? GIN_MAYBE : GIN_TRUE;
+        res = TS_execute_ternary(GETQUERY(query),
+                                 &gcv,
+                                 TS_EXEC_PHRASE_NO_POS,
+                                 checkcondition_gin);
     }

     PG_RETURN_GIN_TERNARY_VALUE(res);
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 2939fb5c21..9236ebcc8f 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -1854,6 +1854,18 @@ TS_execute(QueryItem *curitem, void *arg, uint32 flags,
     return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO;
 }

+/*
+ * Evaluate tsquery boolean expression.
+ *
+ * This is the same as TS_execute except that TS_MAYBE is returned as-is.
+ */
+TSTernaryValue
+TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags,
+                   TSExecuteCallback chkcond)
+{
+    return TS_execute_recurse(curitem, arg, flags, chkcond);
+}
+
 /*
  * TS_execute recursion for operators above any phrase operator.  Here we do
  * not need to worry about lexeme positions.  As soon as we hit an OP_PHRASE
diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h
index 69a9ba8524..4266560151 100644
--- a/src/include/tsearch/ts_utils.h
+++ b/src/include/tsearch/ts_utils.h
@@ -199,6 +199,9 @@ typedef TSTernaryValue (*TSExecuteCallback) (void *arg, QueryOperand *val,

 extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags,
                        TSExecuteCallback chkcond);
+extern TSTernaryValue TS_execute_ternary(QueryItem *curitem, void *arg,
+                                         uint32 flags,
+                                         TSExecuteCallback chkcond);
 extern bool tsquery_requires_match(QueryItem *curitem);

 /*

В списке pgsql-bugs по дате отправления:

Предыдущее
От: PG Bug reporting form
Дата:
Сообщение: BUG #16868: Cannot find sqlstat error codes.
Следующее
От: Michael Paquier
Дата:
Сообщение: Re: BUG #16866: pg_basebackup Windows Server 2016