Re: Some regular-expression performance hacking

Поиск
Список
Период
Сортировка
От Tom Lane
Тема Re: Some regular-expression performance hacking
Дата
Msg-id 1813969.1613250697@sss.pgh.pa.us
обсуждение исходный текст
Ответ на Re: Some regular-expression performance hacking  ("Joel Jacobson" <joel@compiler.org>)
Ответы Re: Some regular-expression performance hacking  ("Joel Jacobson" <joel@compiler.org>)
Список pgsql-hackers
"Joel Jacobson" <joel@compiler.org> writes:
> No is_match differences were detected, good!
> However, there were 23 cases where what got captured differed:

These all stem from the same oversight: checkmatchall() was being
too cavalier by ignoring "pseudocolor" arcs, which are arcs that
match start-of-string or end-of-string markers.  I'd supposed that
pseudocolor arcs necessarily match parallel RAINBOW arcs, because
they start out that way (cf. newnfa).  But it turns out that
some edge-of-string constraints can be optimized in such a way that
they only appear in the final NFA in the guise of missing or extra
pseudocolor arcs.  We have to actually check that the pseudocolor arcs
match the RAINBOW arcs, otherwise our "matchall" NFA isn't one because
it acts differently at the start or end of the string than it does
elsewhere.

So here's a revised pair of patches (0001 is actually the same as
before).

Thanks again for testing!

            regards, tom lane

diff --git a/contrib/pg_trgm/trgm_regexp.c b/contrib/pg_trgm/trgm_regexp.c
index 1e4f0121f3..fcf03de32d 100644
--- a/contrib/pg_trgm/trgm_regexp.c
+++ b/contrib/pg_trgm/trgm_regexp.c
@@ -282,8 +282,8 @@ typedef struct
 typedef int TrgmColor;

 /* We assume that colors returned by the regexp engine cannot be these: */
-#define COLOR_UNKNOWN    (-1)
-#define COLOR_BLANK        (-2)
+#define COLOR_UNKNOWN    (-3)
+#define COLOR_BLANK        (-4)

 typedef struct
 {
@@ -780,7 +780,8 @@ getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
         palloc0(colorsCount * sizeof(TrgmColorInfo));

     /*
-     * Loop over colors, filling TrgmColorInfo about each.
+     * Loop over colors, filling TrgmColorInfo about each.  Note we include
+     * WHITE (0) even though we know it'll be reported as non-expandable.
      */
     for (i = 0; i < colorsCount; i++)
     {
@@ -1098,9 +1099,9 @@ addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
             /* Add enter key to this state */
             addKeyToQueue(trgmNFA, &destKey);
         }
-        else
+        else if (arc->co >= 0)
         {
-            /* Regular color */
+            /* Regular color (including WHITE) */
             TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[arc->co];

             if (colorInfo->expandable)
@@ -1156,6 +1157,14 @@ addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
                 addKeyToQueue(trgmNFA, &destKey);
             }
         }
+        else
+        {
+            /* RAINBOW: treat as unexpandable color */
+            destKey.prefix.colors[0] = COLOR_UNKNOWN;
+            destKey.prefix.colors[1] = COLOR_UNKNOWN;
+            destKey.nstate = arc->to;
+            addKeyToQueue(trgmNFA, &destKey);
+        }
     }

     pfree(arcs);
@@ -1216,10 +1225,10 @@ addArcs(TrgmNFA *trgmNFA, TrgmState *state)
             /*
              * Ignore non-expandable colors; addKey already handled the case.
              *
-             * We need no special check for begin/end pseudocolors here.  We
-             * don't need to do any processing for them, and they will be
-             * marked non-expandable since the regex engine will have reported
-             * them that way.
+             * We need no special check for WHITE or begin/end pseudocolors
+             * here.  We don't need to do any processing for them, and they
+             * will be marked non-expandable since the regex engine will have
+             * reported them that way.
              */
             if (!colorInfo->expandable)
                 continue;
diff --git a/src/backend/regex/README b/src/backend/regex/README
index f08aab69e3..cc1834b89c 100644
--- a/src/backend/regex/README
+++ b/src/backend/regex/README
@@ -261,6 +261,18 @@ and the NFA has these arcs:
     states 4 -> 5 on color 2 ("x" only)
 which can be seen to be a correct representation of the regex.

+There is one more complexity, which is how to handle ".", that is a
+match-anything atom.  We used to do that by generating a "rainbow"
+of arcs of all live colors between the two NFA states before and after
+the dot.  That's expensive in itself when there are lots of colors,
+and it also typically adds lots of follow-on arc-splitting work for the
+color splitting logic.  Now we handle this case by generating a single arc
+labeled with the special color RAINBOW, meaning all colors.  Such arcs
+never need to be split, so they help keep NFAs small in this common case.
+(Note: this optimization doesn't help in REG_NLSTOP mode, where "." is
+not supposed to match newline.  In that case we still handle "." by
+generating an almost-rainbow of all colors except newline's color.)
+
 Given this summary, we can see we need the following operations for
 colors:

@@ -349,6 +361,8 @@ The possible arc types are:

     PLAIN arcs, which specify matching of any character of a given "color"
     (see above).  These are dumped as "[color_number]->to_state".
+    In addition there can be "rainbow" PLAIN arcs, which are dumped as
+    "[*]->to_state".

     EMPTY arcs, which specify a no-op transition to another state.  These
     are dumped as "->to_state".
@@ -356,11 +370,11 @@ The possible arc types are:
     AHEAD constraints, which represent a "next character must be of this
     color" constraint.  AHEAD differs from a PLAIN arc in that the input
     character is not consumed when crossing the arc.  These are dumped as
-    ">color_number>->to_state".
+    ">color_number>->to_state", or possibly ">*>->to_state".

     BEHIND constraints, which represent a "previous character must be of
     this color" constraint, which likewise consumes no input.  These are
-    dumped as "<color_number<->to_state".
+    dumped as "<color_number<->to_state", or possibly "<*<->to_state".

     '^' arcs, which specify a beginning-of-input constraint.  These are
     dumped as "^0->to_state" or "^1->to_state" for beginning-of-string and
diff --git a/src/backend/regex/regc_color.c b/src/backend/regex/regc_color.c
index f5a4151757..0864011cce 100644
--- a/src/backend/regex/regc_color.c
+++ b/src/backend/regex/regc_color.c
@@ -977,6 +977,7 @@ colorchain(struct colormap *cm,
 {
     struct colordesc *cd = &cm->cd[a->co];

+    assert(a->co >= 0);
     if (cd->arcs != NULL)
         cd->arcs->colorchainRev = a;
     a->colorchain = cd->arcs;
@@ -994,6 +995,7 @@ uncolorchain(struct colormap *cm,
     struct colordesc *cd = &cm->cd[a->co];
     struct arc *aa = a->colorchainRev;

+    assert(a->co >= 0);
     if (aa == NULL)
     {
         assert(cd->arcs == a);
@@ -1012,6 +1014,9 @@ uncolorchain(struct colormap *cm,

 /*
  * rainbow - add arcs of all full colors (but one) between specified states
+ *
+ * If there isn't an exception color, we now generate just a single arc
+ * labeled RAINBOW, saving lots of arc-munging later on.
  */
 static void
 rainbow(struct nfa *nfa,
@@ -1025,6 +1030,13 @@ rainbow(struct nfa *nfa,
     struct colordesc *end = CDEND(cm);
     color        co;

+    if (but == COLORLESS)
+    {
+        newarc(nfa, type, RAINBOW, from, to);
+        return;
+    }
+
+    /* Gotta do it the hard way.  Skip subcolors, pseudocolors, and "but" */
     for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++)
         if (!UNUSEDCOLOR(cd) && cd->sub != co && co != but &&
             !(cd->flags & PSEUDO))
@@ -1034,13 +1046,16 @@ rainbow(struct nfa *nfa,
 /*
  * colorcomplement - add arcs of complementary colors
  *
+ * We add arcs of all colors that are not pseudocolors and do not match
+ * any of the "of" state's PLAIN outarcs.
+ *
  * The calling sequence ought to be reconciled with cloneouts().
  */
 static void
 colorcomplement(struct nfa *nfa,
                 struct colormap *cm,
                 int type,
-                struct state *of,    /* complements of this guy's PLAIN outarcs */
+                struct state *of,
                 struct state *from,
                 struct state *to)
 {
@@ -1049,6 +1064,11 @@ colorcomplement(struct nfa *nfa,
     color        co;

     assert(of != from);
+
+    /* A RAINBOW arc matches all colors, making the complement empty */
+    if (findarc(of, PLAIN, RAINBOW) != NULL)
+        return;
+
     for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++)
         if (!UNUSEDCOLOR(cd) && !(cd->flags & PSEUDO))
             if (findarc(of, PLAIN, co) == NULL)
diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c
index 92c9c4d795..1ac030570d 100644
--- a/src/backend/regex/regc_nfa.c
+++ b/src/backend/regex/regc_nfa.c
@@ -271,6 +271,11 @@ destroystate(struct nfa *nfa,
  *
  * This function checks to make sure that no duplicate arcs are created.
  * In general we never want duplicates.
+ *
+ * However: in principle, a RAINBOW arc is redundant with any plain arc
+ * (unless that arc is for a pseudocolor).  But we don't try to recognize
+ * that redundancy, either here or in allied operations such as moveins().
+ * The pseudocolor consideration makes that more costly than it seems worth.
  */
 static void
 newarc(struct nfa *nfa,
@@ -1170,6 +1175,9 @@ copyouts(struct nfa *nfa,

 /*
  * cloneouts - copy out arcs of a state to another state pair, modifying type
+ *
+ * This is only used to convert PLAIN arcs to AHEAD/BEHIND arcs, which share
+ * the same interpretation of "co".  It wouldn't be sensible with LACONs.
  */
 static void
 cloneouts(struct nfa *nfa,
@@ -1181,9 +1189,13 @@ cloneouts(struct nfa *nfa,
     struct arc *a;

     assert(old != from);
+    assert(type == AHEAD || type == BEHIND);

     for (a = old->outs; a != NULL; a = a->outchain)
+    {
+        assert(a->type == PLAIN);
         newarc(nfa, type, a->co, from, to);
+    }
 }

 /*
@@ -1597,7 +1609,7 @@ pull(struct nfa *nfa,
     for (a = from->ins; a != NULL && !NISERR(); a = nexta)
     {
         nexta = a->inchain;
-        switch (combine(con, a))
+        switch (combine(nfa, con, a))
         {
             case INCOMPATIBLE:    /* destroy the arc */
                 freearc(nfa, a);
@@ -1624,6 +1636,10 @@ pull(struct nfa *nfa,
                 cparc(nfa, a, s, to);
                 freearc(nfa, a);
                 break;
+            case REPLACEARC:    /* replace arc's color */
+                newarc(nfa, a->type, con->co, a->from, to);
+                freearc(nfa, a);
+                break;
             default:
                 assert(NOTREACHED);
                 break;
@@ -1764,7 +1780,7 @@ push(struct nfa *nfa,
     for (a = to->outs; a != NULL && !NISERR(); a = nexta)
     {
         nexta = a->outchain;
-        switch (combine(con, a))
+        switch (combine(nfa, con, a))
         {
             case INCOMPATIBLE:    /* destroy the arc */
                 freearc(nfa, a);
@@ -1791,6 +1807,10 @@ push(struct nfa *nfa,
                 cparc(nfa, a, from, s);
                 freearc(nfa, a);
                 break;
+            case REPLACEARC:    /* replace arc's color */
+                newarc(nfa, a->type, con->co, from, a->to);
+                freearc(nfa, a);
+                break;
             default:
                 assert(NOTREACHED);
                 break;
@@ -1810,9 +1830,11 @@ push(struct nfa *nfa,
  * #def INCOMPATIBLE    1    // destroys arc
  * #def SATISFIED        2    // constraint satisfied
  * #def COMPATIBLE        3    // compatible but not satisfied yet
+ * #def REPLACEARC        4    // replace arc's color with constraint color
  */
 static int
-combine(struct arc *con,
+combine(struct nfa *nfa,
+        struct arc *con,
         struct arc *a)
 {
 #define  CA(ct,at)     (((ct)<<CHAR_BIT) | (at))
@@ -1827,14 +1849,46 @@ combine(struct arc *con,
         case CA(BEHIND, PLAIN):
             if (con->co == a->co)
                 return SATISFIED;
+            if (con->co == RAINBOW)
+            {
+                /* con is satisfied unless arc's color is a pseudocolor */
+                if (!(nfa->cm->cd[a->co].flags & PSEUDO))
+                    return SATISFIED;
+            }
+            else if (a->co == RAINBOW)
+            {
+                /* con is incompatible if it's for a pseudocolor */
+                if (nfa->cm->cd[con->co].flags & PSEUDO)
+                    return INCOMPATIBLE;
+                /* otherwise, constraint constrains arc to be only its color */
+                return REPLACEARC;
+            }
             return INCOMPATIBLE;
             break;
         case CA('^', '^'):        /* collision, similar constraints */
         case CA('$', '$'):
-        case CA(AHEAD, AHEAD):
+            if (con->co == a->co)    /* true duplication */
+                return SATISFIED;
+            return INCOMPATIBLE;
+            break;
+        case CA(AHEAD, AHEAD):    /* collision, similar constraints */
         case CA(BEHIND, BEHIND):
             if (con->co == a->co)    /* true duplication */
                 return SATISFIED;
+            if (con->co == RAINBOW)
+            {
+                /* con is satisfied unless arc's color is a pseudocolor */
+                if (!(nfa->cm->cd[a->co].flags & PSEUDO))
+                    return SATISFIED;
+            }
+            else if (a->co == RAINBOW)
+            {
+                /* con is incompatible if it's for a pseudocolor */
+                if (nfa->cm->cd[con->co].flags & PSEUDO)
+                    return INCOMPATIBLE;
+                /* otherwise, constraint constrains arc to be only its color */
+                return REPLACEARC;
+            }
             return INCOMPATIBLE;
             break;
         case CA('^', BEHIND):    /* collision, dissimilar constraints */
@@ -2895,6 +2949,7 @@ compact(struct nfa *nfa,
                     break;
                 case LACON:
                     assert(s->no != cnfa->pre);
+                    assert(a->co >= 0);
                     ca->co = (color) (cnfa->ncolors + a->co);
                     ca->to = a->to->no;
                     ca++;
@@ -2902,7 +2957,7 @@ compact(struct nfa *nfa,
                     break;
                 default:
                     NERR(REG_ASSERT);
-                    break;
+                    return;
             }
         carcsort(first, ca - first);
         ca->co = COLORLESS;
@@ -3068,13 +3123,22 @@ dumparc(struct arc *a,
     switch (a->type)
     {
         case PLAIN:
-            fprintf(f, "[%ld]", (long) a->co);
+            if (a->co == RAINBOW)
+                fprintf(f, "[*]");
+            else
+                fprintf(f, "[%ld]", (long) a->co);
             break;
         case AHEAD:
-            fprintf(f, ">%ld>", (long) a->co);
+            if (a->co == RAINBOW)
+                fprintf(f, ">*>");
+            else
+                fprintf(f, ">%ld>", (long) a->co);
             break;
         case BEHIND:
-            fprintf(f, "<%ld<", (long) a->co);
+            if (a->co == RAINBOW)
+                fprintf(f, "<*<");
+            else
+                fprintf(f, "<%ld<", (long) a->co);
             break;
         case LACON:
             fprintf(f, ":%ld:", (long) a->co);
@@ -3161,7 +3225,9 @@ dumpcstate(int st,
     pos = 1;
     for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
     {
-        if (ca->co < cnfa->ncolors)
+        if (ca->co == RAINBOW)
+            fprintf(f, "\t[*]->%d", ca->to);
+        else if (ca->co < cnfa->ncolors)
             fprintf(f, "\t[%ld]->%d", (long) ca->co, ca->to);
         else
             fprintf(f, "\t:%ld:->%d", (long) (ca->co - cnfa->ncolors), ca->to);
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index 91078dcd80..5956b86026 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -158,7 +158,8 @@ static int    push(struct nfa *, struct arc *, struct state **);
 #define INCOMPATIBLE    1        /* destroys arc */
 #define SATISFIED    2            /* constraint satisfied */
 #define COMPATIBLE    3            /* compatible but not satisfied yet */
-static int    combine(struct arc *, struct arc *);
+#define REPLACEARC    4            /* replace arc's color with constraint color */
+static int    combine(struct nfa *nfa, struct arc *con, struct arc *a);
 static void fixempties(struct nfa *, FILE *);
 static struct state *emptyreachable(struct nfa *, struct state *,
                                     struct state *, struct arc **);
@@ -289,9 +290,11 @@ struct vars
 #define SBEGIN    'A'                /* beginning of string (even if not BOL) */
 #define SEND    'Z'                /* end of string (even if not EOL) */

-/* is an arc colored, and hence on a color chain? */
+/* is an arc colored, and hence should belong to a color chain? */
+/* the test on "co" eliminates RAINBOW arcs, which we don't bother to chain */
 #define COLORED(a) \
-    ((a)->type == PLAIN || (a)->type == AHEAD || (a)->type == BEHIND)
+    ((a)->co >= 0 && \
+     ((a)->type == PLAIN || (a)->type == AHEAD || (a)->type == BEHIND))


 /* static function list */
@@ -1390,7 +1393,8 @@ bracket(struct vars *v,
  * cbracket - handle complemented bracket expression
  * We do it by calling bracket() with dummy endpoints, and then complementing
  * the result.  The alternative would be to invoke rainbow(), and then delete
- * arcs as the b.e. is seen... but that gets messy.
+ * arcs as the b.e. is seen... but that gets messy, and is really quite
+ * infeasible now that rainbow() just puts out one RAINBOW arc.
  */
 static void
 cbracket(struct vars *v,
diff --git a/src/backend/regex/rege_dfa.c b/src/backend/regex/rege_dfa.c
index 5695e158a5..32be2592c5 100644
--- a/src/backend/regex/rege_dfa.c
+++ b/src/backend/regex/rege_dfa.c
@@ -612,6 +612,7 @@ miss(struct vars *v,
     unsigned    h;
     struct carc *ca;
     struct sset *p;
+    int            ispseudocolor;
     int            ispost;
     int            noprogress;
     int            gotstate;
@@ -643,13 +644,15 @@ miss(struct vars *v,
      */
     for (i = 0; i < d->wordsper; i++)
         d->work[i] = 0;            /* build new stateset bitmap in d->work */
+    ispseudocolor = d->cm->cd[co].flags & PSEUDO;
     ispost = 0;
     noprogress = 1;
     gotstate = 0;
     for (i = 0; i < d->nstates; i++)
         if (ISBSET(css->states, i))
             for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
-                if (ca->co == co)
+                if (ca->co == co ||
+                    (ca->co == RAINBOW && !ispseudocolor))
                 {
                     BSET(d->work, ca->to);
                     gotstate = 1;
diff --git a/src/backend/regex/regexport.c b/src/backend/regex/regexport.c
index d4f940b8c3..a493dbe88c 100644
--- a/src/backend/regex/regexport.c
+++ b/src/backend/regex/regexport.c
@@ -222,7 +222,8 @@ pg_reg_colorisend(const regex_t *regex, int co)
  * Get number of member chrs of color number "co".
  *
  * Note: we return -1 if the color number is invalid, or if it is a special
- * color (WHITE or a pseudocolor), or if the number of members is uncertain.
+ * color (WHITE, RAINBOW, or a pseudocolor), or if the number of members is
+ * uncertain.
  * Callers should not try to extract the members if -1 is returned.
  */
 int
@@ -233,7 +234,7 @@ pg_reg_getnumcharacters(const regex_t *regex, int co)
     assert(regex != NULL && regex->re_magic == REMAGIC);
     cm = &((struct guts *) regex->re_guts)->cmap;

-    if (co <= 0 || co > cm->max)    /* we reject 0 which is WHITE */
+    if (co <= 0 || co > cm->max)    /* <= 0 rejects WHITE and RAINBOW */
         return -1;
     if (cm->cd[co].flags & PSEUDO)    /* also pseudocolors (BOS etc) */
         return -1;
@@ -257,7 +258,7 @@ pg_reg_getnumcharacters(const regex_t *regex, int co)
  * whose length chars_len must be at least as long as indicated by
  * pg_reg_getnumcharacters(), else not all chars will be returned.
  *
- * Fetching the members of WHITE or a pseudocolor is not supported.
+ * Fetching the members of WHITE, RAINBOW, or a pseudocolor is not supported.
  *
  * Caution: this is a relatively expensive operation.
  */
diff --git a/src/backend/regex/regprefix.c b/src/backend/regex/regprefix.c
index 1d4593ac94..e2fbad7a8a 100644
--- a/src/backend/regex/regprefix.c
+++ b/src/backend/regex/regprefix.c
@@ -165,9 +165,13 @@ findprefix(struct cnfa *cnfa,
             /* We can ignore BOS/BOL arcs */
             if (ca->co == cnfa->bos[0] || ca->co == cnfa->bos[1])
                 continue;
-            /* ... but EOS/EOL arcs terminate the search, as do LACONs */
+
+            /*
+             * ... but EOS/EOL arcs terminate the search, as do RAINBOW arcs
+             * and LACONs
+             */
             if (ca->co == cnfa->eos[0] || ca->co == cnfa->eos[1] ||
-                ca->co >= cnfa->ncolors)
+                ca->co == RAINBOW || ca->co >= cnfa->ncolors)
             {
                 thiscolor = COLORLESS;
                 break;
diff --git a/src/include/regex/regexport.h b/src/include/regex/regexport.h
index e6209463f7..99c4fb854e 100644
--- a/src/include/regex/regexport.h
+++ b/src/include/regex/regexport.h
@@ -30,6 +30,10 @@

 #include "regex/regex.h"

+/* These macros must match corresponding ones in regguts.h: */
+#define COLOR_WHITE        0        /* color for chars not appearing in regex */
+#define COLOR_RAINBOW    (-2)    /* represents all colors except pseudocolors */
+
 /* information about one arc of a regex's NFA */
 typedef struct
 {
diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h
index 5d0e7a961c..5bcd669d59 100644
--- a/src/include/regex/regguts.h
+++ b/src/include/regex/regguts.h
@@ -130,11 +130,16 @@
 /*
  * As soon as possible, we map chrs into equivalence classes -- "colors" --
  * which are of much more manageable number.
+ *
+ * To further reduce the number of arcs in NFAs and DFAs, we also have a
+ * special RAINBOW "color" that can be assigned to an arc.  This is not a
+ * real color, in that it has no entry in color maps.
  */
 typedef short color;            /* colors of characters */

 #define MAX_COLOR    32767        /* max color (must fit in 'color' datatype) */
 #define COLORLESS    (-1)        /* impossible color */
+#define RAINBOW        (-2)        /* represents all colors except pseudocolors */
 #define WHITE        0            /* default color, parent of all others */
 /* Note: various places in the code know that WHITE is zero */

@@ -276,7 +281,7 @@ struct state;
 struct arc
 {
     int            type;            /* 0 if free, else an NFA arc type code */
-    color        co;
+    color        co;                /* color the arc matches (possibly RAINBOW) */
     struct state *from;            /* where it's from (and contained within) */
     struct state *to;            /* where it's to */
     struct arc *outchain;        /* link in *from's outs chain or free chain */
@@ -284,6 +289,7 @@ struct arc
 #define  freechain    outchain    /* we do not maintain "freechainRev" */
     struct arc *inchain;        /* link in *to's ins chain */
     struct arc *inchainRev;        /* back-link in *to's ins chain */
+    /* these fields are not used when co == RAINBOW: */
     struct arc *colorchain;        /* link in color's arc chain */
     struct arc *colorchainRev;    /* back-link in color's arc chain */
 };
@@ -344,6 +350,9 @@ struct nfa
  * Plain arcs just store the transition color number as "co".  LACON arcs
  * store the lookaround constraint number plus cnfa.ncolors as "co".  LACON
  * arcs can be distinguished from plain by testing for co >= cnfa.ncolors.
+ *
+ * Note that in a plain arc, "co" can be RAINBOW; since that's negative,
+ * it doesn't break the rule about how to recognize LACON arcs.
  */
 struct carc
 {
diff --git a/src/backend/regex/README b/src/backend/regex/README
index cc1834b89c..a83ab5074d 100644
--- a/src/backend/regex/README
+++ b/src/backend/regex/README
@@ -410,14 +410,20 @@ substring, or an imaginary following EOS character if the substring is at
 the end of the input.
 3. If the NFA is (or can be) in the goal state at this point, it matches.

+This definition is necessary to support regexes that begin or end with
+constraints such as \m and \M, which imply requirements on the adjacent
+character if any.  The executor implements that by checking if the
+adjacent character (or BOS/BOL/EOS/EOL pseudo-character) is of the
+right color, and it does that in the same loop that checks characters
+within the match.
+
 So one can mentally execute an untransformed NFA by taking ^ and $ as
 ordinary constraints that match at start and end of input; but plain
 arcs out of the start state should be taken as matches for the character
 before the target substring, and similarly, plain arcs leading to the
 post state are matches for the character after the target substring.
-This definition is necessary to support regexes that begin or end with
-constraints such as \m and \M, which imply requirements on the adjacent
-character if any.  NFAs for simple unanchored patterns will usually have
-pre-state outarcs for all possible character colors as well as BOS and
-BOL, and post-state inarcs for all possible character colors as well as
-EOS and EOL, so that the executor's behavior will work.
+After the optimize() transformation, there are explicit arcs mentioning
+BOS/BOL/EOS/EOL adjacent to the pre-state and post-state.  So a finished
+NFA for a pattern without anchors or adjacent-character constraints will
+have pre-state outarcs for RAINBOW (all possible character colors) as well
+as BOS and BOL, and likewise post-state inarcs for RAINBOW, EOS, and EOL.
diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c
index 1ac030570d..e5c5c65d84 100644
--- a/src/backend/regex/regc_nfa.c
+++ b/src/backend/regex/regc_nfa.c
@@ -65,6 +65,8 @@ newnfa(struct vars *v,
     nfa->v = v;
     nfa->bos[0] = nfa->bos[1] = COLORLESS;
     nfa->eos[0] = nfa->eos[1] = COLORLESS;
+    nfa->flags = 0;
+    nfa->minmatchall = nfa->maxmatchall = -1;
     nfa->parent = parent;        /* Precedes newfstate so parent is valid. */
     nfa->post = newfstate(nfa, '@');    /* number 0 */
     nfa->pre = newfstate(nfa, '>'); /* number 1 */
@@ -2875,8 +2877,14 @@ analyze(struct nfa *nfa)
     if (NISERR())
         return 0;

+    /* Detect whether NFA can't match anything */
     if (nfa->pre->outs == NULL)
         return REG_UIMPOSSIBLE;
+
+    /* Detect whether NFA matches all strings (possibly with length bounds) */
+    checkmatchall(nfa);
+
+    /* Detect whether NFA can possibly match a zero-length string */
     for (a = nfa->pre->outs; a != NULL; a = a->outchain)
         for (aa = a->to->outs; aa != NULL; aa = aa->outchain)
             if (aa->to == nfa->post)
@@ -2884,6 +2892,279 @@ analyze(struct nfa *nfa)
     return 0;
 }

+/*
+ * checkmatchall - does the NFA represent no more than a string length test?
+ *
+ * If so, set nfa->minmatchall and nfa->maxmatchall correctly (they are -1
+ * to begin with) and set the MATCHALL bit in nfa->flags.
+ *
+ * To succeed, we require all arcs to be PLAIN RAINBOW arcs, except for those
+ * for pseudocolors (i.e., BOS/BOL/EOS/EOL).  We must be able to reach the
+ * post state via RAINBOW arcs, and if there are any loops in the graph, they
+ * must be loop-to-self arcs, ensuring that each loop iteration consumes
+ * exactly one character.  (Longer loops are problematic because they create
+ * non-consecutive possible match lengths; we have no good way to represent
+ * that situation for lengths beyond the DUPINF limit.)
+ *
+ * Pseudocolor arcs complicate things a little.  We know that they can only
+ * appear as pre-state outarcs (for BOS/BOL) or post-state inarcs (for
+ * EOS/EOL).  There, they must exactly replicate the parallel RAINBOW arcs,
+ * e.g. if the pre state has one RAINBOW outarc to state 2, it must have BOS
+ * and BOL outarcs to state 2, and no others.  Missing or extra pseudocolor
+ * arcs can occur, meaning that the NFA involves some constraint on the
+ * adjacent characters, which makes it not a matchall NFA.
+ */
+static void
+checkmatchall(struct nfa *nfa)
+{
+    bool        hasmatch[DUPINF + 1];
+    int            minmatch,
+                maxmatch,
+                morematch;
+
+    /*
+     * hasmatch[i] will be set true if a match of length i is feasible, for i
+     * from 0 to DUPINF-1.  hasmatch[DUPINF] will be set true if every match
+     * length of DUPINF or more is feasible.
+     */
+    memset(hasmatch, 0, sizeof(hasmatch));
+
+    /*
+     * Recursively search the graph for all-RAINBOW paths to the "post" state,
+     * starting at the "pre" state.  The -1 initial depth accounts for the
+     * fact that transitions out of the "pre" state are not part of the
+     * matched string.  We likewise don't count the final transition to the
+     * "post" state as part of the match length.  (But we still insist that
+     * those transitions have RAINBOW arcs, otherwise there are lookbehind or
+     * lookahead constraints at the start/end of the pattern.)
+     */
+    if (!checkmatchall_recurse(nfa, nfa->pre, false, -1, hasmatch))
+        return;
+
+    /*
+     * We found some all-RAINBOW paths, and not anything that we couldn't
+     * handle.  Now verify that pseudocolor arcs adjacent to the pre and post
+     * states match the RAINBOW arcs there.  (We could do this while
+     * recursing, but it's expensive and unlikely to fail, so do it last.)
+     */
+    if (!check_out_colors_match(nfa->pre, RAINBOW, nfa->bos[0]) ||
+        !check_out_colors_match(nfa->pre, nfa->bos[0], RAINBOW) ||
+        !check_out_colors_match(nfa->pre, RAINBOW, nfa->bos[1]) ||
+        !check_out_colors_match(nfa->pre, nfa->bos[1], RAINBOW))
+        return;
+    if (!check_in_colors_match(nfa->post, RAINBOW, nfa->eos[0]) ||
+        !check_in_colors_match(nfa->post, nfa->eos[0], RAINBOW) ||
+        !check_in_colors_match(nfa->post, RAINBOW, nfa->eos[1]) ||
+        !check_in_colors_match(nfa->post, nfa->eos[1], RAINBOW))
+        return;
+
+    /*
+     * hasmatch[] now represents the set of possible match lengths; but we
+     * want to reduce that to a min and max value, because it doesn't seem
+     * worth complicating regexec.c to deal with nonconsecutive possible match
+     * lengths.  Find min and max of first run of lengths, then verify there
+     * are no nonconsecutive lengths.
+     */
+    for (minmatch = 0; minmatch <= DUPINF; minmatch++)
+    {
+        if (hasmatch[minmatch])
+            break;
+    }
+    assert(minmatch <= DUPINF); /* else checkmatchall_recurse lied */
+    for (maxmatch = minmatch; maxmatch < DUPINF; maxmatch++)
+    {
+        if (!hasmatch[maxmatch + 1])
+            break;
+    }
+    for (morematch = maxmatch + 1; morematch <= DUPINF; morematch++)
+    {
+        if (hasmatch[morematch])
+            return;                /* fail, there are nonconsecutive lengths */
+    }
+
+    /* Success, so record the info */
+    nfa->minmatchall = minmatch;
+    nfa->maxmatchall = maxmatch;
+    nfa->flags |= MATCHALL;
+}
+
+/*
+ * checkmatchall_recurse - recursive search for checkmatchall
+ *
+ * s is the current state
+ * foundloop is true if any predecessor state has a loop-to-self
+ * depth is the current recursion depth (starting at -1)
+ * hasmatch[] is the output area for recording feasible match lengths
+ *
+ * We return true if there is at least one all-RAINBOW path to the "post"
+ * state and no non-matchall paths; otherwise false.  Note we assume that
+ * any dead-end paths have already been removed, else we might return
+ * false unnecessarily.
+ */
+static bool
+checkmatchall_recurse(struct nfa *nfa, struct state *s,
+                      bool foundloop, int depth,
+                      bool *hasmatch)
+{
+    bool        result = false;
+    struct arc *a;
+
+    /*
+     * Since this is recursive, it could be driven to stack overflow.  But we
+     * need not treat that as a hard failure; just deem the NFA non-matchall.
+     */
+    if (STACK_TOO_DEEP(nfa->v->re))
+        return false;
+
+    /*
+     * Likewise, if we get to a depth too large to represent correctly in
+     * maxmatchall, fail quietly.
+     */
+    if (depth >= DUPINF)
+        return false;
+
+    /*
+     * Scan the outarcs to detect cases we can't handle, and to see if there
+     * is a loop-to-self here.  We need to know about any such loop before we
+     * recurse, so it's hard to avoid making two passes over the outarcs.  In
+     * any case, checking for showstoppers before we recurse is probably best.
+     */
+    for (a = s->outs; a != NULL; a = a->outchain)
+    {
+        if (a->type != PLAIN)
+            return false;        /* any LACONs make it non-matchall */
+        if (a->co != RAINBOW)
+        {
+            if (nfa->cm->cd[a->co].flags & PSEUDO)
+            {
+                /*
+                 * Pseudocolor arc: verify it's in a valid place (this seems
+                 * quite unlikely to fail, but let's be sure).
+                 */
+                if (s == nfa->pre &&
+                    (a->co == nfa->bos[0] || a->co == nfa->bos[1]))
+                     /* okay BOS/BOL arc */ ;
+                else if (a->to == nfa->post &&
+                         (a->co == nfa->eos[0] || a->co == nfa->eos[1]))
+                     /* okay EOS/EOL arc */ ;
+                else
+                    return false;    /* unexpected pseudocolor arc */
+                /* We'll finish checking these arcs after the recursion */
+                continue;
+            }
+            return false;        /* any other color makes it non-matchall */
+        }
+        if (a->to == s)
+        {
+            /*
+             * We found a cycle of length 1, so remember that to pass down to
+             * successor states.  (It doesn't matter if there was also such a
+             * loop at a predecessor state.)
+             */
+            foundloop = true;
+        }
+        else if (a->to->tmp)
+        {
+            /* We found a cycle of length > 1, so fail. */
+            return false;
+        }
+    }
+
+    /* We need to recurse, so mark state as under consideration */
+    assert(s->tmp == NULL);
+    s->tmp = s;
+
+    for (a = s->outs; a != NULL; a = a->outchain)
+    {
+        if (a->co != RAINBOW)
+            continue;            /* ignore pseudocolor arcs */
+        if (a->to == nfa->post)
+        {
+            /* We found an all-RAINBOW path to the post state */
+            result = true;
+            /* Record potential match lengths */
+            assert(depth >= 0);
+            hasmatch[depth] = true;
+            if (foundloop)
+            {
+                /* A predecessor loop makes all larger lengths match, too */
+                int            i;
+
+                for (i = depth + 1; i <= DUPINF; i++)
+                    hasmatch[i] = true;
+            }
+        }
+        else if (a->to != s)
+        {
+            /* This is a new path forward; recurse to investigate */
+            result = checkmatchall_recurse(nfa, a->to,
+                                           foundloop, depth + 1,
+                                           hasmatch);
+            /* Fail if any recursive path fails */
+            if (!result)
+                break;
+        }
+    }
+
+    s->tmp = NULL;
+    return result;
+}
+
+/*
+ * check_out_colors_match - subroutine for checkmatchall
+ *
+ * Check if every s outarc of color co1 has a matching outarc of color co2.
+ * (checkmatchall_recurse already verified that all of the outarcs are PLAIN,
+ * so we need not examine arc types here.)
+ */
+static bool
+check_out_colors_match(struct state *s, color co1, color co2)
+{
+    struct arc *a1;
+    struct arc *a2;
+
+    for (a1 = s->outs; a1 != NULL; a1 = a1->outchain)
+    {
+        if (a1->co != co1)
+            continue;
+        for (a2 = s->outs; a2 != NULL; a2 = a2->outchain)
+        {
+            if (a2->co == co2 && a2->to == a1->to)
+                break;
+        }
+        if (a2 == NULL)
+            return false;
+    }
+    return true;
+}
+
+/*
+ * check_in_colors_match - subroutine for checkmatchall
+ *
+ * Check if every s inarc of color co1 has a matching inarc of color co2.
+ * (For paranoia's sake, ignore any non-PLAIN arcs here.)
+ */
+static bool
+check_in_colors_match(struct state *s, color co1, color co2)
+{
+    struct arc *a1;
+    struct arc *a2;
+
+    for (a1 = s->ins; a1 != NULL; a1 = a1->inchain)
+    {
+        if (a1->type != PLAIN || a1->co != co1)
+            continue;
+        for (a2 = s->ins; a2 != NULL; a2 = a2->inchain)
+        {
+            if (a2->type == PLAIN && a2->co == co2 && a2->from == a1->from)
+                break;
+        }
+        if (a2 == NULL)
+            return false;
+    }
+    return true;
+}
+
 /*
  * compact - construct the compact representation of an NFA
  */
@@ -2930,7 +3211,9 @@ compact(struct nfa *nfa,
     cnfa->eos[0] = nfa->eos[0];
     cnfa->eos[1] = nfa->eos[1];
     cnfa->ncolors = maxcolor(nfa->cm) + 1;
-    cnfa->flags = 0;
+    cnfa->flags = nfa->flags;
+    cnfa->minmatchall = nfa->minmatchall;
+    cnfa->maxmatchall = nfa->maxmatchall;

     ca = cnfa->arcs;
     for (s = nfa->states; s != NULL; s = s->next)
@@ -3034,6 +3317,11 @@ dumpnfa(struct nfa *nfa,
         fprintf(f, ", eos [%ld]", (long) nfa->eos[0]);
     if (nfa->eos[1] != COLORLESS)
         fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
+    if (nfa->flags & HASLACONS)
+        fprintf(f, ", haslacons");
+    if (nfa->flags & MATCHALL)
+        fprintf(f, ", minmatchall %d, maxmatchall %d",
+                nfa->minmatchall, nfa->maxmatchall);
     fprintf(f, "\n");
     for (s = nfa->states; s != NULL; s = s->next)
     {
@@ -3201,6 +3489,9 @@ dumpcnfa(struct cnfa *cnfa,
         fprintf(f, ", eol [%ld]", (long) cnfa->eos[1]);
     if (cnfa->flags & HASLACONS)
         fprintf(f, ", haslacons");
+    if (cnfa->flags & MATCHALL)
+        fprintf(f, ", minmatchall %d, maxmatchall %d",
+                cnfa->minmatchall, cnfa->maxmatchall);
     fprintf(f, "\n");
     for (st = 0; st < cnfa->nstates; st++)
         dumpcstate(st, cnfa, f);
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index 5956b86026..17e79ad0fb 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -175,6 +175,11 @@ static void cleanup(struct nfa *);
 static void markreachable(struct nfa *, struct state *, struct state *, struct state *);
 static void markcanreach(struct nfa *, struct state *, struct state *, struct state *);
 static long analyze(struct nfa *);
+static void checkmatchall(struct nfa *);
+static bool checkmatchall_recurse(struct nfa *, struct state *,
+                                  bool, int, bool *);
+static bool check_out_colors_match(struct state *, color, color);
+static bool check_in_colors_match(struct state *, color, color);
 static void compact(struct nfa *, struct cnfa *);
 static void carcsort(struct carc *, size_t);
 static int    carc_cmp(const void *, const void *);
diff --git a/src/backend/regex/rege_dfa.c b/src/backend/regex/rege_dfa.c
index 32be2592c5..20ec463204 100644
--- a/src/backend/regex/rege_dfa.c
+++ b/src/backend/regex/rege_dfa.c
@@ -58,6 +58,29 @@ longest(struct vars *v,
     if (hitstopp != NULL)
         *hitstopp = 0;

+    /* fast path for matchall NFAs */
+    if (d->cnfa->flags & MATCHALL)
+    {
+        size_t        nchr = stop - start;
+        size_t        maxmatchall = d->cnfa->maxmatchall;
+
+        if (nchr < d->cnfa->minmatchall)
+            return NULL;
+        if (maxmatchall == DUPINF)
+        {
+            if (stop == v->stop && hitstopp != NULL)
+                *hitstopp = 1;
+        }
+        else
+        {
+            if (stop == v->stop && nchr <= maxmatchall + 1 && hitstopp != NULL)
+                *hitstopp = 1;
+            if (nchr > maxmatchall)
+                return start + maxmatchall;
+        }
+        return stop;
+    }
+
     /* initialize */
     css = initialize(v, d, start);
     if (css == NULL)
@@ -187,6 +210,24 @@ shortest(struct vars *v,
     if (hitstopp != NULL)
         *hitstopp = 0;

+    /* fast path for matchall NFAs */
+    if (d->cnfa->flags & MATCHALL)
+    {
+        size_t        nchr = min - start;
+
+        if (d->cnfa->maxmatchall != DUPINF &&
+            nchr > d->cnfa->maxmatchall)
+            return NULL;
+        if ((max - start) < d->cnfa->minmatchall)
+            return NULL;
+        if (nchr < d->cnfa->minmatchall)
+            min = start + d->cnfa->minmatchall;
+        if (coldp != NULL)
+            *coldp = start;
+        /* there is no case where we should set *hitstopp */
+        return min;
+    }
+
     /* initialize */
     css = initialize(v, d, start);
     if (css == NULL)
@@ -312,6 +353,22 @@ matchuntil(struct vars *v,
     struct sset *ss;
     struct colormap *cm = d->cm;

+    /* fast path for matchall NFAs */
+    if (d->cnfa->flags & MATCHALL)
+    {
+        size_t        nchr = probe - v->start;
+
+        /*
+         * It might seem that we should check maxmatchall too, but the
+         * implicit .* at the front of the pattern absorbs any extra
+         * characters.  Thus, we should always match as long as there are at
+         * least minmatchall characters.
+         */
+        if (nchr < d->cnfa->minmatchall)
+            return 0;
+        return 1;
+    }
+
     /* initialize and startup, or restart, if necessary */
     if (cp == NULL || cp > probe)
     {
diff --git a/src/backend/regex/regprefix.c b/src/backend/regex/regprefix.c
index e2fbad7a8a..ec435b6f5f 100644
--- a/src/backend/regex/regprefix.c
+++ b/src/backend/regex/regprefix.c
@@ -77,6 +77,10 @@ pg_regprefix(regex_t *re,
     assert(g->tree != NULL);
     cnfa = &g->tree->cnfa;

+    /* matchall NFAs never have a fixed prefix */
+    if (cnfa->flags & MATCHALL)
+        return REG_NOMATCH;
+
     /*
      * Since a correct NFA should never contain any exit-free loops, it should
      * not be possible for our traversal to return to a previously visited NFA
diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h
index 5bcd669d59..956b37b72d 100644
--- a/src/include/regex/regguts.h
+++ b/src/include/regex/regguts.h
@@ -331,6 +331,9 @@ struct nfa
     struct colormap *cm;        /* the color map */
     color        bos[2];            /* colors, if any, assigned to BOS and BOL */
     color        eos[2];            /* colors, if any, assigned to EOS and EOL */
+    int            flags;            /* flags to pass forward to cNFA */
+    int            minmatchall;    /* min number of chrs to match, if matchall */
+    int            maxmatchall;    /* max number of chrs to match, or DUPINF */
     struct vars *v;                /* simplifies compile error reporting */
     struct nfa *parent;            /* parent NFA, if any */
 };
@@ -353,6 +356,14 @@ struct nfa
  *
  * Note that in a plain arc, "co" can be RAINBOW; since that's negative,
  * it doesn't break the rule about how to recognize LACON arcs.
+ *
+ * We have special markings for "trivial" NFAs that can match any string
+ * (possibly with limits on the number of characters therein).  In such a
+ * case, flags & MATCHALL is set (and HASLACONS can't be set).  Then the
+ * fields minmatchall and maxmatchall give the minimum and maximum numbers
+ * of characters to match.  For example, ".*" produces minmatchall = 0
+ * and maxmatchall = DUPINF, while ".+" produces minmatchall = 1 and
+ * maxmatchall = DUPINF.
  */
 struct carc
 {
@@ -366,6 +377,7 @@ struct cnfa
     int            ncolors;        /* number of colors (max color in use + 1) */
     int            flags;
 #define  HASLACONS    01            /* uses lookaround constraints */
+#define  MATCHALL    02            /* matches all strings of a range of lengths */
     int            pre;            /* setup state number */
     int            post;            /* teardown state number */
     color        bos[2];            /* colors, if any, assigned to BOS and BOL */
@@ -375,6 +387,9 @@ struct cnfa
     struct carc **states;        /* vector of pointers to outarc lists */
     /* states[n] are pointers into a single malloc'd array of arcs */
     struct carc *arcs;            /* the area for the lists */
+    /* these fields are used only in a MATCHALL NFA (else they're -1): */
+    int            minmatchall;    /* min number of chrs to match */
+    int            maxmatchall;    /* max number of chrs to match, or DUPINF */
 };

 #define ZAPCNFA(cnfa)    ((cnfa).nstates = 0)
diff --git a/src/test/modules/test_regex/expected/test_regex.out b/src/test/modules/test_regex/expected/test_regex.out
index 0dc2265d8b..f01ca071d9 100644
--- a/src/test/modules/test_regex/expected/test_regex.out
+++ b/src/test/modules/test_regex/expected/test_regex.out
@@ -3315,6 +3315,21 @@ select * from test_regex('(?=b)b', 'a', 'HP');
  {0,REG_ULOOKAROUND,REG_UNONPOSIX}
 (1 row)

+-- expectMatch    23.9 HP        ...(?!.)    abcde    cde
+select * from test_regex('...(?!.)', 'abcde', 'HP');
+            test_regex
+-----------------------------------
+ {0,REG_ULOOKAROUND,REG_UNONPOSIX}
+ {cde}
+(2 rows)
+
+-- expectNomatch    23.10 HP    ...(?=.)    abc
+select * from test_regex('...(?=.)', 'abc', 'HP');
+            test_regex
+-----------------------------------
+ {0,REG_ULOOKAROUND,REG_UNONPOSIX}
+(1 row)
+
 -- Postgres addition: lookbehind constraints
 -- expectMatch    23.11 HPN        (?<=a)b*    ab    b
 select * from test_regex('(?<=a)b*', 'ab', 'HPN');
@@ -3376,6 +3391,39 @@ select * from test_regex('(?<=b)b', 'b', 'HP');
  {0,REG_ULOOKAROUND,REG_UNONPOSIX}
 (1 row)

+-- expectMatch    23.19 HP        (?<=.)..    abcde    bc
+select * from test_regex('(?<=.)..', 'abcde', 'HP');
+            test_regex
+-----------------------------------
+ {0,REG_ULOOKAROUND,REG_UNONPOSIX}
+ {bc}
+(2 rows)
+
+-- expectMatch    23.20 HP        (?<=..)a*    aaabb    a
+select * from test_regex('(?<=..)a*', 'aaabb', 'HP');
+            test_regex
+-----------------------------------
+ {0,REG_ULOOKAROUND,REG_UNONPOSIX}
+ {a}
+(2 rows)
+
+-- expectMatch    23.21 HP        (?<=..)b*    aaabb    {}
+-- Note: empty match here is correct, it matches after the first 2 characters
+select * from test_regex('(?<=..)b*', 'aaabb', 'HP');
+            test_regex
+-----------------------------------
+ {0,REG_ULOOKAROUND,REG_UNONPOSIX}
+ {""}
+(2 rows)
+
+-- expectMatch    23.22 HP        (?<=..)b+    aaabb    bb
+select * from test_regex('(?<=..)b+', 'aaabb', 'HP');
+            test_regex
+-----------------------------------
+ {0,REG_ULOOKAROUND,REG_UNONPOSIX}
+ {bb}
+(2 rows)
+
 -- doing 24 "non-greedy quantifiers"
 -- expectMatch    24.1  PT    ab+?        abb    ab
 select * from test_regex('ab+?', 'abb', 'PT');
diff --git a/src/test/modules/test_regex/sql/test_regex.sql b/src/test/modules/test_regex/sql/test_regex.sql
index 1a2bfa6235..ae7d6b43e4 100644
--- a/src/test/modules/test_regex/sql/test_regex.sql
+++ b/src/test/modules/test_regex/sql/test_regex.sql
@@ -1049,6 +1049,10 @@ select * from test_regex('a(?!b)b*', 'a', 'HP');
 select * from test_regex('(?=b)b', 'b', 'HP');
 -- expectNomatch    23.8 HP        (?=b)b        a
 select * from test_regex('(?=b)b', 'a', 'HP');
+-- expectMatch    23.9 HP        ...(?!.)    abcde    cde
+select * from test_regex('...(?!.)', 'abcde', 'HP');
+-- expectNomatch    23.10 HP    ...(?=.)    abc
+select * from test_regex('...(?=.)', 'abc', 'HP');

 -- Postgres addition: lookbehind constraints

@@ -1068,6 +1072,15 @@ select * from test_regex('a(?<!b)b*', 'a', 'HP');
 select * from test_regex('(?<=b)b', 'bb', 'HP');
 -- expectNomatch    23.18 HP        (?<=b)b        b
 select * from test_regex('(?<=b)b', 'b', 'HP');
+-- expectMatch    23.19 HP        (?<=.)..    abcde    bc
+select * from test_regex('(?<=.)..', 'abcde', 'HP');
+-- expectMatch    23.20 HP        (?<=..)a*    aaabb    a
+select * from test_regex('(?<=..)a*', 'aaabb', 'HP');
+-- expectMatch    23.21 HP        (?<=..)b*    aaabb    {}
+-- Note: empty match here is correct, it matches after the first 2 characters
+select * from test_regex('(?<=..)b*', 'aaabb', 'HP');
+-- expectMatch    23.22 HP        (?<=..)b+    aaabb    bb
+select * from test_regex('(?<=..)b+', 'aaabb', 'HP');

 -- doing 24 "non-greedy quantifiers"


В списке pgsql-hackers по дате отправления:

Предыдущее
От: Ranier Vilela
Дата:
Сообщение: Re: Possible dereference null return (src/backend/replication/logical/reorderbuffer.c)
Следующее
От: Zhihong Yu
Дата:
Сообщение: Re: Possible dereference null return (src/backend/replication/logical/reorderbuffer.c)