Re: Lift line-length limit for pg_service.conf

Поиск
Список
Период
Сортировка
От Tom Lane
Тема Re: Lift line-length limit for pg_service.conf
Дата
Msg-id 1504903.1600889631@sss.pgh.pa.us
обсуждение исходный текст
Ответ на Re: Lift line-length limit for pg_service.conf  (Tom Lane <tgl@sss.pgh.pa.us>)
Ответы Re: Lift line-length limit for pg_service.conf
Список pgsql-hackers
I wrote:
> So the attached adds a pstrdup/pfree to ensure that "curline"
> has its own storage, putting us right back at two palloc/pfree
> cycles per line.  I don't think there's a lot of choice though;
> in fact, I'm leaning to the idea that we need to back-patch
> that part of this.  The odds of trouble in a production build
> probably aren't high, but still...

So I did that, but while looking at the main patch I realized that
a few things were still being left on the table:

1. We can save a palloc/pfree cycle in the case where no encoding
conversion need be performed, by allowing "curline" to point at
the StringInfo buffer instead of necessarily being a separate
palloc'd string.  (This seems safe since no other code should
manipulate the StringInfo before the next tsearch_readline call;
so we can't get confused about whether "curline" has its own storage.)

2. In the case where encoding conversion is performed, we still have
to pstrdup the result to have a safe copy for "curline".  But I
realized that we're making a poor choice of which copy to return to
the caller.  The output of encoding conversion is likely to be a good
bit bigger than necessary, cf. pg_do_encoding_conversion.  So if the
caller is one that saves the output string directly into a long-lived
dictionary structure, this wastes space.  We should return the pstrdup
result instead, and keep the conversion result as "curline", where
we'll free it next time through.

3. AFAICT, it's completely useless for tsearch_readline to call
pg_verify_mbstr, because pg_any_to_server will either do that exact
thing itself, or verify the input encoding as part of conversion.
Some quick testing says that you don't even get different error
messages.  So we should just drop that step.  (It's likely that the
separate call was actually needed when this code was written; I think
we tightened up the expectations for verification within encoding
conversion somewhere along the line.  But we demonstrably don't need
it now.)

So those considerations lead me to the attached.

            regards, tom lane

diff --git a/src/backend/tsearch/dict_thesaurus.c b/src/backend/tsearch/dict_thesaurus.c
index cb0835982d..64c979086d 100644
--- a/src/backend/tsearch/dict_thesaurus.c
+++ b/src/backend/tsearch/dict_thesaurus.c
@@ -286,11 +286,6 @@ thesaurusRead(const char *filename, DictThesaurus *d)
                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
                      errmsg("unexpected end of line")));

-        /*
-         * Note: currently, tsearch_readline can't return lines exceeding 4KB,
-         * so overflow of the word counts is impossible.  But that may not
-         * always be true, so let's check.
-         */
         if (nwrd != (uint16) nwrd || posinsubst != (uint16) posinsubst)
             ereport(ERROR,
                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index 9b199d0ac1..deaafe57e6 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -14,6 +14,7 @@
 #include "postgres.h"

 #include "catalog/pg_collation.h"
+#include "common/string.h"
 #include "storage/fd.h"
 #include "tsearch/ts_locale.h"
 #include "tsearch/ts_public.h"
@@ -128,6 +129,7 @@ tsearch_readline_begin(tsearch_readline_state *stp,
         return false;
     stp->filename = filename;
     stp->lineno = 0;
+    initStringInfo(&stp->buf);
     stp->curline = NULL;
     /* Setup error traceback support for ereport() */
     stp->cb.callback = tsearch_readline_callback;
@@ -145,7 +147,7 @@ tsearch_readline_begin(tsearch_readline_state *stp,
 char *
 tsearch_readline(tsearch_readline_state *stp)
 {
-    char       *result;
+    char       *recoded;

     /* Advance line number to use in error reports */
     stp->lineno++;
@@ -153,23 +155,31 @@ tsearch_readline(tsearch_readline_state *stp)
     /* Clear curline, it's no longer relevant */
     if (stp->curline)
     {
-        pfree(stp->curline);
+        if (stp->curline != stp->buf.data)
+            pfree(stp->curline);
         stp->curline = NULL;
     }

     /* Collect next line, if there is one */
-    result = t_readline(stp->fp);
-    if (!result)
+    if (!pg_get_line_buf(stp->fp, &stp->buf))
         return NULL;

+    /* Validate the input as UTF-8, then convert to DB encoding if needed */
+    recoded = pg_any_to_server(stp->buf.data, stp->buf.len, PG_UTF8);
+
+    /* Save the correctly-encoded string for possible error reports */
+    stp->curline = recoded;        /* might be equal to buf.data */
+
     /*
-     * Save a copy of the line for possible use in error reports.  (We cannot
-     * just save "result", since it's likely to get pfree'd at some point by
-     * the caller; an error after that would try to access freed data.)
+     * We always return a freshly pstrdup'd string.  This is clearly necessary
+     * if pg_any_to_server() returned buf.data, and it is desirable even if
+     * conversion occurred, because the palloc'd result of conversion is
+     * likely to be much bigger than minimally necessary.  Since some callers
+     * save the result string directly into a long-lived dictionary structure,
+     * we don't want it to be a larger palloc chunk than necessary.  We'll
+     * reclaim the conversion result on the next call.
      */
-    stp->curline = pstrdup(result);
-
-    return result;
+    return pstrdup(recoded);
 }

 /*
@@ -181,11 +191,13 @@ tsearch_readline_end(tsearch_readline_state *stp)
     /* Suppress use of curline in any error reported below */
     if (stp->curline)
     {
-        pfree(stp->curline);
+        if (stp->curline != stp->buf.data)
+            pfree(stp->curline);
         stp->curline = NULL;
     }

     /* Release other resources */
+    pfree(stp->buf.data);
     FreeFile(stp->fp);

     /* Pop the error context stack */
@@ -203,8 +215,7 @@ tsearch_readline_callback(void *arg)

     /*
      * We can't include the text of the config line for errors that occur
-     * during t_readline() itself.  This is only partly a consequence of our
-     * arms-length use of that routine: the major cause of such errors is
+     * during tsearch_readline() itself.  The major cause of such errors is
      * encoding violations, and we daren't try to print error messages
      * containing badly-encoded data.
      */
@@ -220,43 +231,6 @@ tsearch_readline_callback(void *arg)
 }


-/*
- * Read the next line from a tsearch data file (expected to be in UTF-8), and
- * convert it to database encoding if needed. The returned string is palloc'd.
- * NULL return means EOF.
- *
- * Note: direct use of this function is now deprecated.  Go through
- * tsearch_readline() to provide better error reporting.
- */
-char *
-t_readline(FILE *fp)
-{
-    int            len;
-    char       *recoded;
-    char        buf[4096];        /* lines must not be longer than this */
-
-    if (fgets(buf, sizeof(buf), fp) == NULL)
-        return NULL;
-
-    len = strlen(buf);
-
-    /* Make sure the input is valid UTF-8 */
-    (void) pg_verify_mbstr(PG_UTF8, buf, len, false);
-
-    /* And convert */
-    recoded = pg_any_to_server(buf, len, PG_UTF8);
-    if (recoded == buf)
-    {
-        /*
-         * conversion didn't pstrdup, so we must. We can use the length of the
-         * original string, because no conversion was done.
-         */
-        recoded = pnstrdup(recoded, len);
-    }
-
-    return recoded;
-}
-
 /*
  * lowerstr --- fold null-terminated string to lower case
  *
diff --git a/src/include/tsearch/ts_locale.h b/src/include/tsearch/ts_locale.h
index cc4bd9ab20..f1669fda21 100644
--- a/src/include/tsearch/ts_locale.h
+++ b/src/include/tsearch/ts_locale.h
@@ -15,6 +15,7 @@
 #include <ctype.h>
 #include <limits.h>

+#include "lib/stringinfo.h"
 #include "mb/pg_wchar.h"
 #include "utils/pg_locale.h"

@@ -33,7 +34,9 @@ typedef struct
     FILE       *fp;
     const char *filename;
     int            lineno;
-    char       *curline;
+    StringInfoData buf;            /* current input line, in UTF-8 */
+    char       *curline;        /* current input line, in DB's encoding */
+    /* curline may be NULL, or equal to buf.data, or a palloc'd string */
     ErrorContextCallback cb;
 } tsearch_readline_state;

@@ -57,6 +60,4 @@ extern bool tsearch_readline_begin(tsearch_readline_state *stp,
 extern char *tsearch_readline(tsearch_readline_state *stp);
 extern void tsearch_readline_end(tsearch_readline_state *stp);

-extern char *t_readline(FILE *fp);
-
 #endif                            /* __TSLOCALE_H__ */

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Andres Freund
Дата:
Сообщение: Re: Batching page logging during B-tree build
Следующее
От: Fabrízio de Royes Mello
Дата:
Сообщение: Re: Missing TOAST table for pg_class