Re: XML binary I/O (was Re: tsearch refactorings)

Поиск
Список
Период
Сортировка
От Heikki Linnakangas
Тема Re: XML binary I/O (was Re: tsearch refactorings)
Дата
Msg-id 46EA76A2.8090303@enterprisedb.com
обсуждение исходный текст
Ответ на XML binary I/O (was Re: tsearch refactorings)  ("Heikki Linnakangas" <heikki@enterprisedb.com>)
Ответы Re: XML binary I/O (was Re: tsearch refactorings)  (Alvaro Herrera <alvherre@commandprompt.com>)
Re: XML binary I/O (was Re: tsearch refactorings)  (Tom Lane <tgl@sss.pgh.pa.us>)
Список pgsql-patches
Heikki Linnakangas wrote:
> Heikki Linnakangas wrote:
>> BTW, the encoding of the XML datatype looks pretty funky. xml_recv first
>> reads the xml string with pq_getmsgtext, which applies a client->server
>> conversion. Then the xml declaration is parsed, extracting the encoding
>> attribute. Then the string is converted again from that encoding (or
>> UTF-8 if none was specified) to server encoding. I don't understand how
>> it's supposed to work, but ISTM there's one conversion too much,
>
> And it's got an unfortunate typo in it as well: it calls "free(result)"
> instead of pfree. I think we need regression tests for the more complex
> send/recv functions...

According to the docs, xml_send is supposed to output the XML document
in client encoding, with that encoding specified in the xml declaration,
eg. ?<xml encoding="LATIN1"?>. xml_recv is supposed to ignore client
encoding, and parse the document according to the encoding specified in
the declaration.

Here's a patch that fixes the send/recv functions to work like the
manual says. It fixes the free/pfree typo as well.

Included is a new regression test for the send/recv functions as well
(unpatched CVS HEAD fails it BTW). It can be merged with the existing
xml test, but I didn't want to do it in the patch because it would've
involved moving the current xml test from sql/ to input/, and made the
patch longer to review.

--
  Heikki Linnakangas
  EnterpriseDB   http://www.enterprisedb.com
Index: src/backend/utils/adt/xml.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/utils/adt/xml.c,v
retrieving revision 1.46
diff -c -r1.46 xml.c
*** src/backend/utils/adt/xml.c    13 Jul 2007 03:43:23 -0000    1.46
--- src/backend/utils/adt/xml.c    14 Sep 2007 11:23:48 -0000
***************
*** 232,238 ****
      xmlDocPtr    doc;
      xmlChar       *encoding = NULL;

!     str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);

      result = palloc(nbytes + VARHDRSZ);
      SET_VARSIZE(result, nbytes + VARHDRSZ);
--- 232,244 ----
      xmlDocPtr    doc;
      xmlChar       *encoding = NULL;

!     /*
!      * Read the data in raw format. We don't know yet what the encoding
!      * is, that information is embedded in the xml declaration, so we
!      * have to parse that before converting to server encoding.
!      */
!     nbytes = buf->len - buf->cursor;
!     str = (char *) pq_getmsgbytes(buf, nbytes);

      result = palloc(nbytes + VARHDRSZ);
      SET_VARSIZE(result, nbytes + VARHDRSZ);
***************
*** 247,262 ****
      doc = xml_parse(result, xmloption, true, encoding);
      xmlFreeDoc(doc);

      newstr = (char *) pg_do_encoding_conversion((unsigned char *) str,
                                                  nbytes,
                                                  encoding ? pg_char_to_encoding((char *) encoding) : PG_UTF8,
                                                  GetDatabaseEncoding());

-     pfree(str);
-
      if (newstr != str)
      {
!         free(result);

          nbytes = strlen(newstr);

--- 253,267 ----
      doc = xml_parse(result, xmloption, true, encoding);
      xmlFreeDoc(doc);

+     /* Now that we know what we're dealing with, convert to server encoding */
      newstr = (char *) pg_do_encoding_conversion((unsigned char *) str,
                                                  nbytes,
                                                  encoding ? pg_char_to_encoding((char *) encoding) : PG_UTF8,
                                                  GetDatabaseEncoding());

      if (newstr != str)
      {
!         pfree(result);

          nbytes = strlen(newstr);

***************
*** 277,287 ****
  xml_send(PG_FUNCTION_ARGS)
  {
      xmltype       *x = PG_GETARG_XML_P(0);
!     char       *outval = xml_out_internal(x, pg_get_client_encoding());
      StringInfoData buf;

      pq_begintypsend(&buf);
!     pq_sendstring(&buf, outval);
      PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
  }

--- 282,299 ----
  xml_send(PG_FUNCTION_ARGS)
  {
      xmltype       *x = PG_GETARG_XML_P(0);
!     char       *outval;
      StringInfoData buf;
+
+     /*
+      * xml_out_internal doesn't convert the encoding, it just prints
+      * the right declaration. pq_sendtext will do the conversion.
+      */
+     outval = xml_out_internal(x, pg_get_client_encoding());

      pq_begintypsend(&buf);
!     pq_sendtext(&buf, outval, strlen(outval));
!     pfree(outval);
      PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
  }

Index: src/test/regress/parallel_schedule
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/test/regress/parallel_schedule,v
retrieving revision 1.44
diff -c -r1.44 parallel_schedule
*** src/test/regress/parallel_schedule    11 Sep 2007 11:54:42 -0000    1.44
--- src/test/regress/parallel_schedule    14 Sep 2007 11:29:10 -0000
***************
*** 85,90 ****
--- 85,93 ----
  # "plpgsql" cannot run concurrently with "rules", nor can "plancache"
  test: plancache limit plpgsql copy2 temp domain rangefuncs prepare without_oid conversion truncate alter_table
sequencepolymorphism rowtypes returning largeobject xml 

+ # "xmlio" needs to run after xml
+ test: xmlio
+
  # run stats by itself because its delay may be insufficient under heavy load
  test: stats

Index: src/test/regress/serial_schedule
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/test/regress/serial_schedule,v
retrieving revision 1.41
diff -c -r1.41 serial_schedule
*** src/test/regress/serial_schedule    11 Sep 2007 11:54:42 -0000    1.41
--- src/test/regress/serial_schedule    14 Sep 2007 11:28:19 -0000
***************
*** 111,115 ****
--- 111,116 ----
  test: returning
  test: largeobject
  test: xml
+ test: xmlio
  test: stats
  test: tablespace
Index: src/test/regress/expected/xmlio.out
===================================================================
RCS file: src/test/regress/expected/xmlio.out
diff -N src/test/regress/expected/xmlio.out
*** /dev/null    1 Jan 1970 00:00:00 -0000
--- src/test/regress/expected/xmlio.out    14 Sep 2007 11:36:47 -0000
***************
*** 0 ****
--- 1,22 ----
+ -- Test binary I/O functions of XML datatype
+ --
+ -- This needs to be run after 'xml' test, because this depends
+ -- on the 'xmltest' table it creates
+ SELECT * FROM xmltest;
+  id |        data
+ ----+--------------------
+   1 | <value>one</value>
+   2 | <value>two</value>
+ (2 rows)
+
+ -- Basic test
+ COPY xmltest TO '/home/hlinnaka/pg_sandbox/pgsql.cvshead/src/test/regress/results/xmltest.data' WITH BINARY;
+ TRUNCATE xmltest;
+ COPY xmltest FROM '/home/hlinnaka/pg_sandbox/pgsql.cvshead/src/test/regress/results/xmltest.data' WITH BINARY;
+ SELECT * FROM xmltest;
+  id |        data
+ ----+--------------------
+   1 | <value>one</value>
+   2 | <value>two</value>
+ (2 rows)
+
Index: src/test/regress/expected/xmlio_1.out
===================================================================
RCS file: src/test/regress/expected/xmlio_1.out
diff -N src/test/regress/expected/xmlio_1.out
*** /dev/null    1 Jan 1970 00:00:00 -0000
--- src/test/regress/expected/xmlio_1.out    14 Sep 2007 11:44:13 -0000
***************
*** 0 ****
--- 1,18 ----
+ -- Test binary I/O functions of XML datatype
+ --
+ -- This needs to be run after 'xml' test, because this depends
+ -- on the 'xmltest' table it creates
+ SELECT * FROM xmltest;
+  id | data
+ ----+------
+ (0 rows)
+
+ -- Basic test
+ COPY xmltest TO '/home/hlinnaka/pg_sandbox/pgsql.cvshead/src/test/regress/results/xmltest.data' WITH BINARY;
+ TRUNCATE xmltest;
+ COPY xmltest FROM '/home/hlinnaka/pg_sandbox/pgsql.cvshead/src/test/regress/results/xmltest.data' WITH BINARY;
+ SELECT * FROM xmltest;
+  id | data
+ ----+------
+ (0 rows)
+
Index: src/test/regress/input/xmlio.source
===================================================================
RCS file: src/test/regress/input/xmlio.source
diff -N src/test/regress/input/xmlio.source
*** /dev/null    1 Jan 1970 00:00:00 -0000
--- src/test/regress/input/xmlio.source    14 Sep 2007 11:34:37 -0000
***************
*** 0 ****
--- 1,12 ----
+ -- Test binary I/O functions of XML datatype
+ --
+ -- This needs to be run after 'xml' test, because this depends
+ -- on the 'xmltest' table it creates
+
+ SELECT * FROM xmltest;
+
+ -- Basic test
+ COPY xmltest TO '@abs_builddir@/results/xmltest.data' WITH BINARY;
+ TRUNCATE xmltest;
+ COPY xmltest FROM '@abs_builddir@/results/xmltest.data' WITH BINARY;
+ SELECT * FROM xmltest;

В списке pgsql-patches по дате отправления:

Предыдущее
От: Bruce Momjian
Дата:
Сообщение: Re: XLogCacheByte is unused
Следующее
От: Alvaro Herrera
Дата:
Сообщение: Re: XML binary I/O (was Re: tsearch refactorings)