Re: xpath processing brain dead

Поиск
Список
Период
Сортировка
От Andrew Dunstan
Тема Re: xpath processing brain dead
Дата
Msg-id 49A85EFD.3010206@dunslane.net
обсуждение исходный текст
Ответ на Re: xpath processing brain dead  (Andrew Dunstan <andrew@dunslane.net>)
Ответы Re: xpath processing brain dead
Список pgsql-hackers

Andrew Dunstan wrote:
>
>
> Tom Lane wrote:
>> Hmm, does this proposal require adding a test of well-formed-ness to
>> a code path that doesn't currently have one?  If so, is that likely
>> to contribute any noticeable slowdown?
>>
>> I can't offhand see an objection to this other than possible performance
>> impact.
>>
>>
>>
>
> Yeah, testing the well-formedness might cost a bit. We could
> short-circuit the test by applying some comparatively fast heuristic
> tests.
>
> Or we could decide that we'll just fix the xpath prefix part for 8.3
> and keep the wrapping. I don't want to spend a huge effort on fixing
> something I regard as fundamentally broken.
>
> I'll do some tests to see what the cost of extra xml parsing might be.


The extra cost appears to be fairly negligible.

regression=# create table xpathtest3 as select xmlconcat(xmlelement(name
unique1, unique1), '\n\t',xmlelement(name unique2, unique2),
'\n\t',xmlelement(name two, two), '\n\t',xmlelement(name four,
four),'\n\t',xmlelement(name ten,ten),'\n\t',xmlelement(name
twenty,twenty),'\n\t',xmlelement(name
hundred,hundred),'\n\t',xmlelement(name
thousand,thousand),'\n\t',xmlelement(name
twothusand,twothousand),'\n\t',xmlelement(name
fivethous,fivethous),'\n\t',xmlelement(name
tenthous,tenthous),'\n\t',xmlelement(name
odd,odd),'\n\t',xmlelement(name even,even),'\n\t',xmlelement(name
stringu1,stringu1),'\n\t',xmlelement(name
stringu2,stringu2),'\n\t',xmlelement(name string4,string4),'\n') from tenk1;

regression=# select count(*) from (select
xpath('//two[text()="0"]/text()',xmlconcat) as elems from xpathtest3,
generate_series(1,10) ) x ;
 count
--------
 100000
(1 row)

Time: 27.722 ms


Proposed patch for 8.3 attached. (Note: it only reparses in the
non-document case)

cheers

andrew


Index: src/backend/utils/adt/xml.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/xml.c,v
retrieving revision 1.68.2.6
diff -c -r1.68.2.6 xml.c
*** src/backend/utils/adt/xml.c    10 Nov 2008 18:02:27 -0000    1.68.2.6
--- src/backend/utils/adt/xml.c    27 Feb 2009 20:59:28 -0000
***************
*** 3320,3360 ****

      xml_init();

!     /*
!      * To handle both documents and fragments, regardless of the fact whether
!      * the XML datum has a single root (XML well-formedness), we wrap the XML
!      * datum in a dummy element (<x>...</x>) and extend the XPath expression
!      * accordingly.  To do it, throw away the XML prolog, if any.
!      */
!     if (len >= 5 &&
!         xmlStrncmp((xmlChar *) datastr, (xmlChar *) "<?xml", 5) == 0)
!     {
!         i = 5;
!         while (i < len &&
!                !(datastr[i - 1] == '?' && datastr[i] == '>'))
!             i++;
!
!         if (i == len)
!             xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
!                         "could not parse XML data");

!         ++i;

!         datastr += i;
!         len -= i;
!     }
!
!     string = (xmlChar *) palloc((len + 8) * sizeof(xmlChar));
!     memcpy(string, "<x>", 3);
!     memcpy(string + 3, datastr, len);
!     memcpy(string + 3 + len, "</x>", 5);
!     len += 7;
!
!     xpath_expr = (xmlChar *) palloc((xpath_len + 3) * sizeof(xmlChar));
!     memcpy(xpath_expr, "/x", 2);
!     memcpy(xpath_expr + 2, VARDATA(xpath_expr_text), xpath_len);
!     xpath_expr[xpath_len + 2] = '\0';
!     xpath_len += 2;

      xmlInitParser();

--- 3320,3332 ----

      xml_init();

!     string = (xmlChar *) palloc((len + 8) * sizeof(xmlChar));

!     xpath_expr = (xmlChar *) palloc((xpath_len + 5) * sizeof(xmlChar));

!     memcpy (string, datastr, len);
!     string[len] = '\0';
!

      xmlInitParser();

***************
*** 3367,3375 ****
          xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
                      "could not allocate parser context");
      doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
!     if (doc == NULL)
!         xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
!                     "could not parse XML data");
      xpathctx = xmlXPathNewContext(doc);
      if (xpathctx == NULL)
          xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
--- 3339,3410 ----
          xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
                      "could not allocate parser context");
      doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
!
!     if (doc == NULL || xmlDocGetRootElement(doc) == NULL)
!     {
!
!         /*
!          * In case we have a fragment rather than a well-formed XML document,
!          * which has a single root (XML well-formedness), we try again after
!          * transforming the xml by stripping away the XML prolog, if any, and
!          * wrapping the remainder in a dummy element (<x>...</x>),
!          * and later extending the XPath expression accordingly.
!          */
!         if (len >= 5 &&
!             xmlStrncmp((xmlChar *) datastr, (xmlChar *) "<?xml", 5) == 0)
!         {
!             i = 5;
!             while (i < len &&
!                    !(datastr[i - 1] == '?' && datastr[i] == '>'))
!                 i++;
!
!             if (i == len)
!                 xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
!                             "could not parse XML data");
!
!             ++i;
!
!             datastr += i;
!             len -= i;
!         }
!
!         memcpy(string, "<x>", 3);
!         memcpy(string + 3, datastr, len);
!         memcpy(string + 3 + len, "</x>", 5);
!         len += 7;
!
!         doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
!
!          if (doc == NULL)
!             xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
!                         "could not parse XML data");
!
!         if (*VARDATA(xpath_expr_text) == '/')
!         {
!             memcpy(xpath_expr, "/x", 2);
!             memcpy(xpath_expr + 2, VARDATA(xpath_expr_text), xpath_len);
!             xpath_expr[xpath_len + 2] = '\0';
!             xpath_len += 2;
!         }
!         else
!         {
!             memcpy(xpath_expr, "/x//", 4);
!             memcpy(xpath_expr + 4, VARDATA(xpath_expr_text), xpath_len);
!             xpath_expr[xpath_len + 4] = '\0';
!             xpath_len += 4;
!         }
!
!     }
!     else
!     {
!         /*
!          * if we didn't need to mangle the XML, we don't need to mangle the
!          * xpath either.
!          */
!         memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len);
!         xpath_expr[xpath_len] = '\0';
!     }
!
      xpathctx = xmlXPathNewContext(doc);
      if (xpathctx == NULL)
          xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Andrew Dunstan
Дата:
Сообщение: Re: xpath processing brain dead
Следующее
От: Jaime Casanova
Дата:
Сообщение: Re: Updates of SE-PostgreSQL 8.4devel patches (r1530)