Re: [PATCH] Re: Adding xpath_exists function

Поиск
Список
Период
Сортировка
От Mike Fowler
Тема Re: [PATCH] Re: Adding xpath_exists function
Дата
Msg-id 4C29CCE8.7070405@mlfowler.com
обсуждение исходный текст
Ответ на [PATCH] Re: Adding xpath_exists function  (Mike Fowler <mike@mlfowler.com>)
Ответы Review: Re: [PATCH] Re: Adding xpath_exists function  (David Fetter <david@fetter.org>)
Re: [PATCH] Re: Adding xpath_exists function  (Alvaro Herrera <alvherre@commandprompt.com>)
Список pgsql-hackers
Mike Fowler wrote:
> Bruce Momjian wrote:
>> I have added this to the next commit-fest:
>>
>>     https://commitfest.postgresql.org/action/commitfest_view?id=6
> Thanks Bruce. Attached is a revised patch which changes the code
> slightly such that it uses an older version of the libxml library.
> I've added comments to the code so that we remember why we didn't use
> the latest function.

After seeing some other posts in the last couple of days, I realised I
hadn't documented the function in the SGML. I have now done so, and
added a couple of tests with XML literals. Please find the patch
attached. Now time to go correct the xmlexists patch too...

--
Mike Fowler
Registered Linux user: 379787

*** a/doc/src/sgml/func.sgml
--- b/doc/src/sgml/func.sgml
***************
*** 8626,8631 **** SELECT xpath('/my:a/text()', '<my:a xmlns:my="http://example.com">test</my:a>',
--- 8626,8664 ----
  (1 row)
  ]]></screen>
     </para>
+
+    <sect3>
+     <title>xpath_exists</title>
+
+     <indexterm>
+      <primary>xpath_exists</primary>
+     </indexterm>
+
+     <synopsis>
+      <function>xpath_exists</function>(<replaceable>xpath</replaceable>, <replaceable>xml</replaceable><optional>,
<replaceable>nsarray</replaceable></optional>)
+     </synopsis>
+
+     <para>
+      The function <function>xpath_exists</function> is a specialised form
+      of the <function>xpath</function> function. Though the functions are
+      syntactically the same the xpath expressions are evaluated in differing
+      contexts. Instead of returning the XML values that satisfy the xpath, this
+      function returns a boolean indicating whether the query was satisfied or not.
+     </para>
+
+     <para>
+      Example:
+      <screen><![CDATA[
+ SELECT xpath_exists('/my:a/text()', '<my:a xmlns:my="http://example.com">test</my:a>',
+                     ARRAY[ARRAY['my', 'http://example.com']]);
+
+  xpath_exists
+ ------------
+  t
+ (1 row)
+ ]]></screen>
+     </para>
+    </sect3>
    </sect2>

    <sect2 id="functions-xml-mapping">
*** a/src/backend/utils/adt/xml.c
--- b/src/backend/utils/adt/xml.c
***************
*** 3495,3497 **** xpath(PG_FUNCTION_ARGS)
--- 3495,3681 ----
      return 0;
  #endif
  }
+
+ /*
+  * Determines if the node specified by the supplied XPath exists
+  * in a given XML document, returning a boolean.
+  *
+  * It is up to the user to ensure that the XML passed is in fact
+  * an XML document - XPath doesn't work easily on fragments without
+  * a context node being known.
+  */
+ Datum
+ xpath_exists(PG_FUNCTION_ARGS)
+ {
+ #ifdef USE_LIBXML
+     text       *xpath_expr_text = PG_GETARG_TEXT_P(0);
+     xmltype    *data = PG_GETARG_XML_P(1);
+     ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(2);
+     ArrayBuildState *astate = NULL;
+     xmlParserCtxtPtr ctxt = NULL;
+     xmlDocPtr    doc = NULL;
+     xmlXPathContextPtr xpathctx = NULL;
+     xmlXPathCompExprPtr xpathcomp = NULL;
+     xmlXPathObjectPtr xpathobj = NULL;
+     char       *datastr;
+     int32        len;
+     int32        xpath_len;
+     xmlChar    *string;
+     xmlChar    *xpath_expr;
+     int            i;
+     int            res_nitems;
+     int            ndim;
+     Datum       *ns_names_uris;
+     bool       *ns_names_uris_nulls;
+     int            ns_count;
+     int            result;
+
+     /*
+      * Namespace mappings are passed as text[].  If an empty array is passed
+      * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
+      * Else, a 2-dimensional array with length of the second axis being equal
+      * to 2 should be passed, i.e., every subarray contains 2 elements, the
+      * first element defining the name, the second one the URI.  Example:
+      * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
+      * 'http://example2.com']].
+      */
+     ndim = ARR_NDIM(namespaces);
+     if (ndim != 0)
+     {
+         int           *dims;
+
+         dims = ARR_DIMS(namespaces);
+
+         if (ndim != 2 || dims[1] != 2)
+             ereport(ERROR,
+                     (errcode(ERRCODE_DATA_EXCEPTION),
+                      errmsg("invalid array for XML namespace mapping"),
+                      errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
+
+         Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
+
+         deconstruct_array(namespaces, TEXTOID, -1, false, 'i',
+                           &ns_names_uris, &ns_names_uris_nulls,
+                           &ns_count);
+
+         Assert((ns_count % 2) == 0);    /* checked above */
+         ns_count /= 2;            /* count pairs only */
+     }
+     else
+     {
+         ns_names_uris = NULL;
+         ns_names_uris_nulls = NULL;
+         ns_count = 0;
+     }
+
+     datastr = VARDATA(data);
+     len = VARSIZE(data) - VARHDRSZ;
+     xpath_len = VARSIZE(xpath_expr_text) - VARHDRSZ;
+     if (xpath_len == 0)
+         ereport(ERROR,
+                 (errcode(ERRCODE_DATA_EXCEPTION),
+                  errmsg("empty XPath expression")));
+
+     string = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
+     memcpy(string, datastr, len);
+     string[len] = '\0';
+
+     xpath_expr = (xmlChar *) palloc((xpath_len + 1) * sizeof(xmlChar));
+     memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len);
+     xpath_expr[xpath_len] = '\0';
+
+     pg_xml_init();
+     xmlInitParser();
+
+     PG_TRY();
+     {
+         /*
+          * redundant XML parsing (two parsings for the same value during one
+          * command execution are possible)
+          */
+         ctxt = xmlNewParserCtxt();
+         if (ctxt == NULL)
+             xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
+                         "could not allocate parser context");
+         doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
+         if (doc == NULL)
+             xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
+                         "could not parse XML document");
+         xpathctx = xmlXPathNewContext(doc);
+         if (xpathctx == NULL)
+             xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
+                         "could not allocate XPath context");
+         xpathctx->node = xmlDocGetRootElement(doc);
+         if (xpathctx->node == NULL)
+             xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+                         "could not find root XML element");
+
+         /* register namespaces, if any */
+         if (ns_count > 0)
+         {
+             for (i = 0; i < ns_count; i++)
+             {
+                 char       *ns_name;
+                 char       *ns_uri;
+
+                 if (ns_names_uris_nulls[i * 2] ||
+                     ns_names_uris_nulls[i * 2 + 1])
+                     ereport(ERROR,
+                             (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+                       errmsg("neither namespace name nor URI may be null")));
+                 ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
+                 ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
+                 if (xmlXPathRegisterNs(xpathctx,
+                                        (xmlChar *) ns_name,
+                                        (xmlChar *) ns_uri) != 0)
+                     ereport(ERROR,        /* is this an internal error??? */
+                             (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
+                                     ns_name, ns_uri)));
+             }
+         }
+
+         xpathcomp = xmlXPathCompile(xpath_expr);
+         if (xpathcomp == NULL)    /* TODO: show proper XPath error details */
+             xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+                         "invalid XPath expression");
+
+         /* Version 2.6.27 introduces a function named xmlXPathCompiledEvalToBoolean
+          * however we can derive the existence by whether any nodes are returned
+          * thereby preventing a library version upgrade */
+         xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
+         if (xpathobj == NULL)    /* TODO: reason? */
+             xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+                         "could not create XPath object");
+
+         if (xpathobj->nodesetval == NULL)
+             result = 0;
+         else
+             result = xpathobj->nodesetval->nodeNr;
+     }
+     PG_CATCH();
+     {
+         if (xpathobj)
+             xmlXPathFreeObject(xpathobj);
+         if (xpathcomp)
+             xmlXPathFreeCompExpr(xpathcomp);
+         if (xpathctx)
+             xmlXPathFreeContext(xpathctx);
+         if (doc)
+             xmlFreeDoc(doc);
+         if (ctxt)
+             xmlFreeParserCtxt(ctxt);
+         PG_RE_THROW();
+     }
+     PG_END_TRY();
+
+     xmlXPathFreeCompExpr(xpathcomp);
+     xmlXPathFreeContext(xpathctx);
+     xmlFreeDoc(doc);
+     xmlFreeParserCtxt(ctxt);
+
+     PG_RETURN_BOOL(result);
+ #else
+     NO_XML_SUPPORT();
+     return 0;
+ #endif
+ }
*** a/src/include/catalog/pg_proc.h
--- b/src/include/catalog/pg_proc.h
***************
*** 4385,4390 **** DESCR("evaluate XPath expression, with namespaces support");
--- 4385,4395 ----
  DATA(insert OID = 2932 (  xpath         PGNSP PGUID 14 1 0 0 f f f t f i 2 0 143 "25 142" _null_ _null_ _null_ _null_
"selectpg_catalog.xpath($1, $2, ''{}''::pg_catalog.text[])" _null_ _null_ _null_ )); 
  DESCR("evaluate XPath expression");

+ DATA(insert OID = 3037 (  xpath_exists     PGNSP PGUID 12 1 0 0 f f f t f i 3 0 16 "25 142 1009" _null_ _null_ _null_
_null_xpath_exists _null_ _null_ _null_ )); 
+ DESCR("evaluate XPath expression in a boolean context, with namespaces support");
+ DATA(insert OID = 3038 (  xpath_exists     PGNSP PGUID 14 1 0 0 f f f t f i 2 0 16 "25 142" _null_ _null_ _null_
_null_"select pg_catalog.xpath_exists($1, $2, ''{}''::pg_catalog.text[])" _null_ _null_ _null_ )); 
+ DESCR("evaluate XPath expression in a boolean context");
+
  /* uuid */
  DATA(insert OID = 2952 (  uuid_in           PGNSP PGUID 12 1 0 0 f f f t f i 1 0 2950 "2275" _null_ _null_ _null_
_null_uuid_in _null_ _null_ _null_ )); 
  DESCR("I/O");
*** a/src/include/utils/xml.h
--- b/src/include/utils/xml.h
***************
*** 37,42 **** extern Datum texttoxml(PG_FUNCTION_ARGS);
--- 37,43 ----
  extern Datum xmltotext(PG_FUNCTION_ARGS);
  extern Datum xmlvalidate(PG_FUNCTION_ARGS);
  extern Datum xpath(PG_FUNCTION_ARGS);
+ extern Datum xpath_exists(PG_FUNCTION_ARGS);

  extern Datum table_to_xml(PG_FUNCTION_ARGS);
  extern Datum query_to_xml(PG_FUNCTION_ARGS);
*** a/src/test/regress/expected/xml.out
--- b/src/test/regress/expected/xml.out
***************
*** 502,504 **** SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
--- 502,557 ----
   {<b>two</b>,<b>etc</b>}
  (1 row)

+ -- Test xpath_exists evaluation
+ SELECT xpath_exists('//town[text() =
''Toronto'']','<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>'::xml);
+  xpath_exists
+ --------------
+  f
+ (1 row)
+
+ SELECT xpath_exists('//town[text() =
''Cwmbran'']','<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>'::xml);
+  xpath_exists
+ --------------
+  t
+ (1 row)
+
+ INSERT INTO xmltest VALUES (4,
'<menu><beers><name>Budvar</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml);
+ INSERT INTO xmltest VALUES (5,
'<menu><beers><name>Molson</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml);
+ INSERT INTO xmltest VALUES (6, '<myns:menu
xmlns:myns="http://myns.com"><myns:beers><myns:name>Budvar</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml);
+ INSERT INTO xmltest VALUES (7, '<myns:menu
xmlns:myns="http://myns.com"><myns:beers><myns:name>Molson</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml);
+ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beer',data);
+  count
+ -------
+      0
+ (1 row)
+
+ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beers',data);
+  count
+ -------
+      2
+ (1 row)
+
+ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beers/name[text() = ''Molson'']',data);
+  count
+ -------
+      1
+ (1 row)
+
+ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beer',data,ARRAY[ARRAY['myns','http://myns.com']]);
+  count
+ -------
+      0
+ (1 row)
+
+ SELECT COUNT(id) FROM xmltest WHERE
xpath_exists('/myns:menu/myns:beers',data,ARRAY[ARRAY['myns','http://myns.com']]);
+  count
+ -------
+      2
+ (1 row)
+
+ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beers/myns:name[text() =
''Molson'']',data,ARRAY[ARRAY['myns','http://myns.com']]);
+  count
+ -------
+      1
+ (1 row)
+
*** a/src/test/regress/sql/xml.sql
--- b/src/test/regress/sql/xml.sql
***************
*** 163,165 **** SELECT xpath('', '<!-- error -->');
--- 163,181 ----
  SELECT xpath('//text()', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number
one</local:piece><local:pieceid="2" /></local:data>'); 
  SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number
one</local:piece><local:pieceid="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]); 
  SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
+
+ -- Test xpath_exists evaluation
+ SELECT xpath_exists('//town[text() =
''Toronto'']','<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>'::xml);
+ SELECT xpath_exists('//town[text() =
''Cwmbran'']','<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>'::xml);
+
+ INSERT INTO xmltest VALUES (4,
'<menu><beers><name>Budvar</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml);
+ INSERT INTO xmltest VALUES (5,
'<menu><beers><name>Molson</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml);
+ INSERT INTO xmltest VALUES (6, '<myns:menu
xmlns:myns="http://myns.com"><myns:beers><myns:name>Budvar</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml);
+ INSERT INTO xmltest VALUES (7, '<myns:menu
xmlns:myns="http://myns.com"><myns:beers><myns:name>Molson</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml);
+
+ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beer',data);
+ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beers',data);
+ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beers/name[text() = ''Molson'']',data);
+ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beer',data,ARRAY[ARRAY['myns','http://myns.com']]);
+ SELECT COUNT(id) FROM xmltest WHERE
xpath_exists('/myns:menu/myns:beers',data,ARRAY[ARRAY['myns','http://myns.com']]);
+ SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beers/myns:name[text() =
''Molson'']',data,ARRAY[ARRAY['myns','http://myns.com']]);

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Thom Brown
Дата:
Сообщение: Look-behind regular expressions
Следующее
От: Robert Haas
Дата:
Сообщение: Re: warning message in standby