xref: /trueos/contrib/subversion/subversion/libsvn_subr/xml.c (revision 1e5107043085964bba002e6a91aa04e864f74d02)
1 /*
2  * xml.c:  xml helper code shared among the Subversion libraries.
3  *
4  * ====================================================================
5  *    Licensed to the Apache Software Foundation (ASF) under one
6  *    or more contributor license agreements.  See the NOTICE file
7  *    distributed with this work for additional information
8  *    regarding copyright ownership.  The ASF licenses this file
9  *    to you under the Apache License, Version 2.0 (the
10  *    "License"); you may not use this file except in compliance
11  *    with the License.  You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  *    Unless required by applicable law or agreed to in writing,
16  *    software distributed under the License is distributed on an
17  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18  *    KIND, either express or implied.  See the License for the
19  *    specific language governing permissions and limitations
20  *    under the License.
21  * ====================================================================
22  */
23 
24 
25 
26 #include <string.h>
27 #include <assert.h>
28 
29 #include "svn_private_config.h"         /* for SVN_HAVE_OLD_EXPAT */
30 #include "svn_hash.h"
31 #include "svn_pools.h"
32 #include "svn_xml.h"
33 #include "svn_error.h"
34 #include "svn_ctype.h"
35 
36 #include "private/svn_utf_private.h"
37 
38 #ifdef SVN_HAVE_OLD_EXPAT
39 #include <xmlparse.h>
40 #else
41 #include <expat.h>
42 #endif
43 
44 #ifdef XML_UNICODE
45 #error Expat is unusable -- it has been compiled for wide characters
46 #endif
47 
48 /* The private internals for a parser object. */
49 struct svn_xml_parser_t
50 {
51   /** the expat parser */
52   XML_Parser parser;
53 
54   /** the SVN callbacks to call from the Expat callbacks */
55   svn_xml_start_elem start_handler;
56   svn_xml_end_elem end_handler;
57   svn_xml_char_data data_handler;
58 
59   /** the user's baton for private data */
60   void *baton;
61 
62   /** if non-@c NULL, an error happened while parsing */
63   svn_error_t *error;
64 
65   /** where this object is allocated, so we can free it easily */
66   apr_pool_t *pool;
67 
68 };
69 
70 
71 /*** XML character validation ***/
72 
73 svn_boolean_t
svn_xml_is_xml_safe(const char * data,apr_size_t len)74 svn_xml_is_xml_safe(const char *data, apr_size_t len)
75 {
76   const char *end = data + len;
77   const char *p;
78 
79   if (! svn_utf__is_valid(data, len))
80     return FALSE;
81 
82   for (p = data; p < end; p++)
83     {
84       unsigned char c = *p;
85 
86       if (svn_ctype_iscntrl(c))
87         {
88           if ((c != SVN_CTYPE_ASCII_TAB)
89               && (c != SVN_CTYPE_ASCII_LINEFEED)
90               && (c != SVN_CTYPE_ASCII_CARRIAGERETURN)
91               && (c != SVN_CTYPE_ASCII_DELETE))
92             return FALSE;
93         }
94     }
95   return TRUE;
96 }
97 
98 
99 
100 
101 
102 /*** XML escaping. ***/
103 
104 /* ### ...?
105  *
106  * If *OUTSTR is @c NULL, set *OUTSTR to a new stringbuf allocated
107  * in POOL, else append to the existing stringbuf there.
108  */
109 static void
xml_escape_cdata(svn_stringbuf_t ** outstr,const char * data,apr_size_t len,apr_pool_t * pool)110 xml_escape_cdata(svn_stringbuf_t **outstr,
111                  const char *data,
112                  apr_size_t len,
113                  apr_pool_t *pool)
114 {
115   const char *end = data + len;
116   const char *p = data, *q;
117 
118   if (*outstr == NULL)
119     *outstr = svn_stringbuf_create_empty(pool);
120 
121   while (1)
122     {
123       /* Find a character which needs to be quoted and append bytes up
124          to that point.  Strictly speaking, '>' only needs to be
125          quoted if it follows "]]", but it's easier to quote it all
126          the time.
127 
128          So, why are we escaping '\r' here?  Well, according to the
129          XML spec, '\r\n' gets converted to '\n' during XML parsing.
130          Also, any '\r' not followed by '\n' is converted to '\n'.  By
131          golly, if we say we want to escape a '\r', we want to make
132          sure it remains a '\r'!  */
133       q = p;
134       while (q < end && *q != '&' && *q != '<' && *q != '>' && *q != '\r')
135         q++;
136       svn_stringbuf_appendbytes(*outstr, p, q - p);
137 
138       /* We may already be a winner.  */
139       if (q == end)
140         break;
141 
142       /* Append the entity reference for the character.  */
143       if (*q == '&')
144         svn_stringbuf_appendcstr(*outstr, "&amp;");
145       else if (*q == '<')
146         svn_stringbuf_appendcstr(*outstr, "&lt;");
147       else if (*q == '>')
148         svn_stringbuf_appendcstr(*outstr, "&gt;");
149       else if (*q == '\r')
150         svn_stringbuf_appendcstr(*outstr, "&#13;");
151 
152       p = q + 1;
153     }
154 }
155 
156 /* Essentially the same as xml_escape_cdata, with the addition of
157    whitespace and quote characters. */
158 static void
xml_escape_attr(svn_stringbuf_t ** outstr,const char * data,apr_size_t len,apr_pool_t * pool)159 xml_escape_attr(svn_stringbuf_t **outstr,
160                 const char *data,
161                 apr_size_t len,
162                 apr_pool_t *pool)
163 {
164   const char *end = data + len;
165   const char *p = data, *q;
166 
167   if (*outstr == NULL)
168     *outstr = svn_stringbuf_create_ensure(len, pool);
169 
170   while (1)
171     {
172       /* Find a character which needs to be quoted and append bytes up
173          to that point. */
174       q = p;
175       while (q < end && *q != '&' && *q != '<' && *q != '>'
176              && *q != '"' && *q != '\'' && *q != '\r'
177              && *q != '\n' && *q != '\t')
178         q++;
179       svn_stringbuf_appendbytes(*outstr, p, q - p);
180 
181       /* We may already be a winner.  */
182       if (q == end)
183         break;
184 
185       /* Append the entity reference for the character.  */
186       if (*q == '&')
187         svn_stringbuf_appendcstr(*outstr, "&amp;");
188       else if (*q == '<')
189         svn_stringbuf_appendcstr(*outstr, "&lt;");
190       else if (*q == '>')
191         svn_stringbuf_appendcstr(*outstr, "&gt;");
192       else if (*q == '"')
193         svn_stringbuf_appendcstr(*outstr, "&quot;");
194       else if (*q == '\'')
195         svn_stringbuf_appendcstr(*outstr, "&apos;");
196       else if (*q == '\r')
197         svn_stringbuf_appendcstr(*outstr, "&#13;");
198       else if (*q == '\n')
199         svn_stringbuf_appendcstr(*outstr, "&#10;");
200       else if (*q == '\t')
201         svn_stringbuf_appendcstr(*outstr, "&#9;");
202 
203       p = q + 1;
204     }
205 }
206 
207 
208 void
svn_xml_escape_cdata_stringbuf(svn_stringbuf_t ** outstr,const svn_stringbuf_t * string,apr_pool_t * pool)209 svn_xml_escape_cdata_stringbuf(svn_stringbuf_t **outstr,
210                                const svn_stringbuf_t *string,
211                                apr_pool_t *pool)
212 {
213   xml_escape_cdata(outstr, string->data, string->len, pool);
214 }
215 
216 
217 void
svn_xml_escape_cdata_string(svn_stringbuf_t ** outstr,const svn_string_t * string,apr_pool_t * pool)218 svn_xml_escape_cdata_string(svn_stringbuf_t **outstr,
219                             const svn_string_t *string,
220                             apr_pool_t *pool)
221 {
222   xml_escape_cdata(outstr, string->data, string->len, pool);
223 }
224 
225 
226 void
svn_xml_escape_cdata_cstring(svn_stringbuf_t ** outstr,const char * string,apr_pool_t * pool)227 svn_xml_escape_cdata_cstring(svn_stringbuf_t **outstr,
228                              const char *string,
229                              apr_pool_t *pool)
230 {
231   xml_escape_cdata(outstr, string, (apr_size_t) strlen(string), pool);
232 }
233 
234 
235 void
svn_xml_escape_attr_stringbuf(svn_stringbuf_t ** outstr,const svn_stringbuf_t * string,apr_pool_t * pool)236 svn_xml_escape_attr_stringbuf(svn_stringbuf_t **outstr,
237                               const svn_stringbuf_t *string,
238                               apr_pool_t *pool)
239 {
240   xml_escape_attr(outstr, string->data, string->len, pool);
241 }
242 
243 
244 void
svn_xml_escape_attr_string(svn_stringbuf_t ** outstr,const svn_string_t * string,apr_pool_t * pool)245 svn_xml_escape_attr_string(svn_stringbuf_t **outstr,
246                            const svn_string_t *string,
247                            apr_pool_t *pool)
248 {
249   xml_escape_attr(outstr, string->data, string->len, pool);
250 }
251 
252 
253 void
svn_xml_escape_attr_cstring(svn_stringbuf_t ** outstr,const char * string,apr_pool_t * pool)254 svn_xml_escape_attr_cstring(svn_stringbuf_t **outstr,
255                             const char *string,
256                             apr_pool_t *pool)
257 {
258   xml_escape_attr(outstr, string, (apr_size_t) strlen(string), pool);
259 }
260 
261 
262 const char *
svn_xml_fuzzy_escape(const char * string,apr_pool_t * pool)263 svn_xml_fuzzy_escape(const char *string, apr_pool_t *pool)
264 {
265   const char *end = string + strlen(string);
266   const char *p = string, *q;
267   svn_stringbuf_t *outstr;
268   char escaped_char[6];   /* ? \ u u u \0 */
269 
270   for (q = p; q < end; q++)
271     {
272       if (svn_ctype_iscntrl(*q)
273           && ! ((*q == '\n') || (*q == '\r') || (*q == '\t')))
274         break;
275     }
276 
277   /* Return original string if no unsafe characters found. */
278   if (q == end)
279     return string;
280 
281   outstr = svn_stringbuf_create_empty(pool);
282   while (1)
283     {
284       q = p;
285 
286       /* Traverse till either unsafe character or eos. */
287       while ((q < end)
288              && ((! svn_ctype_iscntrl(*q))
289                  || (*q == '\n') || (*q == '\r') || (*q == '\t')))
290         q++;
291 
292       /* copy chunk before marker */
293       svn_stringbuf_appendbytes(outstr, p, q - p);
294 
295       if (q == end)
296         break;
297 
298       /* Append an escaped version of the unsafe character.
299 
300          ### This format was chosen for consistency with
301          ### svn_utf__cstring_from_utf8_fuzzy().  The two functions
302          ### should probably share code, even though they escape
303          ### different characters.
304       */
305       apr_snprintf(escaped_char, sizeof(escaped_char), "?\\%03u",
306                    (unsigned char) *q);
307       svn_stringbuf_appendcstr(outstr, escaped_char);
308 
309       p = q + 1;
310     }
311 
312   return outstr->data;
313 }
314 
315 
316 /*** Map from the Expat callback types to the SVN XML types. ***/
317 
expat_start_handler(void * userData,const XML_Char * name,const XML_Char ** atts)318 static void expat_start_handler(void *userData,
319                                 const XML_Char *name,
320                                 const XML_Char **atts)
321 {
322   svn_xml_parser_t *svn_parser = userData;
323 
324   (*svn_parser->start_handler)(svn_parser->baton, name, atts);
325 }
326 
expat_end_handler(void * userData,const XML_Char * name)327 static void expat_end_handler(void *userData, const XML_Char *name)
328 {
329   svn_xml_parser_t *svn_parser = userData;
330 
331   (*svn_parser->end_handler)(svn_parser->baton, name);
332 }
333 
expat_data_handler(void * userData,const XML_Char * s,int len)334 static void expat_data_handler(void *userData, const XML_Char *s, int len)
335 {
336   svn_xml_parser_t *svn_parser = userData;
337 
338   (*svn_parser->data_handler)(svn_parser->baton, s, (apr_size_t)len);
339 }
340 
341 
342 /*** Making a parser. ***/
343 
344 svn_xml_parser_t *
svn_xml_make_parser(void * baton,svn_xml_start_elem start_handler,svn_xml_end_elem end_handler,svn_xml_char_data data_handler,apr_pool_t * pool)345 svn_xml_make_parser(void *baton,
346                     svn_xml_start_elem start_handler,
347                     svn_xml_end_elem end_handler,
348                     svn_xml_char_data data_handler,
349                     apr_pool_t *pool)
350 {
351   svn_xml_parser_t *svn_parser;
352   apr_pool_t *subpool;
353 
354   XML_Parser parser = XML_ParserCreate(NULL);
355 
356   XML_SetElementHandler(parser,
357                         start_handler ? expat_start_handler : NULL,
358                         end_handler ? expat_end_handler : NULL);
359   XML_SetCharacterDataHandler(parser,
360                               data_handler ? expat_data_handler : NULL);
361 
362   /* ### we probably don't want this pool; or at least we should pass it
363      ### to the callbacks and clear it periodically.  */
364   subpool = svn_pool_create(pool);
365 
366   svn_parser = apr_pcalloc(subpool, sizeof(*svn_parser));
367 
368   svn_parser->parser = parser;
369   svn_parser->start_handler = start_handler;
370   svn_parser->end_handler = end_handler;
371   svn_parser->data_handler = data_handler;
372   svn_parser->baton = baton;
373   svn_parser->pool = subpool;
374 
375   /* store our parser info as the UserData in the Expat parser */
376   XML_SetUserData(parser, svn_parser);
377 
378   return svn_parser;
379 }
380 
381 
382 /* Free a parser */
383 void
svn_xml_free_parser(svn_xml_parser_t * svn_parser)384 svn_xml_free_parser(svn_xml_parser_t *svn_parser)
385 {
386   /* Free the expat parser */
387   XML_ParserFree(svn_parser->parser);
388 
389   /* Free the subversion parser */
390   svn_pool_destroy(svn_parser->pool);
391 }
392 
393 
394 
395 
396 svn_error_t *
svn_xml_parse(svn_xml_parser_t * svn_parser,const char * buf,apr_size_t len,svn_boolean_t is_final)397 svn_xml_parse(svn_xml_parser_t *svn_parser,
398               const char *buf,
399               apr_size_t len,
400               svn_boolean_t is_final)
401 {
402   svn_error_t *err;
403   int success;
404 
405   /* Parse some xml data */
406   success = XML_Parse(svn_parser->parser, buf, (int) len, is_final);
407 
408   /* If expat choked internally, return its error. */
409   if (! success)
410     {
411       /* Line num is "int" in Expat v1, "long" in v2; hide the difference. */
412       long line = XML_GetCurrentLineNumber(svn_parser->parser);
413 
414       err = svn_error_createf
415         (SVN_ERR_XML_MALFORMED, NULL,
416          _("Malformed XML: %s at line %ld"),
417          XML_ErrorString(XML_GetErrorCode(svn_parser->parser)), line);
418 
419       /* Kill all parsers and return the expat error */
420       svn_xml_free_parser(svn_parser);
421       return err;
422     }
423 
424   /* Did an error occur somewhere *inside* the expat callbacks? */
425   if (svn_parser->error)
426     {
427       err = svn_parser->error;
428       svn_xml_free_parser(svn_parser);
429       return err;
430     }
431 
432   return SVN_NO_ERROR;
433 }
434 
435 
436 
svn_xml_signal_bailout(svn_error_t * error,svn_xml_parser_t * svn_parser)437 void svn_xml_signal_bailout(svn_error_t *error,
438                             svn_xml_parser_t *svn_parser)
439 {
440   /* This will cause the current XML_Parse() call to finish quickly! */
441   XML_SetElementHandler(svn_parser->parser, NULL, NULL);
442   XML_SetCharacterDataHandler(svn_parser->parser, NULL);
443 
444   /* Once outside of XML_Parse(), the existence of this field will
445      cause svn_delta_parse()'s main read-loop to return error. */
446   svn_parser->error = error;
447 }
448 
449 
450 
451 
452 
453 
454 
455 
456 /*** Attribute walking. ***/
457 
458 const char *
svn_xml_get_attr_value(const char * name,const char * const * atts)459 svn_xml_get_attr_value(const char *name, const char *const *atts)
460 {
461   while (atts && (*atts))
462     {
463       if (strcmp(atts[0], name) == 0)
464         return atts[1];
465       else
466         atts += 2; /* continue looping */
467     }
468 
469   /* Else no such attribute name seen. */
470   return NULL;
471 }
472 
473 
474 
475 /*** Printing XML ***/
476 
477 void
svn_xml_make_header2(svn_stringbuf_t ** str,const char * encoding,apr_pool_t * pool)478 svn_xml_make_header2(svn_stringbuf_t **str, const char *encoding,
479                      apr_pool_t *pool)
480 {
481 
482   if (*str == NULL)
483     *str = svn_stringbuf_create_empty(pool);
484   svn_stringbuf_appendcstr(*str, "<?xml version=\"1.0\"");
485   if (encoding)
486     {
487       encoding = apr_psprintf(pool, " encoding=\"%s\"", encoding);
488       svn_stringbuf_appendcstr(*str, encoding);
489     }
490   svn_stringbuf_appendcstr(*str, "?>\n");
491 }
492 
493 
494 
495 /*** Creating attribute hashes. ***/
496 
497 /* Combine an existing attribute list ATTS with a HASH that itself
498    represents an attribute list.  Iff PRESERVE is true, then no value
499    already in HASH will be changed, else values from ATTS will
500    override previous values in HASH. */
501 static void
amalgamate(const char ** atts,apr_hash_t * ht,svn_boolean_t preserve,apr_pool_t * pool)502 amalgamate(const char **atts,
503            apr_hash_t *ht,
504            svn_boolean_t preserve,
505            apr_pool_t *pool)
506 {
507   const char *key;
508 
509   if (atts)
510     for (key = *atts; key; key = *(++atts))
511       {
512         const char *val = *(++atts);
513         size_t keylen;
514         assert(key != NULL);
515         /* kff todo: should we also insist that val be non-null here?
516            Probably. */
517 
518         keylen = strlen(key);
519         if (preserve && ((apr_hash_get(ht, key, keylen)) != NULL))
520           continue;
521         else
522           apr_hash_set(ht, apr_pstrndup(pool, key, keylen), keylen,
523                        val ? apr_pstrdup(pool, val) : NULL);
524       }
525 }
526 
527 
528 apr_hash_t *
svn_xml_ap_to_hash(va_list ap,apr_pool_t * pool)529 svn_xml_ap_to_hash(va_list ap, apr_pool_t *pool)
530 {
531   apr_hash_t *ht = apr_hash_make(pool);
532   const char *key;
533 
534   while ((key = va_arg(ap, char *)) != NULL)
535     {
536       const char *val = va_arg(ap, const char *);
537       svn_hash_sets(ht, key, val);
538     }
539 
540   return ht;
541 }
542 
543 
544 apr_hash_t *
svn_xml_make_att_hash(const char ** atts,apr_pool_t * pool)545 svn_xml_make_att_hash(const char **atts, apr_pool_t *pool)
546 {
547   apr_hash_t *ht = apr_hash_make(pool);
548   amalgamate(atts, ht, 0, pool);  /* third arg irrelevant in this case */
549   return ht;
550 }
551 
552 
553 void
svn_xml_hash_atts_overlaying(const char ** atts,apr_hash_t * ht,apr_pool_t * pool)554 svn_xml_hash_atts_overlaying(const char **atts,
555                              apr_hash_t *ht,
556                              apr_pool_t *pool)
557 {
558   amalgamate(atts, ht, 0, pool);
559 }
560 
561 
562 void
svn_xml_hash_atts_preserving(const char ** atts,apr_hash_t * ht,apr_pool_t * pool)563 svn_xml_hash_atts_preserving(const char **atts,
564                              apr_hash_t *ht,
565                              apr_pool_t *pool)
566 {
567   amalgamate(atts, ht, 1, pool);
568 }
569 
570 
571 
572 /*** Making XML tags. ***/
573 
574 
575 void
svn_xml_make_open_tag_hash(svn_stringbuf_t ** str,apr_pool_t * pool,enum svn_xml_open_tag_style style,const char * tagname,apr_hash_t * attributes)576 svn_xml_make_open_tag_hash(svn_stringbuf_t **str,
577                            apr_pool_t *pool,
578                            enum svn_xml_open_tag_style style,
579                            const char *tagname,
580                            apr_hash_t *attributes)
581 {
582   apr_hash_index_t *hi;
583   apr_size_t est_size = strlen(tagname) + 4 + apr_hash_count(attributes) * 30;
584 
585   if (*str == NULL)
586     *str = svn_stringbuf_create_ensure(est_size, pool);
587 
588   svn_stringbuf_appendcstr(*str, "<");
589   svn_stringbuf_appendcstr(*str, tagname);
590 
591   for (hi = apr_hash_first(pool, attributes); hi; hi = apr_hash_next(hi))
592     {
593       const void *key;
594       void *val;
595 
596       apr_hash_this(hi, &key, NULL, &val);
597       assert(val != NULL);
598 
599       svn_stringbuf_appendcstr(*str, "\n   ");
600       svn_stringbuf_appendcstr(*str, key);
601       svn_stringbuf_appendcstr(*str, "=\"");
602       svn_xml_escape_attr_cstring(str, val, pool);
603       svn_stringbuf_appendcstr(*str, "\"");
604     }
605 
606   if (style == svn_xml_self_closing)
607     svn_stringbuf_appendcstr(*str, "/");
608   svn_stringbuf_appendcstr(*str, ">");
609   if (style != svn_xml_protect_pcdata)
610     svn_stringbuf_appendcstr(*str, "\n");
611 }
612 
613 
614 void
svn_xml_make_open_tag_v(svn_stringbuf_t ** str,apr_pool_t * pool,enum svn_xml_open_tag_style style,const char * tagname,va_list ap)615 svn_xml_make_open_tag_v(svn_stringbuf_t **str,
616                         apr_pool_t *pool,
617                         enum svn_xml_open_tag_style style,
618                         const char *tagname,
619                         va_list ap)
620 {
621   apr_pool_t *subpool = svn_pool_create(pool);
622   apr_hash_t *ht = svn_xml_ap_to_hash(ap, subpool);
623 
624   svn_xml_make_open_tag_hash(str, pool, style, tagname, ht);
625   svn_pool_destroy(subpool);
626 }
627 
628 
629 
630 void
svn_xml_make_open_tag(svn_stringbuf_t ** str,apr_pool_t * pool,enum svn_xml_open_tag_style style,const char * tagname,...)631 svn_xml_make_open_tag(svn_stringbuf_t **str,
632                       apr_pool_t *pool,
633                       enum svn_xml_open_tag_style style,
634                       const char *tagname,
635                       ...)
636 {
637   va_list ap;
638 
639   va_start(ap, tagname);
640   svn_xml_make_open_tag_v(str, pool, style, tagname, ap);
641   va_end(ap);
642 }
643 
644 
svn_xml_make_close_tag(svn_stringbuf_t ** str,apr_pool_t * pool,const char * tagname)645 void svn_xml_make_close_tag(svn_stringbuf_t **str,
646                             apr_pool_t *pool,
647                             const char *tagname)
648 {
649   if (*str == NULL)
650     *str = svn_stringbuf_create_empty(pool);
651 
652   svn_stringbuf_appendcstr(*str, "</");
653   svn_stringbuf_appendcstr(*str, tagname);
654   svn_stringbuf_appendcstr(*str, ">\n");
655 }
656