1 /*
2 * xml.c: xml helper code shared among the Subversion libraries.
3 *
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
20 * under the License.
21 * ====================================================================
22 */
23
24
25
26 #include <string.h>
27 #include <assert.h>
28
29 #include "svn_private_config.h" /* for SVN_HAVE_OLD_EXPAT */
30 #include "svn_hash.h"
31 #include "svn_pools.h"
32 #include "svn_xml.h"
33 #include "svn_error.h"
34 #include "svn_ctype.h"
35
36 #include "private/svn_utf_private.h"
37
38 #ifdef SVN_HAVE_OLD_EXPAT
39 #include <xmlparse.h>
40 #else
41 #include <expat.h>
42 #endif
43
44 #ifdef XML_UNICODE
45 #error Expat is unusable -- it has been compiled for wide characters
46 #endif
47
48 /* The private internals for a parser object. */
49 struct svn_xml_parser_t
50 {
51 /** the expat parser */
52 XML_Parser parser;
53
54 /** the SVN callbacks to call from the Expat callbacks */
55 svn_xml_start_elem start_handler;
56 svn_xml_end_elem end_handler;
57 svn_xml_char_data data_handler;
58
59 /** the user's baton for private data */
60 void *baton;
61
62 /** if non-@c NULL, an error happened while parsing */
63 svn_error_t *error;
64
65 /** where this object is allocated, so we can free it easily */
66 apr_pool_t *pool;
67
68 };
69
70
71 /*** XML character validation ***/
72
73 svn_boolean_t
svn_xml_is_xml_safe(const char * data,apr_size_t len)74 svn_xml_is_xml_safe(const char *data, apr_size_t len)
75 {
76 const char *end = data + len;
77 const char *p;
78
79 if (! svn_utf__is_valid(data, len))
80 return FALSE;
81
82 for (p = data; p < end; p++)
83 {
84 unsigned char c = *p;
85
86 if (svn_ctype_iscntrl(c))
87 {
88 if ((c != SVN_CTYPE_ASCII_TAB)
89 && (c != SVN_CTYPE_ASCII_LINEFEED)
90 && (c != SVN_CTYPE_ASCII_CARRIAGERETURN)
91 && (c != SVN_CTYPE_ASCII_DELETE))
92 return FALSE;
93 }
94 }
95 return TRUE;
96 }
97
98
99
100
101
102 /*** XML escaping. ***/
103
104 /* ### ...?
105 *
106 * If *OUTSTR is @c NULL, set *OUTSTR to a new stringbuf allocated
107 * in POOL, else append to the existing stringbuf there.
108 */
109 static void
xml_escape_cdata(svn_stringbuf_t ** outstr,const char * data,apr_size_t len,apr_pool_t * pool)110 xml_escape_cdata(svn_stringbuf_t **outstr,
111 const char *data,
112 apr_size_t len,
113 apr_pool_t *pool)
114 {
115 const char *end = data + len;
116 const char *p = data, *q;
117
118 if (*outstr == NULL)
119 *outstr = svn_stringbuf_create_empty(pool);
120
121 while (1)
122 {
123 /* Find a character which needs to be quoted and append bytes up
124 to that point. Strictly speaking, '>' only needs to be
125 quoted if it follows "]]", but it's easier to quote it all
126 the time.
127
128 So, why are we escaping '\r' here? Well, according to the
129 XML spec, '\r\n' gets converted to '\n' during XML parsing.
130 Also, any '\r' not followed by '\n' is converted to '\n'. By
131 golly, if we say we want to escape a '\r', we want to make
132 sure it remains a '\r'! */
133 q = p;
134 while (q < end && *q != '&' && *q != '<' && *q != '>' && *q != '\r')
135 q++;
136 svn_stringbuf_appendbytes(*outstr, p, q - p);
137
138 /* We may already be a winner. */
139 if (q == end)
140 break;
141
142 /* Append the entity reference for the character. */
143 if (*q == '&')
144 svn_stringbuf_appendcstr(*outstr, "&");
145 else if (*q == '<')
146 svn_stringbuf_appendcstr(*outstr, "<");
147 else if (*q == '>')
148 svn_stringbuf_appendcstr(*outstr, ">");
149 else if (*q == '\r')
150 svn_stringbuf_appendcstr(*outstr, " ");
151
152 p = q + 1;
153 }
154 }
155
156 /* Essentially the same as xml_escape_cdata, with the addition of
157 whitespace and quote characters. */
158 static void
xml_escape_attr(svn_stringbuf_t ** outstr,const char * data,apr_size_t len,apr_pool_t * pool)159 xml_escape_attr(svn_stringbuf_t **outstr,
160 const char *data,
161 apr_size_t len,
162 apr_pool_t *pool)
163 {
164 const char *end = data + len;
165 const char *p = data, *q;
166
167 if (*outstr == NULL)
168 *outstr = svn_stringbuf_create_ensure(len, pool);
169
170 while (1)
171 {
172 /* Find a character which needs to be quoted and append bytes up
173 to that point. */
174 q = p;
175 while (q < end && *q != '&' && *q != '<' && *q != '>'
176 && *q != '"' && *q != '\'' && *q != '\r'
177 && *q != '\n' && *q != '\t')
178 q++;
179 svn_stringbuf_appendbytes(*outstr, p, q - p);
180
181 /* We may already be a winner. */
182 if (q == end)
183 break;
184
185 /* Append the entity reference for the character. */
186 if (*q == '&')
187 svn_stringbuf_appendcstr(*outstr, "&");
188 else if (*q == '<')
189 svn_stringbuf_appendcstr(*outstr, "<");
190 else if (*q == '>')
191 svn_stringbuf_appendcstr(*outstr, ">");
192 else if (*q == '"')
193 svn_stringbuf_appendcstr(*outstr, """);
194 else if (*q == '\'')
195 svn_stringbuf_appendcstr(*outstr, "'");
196 else if (*q == '\r')
197 svn_stringbuf_appendcstr(*outstr, " ");
198 else if (*q == '\n')
199 svn_stringbuf_appendcstr(*outstr, " ");
200 else if (*q == '\t')
201 svn_stringbuf_appendcstr(*outstr, "	");
202
203 p = q + 1;
204 }
205 }
206
207
208 void
svn_xml_escape_cdata_stringbuf(svn_stringbuf_t ** outstr,const svn_stringbuf_t * string,apr_pool_t * pool)209 svn_xml_escape_cdata_stringbuf(svn_stringbuf_t **outstr,
210 const svn_stringbuf_t *string,
211 apr_pool_t *pool)
212 {
213 xml_escape_cdata(outstr, string->data, string->len, pool);
214 }
215
216
217 void
svn_xml_escape_cdata_string(svn_stringbuf_t ** outstr,const svn_string_t * string,apr_pool_t * pool)218 svn_xml_escape_cdata_string(svn_stringbuf_t **outstr,
219 const svn_string_t *string,
220 apr_pool_t *pool)
221 {
222 xml_escape_cdata(outstr, string->data, string->len, pool);
223 }
224
225
226 void
svn_xml_escape_cdata_cstring(svn_stringbuf_t ** outstr,const char * string,apr_pool_t * pool)227 svn_xml_escape_cdata_cstring(svn_stringbuf_t **outstr,
228 const char *string,
229 apr_pool_t *pool)
230 {
231 xml_escape_cdata(outstr, string, (apr_size_t) strlen(string), pool);
232 }
233
234
235 void
svn_xml_escape_attr_stringbuf(svn_stringbuf_t ** outstr,const svn_stringbuf_t * string,apr_pool_t * pool)236 svn_xml_escape_attr_stringbuf(svn_stringbuf_t **outstr,
237 const svn_stringbuf_t *string,
238 apr_pool_t *pool)
239 {
240 xml_escape_attr(outstr, string->data, string->len, pool);
241 }
242
243
244 void
svn_xml_escape_attr_string(svn_stringbuf_t ** outstr,const svn_string_t * string,apr_pool_t * pool)245 svn_xml_escape_attr_string(svn_stringbuf_t **outstr,
246 const svn_string_t *string,
247 apr_pool_t *pool)
248 {
249 xml_escape_attr(outstr, string->data, string->len, pool);
250 }
251
252
253 void
svn_xml_escape_attr_cstring(svn_stringbuf_t ** outstr,const char * string,apr_pool_t * pool)254 svn_xml_escape_attr_cstring(svn_stringbuf_t **outstr,
255 const char *string,
256 apr_pool_t *pool)
257 {
258 xml_escape_attr(outstr, string, (apr_size_t) strlen(string), pool);
259 }
260
261
262 const char *
svn_xml_fuzzy_escape(const char * string,apr_pool_t * pool)263 svn_xml_fuzzy_escape(const char *string, apr_pool_t *pool)
264 {
265 const char *end = string + strlen(string);
266 const char *p = string, *q;
267 svn_stringbuf_t *outstr;
268 char escaped_char[6]; /* ? \ u u u \0 */
269
270 for (q = p; q < end; q++)
271 {
272 if (svn_ctype_iscntrl(*q)
273 && ! ((*q == '\n') || (*q == '\r') || (*q == '\t')))
274 break;
275 }
276
277 /* Return original string if no unsafe characters found. */
278 if (q == end)
279 return string;
280
281 outstr = svn_stringbuf_create_empty(pool);
282 while (1)
283 {
284 q = p;
285
286 /* Traverse till either unsafe character or eos. */
287 while ((q < end)
288 && ((! svn_ctype_iscntrl(*q))
289 || (*q == '\n') || (*q == '\r') || (*q == '\t')))
290 q++;
291
292 /* copy chunk before marker */
293 svn_stringbuf_appendbytes(outstr, p, q - p);
294
295 if (q == end)
296 break;
297
298 /* Append an escaped version of the unsafe character.
299
300 ### This format was chosen for consistency with
301 ### svn_utf__cstring_from_utf8_fuzzy(). The two functions
302 ### should probably share code, even though they escape
303 ### different characters.
304 */
305 apr_snprintf(escaped_char, sizeof(escaped_char), "?\\%03u",
306 (unsigned char) *q);
307 svn_stringbuf_appendcstr(outstr, escaped_char);
308
309 p = q + 1;
310 }
311
312 return outstr->data;
313 }
314
315
316 /*** Map from the Expat callback types to the SVN XML types. ***/
317
expat_start_handler(void * userData,const XML_Char * name,const XML_Char ** atts)318 static void expat_start_handler(void *userData,
319 const XML_Char *name,
320 const XML_Char **atts)
321 {
322 svn_xml_parser_t *svn_parser = userData;
323
324 (*svn_parser->start_handler)(svn_parser->baton, name, atts);
325 }
326
expat_end_handler(void * userData,const XML_Char * name)327 static void expat_end_handler(void *userData, const XML_Char *name)
328 {
329 svn_xml_parser_t *svn_parser = userData;
330
331 (*svn_parser->end_handler)(svn_parser->baton, name);
332 }
333
expat_data_handler(void * userData,const XML_Char * s,int len)334 static void expat_data_handler(void *userData, const XML_Char *s, int len)
335 {
336 svn_xml_parser_t *svn_parser = userData;
337
338 (*svn_parser->data_handler)(svn_parser->baton, s, (apr_size_t)len);
339 }
340
341
342 /*** Making a parser. ***/
343
344 svn_xml_parser_t *
svn_xml_make_parser(void * baton,svn_xml_start_elem start_handler,svn_xml_end_elem end_handler,svn_xml_char_data data_handler,apr_pool_t * pool)345 svn_xml_make_parser(void *baton,
346 svn_xml_start_elem start_handler,
347 svn_xml_end_elem end_handler,
348 svn_xml_char_data data_handler,
349 apr_pool_t *pool)
350 {
351 svn_xml_parser_t *svn_parser;
352 apr_pool_t *subpool;
353
354 XML_Parser parser = XML_ParserCreate(NULL);
355
356 XML_SetElementHandler(parser,
357 start_handler ? expat_start_handler : NULL,
358 end_handler ? expat_end_handler : NULL);
359 XML_SetCharacterDataHandler(parser,
360 data_handler ? expat_data_handler : NULL);
361
362 /* ### we probably don't want this pool; or at least we should pass it
363 ### to the callbacks and clear it periodically. */
364 subpool = svn_pool_create(pool);
365
366 svn_parser = apr_pcalloc(subpool, sizeof(*svn_parser));
367
368 svn_parser->parser = parser;
369 svn_parser->start_handler = start_handler;
370 svn_parser->end_handler = end_handler;
371 svn_parser->data_handler = data_handler;
372 svn_parser->baton = baton;
373 svn_parser->pool = subpool;
374
375 /* store our parser info as the UserData in the Expat parser */
376 XML_SetUserData(parser, svn_parser);
377
378 return svn_parser;
379 }
380
381
382 /* Free a parser */
383 void
svn_xml_free_parser(svn_xml_parser_t * svn_parser)384 svn_xml_free_parser(svn_xml_parser_t *svn_parser)
385 {
386 /* Free the expat parser */
387 XML_ParserFree(svn_parser->parser);
388
389 /* Free the subversion parser */
390 svn_pool_destroy(svn_parser->pool);
391 }
392
393
394
395
396 svn_error_t *
svn_xml_parse(svn_xml_parser_t * svn_parser,const char * buf,apr_size_t len,svn_boolean_t is_final)397 svn_xml_parse(svn_xml_parser_t *svn_parser,
398 const char *buf,
399 apr_size_t len,
400 svn_boolean_t is_final)
401 {
402 svn_error_t *err;
403 int success;
404
405 /* Parse some xml data */
406 success = XML_Parse(svn_parser->parser, buf, (int) len, is_final);
407
408 /* If expat choked internally, return its error. */
409 if (! success)
410 {
411 /* Line num is "int" in Expat v1, "long" in v2; hide the difference. */
412 long line = XML_GetCurrentLineNumber(svn_parser->parser);
413
414 err = svn_error_createf
415 (SVN_ERR_XML_MALFORMED, NULL,
416 _("Malformed XML: %s at line %ld"),
417 XML_ErrorString(XML_GetErrorCode(svn_parser->parser)), line);
418
419 /* Kill all parsers and return the expat error */
420 svn_xml_free_parser(svn_parser);
421 return err;
422 }
423
424 /* Did an error occur somewhere *inside* the expat callbacks? */
425 if (svn_parser->error)
426 {
427 err = svn_parser->error;
428 svn_xml_free_parser(svn_parser);
429 return err;
430 }
431
432 return SVN_NO_ERROR;
433 }
434
435
436
svn_xml_signal_bailout(svn_error_t * error,svn_xml_parser_t * svn_parser)437 void svn_xml_signal_bailout(svn_error_t *error,
438 svn_xml_parser_t *svn_parser)
439 {
440 /* This will cause the current XML_Parse() call to finish quickly! */
441 XML_SetElementHandler(svn_parser->parser, NULL, NULL);
442 XML_SetCharacterDataHandler(svn_parser->parser, NULL);
443
444 /* Once outside of XML_Parse(), the existence of this field will
445 cause svn_delta_parse()'s main read-loop to return error. */
446 svn_parser->error = error;
447 }
448
449
450
451
452
453
454
455
456 /*** Attribute walking. ***/
457
458 const char *
svn_xml_get_attr_value(const char * name,const char * const * atts)459 svn_xml_get_attr_value(const char *name, const char *const *atts)
460 {
461 while (atts && (*atts))
462 {
463 if (strcmp(atts[0], name) == 0)
464 return atts[1];
465 else
466 atts += 2; /* continue looping */
467 }
468
469 /* Else no such attribute name seen. */
470 return NULL;
471 }
472
473
474
475 /*** Printing XML ***/
476
477 void
svn_xml_make_header2(svn_stringbuf_t ** str,const char * encoding,apr_pool_t * pool)478 svn_xml_make_header2(svn_stringbuf_t **str, const char *encoding,
479 apr_pool_t *pool)
480 {
481
482 if (*str == NULL)
483 *str = svn_stringbuf_create_empty(pool);
484 svn_stringbuf_appendcstr(*str, "<?xml version=\"1.0\"");
485 if (encoding)
486 {
487 encoding = apr_psprintf(pool, " encoding=\"%s\"", encoding);
488 svn_stringbuf_appendcstr(*str, encoding);
489 }
490 svn_stringbuf_appendcstr(*str, "?>\n");
491 }
492
493
494
495 /*** Creating attribute hashes. ***/
496
497 /* Combine an existing attribute list ATTS with a HASH that itself
498 represents an attribute list. Iff PRESERVE is true, then no value
499 already in HASH will be changed, else values from ATTS will
500 override previous values in HASH. */
501 static void
amalgamate(const char ** atts,apr_hash_t * ht,svn_boolean_t preserve,apr_pool_t * pool)502 amalgamate(const char **atts,
503 apr_hash_t *ht,
504 svn_boolean_t preserve,
505 apr_pool_t *pool)
506 {
507 const char *key;
508
509 if (atts)
510 for (key = *atts; key; key = *(++atts))
511 {
512 const char *val = *(++atts);
513 size_t keylen;
514 assert(key != NULL);
515 /* kff todo: should we also insist that val be non-null here?
516 Probably. */
517
518 keylen = strlen(key);
519 if (preserve && ((apr_hash_get(ht, key, keylen)) != NULL))
520 continue;
521 else
522 apr_hash_set(ht, apr_pstrndup(pool, key, keylen), keylen,
523 val ? apr_pstrdup(pool, val) : NULL);
524 }
525 }
526
527
528 apr_hash_t *
svn_xml_ap_to_hash(va_list ap,apr_pool_t * pool)529 svn_xml_ap_to_hash(va_list ap, apr_pool_t *pool)
530 {
531 apr_hash_t *ht = apr_hash_make(pool);
532 const char *key;
533
534 while ((key = va_arg(ap, char *)) != NULL)
535 {
536 const char *val = va_arg(ap, const char *);
537 svn_hash_sets(ht, key, val);
538 }
539
540 return ht;
541 }
542
543
544 apr_hash_t *
svn_xml_make_att_hash(const char ** atts,apr_pool_t * pool)545 svn_xml_make_att_hash(const char **atts, apr_pool_t *pool)
546 {
547 apr_hash_t *ht = apr_hash_make(pool);
548 amalgamate(atts, ht, 0, pool); /* third arg irrelevant in this case */
549 return ht;
550 }
551
552
553 void
svn_xml_hash_atts_overlaying(const char ** atts,apr_hash_t * ht,apr_pool_t * pool)554 svn_xml_hash_atts_overlaying(const char **atts,
555 apr_hash_t *ht,
556 apr_pool_t *pool)
557 {
558 amalgamate(atts, ht, 0, pool);
559 }
560
561
562 void
svn_xml_hash_atts_preserving(const char ** atts,apr_hash_t * ht,apr_pool_t * pool)563 svn_xml_hash_atts_preserving(const char **atts,
564 apr_hash_t *ht,
565 apr_pool_t *pool)
566 {
567 amalgamate(atts, ht, 1, pool);
568 }
569
570
571
572 /*** Making XML tags. ***/
573
574
575 void
svn_xml_make_open_tag_hash(svn_stringbuf_t ** str,apr_pool_t * pool,enum svn_xml_open_tag_style style,const char * tagname,apr_hash_t * attributes)576 svn_xml_make_open_tag_hash(svn_stringbuf_t **str,
577 apr_pool_t *pool,
578 enum svn_xml_open_tag_style style,
579 const char *tagname,
580 apr_hash_t *attributes)
581 {
582 apr_hash_index_t *hi;
583 apr_size_t est_size = strlen(tagname) + 4 + apr_hash_count(attributes) * 30;
584
585 if (*str == NULL)
586 *str = svn_stringbuf_create_ensure(est_size, pool);
587
588 svn_stringbuf_appendcstr(*str, "<");
589 svn_stringbuf_appendcstr(*str, tagname);
590
591 for (hi = apr_hash_first(pool, attributes); hi; hi = apr_hash_next(hi))
592 {
593 const void *key;
594 void *val;
595
596 apr_hash_this(hi, &key, NULL, &val);
597 assert(val != NULL);
598
599 svn_stringbuf_appendcstr(*str, "\n ");
600 svn_stringbuf_appendcstr(*str, key);
601 svn_stringbuf_appendcstr(*str, "=\"");
602 svn_xml_escape_attr_cstring(str, val, pool);
603 svn_stringbuf_appendcstr(*str, "\"");
604 }
605
606 if (style == svn_xml_self_closing)
607 svn_stringbuf_appendcstr(*str, "/");
608 svn_stringbuf_appendcstr(*str, ">");
609 if (style != svn_xml_protect_pcdata)
610 svn_stringbuf_appendcstr(*str, "\n");
611 }
612
613
614 void
svn_xml_make_open_tag_v(svn_stringbuf_t ** str,apr_pool_t * pool,enum svn_xml_open_tag_style style,const char * tagname,va_list ap)615 svn_xml_make_open_tag_v(svn_stringbuf_t **str,
616 apr_pool_t *pool,
617 enum svn_xml_open_tag_style style,
618 const char *tagname,
619 va_list ap)
620 {
621 apr_pool_t *subpool = svn_pool_create(pool);
622 apr_hash_t *ht = svn_xml_ap_to_hash(ap, subpool);
623
624 svn_xml_make_open_tag_hash(str, pool, style, tagname, ht);
625 svn_pool_destroy(subpool);
626 }
627
628
629
630 void
svn_xml_make_open_tag(svn_stringbuf_t ** str,apr_pool_t * pool,enum svn_xml_open_tag_style style,const char * tagname,...)631 svn_xml_make_open_tag(svn_stringbuf_t **str,
632 apr_pool_t *pool,
633 enum svn_xml_open_tag_style style,
634 const char *tagname,
635 ...)
636 {
637 va_list ap;
638
639 va_start(ap, tagname);
640 svn_xml_make_open_tag_v(str, pool, style, tagname, ap);
641 va_end(ap);
642 }
643
644
svn_xml_make_close_tag(svn_stringbuf_t ** str,apr_pool_t * pool,const char * tagname)645 void svn_xml_make_close_tag(svn_stringbuf_t **str,
646 apr_pool_t *pool,
647 const char *tagname)
648 {
649 if (*str == NULL)
650 *str = svn_stringbuf_create_empty(pool);
651
652 svn_stringbuf_appendcstr(*str, "</");
653 svn_stringbuf_appendcstr(*str, tagname);
654 svn_stringbuf_appendcstr(*str, ">\n");
655 }
656