xref: /freebsd-11-stable/contrib/apr-util/xml/apr_xml.c (revision 3c9339f7792540596bf97077a8f403e944af7f39)
1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2  * contributor license agreements.  See the NOTICE file distributed with
3  * this work for additional information regarding copyright ownership.
4  * The ASF licenses this file to You under the Apache License, Version 2.0
5  * (the "License"); you may not use this file except in compliance with
6  * the License.  You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "apr.h"
18 #include "apr_strings.h"
19 
20 #define APR_WANT_STDIO          /* for sprintf() */
21 #define APR_WANT_STRFUNC
22 #include "apr_want.h"
23 
24 #include "apr_xml.h"
25 
26 #include "apu_config.h"
27 
28 #if defined(HAVE_XMLPARSE_XMLPARSE_H)
29 #include <xmlparse/xmlparse.h>
30 #elif defined(HAVE_XMLTOK_XMLPARSE_H)
31 #include <xmltok/xmlparse.h>
32 #elif defined(HAVE_XML_XMLPARSE_H)
33 #include <xml/xmlparse.h>
34 #else
35 #include <expat.h>
36 #endif
37 
38 #define DEBUG_CR "\r\n"
39 
40 static const char APR_KW_xmlns[] = { 0x78, 0x6D, 0x6C, 0x6E, 0x73, '\0' };
41 static const char APR_KW_xmlns_lang[] = { 0x78, 0x6D, 0x6C, 0x3A, 0x6C, 0x61, 0x6E, 0x67, '\0' };
42 static const char APR_KW_DAV[] = { 0x44, 0x41, 0x56, 0x3A, '\0' };
43 
44 /* errors related to namespace processing */
45 #define APR_XML_NS_ERROR_UNKNOWN_PREFIX (-1000)
46 #define APR_XML_NS_ERROR_INVALID_DECL (-1001)
47 
48 /* test for a namespace prefix that begins with [Xx][Mm][Ll] */
49 #define APR_XML_NS_IS_RESERVED(name) \
50 	( (name[0] == 0x58 || name[0] == 0x78) && \
51 	  (name[1] == 0x4D || name[1] == 0x6D) && \
52 	  (name[2] == 0x4C || name[2] == 0x6C) )
53 
54 
55 /* the real (internal) definition of the parser context */
56 struct apr_xml_parser {
57     apr_xml_doc *doc;		/* the doc we're parsing */
58     apr_pool_t *p;		/* the pool we allocate from */
59     apr_xml_elem *cur_elem;	/* current element */
60 
61     int error;			/* an error has occurred */
62 #define APR_XML_ERROR_EXPAT             1
63 #define APR_XML_ERROR_PARSE_DONE        2
64 /* also: public APR_XML_NS_ERROR_* values (if any) */
65 
66     XML_Parser xp;              /* the actual (Expat) XML parser */
67     enum XML_Error xp_err;      /* stored Expat error code */
68 };
69 
70 /* struct for scoping namespace declarations */
71 typedef struct apr_xml_ns_scope {
72     const char *prefix;		/* prefix used for this ns */
73     int ns;			/* index into namespace table */
74     int emptyURI;		/* the namespace URI is the empty string */
75     struct apr_xml_ns_scope *next;	/* next scoped namespace */
76 } apr_xml_ns_scope;
77 
78 
79 /* return namespace table index for a given prefix */
find_prefix(apr_xml_parser * parser,const char * prefix)80 static int find_prefix(apr_xml_parser *parser, const char *prefix)
81 {
82     apr_xml_elem *elem = parser->cur_elem;
83 
84     /*
85     ** Walk up the tree, looking for a namespace scope that defines this
86     ** prefix.
87     */
88     for (; elem; elem = elem->parent) {
89 	apr_xml_ns_scope *ns_scope;
90 
91 	for (ns_scope = elem->ns_scope; ns_scope; ns_scope = ns_scope->next) {
92 	    if (strcmp(prefix, ns_scope->prefix) == 0) {
93 		if (ns_scope->emptyURI) {
94 		    /*
95 		    ** It is possible to set the default namespace to an
96 		    ** empty URI string; this resets the default namespace
97 		    ** to mean "no namespace." We just found the prefix
98 		    ** refers to an empty URI, so return "no namespace."
99 		    */
100 		    return APR_XML_NS_NONE;
101 		}
102 
103 		return ns_scope->ns;
104 	    }
105 	}
106     }
107 
108     /*
109      * If the prefix is empty (""), this means that a prefix was not
110      * specified in the element/attribute. The search that was performed
111      * just above did not locate a default namespace URI (which is stored
112      * into ns_scope with an empty prefix). This means the element/attribute
113      * has "no namespace". We have a reserved value for this.
114      */
115     if (*prefix == '\0') {
116 	return APR_XML_NS_NONE;
117     }
118 
119     /* not found */
120     return APR_XML_NS_ERROR_UNKNOWN_PREFIX;
121 }
122 
123 /* return original prefix given ns index */
find_prefix_name(const apr_xml_elem * elem,int ns,int parent)124 static const char * find_prefix_name(const apr_xml_elem *elem, int ns, int parent)
125 {
126     /*
127     ** Walk up the tree, looking for a namespace scope that defines this
128     ** prefix.
129     */
130     for (; elem; elem = parent ? elem->parent : NULL) {
131 	apr_xml_ns_scope *ns_scope = elem->ns_scope;
132 
133 	for (; ns_scope; ns_scope = ns_scope->next) {
134 	    if (ns_scope->ns == ns)
135 		return ns_scope->prefix;
136 	}
137     }
138     /* not found */
139     return "";
140 }
141 
142 
start_handler(void * userdata,const char * name,const char ** attrs)143 static void start_handler(void *userdata, const char *name, const char **attrs)
144 {
145     apr_xml_parser *parser = userdata;
146     apr_xml_elem *elem;
147     apr_xml_attr *attr;
148     apr_xml_attr *prev;
149     char *colon;
150     const char *quoted;
151     char *elem_name;
152 
153     /* punt once we find an error */
154     if (parser->error)
155 	return;
156 
157     elem = apr_pcalloc(parser->p, sizeof(*elem));
158 
159     /* prep the element */
160     elem->name = elem_name = apr_pstrdup(parser->p, name);
161 
162     /* fill in the attributes (note: ends up in reverse order) */
163     while (*attrs) {
164 	attr = apr_palloc(parser->p, sizeof(*attr));
165 	attr->name = apr_pstrdup(parser->p, *attrs++);
166 	attr->value = apr_pstrdup(parser->p, *attrs++);
167 	attr->next = elem->attr;
168 	elem->attr = attr;
169     }
170 
171     /* hook the element into the tree */
172     if (parser->cur_elem == NULL) {
173 	/* no current element; this also becomes the root */
174 	parser->cur_elem = parser->doc->root = elem;
175     }
176     else {
177 	/* this element appeared within the current elem */
178 	elem->parent = parser->cur_elem;
179 
180 	/* set up the child/sibling links */
181 	if (elem->parent->last_child == NULL) {
182 	    /* no first child either */
183 	    elem->parent->first_child = elem->parent->last_child = elem;
184 	}
185 	else {
186 	    /* hook onto the end of the parent's children */
187 	    elem->parent->last_child->next = elem;
188 	    elem->parent->last_child = elem;
189 	}
190 
191 	/* this element is now the current element */
192 	parser->cur_elem = elem;
193     }
194 
195     /* scan the attributes for namespace declarations */
196     for (prev = NULL, attr = elem->attr;
197 	 attr;
198 	 attr = attr->next) {
199 	if (strncmp(attr->name, APR_KW_xmlns, 5) == 0) {
200 	    const char *prefix = &attr->name[5];
201 	    apr_xml_ns_scope *ns_scope;
202 
203 	    /* test for xmlns:foo= form and xmlns= form */
204 	    if (*prefix == 0x3A) {
205                 /* a namespace prefix declaration must have a
206                    non-empty value. */
207                 if (attr->value[0] == '\0') {
208                     parser->error = APR_XML_NS_ERROR_INVALID_DECL;
209                     return;
210                 }
211 		++prefix;
212             }
213 	    else if (*prefix != '\0') {
214 		/* advance "prev" since "attr" is still present */
215 		prev = attr;
216 		continue;
217 	    }
218 
219 	    /* quote the URI before we ever start working with it */
220 	    quoted = apr_xml_quote_string(parser->p, attr->value, 1);
221 
222 	    /* build and insert the new scope */
223 	    ns_scope = apr_pcalloc(parser->p, sizeof(*ns_scope));
224 	    ns_scope->prefix = prefix;
225 	    ns_scope->ns = apr_xml_insert_uri(parser->doc->namespaces, quoted);
226 	    ns_scope->emptyURI = *quoted == '\0';
227 	    ns_scope->next = elem->ns_scope;
228 	    elem->ns_scope = ns_scope;
229 
230 	    /* remove this attribute from the element */
231 	    if (prev == NULL)
232 		elem->attr = attr->next;
233 	    else
234 		prev->next = attr->next;
235 
236 	    /* Note: prev will not be advanced since we just removed "attr" */
237 	}
238 	else if (strcmp(attr->name, APR_KW_xmlns_lang) == 0) {
239 	    /* save away the language (in quoted form) */
240 	    elem->lang = apr_xml_quote_string(parser->p, attr->value, 1);
241 
242 	    /* remove this attribute from the element */
243 	    if (prev == NULL)
244 		elem->attr = attr->next;
245 	    else
246 		prev->next = attr->next;
247 
248 	    /* Note: prev will not be advanced since we just removed "attr" */
249 	}
250 	else {
251 	    /* advance "prev" since "attr" is still present */
252 	    prev = attr;
253 	}
254     }
255 
256     /*
257     ** If an xml:lang attribute didn't exist (lang==NULL), then copy the
258     ** language from the parent element (if present).
259     **
260     ** NOTE: elem_size() *depends* upon this pointer equality.
261     */
262     if (elem->lang == NULL && elem->parent != NULL)
263 	elem->lang = elem->parent->lang;
264 
265     /* adjust the element's namespace */
266     colon = strchr(elem_name, 0x3A);
267     if (colon == NULL) {
268 	/*
269 	 * The element is using the default namespace, which will always
270 	 * be found. Either it will be "no namespace", or a default
271 	 * namespace URI has been specified at some point.
272 	 */
273 	elem->ns = find_prefix(parser, "");
274     }
275     else if (APR_XML_NS_IS_RESERVED(elem->name)) {
276 	elem->ns = APR_XML_NS_NONE;
277     }
278     else {
279 	*colon = '\0';
280 	elem->ns = find_prefix(parser, elem->name);
281 	elem->name = colon + 1;
282 
283 	if (APR_XML_NS_IS_ERROR(elem->ns)) {
284 	    parser->error = elem->ns;
285 	    return;
286 	}
287     }
288 
289     /* adjust all remaining attributes' namespaces */
290     for (attr = elem->attr; attr; attr = attr->next) {
291         /*
292          * apr_xml_attr defines this as "const" but we dup'd it, so we
293          * know that we can change it. a bit hacky, but the existing
294          * structure def is best.
295          */
296         char *attr_name = (char *)attr->name;
297 
298 	colon = strchr(attr_name, 0x3A);
299 	if (colon == NULL) {
300 	    /*
301 	     * Attributes do NOT use the default namespace. Therefore,
302 	     * we place them into the "no namespace" category.
303 	     */
304 	    attr->ns = APR_XML_NS_NONE;
305 	}
306 	else if (APR_XML_NS_IS_RESERVED(attr->name)) {
307 	    attr->ns = APR_XML_NS_NONE;
308 	}
309 	else {
310 	    *colon = '\0';
311 	    attr->ns = find_prefix(parser, attr->name);
312 	    attr->name = colon + 1;
313 
314 	    if (APR_XML_NS_IS_ERROR(attr->ns)) {
315 		parser->error = attr->ns;
316 		return;
317 	    }
318 	}
319     }
320 }
321 
end_handler(void * userdata,const char * name)322 static void end_handler(void *userdata, const char *name)
323 {
324     apr_xml_parser *parser = userdata;
325 
326     /* punt once we find an error */
327     if (parser->error)
328 	return;
329 
330     /* pop up one level */
331     parser->cur_elem = parser->cur_elem->parent;
332 }
333 
cdata_handler(void * userdata,const char * data,int len)334 static void cdata_handler(void *userdata, const char *data, int len)
335 {
336     apr_xml_parser *parser = userdata;
337     apr_xml_elem *elem;
338     apr_text_header *hdr;
339     const char *s;
340 
341     /* punt once we find an error */
342     if (parser->error)
343 	return;
344 
345     elem = parser->cur_elem;
346     s = apr_pstrndup(parser->p, data, len);
347 
348     if (elem->last_child == NULL) {
349 	/* no children yet. this cdata follows the start tag */
350 	hdr = &elem->first_cdata;
351     }
352     else {
353 	/* child elements exist. this cdata follows the last child. */
354 	hdr = &elem->last_child->following_cdata;
355     }
356 
357     apr_text_append(parser->p, hdr, s);
358 }
359 
cleanup_parser(void * ctx)360 static apr_status_t cleanup_parser(void *ctx)
361 {
362     apr_xml_parser *parser = ctx;
363 
364     XML_ParserFree(parser->xp);
365     parser->xp = NULL;
366 
367     return APR_SUCCESS;
368 }
369 
370 #if XML_MAJOR_VERSION > 1
371 /* Stop the parser if an entity declaration is hit. */
entity_declaration(void * userData,const XML_Char * entityName,int is_parameter_entity,const XML_Char * value,int value_length,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId,const XML_Char * notationName)372 static void entity_declaration(void *userData, const XML_Char *entityName,
373                                int is_parameter_entity, const XML_Char *value,
374                                int value_length, const XML_Char *base,
375                                const XML_Char *systemId, const XML_Char *publicId,
376                                const XML_Char *notationName)
377 {
378     apr_xml_parser *parser = userData;
379 
380     XML_StopParser(parser->xp, XML_FALSE);
381 }
382 #else
383 /* A noop default_handler. */
default_handler(void * userData,const XML_Char * s,int len)384 static void default_handler(void *userData, const XML_Char *s, int len)
385 {
386 }
387 #endif
388 
apr_xml_parser_create(apr_pool_t * pool)389 APU_DECLARE(apr_xml_parser *) apr_xml_parser_create(apr_pool_t *pool)
390 {
391     apr_xml_parser *parser = apr_pcalloc(pool, sizeof(*parser));
392 
393     parser->p = pool;
394     parser->doc = apr_pcalloc(pool, sizeof(*parser->doc));
395 
396     parser->doc->namespaces = apr_array_make(pool, 5, sizeof(const char *));
397 
398     /* ### is there a way to avoid hard-coding this? */
399     apr_xml_insert_uri(parser->doc->namespaces, APR_KW_DAV);
400 
401     parser->xp = XML_ParserCreate(NULL);
402     if (parser->xp == NULL) {
403         (*apr_pool_abort_get(pool))(APR_ENOMEM);
404         return NULL;
405     }
406 
407     apr_pool_cleanup_register(pool, parser, cleanup_parser,
408                               apr_pool_cleanup_null);
409 
410     XML_SetUserData(parser->xp, parser);
411     XML_SetElementHandler(parser->xp, start_handler, end_handler);
412     XML_SetCharacterDataHandler(parser->xp, cdata_handler);
413 
414     /* Prevent the "billion laughs" attack against expat by disabling
415      * internal entity expansion.  With 2.x, forcibly stop the parser
416      * if an entity is declared - this is safer and a more obvious
417      * failure mode.  With older versions, installing a noop
418      * DefaultHandler means that internal entities will be expanded as
419      * the empty string, which is also sufficient to prevent the
420      * attack. */
421 #if XML_MAJOR_VERSION > 1
422     XML_SetEntityDeclHandler(parser->xp, entity_declaration);
423 #else
424     XML_SetDefaultHandler(parser->xp, default_handler);
425 #endif
426 
427     return parser;
428 }
429 
do_parse(apr_xml_parser * parser,const char * data,apr_size_t len,int is_final)430 static apr_status_t do_parse(apr_xml_parser *parser,
431                              const char *data, apr_size_t len,
432                              int is_final)
433 {
434     if (parser->xp == NULL) {
435         parser->error = APR_XML_ERROR_PARSE_DONE;
436     }
437     else {
438         int rv = XML_Parse(parser->xp, data, (int)len, is_final);
439 
440         if (rv == 0) {
441             parser->error = APR_XML_ERROR_EXPAT;
442             parser->xp_err = XML_GetErrorCode(parser->xp);
443         }
444     }
445 
446     /* ### better error code? */
447     return parser->error ? APR_EGENERAL : APR_SUCCESS;
448 }
449 
apr_xml_parser_feed(apr_xml_parser * parser,const char * data,apr_size_t len)450 APU_DECLARE(apr_status_t) apr_xml_parser_feed(apr_xml_parser *parser,
451                                               const char *data,
452                                               apr_size_t len)
453 {
454     return do_parse(parser, data, len, 0 /* is_final */);
455 }
456 
apr_xml_parser_done(apr_xml_parser * parser,apr_xml_doc ** pdoc)457 APU_DECLARE(apr_status_t) apr_xml_parser_done(apr_xml_parser *parser,
458                                               apr_xml_doc **pdoc)
459 {
460     char end;
461     apr_status_t status = do_parse(parser, &end, 0, 1 /* is_final */);
462 
463     /* get rid of the parser */
464     (void) apr_pool_cleanup_run(parser->p, parser, cleanup_parser);
465 
466     if (status)
467         return status;
468 
469     if (pdoc != NULL)
470         *pdoc = parser->doc;
471     return APR_SUCCESS;
472 }
473 
apr_xml_parser_geterror(apr_xml_parser * parser,char * errbuf,apr_size_t errbufsize)474 APU_DECLARE(char *) apr_xml_parser_geterror(apr_xml_parser *parser,
475                                             char *errbuf,
476                                             apr_size_t errbufsize)
477 {
478     int error = parser->error;
479     const char *msg;
480 
481     /* clear our record of an error */
482     parser->error = 0;
483 
484     switch (error) {
485     case 0:
486         msg = "No error.";
487         break;
488 
489     case APR_XML_NS_ERROR_UNKNOWN_PREFIX:
490         msg = "An undefined namespace prefix was used.";
491         break;
492 
493     case APR_XML_NS_ERROR_INVALID_DECL:
494         msg = "A namespace prefix was defined with an empty URI.";
495         break;
496 
497     case APR_XML_ERROR_EXPAT:
498         (void) apr_snprintf(errbuf, errbufsize,
499                             "XML parser error code: %s (%d)",
500                             XML_ErrorString(parser->xp_err), parser->xp_err);
501         return errbuf;
502 
503     case APR_XML_ERROR_PARSE_DONE:
504         msg = "The parser is not active.";
505         break;
506 
507     default:
508         msg = "There was an unknown error within the XML body.";
509         break;
510     }
511 
512     (void) apr_cpystrn(errbuf, msg, errbufsize);
513     return errbuf;
514 }
515 
apr_xml_parse_file(apr_pool_t * p,apr_xml_parser ** parser,apr_xml_doc ** ppdoc,apr_file_t * xmlfd,apr_size_t buffer_length)516 APU_DECLARE(apr_status_t) apr_xml_parse_file(apr_pool_t *p,
517                                              apr_xml_parser **parser,
518                                              apr_xml_doc **ppdoc,
519                                              apr_file_t *xmlfd,
520                                              apr_size_t buffer_length)
521 {
522     apr_status_t rv;
523     char *buffer;
524     apr_size_t length;
525 
526     *parser = apr_xml_parser_create(p);
527     if (*parser == NULL) {
528         /* FIXME: returning an error code would be nice,
529          * but we dont get one ;( */
530         return APR_EGENERAL;
531     }
532     buffer = apr_palloc(p, buffer_length);
533     length = buffer_length;
534 
535     rv = apr_file_read(xmlfd, buffer, &length);
536 
537     while (rv == APR_SUCCESS) {
538         rv = apr_xml_parser_feed(*parser, buffer, length);
539         if (rv != APR_SUCCESS) {
540             return rv;
541         }
542 
543         length = buffer_length;
544         rv = apr_file_read(xmlfd, buffer, &length);
545     }
546     if (rv != APR_EOF) {
547         return rv;
548     }
549     rv = apr_xml_parser_done(*parser, ppdoc);
550     *parser = NULL;
551     return rv;
552 }
553 
apr_text_append(apr_pool_t * p,apr_text_header * hdr,const char * text)554 APU_DECLARE(void) apr_text_append(apr_pool_t * p, apr_text_header *hdr,
555                                   const char *text)
556 {
557     apr_text *t = apr_palloc(p, sizeof(*t));
558 
559     t->text = text;
560     t->next = NULL;
561 
562     if (hdr->first == NULL) {
563 	/* no text elements yet */
564 	hdr->first = hdr->last = t;
565     }
566     else {
567 	/* append to the last text element */
568 	hdr->last->next = t;
569 	hdr->last = t;
570     }
571 }
572 
573 
574 /* ---------------------------------------------------------------
575 **
576 ** XML UTILITY FUNCTIONS
577 */
578 
579 /*
580 ** apr_xml_quote_string: quote an XML string
581 **
582 ** Replace '<', '>', and '&' with '&lt;', '&gt;', and '&amp;'.
583 ** If quotes is true, then replace '"' with '&quot;'.
584 **
585 ** quotes is typically set to true for XML strings that will occur within
586 ** double quotes -- attribute values.
587 */
apr_xml_quote_string(apr_pool_t * p,const char * s,int quotes)588 APU_DECLARE(const char *) apr_xml_quote_string(apr_pool_t *p, const char *s,
589                                                int quotes)
590 {
591     const char *scan;
592     apr_size_t len = 0;
593     apr_size_t extra = 0;
594     char *qstr;
595     char *qscan;
596     char c;
597 
598     for (scan = s; (c = *scan) != '\0'; ++scan, ++len) {
599 	if (c == '<' || c == '>')
600 	    extra += 3;		/* &lt; or &gt; */
601 	else if (c == '&')
602 	    extra += 4;		/* &amp; */
603 	else if (quotes && c == '"')
604 	    extra += 5;		/* &quot; */
605     }
606 
607     /* nothing to do? */
608     if (extra == 0)
609 	return s;
610 
611     qstr = apr_palloc(p, len + extra + 1);
612     for (scan = s, qscan = qstr; (c = *scan) != '\0'; ++scan) {
613 	if (c == '<') {
614 	    *qscan++ = '&';
615 	    *qscan++ = 'l';
616 	    *qscan++ = 't';
617 	    *qscan++ = ';';
618 	}
619 	else if (c == '>') {
620 	    *qscan++ = '&';
621 	    *qscan++ = 'g';
622 	    *qscan++ = 't';
623 	    *qscan++ = ';';
624 	}
625 	else if (c == '&') {
626 	    *qscan++ = '&';
627 	    *qscan++ = 'a';
628 	    *qscan++ = 'm';
629 	    *qscan++ = 'p';
630 	    *qscan++ = ';';
631 	}
632 	else if (quotes && c == '"') {
633 	    *qscan++ = '&';
634 	    *qscan++ = 'q';
635 	    *qscan++ = 'u';
636 	    *qscan++ = 'o';
637 	    *qscan++ = 't';
638 	    *qscan++ = ';';
639 	}
640 	else {
641 	    *qscan++ = c;
642 	}
643     }
644 
645     *qscan = '\0';
646     return qstr;
647 }
648 
649 /* how many characters for the given integer? */
650 #define APR_XML_NS_LEN(ns) ((ns) < 10 ? 1 : (ns) < 100 ? 2 : (ns) < 1000 ? 3 : \
651                             (ns) < 10000 ? 4 : (ns) < 100000 ? 5 : \
652                             (ns) < 1000000 ? 6 : (ns) < 10000000 ? 7 : \
653                             (ns) < 100000000 ? 8 : (ns) < 1000000000 ? 9 : 10)
654 
text_size(const apr_text * t)655 static apr_size_t text_size(const apr_text *t)
656 {
657     apr_size_t size = 0;
658 
659     for (; t; t = t->next)
660 	size += strlen(t->text);
661     return size;
662 }
663 
elem_size(const apr_xml_elem * elem,int style,apr_array_header_t * namespaces,int * ns_map)664 static apr_size_t elem_size(const apr_xml_elem *elem, int style,
665                             apr_array_header_t *namespaces, int *ns_map)
666 {
667     apr_size_t size;
668 
669     if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG ||
670 	style == APR_XML_X2T_PARSED) {
671 	const apr_xml_attr *attr;
672 
673 	size = 0;
674 
675 	if (style == APR_XML_X2T_FULL_NS_LANG) {
676 	    int i;
677 
678 	    /*
679 	    ** The outer element will contain xmlns:ns%d="%s" attributes
680 	    ** and an xml:lang attribute, if applicable.
681 	    */
682 
683 	    for (i = namespaces->nelts; i--;) {
684 		/* compute size of: ' xmlns:ns%d="%s"' */
685 		size += (9 + APR_XML_NS_LEN(i) + 2 +
686 			 strlen(APR_XML_GET_URI_ITEM(namespaces, i)) + 1);
687 	    }
688 
689 	    if (elem->lang != NULL) {
690 		/* compute size of: ' xml:lang="%s"' */
691 		size += 11 + strlen(elem->lang) + 1;
692 	    }
693 	}
694 	else if (style == APR_XML_X2T_PARSED) {
695 	    apr_xml_ns_scope *ns_scope = elem->ns_scope;
696 
697 	    /* compute size of: ' xmlns:%s="%s"' */
698 	    for (; ns_scope; ns_scope = ns_scope->next) {
699 		size += 10 + strlen(find_prefix_name(elem, ns_scope->ns, 0)) +
700 			     strlen(APR_XML_GET_URI_ITEM(namespaces, ns_scope->ns));
701 	    }
702 
703 	    if (elem->lang != NULL) {
704 		/* compute size of: ' xml:lang="%s"' */
705 		size += 11 + strlen(elem->lang) + 1;
706 	    }
707 	}
708 
709 	if (elem->ns == APR_XML_NS_NONE) {
710 	    /* compute size of: <%s> */
711 	    size += 1 + strlen(elem->name) + 1;
712 	}
713 	else if (style == APR_XML_X2T_PARSED) {
714 	    /* compute size of: <%s:%s> */
715 	    size += 3 + strlen(find_prefix_name(elem, elem->ns, 1)) + strlen(elem->name);
716 	}
717 	else {
718 	    int ns = ns_map ? ns_map[elem->ns] : elem->ns;
719 
720 	    /* compute size of: <ns%d:%s> */
721 	    size += 3 + APR_XML_NS_LEN(ns) + 1 + strlen(elem->name) + 1;
722 	}
723 
724 	if (APR_XML_ELEM_IS_EMPTY(elem)) {
725 	    /* insert a closing "/" */
726 	    size += 1;
727 	}
728 	else {
729 	    /*
730 	     * two of above plus "/":
731 	     *     <ns%d:%s> ... </ns%d:%s>
732 	     * OR  <%s> ... </%s>
733 	     */
734 	    size = 2 * size + 1;
735 	}
736 
737 	for (attr = elem->attr; attr; attr = attr->next) {
738 	    if (attr->ns == APR_XML_NS_NONE) {
739 		/* compute size of: ' %s="%s"' */
740 		size += 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1;
741 	    }
742 	    else if (style == APR_XML_X2T_PARSED) {
743 		/* compute size of: ' %s:%s="%s"' */
744 		size += 5 + strlen(find_prefix_name(elem, attr->ns, 1)) + strlen(attr->name) + strlen(attr->value);
745 	    }
746 	    else {
747 		/* compute size of: ' ns%d:%s="%s"' */
748                 int ns = ns_map ? ns_map[attr->ns] : attr->ns;
749                 size += 3 + APR_XML_NS_LEN(ns) + 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1;
750 	    }
751 	}
752 
753 	/*
754 	** If the element has an xml:lang value that is *different* from
755 	** its parent, then add the thing in: ' xml:lang="%s"'.
756 	**
757 	** NOTE: we take advantage of the pointer equality established by
758 	** the parsing for "inheriting" the xml:lang values from parents.
759 	*/
760 	if (elem->lang != NULL &&
761 	    (elem->parent == NULL || elem->lang != elem->parent->lang)) {
762 	    size += 11 + strlen(elem->lang) + 1;
763 	}
764     }
765     else if (style == APR_XML_X2T_LANG_INNER) {
766 	/*
767 	 * This style prepends the xml:lang value plus a null terminator.
768 	 * If a lang value is not present, then we insert a null term.
769 	 */
770 	size = elem->lang ? strlen(elem->lang) + 1 : 1;
771     }
772     else
773 	size = 0;
774 
775     size += text_size(elem->first_cdata.first);
776 
777     for (elem = elem->first_child; elem; elem = elem->next) {
778 	/* the size of the child element plus the CDATA that follows it */
779 	size += (elem_size(elem, style == APR_XML_X2T_PARSED ? APR_XML_X2T_PARSED : APR_XML_X2T_FULL, NULL, ns_map) +
780 		 text_size(elem->following_cdata.first));
781     }
782 
783     return size;
784 }
785 
write_text(char * s,const apr_text * t)786 static char *write_text(char *s, const apr_text *t)
787 {
788     for (; t; t = t->next) {
789 	apr_size_t len = strlen(t->text);
790 	memcpy(s, t->text, len);
791 	s += len;
792     }
793     return s;
794 }
795 
write_elem(char * s,const apr_xml_elem * elem,int style,apr_array_header_t * namespaces,int * ns_map)796 static char *write_elem(char *s, const apr_xml_elem *elem, int style,
797 			apr_array_header_t *namespaces, int *ns_map)
798 {
799     const apr_xml_elem *child;
800     apr_size_t len;
801     int ns;
802 
803     if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG ||
804 	style == APR_XML_X2T_PARSED) {
805 	int empty = APR_XML_ELEM_IS_EMPTY(elem);
806 	const apr_xml_attr *attr;
807 
808 	if (elem->ns == APR_XML_NS_NONE)
809 	    len = sprintf(s, "<%s", elem->name);
810 	else if (style == APR_XML_X2T_PARSED)
811 	    len = sprintf(s, "<%s:%s", find_prefix_name(elem, elem->ns, 1), elem->name);
812 	else {
813 	    ns = ns_map ? ns_map[elem->ns] : elem->ns;
814 	    len = sprintf(s, "<ns%d:%s", ns, elem->name);
815 	}
816 	s += len;
817 
818 	for (attr = elem->attr; attr; attr = attr->next) {
819 	    if (attr->ns == APR_XML_NS_NONE)
820 		len = sprintf(s, " %s=\"%s\"", attr->name, attr->value);
821 	    else if (style == APR_XML_X2T_PARSED)
822 		len = sprintf(s, " %s:%s=\"%s\"",
823 			      find_prefix_name(elem, attr->ns, 1), attr->name, attr->value);
824 	    else {
825 		ns = ns_map ? ns_map[attr->ns] : attr->ns;
826 		len = sprintf(s, " ns%d:%s=\"%s\"", ns, attr->name, attr->value);
827 	    }
828 	    s += len;
829 	}
830 
831 	/* add the xml:lang value if necessary */
832 	if (elem->lang != NULL &&
833 	    (style == APR_XML_X2T_FULL_NS_LANG ||
834 	     elem->parent == NULL ||
835 	     elem->lang != elem->parent->lang)) {
836 	    len = sprintf(s, " xml:lang=\"%s\"", elem->lang);
837 	    s += len;
838 	}
839 
840 	/* add namespace definitions, if required */
841 	if (style == APR_XML_X2T_FULL_NS_LANG) {
842 	    int i;
843 
844 	    for (i = namespaces->nelts; i--;) {
845 		len = sprintf(s, " xmlns:ns%d=\"%s\"", i,
846 			      APR_XML_GET_URI_ITEM(namespaces, i));
847 		s += len;
848 	    }
849 	}
850 	else if (style == APR_XML_X2T_PARSED) {
851 	    apr_xml_ns_scope *ns_scope = elem->ns_scope;
852 
853 	    for (; ns_scope; ns_scope = ns_scope->next) {
854 		const char *prefix = find_prefix_name(elem, ns_scope->ns, 0);
855 
856 		len = sprintf(s, " xmlns%s%s=\"%s\"",
857 			      *prefix ? ":" : "", *prefix ? prefix : "",
858 			      APR_XML_GET_URI_ITEM(namespaces, ns_scope->ns));
859 		s += len;
860 	    }
861 	}
862 
863 	/* no more to do. close it up and go. */
864 	if (empty) {
865 	    *s++ = '/';
866 	    *s++ = '>';
867 	    return s;
868 	}
869 
870 	/* just close it */
871 	*s++ = '>';
872     }
873     else if (style == APR_XML_X2T_LANG_INNER) {
874 	/* prepend the xml:lang value */
875 	if (elem->lang != NULL) {
876 	    len = strlen(elem->lang);
877 	    memcpy(s, elem->lang, len);
878 	    s += len;
879 	}
880 	*s++ = '\0';
881     }
882 
883     s = write_text(s, elem->first_cdata.first);
884 
885     for (child = elem->first_child; child; child = child->next) {
886 	s = write_elem(s, child,
887 		       style == APR_XML_X2T_PARSED ? APR_XML_X2T_PARSED : APR_XML_X2T_FULL,
888 		       NULL, ns_map);
889 	s = write_text(s, child->following_cdata.first);
890     }
891 
892     if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG || style == APR_XML_X2T_PARSED) {
893 	if (elem->ns == APR_XML_NS_NONE)
894 	    len = sprintf(s, "</%s>", elem->name);
895 	else if (style == APR_XML_X2T_PARSED)
896 	    len = sprintf(s, "</%s:%s>", find_prefix_name(elem, elem->ns, 1), elem->name);
897 	else {
898 	    ns = ns_map ? ns_map[elem->ns] : elem->ns;
899 	    len = sprintf(s, "</ns%d:%s>", ns, elem->name);
900 	}
901 	s += len;
902     }
903 
904     return s;
905 }
906 
apr_xml_quote_elem(apr_pool_t * p,apr_xml_elem * elem)907 APU_DECLARE(void) apr_xml_quote_elem(apr_pool_t *p, apr_xml_elem *elem)
908 {
909     apr_text *scan_txt;
910     apr_xml_attr *scan_attr;
911     apr_xml_elem *scan_elem;
912 
913     /* convert the element's text */
914     for (scan_txt = elem->first_cdata.first;
915 	 scan_txt != NULL;
916 	 scan_txt = scan_txt->next) {
917 	scan_txt->text = apr_xml_quote_string(p, scan_txt->text, 0);
918     }
919     for (scan_txt = elem->following_cdata.first;
920 	 scan_txt != NULL;
921 	 scan_txt = scan_txt->next) {
922 	scan_txt->text = apr_xml_quote_string(p, scan_txt->text, 0);
923     }
924 
925     /* convert the attribute values */
926     for (scan_attr = elem->attr;
927 	 scan_attr != NULL;
928 	 scan_attr = scan_attr->next) {
929 	scan_attr->value = apr_xml_quote_string(p, scan_attr->value, 1);
930     }
931 
932     /* convert the child elements */
933     for (scan_elem = elem->first_child;
934 	 scan_elem != NULL;
935 	 scan_elem = scan_elem->next) {
936 	apr_xml_quote_elem(p, scan_elem);
937     }
938 }
939 
940 /* convert an element to a text string */
apr_xml_to_text(apr_pool_t * p,const apr_xml_elem * elem,int style,apr_array_header_t * namespaces,int * ns_map,const char ** pbuf,apr_size_t * psize)941 APU_DECLARE(void) apr_xml_to_text(apr_pool_t * p, const apr_xml_elem *elem,
942                                   int style, apr_array_header_t *namespaces,
943                                   int *ns_map, const char **pbuf,
944                                   apr_size_t *psize)
945 {
946     /* get the exact size, plus a null terminator */
947     apr_size_t size = elem_size(elem, style, namespaces, ns_map) + 1;
948     char *s = apr_palloc(p, size);
949 
950     (void) write_elem(s, elem, style, namespaces, ns_map);
951     s[size - 1] = '\0';
952 
953     *pbuf = s;
954     if (psize)
955 	*psize = size;
956 }
957 
apr_xml_empty_elem(apr_pool_t * p,const apr_xml_elem * elem)958 APU_DECLARE(const char *) apr_xml_empty_elem(apr_pool_t * p,
959                                              const apr_xml_elem *elem)
960 {
961     if (elem->ns == APR_XML_NS_NONE) {
962 	/*
963 	 * The prefix (xml...) is already within the prop name, or
964 	 * the element simply has no prefix.
965 	 */
966 	return apr_psprintf(p, "<%s/>" DEBUG_CR, elem->name);
967     }
968 
969     return apr_psprintf(p, "<ns%d:%s/>" DEBUG_CR, elem->ns, elem->name);
970 }
971 
972 /* return the URI's (existing) index, or insert it and return a new index */
apr_xml_insert_uri(apr_array_header_t * uri_array,const char * uri)973 APU_DECLARE(int) apr_xml_insert_uri(apr_array_header_t *uri_array,
974                                     const char *uri)
975 {
976     int i;
977     const char **pelt;
978 
979     /* never insert an empty URI; this index is always APR_XML_NS_NONE */
980     if (*uri == '\0')
981         return APR_XML_NS_NONE;
982 
983     for (i = uri_array->nelts; i--;) {
984 	if (strcmp(uri, APR_XML_GET_URI_ITEM(uri_array, i)) == 0)
985 	    return i;
986     }
987 
988     pelt = apr_array_push(uri_array);
989     *pelt = uri;		/* assume uri is const or in a pool */
990     return uri_array->nelts - 1;
991 }
992 
993 /* convert the element to EBCDIC */
994 #if APR_CHARSET_EBCDIC
apr_xml_parser_convert_elem(apr_xml_elem * e,apr_xlate_t * convset)995 static apr_status_t apr_xml_parser_convert_elem(apr_xml_elem *e,
996                                                 apr_xlate_t *convset)
997 {
998     apr_xml_attr *a;
999     apr_xml_elem *ec;
1000     apr_text *t;
1001     apr_size_t inbytes_left, outbytes_left;
1002     apr_status_t status;
1003 
1004     inbytes_left = outbytes_left = strlen(e->name);
1005     status = apr_xlate_conv_buffer(convset, e->name,  &inbytes_left, (char *) e->name, &outbytes_left);
1006     if (status) {
1007         return status;
1008     }
1009 
1010     for (t = e->first_cdata.first; t != NULL; t = t->next) {
1011         inbytes_left = outbytes_left = strlen(t->text);
1012         status = apr_xlate_conv_buffer(convset, t->text, &inbytes_left, (char *) t->text, &outbytes_left);
1013         if (status) {
1014             return status;
1015         }
1016     }
1017 
1018     for (t = e->following_cdata.first;  t != NULL; t = t->next) {
1019         inbytes_left = outbytes_left = strlen(t->text);
1020         status = apr_xlate_conv_buffer(convset, t->text, &inbytes_left, (char *) t->text, &outbytes_left);
1021         if (status) {
1022             return status;
1023         }
1024     }
1025 
1026     for (a = e->attr; a != NULL; a = a->next) {
1027         inbytes_left = outbytes_left = strlen(a->name);
1028         status = apr_xlate_conv_buffer(convset, a->name, &inbytes_left, (char *) a->name, &outbytes_left);
1029         if (status) {
1030             return status;
1031         }
1032         inbytes_left = outbytes_left = strlen(a->value);
1033         status = apr_xlate_conv_buffer(convset, a->value, &inbytes_left, (char *) a->value, &outbytes_left);
1034         if (status) {
1035             return status;
1036         }
1037     }
1038 
1039     for (ec = e->first_child; ec != NULL; ec = ec->next) {
1040         status = apr_xml_parser_convert_elem(ec, convset);
1041         if (status) {
1042             return status;
1043         }
1044     }
1045     return APR_SUCCESS;
1046 }
1047 
1048 /* convert the whole document to EBCDIC */
apr_xml_parser_convert_doc(apr_pool_t * pool,apr_xml_doc * pdoc,apr_xlate_t * convset)1049 APU_DECLARE(apr_status_t) apr_xml_parser_convert_doc(apr_pool_t *pool,
1050                                                      apr_xml_doc *pdoc,
1051                                                      apr_xlate_t *convset)
1052 {
1053     apr_status_t status;
1054     /* Don't convert the namespaces: they are constant! */
1055     if (pdoc->namespaces != NULL) {
1056         int i;
1057         apr_array_header_t *namespaces;
1058         namespaces = apr_array_make(pool, pdoc->namespaces->nelts, sizeof(const char *));
1059         if (namespaces == NULL)
1060             return APR_ENOMEM;
1061         for (i = 0; i < pdoc->namespaces->nelts; i++) {
1062             apr_size_t inbytes_left, outbytes_left;
1063             char *ptr = (char *) APR_XML_GET_URI_ITEM(pdoc->namespaces, i);
1064             ptr = apr_pstrdup(pool, ptr);
1065             if ( ptr == NULL)
1066                 return APR_ENOMEM;
1067             inbytes_left = outbytes_left = strlen(ptr);
1068             status = apr_xlate_conv_buffer(convset, ptr, &inbytes_left, ptr, &outbytes_left);
1069             if (status) {
1070                 return status;
1071             }
1072             apr_xml_insert_uri(namespaces, ptr);
1073         }
1074         pdoc->namespaces = namespaces;
1075     }
1076     return apr_xml_parser_convert_elem(pdoc->root, convset);
1077 }
1078 #endif
1079