xref: /NextBSD/contrib/apr-util/xml/apr_xml.c (revision eb1a5f8de9f7ea602c373a710f531abbf81141c4)
1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2  * contributor license agreements.  See the NOTICE file distributed with
3  * this work for additional information regarding copyright ownership.
4  * The ASF licenses this file to You under the Apache License, Version 2.0
5  * (the "License"); you may not use this file except in compliance with
6  * the License.  You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "apr.h"
18 #include "apr_strings.h"
19 
20 #define APR_WANT_STDIO          /* for sprintf() */
21 #define APR_WANT_STRFUNC
22 #include "apr_want.h"
23 
24 #include "apr_xml.h"
25 
26 #include "apu_config.h"
27 
28 #if defined(HAVE_XMLPARSE_XMLPARSE_H)
29 #include <xmlparse/xmlparse.h>
30 #elif defined(HAVE_XMLTOK_XMLPARSE_H)
31 #include <xmltok/xmlparse.h>
32 #elif defined(HAVE_XML_XMLPARSE_H)
33 #include <xml/xmlparse.h>
34 #else
35 #include <expat.h>
36 #endif
37 
38 #define DEBUG_CR "\r\n"
39 
40 static const char APR_KW_xmlns[] = { 0x78, 0x6D, 0x6C, 0x6E, 0x73, '\0' };
41 static const char APR_KW_xmlns_lang[] = { 0x78, 0x6D, 0x6C, 0x3A, 0x6C, 0x61, 0x6E, 0x67, '\0' };
42 static const char APR_KW_DAV[] = { 0x44, 0x41, 0x56, 0x3A, '\0' };
43 
44 /* errors related to namespace processing */
45 #define APR_XML_NS_ERROR_UNKNOWN_PREFIX (-1000)
46 #define APR_XML_NS_ERROR_INVALID_DECL (-1001)
47 
48 /* test for a namespace prefix that begins with [Xx][Mm][Ll] */
49 #define APR_XML_NS_IS_RESERVED(name) \
50 	( (name[0] == 0x58 || name[0] == 0x78) && \
51 	  (name[1] == 0x4D || name[1] == 0x6D) && \
52 	  (name[2] == 0x4C || name[2] == 0x6C) )
53 
54 
55 /* the real (internal) definition of the parser context */
56 struct apr_xml_parser {
57     apr_xml_doc *doc;		/* the doc we're parsing */
58     apr_pool_t *p;		/* the pool we allocate from */
59     apr_xml_elem *cur_elem;	/* current element */
60 
61     int error;			/* an error has occurred */
62 #define APR_XML_ERROR_EXPAT             1
63 #define APR_XML_ERROR_PARSE_DONE        2
64 /* also: public APR_XML_NS_ERROR_* values (if any) */
65 
66     XML_Parser xp;              /* the actual (Expat) XML parser */
67     enum XML_Error xp_err;      /* stored Expat error code */
68 };
69 
70 /* struct for scoping namespace declarations */
71 typedef struct apr_xml_ns_scope {
72     const char *prefix;		/* prefix used for this ns */
73     int ns;			/* index into namespace table */
74     int emptyURI;		/* the namespace URI is the empty string */
75     struct apr_xml_ns_scope *next;	/* next scoped namespace */
76 } apr_xml_ns_scope;
77 
78 
79 /* return namespace table index for a given prefix */
find_prefix(apr_xml_parser * parser,const char * prefix)80 static int find_prefix(apr_xml_parser *parser, const char *prefix)
81 {
82     apr_xml_elem *elem = parser->cur_elem;
83 
84     /*
85     ** Walk up the tree, looking for a namespace scope that defines this
86     ** prefix.
87     */
88     for (; elem; elem = elem->parent) {
89 	apr_xml_ns_scope *ns_scope = elem->ns_scope;
90 
91 	for (ns_scope = elem->ns_scope; ns_scope; ns_scope = ns_scope->next) {
92 	    if (strcmp(prefix, ns_scope->prefix) == 0) {
93 		if (ns_scope->emptyURI) {
94 		    /*
95 		    ** It is possible to set the default namespace to an
96 		    ** empty URI string; this resets the default namespace
97 		    ** to mean "no namespace." We just found the prefix
98 		    ** refers to an empty URI, so return "no namespace."
99 		    */
100 		    return APR_XML_NS_NONE;
101 		}
102 
103 		return ns_scope->ns;
104 	    }
105 	}
106     }
107 
108     /*
109      * If the prefix is empty (""), this means that a prefix was not
110      * specified in the element/attribute. The search that was performed
111      * just above did not locate a default namespace URI (which is stored
112      * into ns_scope with an empty prefix). This means the element/attribute
113      * has "no namespace". We have a reserved value for this.
114      */
115     if (*prefix == '\0') {
116 	return APR_XML_NS_NONE;
117     }
118 
119     /* not found */
120     return APR_XML_NS_ERROR_UNKNOWN_PREFIX;
121 }
122 
start_handler(void * userdata,const char * name,const char ** attrs)123 static void start_handler(void *userdata, const char *name, const char **attrs)
124 {
125     apr_xml_parser *parser = userdata;
126     apr_xml_elem *elem;
127     apr_xml_attr *attr;
128     apr_xml_attr *prev;
129     char *colon;
130     const char *quoted;
131     char *elem_name;
132 
133     /* punt once we find an error */
134     if (parser->error)
135 	return;
136 
137     elem = apr_pcalloc(parser->p, sizeof(*elem));
138 
139     /* prep the element */
140     elem->name = elem_name = apr_pstrdup(parser->p, name);
141 
142     /* fill in the attributes (note: ends up in reverse order) */
143     while (*attrs) {
144 	attr = apr_palloc(parser->p, sizeof(*attr));
145 	attr->name = apr_pstrdup(parser->p, *attrs++);
146 	attr->value = apr_pstrdup(parser->p, *attrs++);
147 	attr->next = elem->attr;
148 	elem->attr = attr;
149     }
150 
151     /* hook the element into the tree */
152     if (parser->cur_elem == NULL) {
153 	/* no current element; this also becomes the root */
154 	parser->cur_elem = parser->doc->root = elem;
155     }
156     else {
157 	/* this element appeared within the current elem */
158 	elem->parent = parser->cur_elem;
159 
160 	/* set up the child/sibling links */
161 	if (elem->parent->last_child == NULL) {
162 	    /* no first child either */
163 	    elem->parent->first_child = elem->parent->last_child = elem;
164 	}
165 	else {
166 	    /* hook onto the end of the parent's children */
167 	    elem->parent->last_child->next = elem;
168 	    elem->parent->last_child = elem;
169 	}
170 
171 	/* this element is now the current element */
172 	parser->cur_elem = elem;
173     }
174 
175     /* scan the attributes for namespace declarations */
176     for (prev = NULL, attr = elem->attr;
177 	 attr;
178 	 attr = attr->next) {
179 	if (strncmp(attr->name, APR_KW_xmlns, 5) == 0) {
180 	    const char *prefix = &attr->name[5];
181 	    apr_xml_ns_scope *ns_scope;
182 
183 	    /* test for xmlns:foo= form and xmlns= form */
184 	    if (*prefix == 0x3A) {
185                 /* a namespace prefix declaration must have a
186                    non-empty value. */
187                 if (attr->value[0] == '\0') {
188                     parser->error = APR_XML_NS_ERROR_INVALID_DECL;
189                     return;
190                 }
191 		++prefix;
192             }
193 	    else if (*prefix != '\0') {
194 		/* advance "prev" since "attr" is still present */
195 		prev = attr;
196 		continue;
197 	    }
198 
199 	    /* quote the URI before we ever start working with it */
200 	    quoted = apr_xml_quote_string(parser->p, attr->value, 1);
201 
202 	    /* build and insert the new scope */
203 	    ns_scope = apr_pcalloc(parser->p, sizeof(*ns_scope));
204 	    ns_scope->prefix = prefix;
205 	    ns_scope->ns = apr_xml_insert_uri(parser->doc->namespaces, quoted);
206 	    ns_scope->emptyURI = *quoted == '\0';
207 	    ns_scope->next = elem->ns_scope;
208 	    elem->ns_scope = ns_scope;
209 
210 	    /* remove this attribute from the element */
211 	    if (prev == NULL)
212 		elem->attr = attr->next;
213 	    else
214 		prev->next = attr->next;
215 
216 	    /* Note: prev will not be advanced since we just removed "attr" */
217 	}
218 	else if (strcmp(attr->name, APR_KW_xmlns_lang) == 0) {
219 	    /* save away the language (in quoted form) */
220 	    elem->lang = apr_xml_quote_string(parser->p, attr->value, 1);
221 
222 	    /* remove this attribute from the element */
223 	    if (prev == NULL)
224 		elem->attr = attr->next;
225 	    else
226 		prev->next = attr->next;
227 
228 	    /* Note: prev will not be advanced since we just removed "attr" */
229 	}
230 	else {
231 	    /* advance "prev" since "attr" is still present */
232 	    prev = attr;
233 	}
234     }
235 
236     /*
237     ** If an xml:lang attribute didn't exist (lang==NULL), then copy the
238     ** language from the parent element (if present).
239     **
240     ** NOTE: elem_size() *depends* upon this pointer equality.
241     */
242     if (elem->lang == NULL && elem->parent != NULL)
243 	elem->lang = elem->parent->lang;
244 
245     /* adjust the element's namespace */
246     colon = strchr(elem_name, 0x3A);
247     if (colon == NULL) {
248 	/*
249 	 * The element is using the default namespace, which will always
250 	 * be found. Either it will be "no namespace", or a default
251 	 * namespace URI has been specified at some point.
252 	 */
253 	elem->ns = find_prefix(parser, "");
254     }
255     else if (APR_XML_NS_IS_RESERVED(elem->name)) {
256 	elem->ns = APR_XML_NS_NONE;
257     }
258     else {
259 	*colon = '\0';
260 	elem->ns = find_prefix(parser, elem->name);
261 	elem->name = colon + 1;
262 
263 	if (APR_XML_NS_IS_ERROR(elem->ns)) {
264 	    parser->error = elem->ns;
265 	    return;
266 	}
267     }
268 
269     /* adjust all remaining attributes' namespaces */
270     for (attr = elem->attr; attr; attr = attr->next) {
271         /*
272          * apr_xml_attr defines this as "const" but we dup'd it, so we
273          * know that we can change it. a bit hacky, but the existing
274          * structure def is best.
275          */
276         char *attr_name = (char *)attr->name;
277 
278 	colon = strchr(attr_name, 0x3A);
279 	if (colon == NULL) {
280 	    /*
281 	     * Attributes do NOT use the default namespace. Therefore,
282 	     * we place them into the "no namespace" category.
283 	     */
284 	    attr->ns = APR_XML_NS_NONE;
285 	}
286 	else if (APR_XML_NS_IS_RESERVED(attr->name)) {
287 	    attr->ns = APR_XML_NS_NONE;
288 	}
289 	else {
290 	    *colon = '\0';
291 	    attr->ns = find_prefix(parser, attr->name);
292 	    attr->name = colon + 1;
293 
294 	    if (APR_XML_NS_IS_ERROR(attr->ns)) {
295 		parser->error = attr->ns;
296 		return;
297 	    }
298 	}
299     }
300 }
301 
end_handler(void * userdata,const char * name)302 static void end_handler(void *userdata, const char *name)
303 {
304     apr_xml_parser *parser = userdata;
305 
306     /* punt once we find an error */
307     if (parser->error)
308 	return;
309 
310     /* pop up one level */
311     parser->cur_elem = parser->cur_elem->parent;
312 }
313 
cdata_handler(void * userdata,const char * data,int len)314 static void cdata_handler(void *userdata, const char *data, int len)
315 {
316     apr_xml_parser *parser = userdata;
317     apr_xml_elem *elem;
318     apr_text_header *hdr;
319     const char *s;
320 
321     /* punt once we find an error */
322     if (parser->error)
323 	return;
324 
325     elem = parser->cur_elem;
326     s = apr_pstrndup(parser->p, data, len);
327 
328     if (elem->last_child == NULL) {
329 	/* no children yet. this cdata follows the start tag */
330 	hdr = &elem->first_cdata;
331     }
332     else {
333 	/* child elements exist. this cdata follows the last child. */
334 	hdr = &elem->last_child->following_cdata;
335     }
336 
337     apr_text_append(parser->p, hdr, s);
338 }
339 
cleanup_parser(void * ctx)340 static apr_status_t cleanup_parser(void *ctx)
341 {
342     apr_xml_parser *parser = ctx;
343 
344     XML_ParserFree(parser->xp);
345     parser->xp = NULL;
346 
347     return APR_SUCCESS;
348 }
349 
350 #if XML_MAJOR_VERSION > 1
351 /* Stop the parser if an entity declaration is hit. */
entity_declaration(void * userData,const XML_Char * entityName,int is_parameter_entity,const XML_Char * value,int value_length,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId,const XML_Char * notationName)352 static void entity_declaration(void *userData, const XML_Char *entityName,
353                                int is_parameter_entity, const XML_Char *value,
354                                int value_length, const XML_Char *base,
355                                const XML_Char *systemId, const XML_Char *publicId,
356                                const XML_Char *notationName)
357 {
358     apr_xml_parser *parser = userData;
359 
360     XML_StopParser(parser->xp, XML_FALSE);
361 }
362 #else
363 /* A noop default_handler. */
default_handler(void * userData,const XML_Char * s,int len)364 static void default_handler(void *userData, const XML_Char *s, int len)
365 {
366 }
367 #endif
368 
apr_xml_parser_create(apr_pool_t * pool)369 APU_DECLARE(apr_xml_parser *) apr_xml_parser_create(apr_pool_t *pool)
370 {
371     apr_xml_parser *parser = apr_pcalloc(pool, sizeof(*parser));
372 
373     parser->p = pool;
374     parser->doc = apr_pcalloc(pool, sizeof(*parser->doc));
375 
376     parser->doc->namespaces = apr_array_make(pool, 5, sizeof(const char *));
377 
378     /* ### is there a way to avoid hard-coding this? */
379     apr_xml_insert_uri(parser->doc->namespaces, APR_KW_DAV);
380 
381     parser->xp = XML_ParserCreate(NULL);
382     if (parser->xp == NULL) {
383         (*apr_pool_abort_get(pool))(APR_ENOMEM);
384         return NULL;
385     }
386 
387     apr_pool_cleanup_register(pool, parser, cleanup_parser,
388                               apr_pool_cleanup_null);
389 
390     XML_SetUserData(parser->xp, parser);
391     XML_SetElementHandler(parser->xp, start_handler, end_handler);
392     XML_SetCharacterDataHandler(parser->xp, cdata_handler);
393 
394     /* Prevent the "billion laughs" attack against expat by disabling
395      * internal entity expansion.  With 2.x, forcibly stop the parser
396      * if an entity is declared - this is safer and a more obvious
397      * failure mode.  With older versions, installing a noop
398      * DefaultHandler means that internal entities will be expanded as
399      * the empty string, which is also sufficient to prevent the
400      * attack. */
401 #if XML_MAJOR_VERSION > 1
402     XML_SetEntityDeclHandler(parser->xp, entity_declaration);
403 #else
404     XML_SetDefaultHandler(parser->xp, default_handler);
405 #endif
406 
407     return parser;
408 }
409 
do_parse(apr_xml_parser * parser,const char * data,apr_size_t len,int is_final)410 static apr_status_t do_parse(apr_xml_parser *parser,
411                              const char *data, apr_size_t len,
412                              int is_final)
413 {
414     if (parser->xp == NULL) {
415         parser->error = APR_XML_ERROR_PARSE_DONE;
416     }
417     else {
418         int rv = XML_Parse(parser->xp, data, (int)len, is_final);
419 
420         if (rv == 0) {
421             parser->error = APR_XML_ERROR_EXPAT;
422             parser->xp_err = XML_GetErrorCode(parser->xp);
423         }
424     }
425 
426     /* ### better error code? */
427     return parser->error ? APR_EGENERAL : APR_SUCCESS;
428 }
429 
apr_xml_parser_feed(apr_xml_parser * parser,const char * data,apr_size_t len)430 APU_DECLARE(apr_status_t) apr_xml_parser_feed(apr_xml_parser *parser,
431                                               const char *data,
432                                               apr_size_t len)
433 {
434     return do_parse(parser, data, len, 0 /* is_final */);
435 }
436 
apr_xml_parser_done(apr_xml_parser * parser,apr_xml_doc ** pdoc)437 APU_DECLARE(apr_status_t) apr_xml_parser_done(apr_xml_parser *parser,
438                                               apr_xml_doc **pdoc)
439 {
440     char end;
441     apr_status_t status = do_parse(parser, &end, 0, 1 /* is_final */);
442 
443     /* get rid of the parser */
444     (void) apr_pool_cleanup_run(parser->p, parser, cleanup_parser);
445 
446     if (status)
447         return status;
448 
449     if (pdoc != NULL)
450         *pdoc = parser->doc;
451     return APR_SUCCESS;
452 }
453 
apr_xml_parser_geterror(apr_xml_parser * parser,char * errbuf,apr_size_t errbufsize)454 APU_DECLARE(char *) apr_xml_parser_geterror(apr_xml_parser *parser,
455                                             char *errbuf,
456                                             apr_size_t errbufsize)
457 {
458     int error = parser->error;
459     const char *msg;
460 
461     /* clear our record of an error */
462     parser->error = 0;
463 
464     switch (error) {
465     case 0:
466         msg = "No error.";
467         break;
468 
469     case APR_XML_NS_ERROR_UNKNOWN_PREFIX:
470         msg = "An undefined namespace prefix was used.";
471         break;
472 
473     case APR_XML_NS_ERROR_INVALID_DECL:
474         msg = "A namespace prefix was defined with an empty URI.";
475         break;
476 
477     case APR_XML_ERROR_EXPAT:
478         (void) apr_snprintf(errbuf, errbufsize,
479                             "XML parser error code: %s (%d)",
480                             XML_ErrorString(parser->xp_err), parser->xp_err);
481         return errbuf;
482 
483     case APR_XML_ERROR_PARSE_DONE:
484         msg = "The parser is not active.";
485         break;
486 
487     default:
488         msg = "There was an unknown error within the XML body.";
489         break;
490     }
491 
492     (void) apr_cpystrn(errbuf, msg, errbufsize);
493     return errbuf;
494 }
495 
apr_xml_parse_file(apr_pool_t * p,apr_xml_parser ** parser,apr_xml_doc ** ppdoc,apr_file_t * xmlfd,apr_size_t buffer_length)496 APU_DECLARE(apr_status_t) apr_xml_parse_file(apr_pool_t *p,
497                                              apr_xml_parser **parser,
498                                              apr_xml_doc **ppdoc,
499                                              apr_file_t *xmlfd,
500                                              apr_size_t buffer_length)
501 {
502     apr_status_t rv;
503     char *buffer;
504     apr_size_t length;
505 
506     *parser = apr_xml_parser_create(p);
507     if (*parser == NULL) {
508         /* FIXME: returning an error code would be nice,
509          * but we dont get one ;( */
510         return APR_EGENERAL;
511     }
512     buffer = apr_palloc(p, buffer_length);
513     length = buffer_length;
514 
515     rv = apr_file_read(xmlfd, buffer, &length);
516 
517     while (rv == APR_SUCCESS) {
518         rv = apr_xml_parser_feed(*parser, buffer, length);
519         if (rv != APR_SUCCESS) {
520             return rv;
521         }
522 
523         length = buffer_length;
524         rv = apr_file_read(xmlfd, buffer, &length);
525     }
526     if (rv != APR_EOF) {
527         return rv;
528     }
529     rv = apr_xml_parser_done(*parser, ppdoc);
530     *parser = NULL;
531     return rv;
532 }
533 
apr_text_append(apr_pool_t * p,apr_text_header * hdr,const char * text)534 APU_DECLARE(void) apr_text_append(apr_pool_t * p, apr_text_header *hdr,
535                                   const char *text)
536 {
537     apr_text *t = apr_palloc(p, sizeof(*t));
538 
539     t->text = text;
540     t->next = NULL;
541 
542     if (hdr->first == NULL) {
543 	/* no text elements yet */
544 	hdr->first = hdr->last = t;
545     }
546     else {
547 	/* append to the last text element */
548 	hdr->last->next = t;
549 	hdr->last = t;
550     }
551 }
552 
553 
554 /* ---------------------------------------------------------------
555 **
556 ** XML UTILITY FUNCTIONS
557 */
558 
559 /*
560 ** apr_xml_quote_string: quote an XML string
561 **
562 ** Replace '<', '>', and '&' with '&lt;', '&gt;', and '&amp;'.
563 ** If quotes is true, then replace '"' with '&quot;'.
564 **
565 ** quotes is typically set to true for XML strings that will occur within
566 ** double quotes -- attribute values.
567 */
apr_xml_quote_string(apr_pool_t * p,const char * s,int quotes)568 APU_DECLARE(const char *) apr_xml_quote_string(apr_pool_t *p, const char *s,
569                                                int quotes)
570 {
571     const char *scan;
572     apr_size_t len = 0;
573     apr_size_t extra = 0;
574     char *qstr;
575     char *qscan;
576     char c;
577 
578     for (scan = s; (c = *scan) != '\0'; ++scan, ++len) {
579 	if (c == '<' || c == '>')
580 	    extra += 3;		/* &lt; or &gt; */
581 	else if (c == '&')
582 	    extra += 4;		/* &amp; */
583 	else if (quotes && c == '"')
584 	    extra += 5;		/* &quot; */
585     }
586 
587     /* nothing to do? */
588     if (extra == 0)
589 	return s;
590 
591     qstr = apr_palloc(p, len + extra + 1);
592     for (scan = s, qscan = qstr; (c = *scan) != '\0'; ++scan) {
593 	if (c == '<') {
594 	    *qscan++ = '&';
595 	    *qscan++ = 'l';
596 	    *qscan++ = 't';
597 	    *qscan++ = ';';
598 	}
599 	else if (c == '>') {
600 	    *qscan++ = '&';
601 	    *qscan++ = 'g';
602 	    *qscan++ = 't';
603 	    *qscan++ = ';';
604 	}
605 	else if (c == '&') {
606 	    *qscan++ = '&';
607 	    *qscan++ = 'a';
608 	    *qscan++ = 'm';
609 	    *qscan++ = 'p';
610 	    *qscan++ = ';';
611 	}
612 	else if (quotes && c == '"') {
613 	    *qscan++ = '&';
614 	    *qscan++ = 'q';
615 	    *qscan++ = 'u';
616 	    *qscan++ = 'o';
617 	    *qscan++ = 't';
618 	    *qscan++ = ';';
619 	}
620 	else {
621 	    *qscan++ = c;
622 	}
623     }
624 
625     *qscan = '\0';
626     return qstr;
627 }
628 
629 /* how many characters for the given integer? */
630 #define APR_XML_NS_LEN(ns) ((ns) < 10 ? 1 : (ns) < 100 ? 2 : (ns) < 1000 ? 3 : \
631                             (ns) < 10000 ? 4 : (ns) < 100000 ? 5 : \
632                             (ns) < 1000000 ? 6 : (ns) < 10000000 ? 7 : \
633                             (ns) < 100000000 ? 8 : (ns) < 1000000000 ? 9 : 10)
634 
text_size(const apr_text * t)635 static apr_size_t text_size(const apr_text *t)
636 {
637     apr_size_t size = 0;
638 
639     for (; t; t = t->next)
640 	size += strlen(t->text);
641     return size;
642 }
643 
elem_size(const apr_xml_elem * elem,int style,apr_array_header_t * namespaces,int * ns_map)644 static apr_size_t elem_size(const apr_xml_elem *elem, int style,
645                             apr_array_header_t *namespaces, int *ns_map)
646 {
647     apr_size_t size;
648 
649     if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) {
650 	const apr_xml_attr *attr;
651 
652 	size = 0;
653 
654 	if (style == APR_XML_X2T_FULL_NS_LANG) {
655 	    int i;
656 
657 	    /*
658 	    ** The outer element will contain xmlns:ns%d="%s" attributes
659 	    ** and an xml:lang attribute, if applicable.
660 	    */
661 
662 	    for (i = namespaces->nelts; i--;) {
663 		/* compute size of: ' xmlns:ns%d="%s"' */
664 		size += (9 + APR_XML_NS_LEN(i) + 2 +
665 			 strlen(APR_XML_GET_URI_ITEM(namespaces, i)) + 1);
666 	    }
667 
668 	    if (elem->lang != NULL) {
669 		/* compute size of: ' xml:lang="%s"' */
670 		size += 11 + strlen(elem->lang) + 1;
671 	    }
672 	}
673 
674 	if (elem->ns == APR_XML_NS_NONE) {
675 	    /* compute size of: <%s> */
676 	    size += 1 + strlen(elem->name) + 1;
677 	}
678 	else {
679 	    int ns = ns_map ? ns_map[elem->ns] : elem->ns;
680 
681 	    /* compute size of: <ns%d:%s> */
682 	    size += 3 + APR_XML_NS_LEN(ns) + 1 + strlen(elem->name) + 1;
683 	}
684 
685 	if (APR_XML_ELEM_IS_EMPTY(elem)) {
686 	    /* insert a closing "/" */
687 	    size += 1;
688 	}
689 	else {
690 	    /*
691 	     * two of above plus "/":
692 	     *     <ns%d:%s> ... </ns%d:%s>
693 	     * OR  <%s> ... </%s>
694 	     */
695 	    size = 2 * size + 1;
696 	}
697 
698 	for (attr = elem->attr; attr; attr = attr->next) {
699 	    if (attr->ns == APR_XML_NS_NONE) {
700 		/* compute size of: ' %s="%s"' */
701 		size += 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1;
702 	    }
703 	    else {
704 		/* compute size of: ' ns%d:%s="%s"' */
705                 int ns = ns_map ? ns_map[attr->ns] : attr->ns;
706                 size += 3 + APR_XML_NS_LEN(ns) + 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1;
707 	    }
708 	}
709 
710 	/*
711 	** If the element has an xml:lang value that is *different* from
712 	** its parent, then add the thing in: ' xml:lang="%s"'.
713 	**
714 	** NOTE: we take advantage of the pointer equality established by
715 	** the parsing for "inheriting" the xml:lang values from parents.
716 	*/
717 	if (elem->lang != NULL &&
718 	    (elem->parent == NULL || elem->lang != elem->parent->lang)) {
719 	    size += 11 + strlen(elem->lang) + 1;
720 	}
721     }
722     else if (style == APR_XML_X2T_LANG_INNER) {
723 	/*
724 	 * This style prepends the xml:lang value plus a null terminator.
725 	 * If a lang value is not present, then we insert a null term.
726 	 */
727 	size = elem->lang ? strlen(elem->lang) + 1 : 1;
728     }
729     else
730 	size = 0;
731 
732     size += text_size(elem->first_cdata.first);
733 
734     for (elem = elem->first_child; elem; elem = elem->next) {
735 	/* the size of the child element plus the CDATA that follows it */
736 	size += (elem_size(elem, APR_XML_X2T_FULL, NULL, ns_map) +
737 		 text_size(elem->following_cdata.first));
738     }
739 
740     return size;
741 }
742 
write_text(char * s,const apr_text * t)743 static char *write_text(char *s, const apr_text *t)
744 {
745     for (; t; t = t->next) {
746 	apr_size_t len = strlen(t->text);
747 	memcpy(s, t->text, len);
748 	s += len;
749     }
750     return s;
751 }
752 
write_elem(char * s,const apr_xml_elem * elem,int style,apr_array_header_t * namespaces,int * ns_map)753 static char *write_elem(char *s, const apr_xml_elem *elem, int style,
754 			apr_array_header_t *namespaces, int *ns_map)
755 {
756     const apr_xml_elem *child;
757     apr_size_t len;
758     int ns;
759 
760     if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) {
761 	int empty = APR_XML_ELEM_IS_EMPTY(elem);
762 	const apr_xml_attr *attr;
763 
764 	if (elem->ns == APR_XML_NS_NONE) {
765 	    len = sprintf(s, "<%s", elem->name);
766 	}
767 	else {
768 	    ns = ns_map ? ns_map[elem->ns] : elem->ns;
769 	    len = sprintf(s, "<ns%d:%s", ns, elem->name);
770 	}
771 	s += len;
772 
773 	for (attr = elem->attr; attr; attr = attr->next) {
774 	    if (attr->ns == APR_XML_NS_NONE)
775 		len = sprintf(s, " %s=\"%s\"", attr->name, attr->value);
776             else {
777                 ns = ns_map ? ns_map[attr->ns] : attr->ns;
778                 len = sprintf(s, " ns%d:%s=\"%s\"", ns, attr->name, attr->value);
779             }
780 	    s += len;
781 	}
782 
783 	/* add the xml:lang value if necessary */
784 	if (elem->lang != NULL &&
785 	    (style == APR_XML_X2T_FULL_NS_LANG ||
786 	     elem->parent == NULL ||
787 	     elem->lang != elem->parent->lang)) {
788 	    len = sprintf(s, " xml:lang=\"%s\"", elem->lang);
789 	    s += len;
790 	}
791 
792 	/* add namespace definitions, if required */
793 	if (style == APR_XML_X2T_FULL_NS_LANG) {
794 	    int i;
795 
796 	    for (i = namespaces->nelts; i--;) {
797 		len = sprintf(s, " xmlns:ns%d=\"%s\"", i,
798 			      APR_XML_GET_URI_ITEM(namespaces, i));
799 		s += len;
800 	    }
801 	}
802 
803 	/* no more to do. close it up and go. */
804 	if (empty) {
805 	    *s++ = '/';
806 	    *s++ = '>';
807 	    return s;
808 	}
809 
810 	/* just close it */
811 	*s++ = '>';
812     }
813     else if (style == APR_XML_X2T_LANG_INNER) {
814 	/* prepend the xml:lang value */
815 	if (elem->lang != NULL) {
816 	    len = strlen(elem->lang);
817 	    memcpy(s, elem->lang, len);
818 	    s += len;
819 	}
820 	*s++ = '\0';
821     }
822 
823     s = write_text(s, elem->first_cdata.first);
824 
825     for (child = elem->first_child; child; child = child->next) {
826 	s = write_elem(s, child, APR_XML_X2T_FULL, NULL, ns_map);
827 	s = write_text(s, child->following_cdata.first);
828     }
829 
830     if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) {
831 	if (elem->ns == APR_XML_NS_NONE) {
832 	    len = sprintf(s, "</%s>", elem->name);
833 	}
834 	else {
835 	    ns = ns_map ? ns_map[elem->ns] : elem->ns;
836 	    len = sprintf(s, "</ns%d:%s>", ns, elem->name);
837 	}
838 	s += len;
839     }
840 
841     return s;
842 }
843 
apr_xml_quote_elem(apr_pool_t * p,apr_xml_elem * elem)844 APU_DECLARE(void) apr_xml_quote_elem(apr_pool_t *p, apr_xml_elem *elem)
845 {
846     apr_text *scan_txt;
847     apr_xml_attr *scan_attr;
848     apr_xml_elem *scan_elem;
849 
850     /* convert the element's text */
851     for (scan_txt = elem->first_cdata.first;
852 	 scan_txt != NULL;
853 	 scan_txt = scan_txt->next) {
854 	scan_txt->text = apr_xml_quote_string(p, scan_txt->text, 0);
855     }
856     for (scan_txt = elem->following_cdata.first;
857 	 scan_txt != NULL;
858 	 scan_txt = scan_txt->next) {
859 	scan_txt->text = apr_xml_quote_string(p, scan_txt->text, 0);
860     }
861 
862     /* convert the attribute values */
863     for (scan_attr = elem->attr;
864 	 scan_attr != NULL;
865 	 scan_attr = scan_attr->next) {
866 	scan_attr->value = apr_xml_quote_string(p, scan_attr->value, 1);
867     }
868 
869     /* convert the child elements */
870     for (scan_elem = elem->first_child;
871 	 scan_elem != NULL;
872 	 scan_elem = scan_elem->next) {
873 	apr_xml_quote_elem(p, scan_elem);
874     }
875 }
876 
877 /* convert an element to a text string */
apr_xml_to_text(apr_pool_t * p,const apr_xml_elem * elem,int style,apr_array_header_t * namespaces,int * ns_map,const char ** pbuf,apr_size_t * psize)878 APU_DECLARE(void) apr_xml_to_text(apr_pool_t * p, const apr_xml_elem *elem,
879                                   int style, apr_array_header_t *namespaces,
880                                   int *ns_map, const char **pbuf,
881                                   apr_size_t *psize)
882 {
883     /* get the exact size, plus a null terminator */
884     apr_size_t size = elem_size(elem, style, namespaces, ns_map) + 1;
885     char *s = apr_palloc(p, size);
886 
887     (void) write_elem(s, elem, style, namespaces, ns_map);
888     s[size - 1] = '\0';
889 
890     *pbuf = s;
891     if (psize)
892 	*psize = size;
893 }
894 
apr_xml_empty_elem(apr_pool_t * p,const apr_xml_elem * elem)895 APU_DECLARE(const char *) apr_xml_empty_elem(apr_pool_t * p,
896                                              const apr_xml_elem *elem)
897 {
898     if (elem->ns == APR_XML_NS_NONE) {
899 	/*
900 	 * The prefix (xml...) is already within the prop name, or
901 	 * the element simply has no prefix.
902 	 */
903 	return apr_psprintf(p, "<%s/>" DEBUG_CR, elem->name);
904     }
905 
906     return apr_psprintf(p, "<ns%d:%s/>" DEBUG_CR, elem->ns, elem->name);
907 }
908 
909 /* return the URI's (existing) index, or insert it and return a new index */
apr_xml_insert_uri(apr_array_header_t * uri_array,const char * uri)910 APU_DECLARE(int) apr_xml_insert_uri(apr_array_header_t *uri_array,
911                                     const char *uri)
912 {
913     int i;
914     const char **pelt;
915 
916     /* never insert an empty URI; this index is always APR_XML_NS_NONE */
917     if (*uri == '\0')
918         return APR_XML_NS_NONE;
919 
920     for (i = uri_array->nelts; i--;) {
921 	if (strcmp(uri, APR_XML_GET_URI_ITEM(uri_array, i)) == 0)
922 	    return i;
923     }
924 
925     pelt = apr_array_push(uri_array);
926     *pelt = uri;		/* assume uri is const or in a pool */
927     return uri_array->nelts - 1;
928 }
929 
930 /* convert the element to EBCDIC */
931 #if APR_CHARSET_EBCDIC
apr_xml_parser_convert_elem(apr_xml_elem * e,apr_xlate_t * convset)932 static apr_status_t apr_xml_parser_convert_elem(apr_xml_elem *e,
933                                                 apr_xlate_t *convset)
934 {
935     apr_xml_attr *a;
936     apr_xml_elem *ec;
937     apr_text *t;
938     apr_size_t inbytes_left, outbytes_left;
939     apr_status_t status;
940 
941     inbytes_left = outbytes_left = strlen(e->name);
942     status = apr_xlate_conv_buffer(convset, e->name,  &inbytes_left, (char *) e->name, &outbytes_left);
943     if (status) {
944         return status;
945     }
946 
947     for (t = e->first_cdata.first; t != NULL; t = t->next) {
948         inbytes_left = outbytes_left = strlen(t->text);
949         status = apr_xlate_conv_buffer(convset, t->text, &inbytes_left, (char *) t->text, &outbytes_left);
950         if (status) {
951             return status;
952         }
953     }
954 
955     for (t = e->following_cdata.first;  t != NULL; t = t->next) {
956         inbytes_left = outbytes_left = strlen(t->text);
957         status = apr_xlate_conv_buffer(convset, t->text, &inbytes_left, (char *) t->text, &outbytes_left);
958         if (status) {
959             return status;
960         }
961     }
962 
963     for (a = e->attr; a != NULL; a = a->next) {
964         inbytes_left = outbytes_left = strlen(a->name);
965         status = apr_xlate_conv_buffer(convset, a->name, &inbytes_left, (char *) a->name, &outbytes_left);
966         if (status) {
967             return status;
968         }
969         inbytes_left = outbytes_left = strlen(a->value);
970         status = apr_xlate_conv_buffer(convset, a->value, &inbytes_left, (char *) a->value, &outbytes_left);
971         if (status) {
972             return status;
973         }
974     }
975 
976     for (ec = e->first_child; ec != NULL; ec = ec->next) {
977         status = apr_xml_parser_convert_elem(ec, convset);
978         if (status) {
979             return status;
980         }
981     }
982     return APR_SUCCESS;
983 }
984 
985 /* convert the whole document to EBCDIC */
apr_xml_parser_convert_doc(apr_pool_t * pool,apr_xml_doc * pdoc,apr_xlate_t * convset)986 APU_DECLARE(apr_status_t) apr_xml_parser_convert_doc(apr_pool_t *pool,
987                                                      apr_xml_doc *pdoc,
988                                                      apr_xlate_t *convset)
989 {
990     apr_status_t status;
991     /* Don't convert the namespaces: they are constant! */
992     if (pdoc->namespaces != NULL) {
993         int i;
994         apr_array_header_t *namespaces;
995         namespaces = apr_array_make(pool, pdoc->namespaces->nelts, sizeof(const char *));
996         if (namespaces == NULL)
997             return APR_ENOMEM;
998         for (i = 0; i < pdoc->namespaces->nelts; i++) {
999             apr_size_t inbytes_left, outbytes_left;
1000             char *ptr = (char *) APR_XML_GET_URI_ITEM(pdoc->namespaces, i);
1001             ptr = apr_pstrdup(pool, ptr);
1002             if ( ptr == NULL)
1003                 return APR_ENOMEM;
1004             inbytes_left = outbytes_left = strlen(ptr);
1005             status = apr_xlate_conv_buffer(convset, ptr, &inbytes_left, ptr, &outbytes_left);
1006             if (status) {
1007                 return status;
1008             }
1009             apr_xml_insert_uri(namespaces, ptr);
1010         }
1011         pdoc->namespaces = namespaces;
1012     }
1013     return apr_xml_parser_convert_elem(pdoc->root, convset);
1014 }
1015 #endif
1016