1 /* Helper routines for parsing XML using Expat.
2 
3    Copyright (C) 2006-2024 Free Software Foundation, Inc.
4 
5    This file is part of GDB.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
19 
20 #include "cli/cli-cmds.h"
21 #include "xml-builtin.h"
22 #include "xml-support.h"
23 #include "gdbsupport/filestuff.h"
24 #include "gdbsupport/gdb-safe-ctype.h"
25 #include <vector>
26 #include <string>
27 
28 /* Debugging flag.  */
29 static bool debug_xml;
30 
31 /* The contents of this file are only useful if XML support is
32    available.  */
33 #ifdef HAVE_LIBEXPAT
34 
35 #include "gdb_expat.h"
36 
37 /* The maximum depth of <xi:include> nesting.  No need to be miserly,
38    we just want to avoid running out of stack on loops.  */
39 #define MAX_XINCLUDE_DEPTH 30
40 
41 /* Simplified XML parser infrastructure.  */
42 
43 /* A parsing level -- used to keep track of the current element
44    nesting.  */
45 struct scope_level
46 {
47   explicit scope_level (const gdb_xml_element *elements_ = NULL)
elementsscope_level48     : elements (elements_),
49       element (NULL),
50       seen (0)
51   {}
52 
53   /* Elements we allow at this level.  */
54   const struct gdb_xml_element *elements;
55 
56   /* The element which we are within.  */
57   const struct gdb_xml_element *element;
58 
59   /* Mask of which elements we've seen at this level (used for
60      optional and repeatable checking).  */
61   unsigned int seen;
62 
63   /* Body text accumulation.  */
64   std::string body;
65 };
66 
67 /* The parser itself, and our additional state.  */
68 struct gdb_xml_parser
69 {
70   gdb_xml_parser (const char *name,
71                       const gdb_xml_element *elements,
72                       void *user_data);
73   ~gdb_xml_parser();
74 
75   /* Associate DTD_NAME, which must be the name of a compiled-in DTD,
76      with the parser.  */
77   void use_dtd (const char *dtd_name);
78 
79   /* Return the name of the expected / default DTD, if specified.  */
dtd_namegdb_xml_parser80   const char *dtd_name ()
81   { return m_dtd_name; }
82 
83   /* Invoke the parser on BUFFER.  BUFFER is the data to parse, which
84      should be NUL-terminated.
85 
86      The return value is 0 for success or -1 for error.  It may throw,
87      but only if something unexpected goes wrong during parsing; parse
88      errors will be caught, warned about, and reported as failure.  */
89   int parse (const char *buffer);
90 
91   /* Issue a debugging message.  */
92   void vdebug (const char *format, va_list ap)
93     ATTRIBUTE_PRINTF (2, 0);
94 
95   /* Issue an error message, and stop parsing.  */
96   void verror (const char *format, va_list ap)
97     ATTRIBUTE_NORETURN ATTRIBUTE_PRINTF (2, 0);
98 
99   void body_text (const XML_Char *text, int length);
100   void start_element (const XML_Char *name, const XML_Char **attrs);
101   void end_element (const XML_Char *name);
102 
103   /* Return the name of this parser.  */
namegdb_xml_parser104   const char *name ()
105   { return m_name; }
106 
107   /* Return the user's callback data, for handlers.  */
user_datagdb_xml_parser108   void *user_data ()
109   { return m_user_data; };
110 
111   /* Are we the special <xi:include> parser?  */
set_is_xincludegdb_xml_parser112   void set_is_xinclude (bool is_xinclude)
113   { m_is_xinclude = is_xinclude; }
114 
115   /* A thrown error, if any.  */
set_errorgdb_xml_parser116   void set_error (gdb_exception &&error)
117   {
118     m_error = std::move (error);
119 #ifdef HAVE_XML_STOPPARSER
120     XML_StopParser (m_expat_parser, XML_FALSE);
121 #endif
122   }
123 
124   /* Return the underlying expat parser.  */
expat_parsergdb_xml_parser125   XML_Parser expat_parser ()
126   { return m_expat_parser; }
127 
128 private:
129   /* The underlying expat parser.  */
130   XML_Parser m_expat_parser;
131 
132   /* Name of this parser.  */
133   const char *m_name;
134 
135   /* The user's callback data, for handlers.  */
136   void *m_user_data;
137 
138   /* Scoping stack.  */
139   std::vector<scope_level> m_scopes;
140 
141 /* A thrown error, if any.  */
142   struct gdb_exception m_error;
143 
144   /* The line of the thrown error, or 0.  */
145   int m_last_line;
146 
147   /* The name of the expected / default DTD, if specified.  */
148   const char *m_dtd_name;
149 
150   /* Are we the special <xi:include> parser?  */
151   bool m_is_xinclude;
152 };
153 
154 /* Process some body text.  We accumulate the text for later use; it's
155    wrong to do anything with it immediately, because a single block of
156    text might be broken up into multiple calls to this function.  */
157 
158 void
body_text(const XML_Char * text,int length)159 gdb_xml_parser::body_text (const XML_Char *text, int length)
160 {
161   if (m_error.reason < 0)
162     return;
163 
164   scope_level &scope = m_scopes.back ();
165   scope.body.append (text, length);
166 }
167 
168 static void
gdb_xml_body_text(void * data,const XML_Char * text,int length)169 gdb_xml_body_text (void *data, const XML_Char *text, int length)
170 {
171   struct gdb_xml_parser *parser = (struct gdb_xml_parser *) data;
172 
173   parser->body_text (text, length);
174 }
175 
176 /* Issue a debugging message from one of PARSER's handlers.  */
177 
178 void
vdebug(const char * format,va_list ap)179 gdb_xml_parser::vdebug (const char *format, va_list ap)
180 {
181   int line = XML_GetCurrentLineNumber (m_expat_parser);
182 
183   std::string message = string_vprintf (format, ap);
184   if (line)
185     gdb_printf (gdb_stderr, "%s (line %d): %s\n",
186                     m_name, line, message.c_str ());
187   else
188     gdb_printf (gdb_stderr, "%s: %s\n",
189                     m_name, message.c_str ());
190 }
191 
192 void
gdb_xml_debug(struct gdb_xml_parser * parser,const char * format,...)193 gdb_xml_debug (struct gdb_xml_parser *parser, const char *format, ...)
194 {
195   if (!debug_xml)
196     return;
197 
198   va_list ap;
199   va_start (ap, format);
200   parser->vdebug (format, ap);
201   va_end (ap);
202 }
203 
204 /* Issue an error message from one of PARSER's handlers, and stop
205    parsing.  */
206 
207 void
verror(const char * format,va_list ap)208 gdb_xml_parser::verror (const char *format, va_list ap)
209 {
210   int line = XML_GetCurrentLineNumber (m_expat_parser);
211 
212   m_last_line = line;
213   throw_verror (XML_PARSE_ERROR, format, ap);
214 }
215 
216 void
gdb_xml_error(struct gdb_xml_parser * parser,const char * format,...)217 gdb_xml_error (struct gdb_xml_parser *parser, const char *format, ...)
218 {
219   va_list ap;
220   va_start (ap, format);
221   parser->verror (format, ap);
222   va_end (ap);
223 }
224 
225 /* Find the attribute named NAME in the set of parsed attributes
226    ATTRIBUTES.  Returns NULL if not found.  */
227 
228 struct gdb_xml_value *
xml_find_attribute(std::vector<gdb_xml_value> & attributes,const char * name)229 xml_find_attribute (std::vector<gdb_xml_value> &attributes,
230                         const char *name)
231 {
232   for (gdb_xml_value &value : attributes)
233     if (strcmp (value.name, name) == 0)
234       return &value;
235 
236   return NULL;
237 }
238 
239 /* Handle the start of an element.  NAME is the element, and ATTRS are
240    the names and values of this element's attributes.  */
241 
242 void
start_element(const XML_Char * name,const XML_Char ** attrs)243 gdb_xml_parser::start_element (const XML_Char *name,
244                                      const XML_Char **attrs)
245 {
246   if (m_error.reason < 0)
247     return;
248 
249   const struct gdb_xml_element *element;
250   const struct gdb_xml_attribute *attribute;
251   unsigned int seen;
252 
253   /* Push an error scope.  If we return or throw an exception before
254      filling this in, it will tell us to ignore children of this
255      element.  Note we don't take a reference to the element yet
256      because further below we'll process the element which may recurse
257      back here and push more elements to the vector.  When the
258      recursion unrolls all such elements will have been popped back
259      already, but if one of those pushes reallocates the vector,
260      previous element references will be invalidated.  */
261   m_scopes.emplace_back ();
262 
263   /* Get a reference to the current scope.  */
264   scope_level &scope = m_scopes[m_scopes.size () - 2];
265 
266   gdb_xml_debug (this, _("Entering element <%s>"), name);
267 
268   /* Find this element in the list of the current scope's allowed
269      children.  Record that we've seen it.  */
270 
271   seen = 1;
272   for (element = scope.elements; element && element->name;
273        element++, seen <<= 1)
274     if (strcmp (element->name, name) == 0)
275       break;
276 
277   if (element == NULL || element->name == NULL)
278     {
279       /* If we're working on XInclude, <xi:include> can be the child
280            of absolutely anything.  Copy the previous scope's element
281            list into the new scope even if there was no match.  */
282       if (m_is_xinclude)
283           {
284             XML_DefaultCurrent (m_expat_parser);
285 
286             scope_level &unknown_scope = m_scopes.back ();
287             unknown_scope.elements = scope.elements;
288             return;
289           }
290 
291       gdb_xml_debug (this, _("Element <%s> unknown"), name);
292       return;
293     }
294 
295   if (!(element->flags & GDB_XML_EF_REPEATABLE) && (seen & scope.seen))
296     gdb_xml_error (this, _("Element <%s> only expected once"), name);
297 
298   scope.seen |= seen;
299 
300   std::vector<gdb_xml_value> attributes;
301 
302   for (attribute = element->attributes;
303        attribute != NULL && attribute->name != NULL;
304        attribute++)
305     {
306       const char *val = NULL;
307       const XML_Char **p;
308       void *parsed_value;
309 
310       for (p = attrs; *p != NULL; p += 2)
311           if (!strcmp (attribute->name, p[0]))
312             {
313               val = p[1];
314               break;
315             }
316 
317       if (*p != NULL && val == NULL)
318           {
319             gdb_xml_debug (this, _("Attribute \"%s\" missing a value"),
320                                attribute->name);
321             continue;
322           }
323 
324       if (*p == NULL && !(attribute->flags & GDB_XML_AF_OPTIONAL))
325           {
326             gdb_xml_error (this, _("Required attribute \"%s\" of "
327                                            "<%s> not specified"),
328                                attribute->name, element->name);
329             continue;
330           }
331 
332       if (*p == NULL)
333           continue;
334 
335       gdb_xml_debug (this, _("Parsing attribute %s=\"%s\""),
336                          attribute->name, val);
337 
338       if (attribute->handler)
339           parsed_value = attribute->handler (this, attribute, val);
340       else
341           parsed_value = xstrdup (val);
342 
343       attributes.emplace_back (attribute->name, parsed_value);
344     }
345 
346   /* Check for unrecognized attributes.  */
347   if (debug_xml)
348     {
349       const XML_Char **p;
350 
351       for (p = attrs; *p != NULL; p += 2)
352           {
353             for (attribute = element->attributes;
354                  attribute != NULL && attribute->name != NULL;
355                  attribute++)
356               if (strcmp (attribute->name, *p) == 0)
357                 break;
358 
359             if (attribute == NULL || attribute->name == NULL)
360               gdb_xml_debug (this, _("Ignoring unknown attribute %s"), *p);
361           }
362     }
363 
364   /* Call the element handler if there is one.  */
365   if (element->start_handler)
366     element->start_handler (this, element, m_user_data, attributes);
367 
368   /* Fill in a new scope level.  Note that we must delay getting a
369      back reference till here because above we might have recursed,
370      which may have reallocated the vector which invalidates
371      iterators/pointers/references.  */
372   scope_level &new_scope = m_scopes.back ();
373   new_scope.element = element;
374   new_scope.elements = element->children;
375 }
376 
377 /* Wrapper for gdb_xml_start_element, to prevent throwing exceptions
378    through expat.  */
379 
380 static void
gdb_xml_start_element_wrapper(void * data,const XML_Char * name,const XML_Char ** attrs)381 gdb_xml_start_element_wrapper (void *data, const XML_Char *name,
382                                      const XML_Char **attrs)
383 {
384   struct gdb_xml_parser *parser = (struct gdb_xml_parser *) data;
385 
386   try
387     {
388       parser->start_element (name, attrs);
389     }
390   catch (gdb_exception &ex)
391     {
392       parser->set_error (std::move (ex));
393     }
394 }
395 
396 /* Handle the end of an element.  NAME is the current element.  */
397 
398 void
end_element(const XML_Char * name)399 gdb_xml_parser::end_element (const XML_Char *name)
400 {
401   if (m_error.reason < 0)
402     return;
403 
404   struct scope_level *scope = &m_scopes.back ();
405   const struct gdb_xml_element *element;
406   unsigned int seen;
407 
408   gdb_xml_debug (this, _("Leaving element <%s>"), name);
409 
410   for (element = scope->elements, seen = 1;
411        element != NULL && element->name != NULL;
412        element++, seen <<= 1)
413     if ((scope->seen & seen) == 0
414           && (element->flags & GDB_XML_EF_OPTIONAL) == 0)
415       gdb_xml_error (this, _("Required element <%s> is missing"),
416                          element->name);
417 
418   /* Call the element processor.  */
419   if (scope->element != NULL && scope->element->end_handler)
420     {
421       const char *body;
422 
423       if (scope->body.empty ())
424           body = "";
425       else
426           {
427             int length;
428 
429             length = scope->body.size ();
430             body = scope->body.c_str ();
431 
432             /* Strip leading and trailing whitespace.  */
433             while (length > 0 && ISSPACE (body[length - 1]))
434               length--;
435             scope->body.erase (length);
436             while (*body && ISSPACE (*body))
437               body++;
438           }
439 
440       scope->element->end_handler (this, scope->element,
441                                            m_user_data, body);
442     }
443   else if (scope->element == NULL)
444     XML_DefaultCurrent (m_expat_parser);
445 
446   /* Pop the scope level.  */
447   m_scopes.pop_back ();
448 }
449 
450 /* Wrapper for gdb_xml_end_element, to prevent throwing exceptions
451    through expat.  */
452 
453 static void
gdb_xml_end_element_wrapper(void * data,const XML_Char * name)454 gdb_xml_end_element_wrapper (void *data, const XML_Char *name)
455 {
456   struct gdb_xml_parser *parser = (struct gdb_xml_parser *) data;
457 
458   try
459     {
460       parser->end_element (name);
461     }
462   catch (gdb_exception &ex)
463     {
464       parser->set_error (std::move (ex));
465     }
466 }
467 
468 /* Free a parser and all its associated state.  */
469 
~gdb_xml_parser()470 gdb_xml_parser::~gdb_xml_parser ()
471 {
472   XML_ParserFree (m_expat_parser);
473 }
474 
475 /* Initialize a parser.  */
476 
gdb_xml_parser(const char * name,const gdb_xml_element * elements,void * user_data)477 gdb_xml_parser::gdb_xml_parser (const char *name,
478                                         const gdb_xml_element *elements,
479                                         void *user_data)
480   : m_name (name),
481     m_user_data (user_data),
482     m_last_line (0),
483     m_dtd_name (NULL),
484     m_is_xinclude (false)
485 {
486   m_expat_parser = XML_ParserCreateNS (NULL, '!');
487   if (m_expat_parser == NULL)
488     malloc_failure (0);
489 
490   XML_SetUserData (m_expat_parser, this);
491 
492   /* Set the callbacks.  */
493   XML_SetElementHandler (m_expat_parser, gdb_xml_start_element_wrapper,
494                                gdb_xml_end_element_wrapper);
495   XML_SetCharacterDataHandler (m_expat_parser, gdb_xml_body_text);
496 
497   /* Initialize the outer scope.  */
498   m_scopes.emplace_back (elements);
499 }
500 
501 /* External entity handler.  The only external entities we support
502    are those compiled into GDB (we do not fetch entities from the
503    target).  */
504 
505 static int XMLCALL
gdb_xml_fetch_external_entity(XML_Parser expat_parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)506 gdb_xml_fetch_external_entity (XML_Parser expat_parser,
507                                      const XML_Char *context,
508                                      const XML_Char *base,
509                                      const XML_Char *systemId,
510                                      const XML_Char *publicId)
511 {
512   XML_Parser entity_parser;
513   const char *text;
514   enum XML_Status status;
515 
516   if (systemId == NULL)
517     {
518       gdb_xml_parser *parser
519           = (gdb_xml_parser *) XML_GetUserData (expat_parser);
520 
521       text = fetch_xml_builtin (parser->dtd_name ());
522       if (text == NULL)
523           internal_error (_("could not locate built-in DTD %s"),
524                               parser->dtd_name ());
525     }
526   else
527     {
528       text = fetch_xml_builtin (systemId);
529       if (text == NULL)
530           return XML_STATUS_ERROR;
531     }
532 
533   entity_parser = XML_ExternalEntityParserCreate (expat_parser,
534                                                               context, NULL);
535 
536   /* Don't use our handlers for the contents of the DTD.  Just let expat
537      process it.  */
538   XML_SetElementHandler (entity_parser, NULL, NULL);
539   XML_SetDoctypeDeclHandler (entity_parser, NULL, NULL);
540   XML_SetXmlDeclHandler (entity_parser, NULL);
541   XML_SetDefaultHandler (entity_parser, NULL);
542   XML_SetUserData (entity_parser, NULL);
543 
544   status = XML_Parse (entity_parser, text, strlen (text), 1);
545 
546   XML_ParserFree (entity_parser);
547   return status;
548 }
549 
550 void
use_dtd(const char * dtd_name)551 gdb_xml_parser::use_dtd (const char *dtd_name)
552 {
553   enum XML_Error err;
554 
555   m_dtd_name = dtd_name;
556 
557   XML_SetParamEntityParsing (m_expat_parser,
558                                    XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
559   XML_SetExternalEntityRefHandler (m_expat_parser,
560                                            gdb_xml_fetch_external_entity);
561 
562   /* Even if no DTD is provided, use the built-in DTD anyway.  */
563   err = XML_UseForeignDTD (m_expat_parser, XML_TRUE);
564   if (err != XML_ERROR_NONE)
565     internal_error (_("XML_UseForeignDTD failed: %s"),
566                         XML_ErrorString (err));
567 }
568 
569 /* Invoke PARSER on BUFFER.  BUFFER is the data to parse, which
570    should be NUL-terminated.
571 
572    The return value is 0 for success or -1 for error.  It may throw,
573    but only if something unexpected goes wrong during parsing; parse
574    errors will be caught, warned about, and reported as failure.  */
575 
576 int
parse(const char * buffer)577 gdb_xml_parser::parse (const char *buffer)
578 {
579   enum XML_Status status;
580   const char *error_string;
581 
582   gdb_xml_debug (this, _("Starting:\n%s"), buffer);
583 
584   status = XML_Parse (m_expat_parser, buffer, strlen (buffer), 1);
585 
586   if (status == XML_STATUS_OK && m_error.reason == 0)
587     return 0;
588 
589   if (m_error.reason == RETURN_ERROR
590       && m_error.error == XML_PARSE_ERROR)
591     {
592       gdb_assert (m_error.message != NULL);
593       error_string = m_error.what ();
594     }
595   else if (status == XML_STATUS_ERROR)
596     {
597       enum XML_Error err = XML_GetErrorCode (m_expat_parser);
598 
599       error_string = XML_ErrorString (err);
600     }
601   else
602     {
603       gdb_assert (m_error.reason < 0);
604       throw_exception (std::move (m_error));
605     }
606 
607   if (m_last_line != 0)
608     warning (_("while parsing %s (at line %d): %s"), m_name,
609                m_last_line, error_string);
610   else
611     warning (_("while parsing %s: %s"), m_name, error_string);
612 
613   return -1;
614 }
615 
616 int
gdb_xml_parse_quick(const char * name,const char * dtd_name,const struct gdb_xml_element * elements,const char * document,void * user_data)617 gdb_xml_parse_quick (const char *name, const char *dtd_name,
618                          const struct gdb_xml_element *elements,
619                          const char *document, void *user_data)
620 {
621   gdb_xml_parser parser (name, elements, user_data);
622   if (dtd_name != NULL)
623     parser.use_dtd (dtd_name);
624   return parser.parse (document);
625 }
626 
627 /* Parse a field VALSTR that we expect to contain an integer value.
628    The integer is returned in *VALP.  The string is parsed with an
629    equivalent to strtoul.
630 
631    Returns 0 for success, -1 for error.  */
632 
633 static int
xml_parse_unsigned_integer(const char * valstr,ULONGEST * valp)634 xml_parse_unsigned_integer (const char *valstr, ULONGEST *valp)
635 {
636   const char *endptr;
637   ULONGEST result;
638 
639   if (*valstr == '\0')
640     return -1;
641 
642   result = strtoulst (valstr, &endptr, 0);
643   if (*endptr != '\0')
644     return -1;
645 
646   *valp = result;
647   return 0;
648 }
649 
650 /* Parse an integer string into a ULONGEST and return it, or call
651    gdb_xml_error if it could not be parsed.  */
652 
653 ULONGEST
gdb_xml_parse_ulongest(struct gdb_xml_parser * parser,const char * value)654 gdb_xml_parse_ulongest (struct gdb_xml_parser *parser, const char *value)
655 {
656   ULONGEST result;
657 
658   if (xml_parse_unsigned_integer (value, &result) != 0)
659     gdb_xml_error (parser, _("Can't convert \"%s\" to an integer"), value);
660 
661   return result;
662 }
663 
664 /* Parse an integer attribute into a ULONGEST.  */
665 
666 void *
gdb_xml_parse_attr_ulongest(struct gdb_xml_parser * parser,const struct gdb_xml_attribute * attribute,const char * value)667 gdb_xml_parse_attr_ulongest (struct gdb_xml_parser *parser,
668                                    const struct gdb_xml_attribute *attribute,
669                                    const char *value)
670 {
671   ULONGEST result;
672   void *ret;
673 
674   if (xml_parse_unsigned_integer (value, &result) != 0)
675     gdb_xml_error (parser, _("Can't convert %s=\"%s\" to an integer"),
676                        attribute->name, value);
677 
678   ret = XNEW (ULONGEST);
679   memcpy (ret, &result, sizeof (result));
680   return ret;
681 }
682 
683 /* A handler_data for yes/no boolean values.  */
684 
685 const struct gdb_xml_enum gdb_xml_enums_boolean[] = {
686   { "yes", 1 },
687   { "no", 0 },
688   { NULL, 0 }
689 };
690 
691 /* Map NAME to VALUE.  A struct gdb_xml_enum * should be saved as the
692    value of handler_data when using gdb_xml_parse_attr_enum to parse a
693    fixed list of possible strings.  The list is terminated by an entry
694    with NAME == NULL.  */
695 
696 void *
gdb_xml_parse_attr_enum(struct gdb_xml_parser * parser,const struct gdb_xml_attribute * attribute,const char * value)697 gdb_xml_parse_attr_enum (struct gdb_xml_parser *parser,
698                                const struct gdb_xml_attribute *attribute,
699                                const char *value)
700 {
701   const struct gdb_xml_enum *enums
702     = (const struct gdb_xml_enum *) attribute->handler_data;
703   void *ret;
704 
705   for (enums = (const struct gdb_xml_enum *) attribute->handler_data;
706        enums->name != NULL; enums++)
707     if (strcasecmp (enums->name, value) == 0)
708       break;
709 
710   if (enums->name == NULL)
711     gdb_xml_error (parser, _("Unknown attribute value %s=\"%s\""),
712                      attribute->name, value);
713 
714   ret = xmalloc (sizeof (enums->value));
715   memcpy (ret, &enums->value, sizeof (enums->value));
716   return ret;
717 }
718 
719 
720 /* XInclude processing.  This is done as a separate step from actually
721    parsing the document, so that we can produce a single combined XML
722    document - e.g. to hand to a front end or to simplify comparing two
723    documents.  We make extensive use of XML_DefaultCurrent, to pass
724    input text directly into the output without reformatting or
725    requoting it.
726 
727    We output the DOCTYPE declaration for the first document unchanged,
728    if present, and discard DOCTYPEs from included documents.  Only the
729    one we pass through here is used when we feed the result back to
730    expat.  The XInclude standard explicitly does not discuss
731    validation of the result; we choose to apply the same DTD applied
732    to the outermost document.
733 
734    We can not simply include the external DTD subset in the document
735    as an internal subset, because <!IGNORE> and <!INCLUDE> are valid
736    only in external subsets.  But if we do not pass the DTD into the
737    output at all, default values will not be filled in.
738 
739    We don't pass through any <?xml> declaration because we generate
740    UTF-8, not whatever the input encoding was.  */
741 
742 struct xinclude_parsing_data
743 {
xinclude_parsing_dataxinclude_parsing_data744   xinclude_parsing_data (std::string &output_,
745                                xml_fetch_another fetcher_,
746                                int include_depth_)
747     : output (output_),
748       skip_depth (0),
749       include_depth (include_depth_),
750       fetcher (fetcher_)
751   {}
752 
753   /* Where the output goes.  */
754   std::string &output;
755 
756   /* A count indicating whether we are in an element whose
757      children should not be copied to the output, and if so,
758      how deep we are nested.  This is used for anything inside
759      an xi:include, and for the DTD.  */
760   int skip_depth;
761 
762   /* The number of <xi:include> elements currently being processed,
763      to detect loops.  */
764   int include_depth;
765 
766   /* A function to call to obtain additional features, and its
767      baton.  */
768   xml_fetch_another fetcher;
769 };
770 
771 static void
xinclude_start_include(struct gdb_xml_parser * parser,const struct gdb_xml_element * element,void * user_data,std::vector<gdb_xml_value> & attributes)772 xinclude_start_include (struct gdb_xml_parser *parser,
773                               const struct gdb_xml_element *element,
774                               void *user_data,
775                               std::vector<gdb_xml_value> &attributes)
776 {
777   struct xinclude_parsing_data *data
778     = (struct xinclude_parsing_data *) user_data;
779   char *href = (char *) xml_find_attribute (attributes, "href")->value.get ();
780 
781   gdb_xml_debug (parser, _("Processing XInclude of \"%s\""), href);
782 
783   if (data->include_depth > MAX_XINCLUDE_DEPTH)
784     gdb_xml_error (parser, _("Maximum XInclude depth (%d) exceeded"),
785                        MAX_XINCLUDE_DEPTH);
786 
787   std::optional<gdb::char_vector> text = data->fetcher (href);
788   if (!text)
789     gdb_xml_error (parser, _("Could not load XML document \"%s\""), href);
790 
791   if (!xml_process_xincludes (data->output, parser->name (),
792                                     text->data (), data->fetcher,
793                                     data->include_depth + 1))
794     gdb_xml_error (parser, _("Parsing \"%s\" failed"), href);
795 
796   data->skip_depth++;
797 }
798 
799 static void
xinclude_end_include(struct gdb_xml_parser * parser,const struct gdb_xml_element * element,void * user_data,const char * body_text)800 xinclude_end_include (struct gdb_xml_parser *parser,
801                           const struct gdb_xml_element *element,
802                           void *user_data, const char *body_text)
803 {
804   struct xinclude_parsing_data *data
805     = (struct xinclude_parsing_data *) user_data;
806 
807   data->skip_depth--;
808 }
809 
810 static void XMLCALL
xml_xinclude_default(void * data_,const XML_Char * s,int len)811 xml_xinclude_default (void *data_, const XML_Char *s, int len)
812 {
813   struct gdb_xml_parser *parser = (struct gdb_xml_parser *) data_;
814   xinclude_parsing_data *data = (xinclude_parsing_data *) parser->user_data ();
815 
816   /* If we are inside of e.g. xi:include or the DTD, don't save this
817      string.  */
818   if (data->skip_depth)
819     return;
820 
821   /* Otherwise just add it to the end of the document we're building
822      up.  */
823   data->output.append (s, len);
824 }
825 
826 static void XMLCALL
xml_xinclude_start_doctype(void * data_,const XML_Char * doctypeName,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)827 xml_xinclude_start_doctype (void *data_, const XML_Char *doctypeName,
828                                   const XML_Char *sysid, const XML_Char *pubid,
829                                   int has_internal_subset)
830 {
831   struct gdb_xml_parser *parser = (struct gdb_xml_parser *) data_;
832   xinclude_parsing_data *data = (xinclude_parsing_data *) parser->user_data ();
833 
834   /* Don't print out the doctype, or the contents of the DTD internal
835      subset, if any.  */
836   data->skip_depth++;
837 }
838 
839 static void XMLCALL
xml_xinclude_end_doctype(void * data_)840 xml_xinclude_end_doctype (void *data_)
841 {
842   struct gdb_xml_parser *parser = (struct gdb_xml_parser *) data_;
843   xinclude_parsing_data *data = (xinclude_parsing_data *) parser->user_data ();
844 
845   data->skip_depth--;
846 }
847 
848 static void XMLCALL
xml_xinclude_xml_decl(void * data_,const XML_Char * version,const XML_Char * encoding,int standalone)849 xml_xinclude_xml_decl (void *data_, const XML_Char *version,
850                            const XML_Char *encoding, int standalone)
851 {
852   /* Do nothing - this function prevents the default handler from
853      being called, thus suppressing the XML declaration from the
854      output.  */
855 }
856 
857 const struct gdb_xml_attribute xinclude_attributes[] = {
858   { "href", GDB_XML_AF_NONE, NULL, NULL },
859   { NULL, GDB_XML_AF_NONE, NULL, NULL }
860 };
861 
862 const struct gdb_xml_element xinclude_elements[] = {
863   { "http://www.w3.org/2001/XInclude!include", xinclude_attributes, NULL,
864     GDB_XML_EF_OPTIONAL | GDB_XML_EF_REPEATABLE,
865     xinclude_start_include, xinclude_end_include },
866   { NULL, NULL, NULL, GDB_XML_EF_NONE, NULL, NULL }
867 };
868 
869 /* The main entry point for <xi:include> processing.  */
870 
871 bool
xml_process_xincludes(std::string & result,const char * name,const char * text,xml_fetch_another fetcher,int depth)872 xml_process_xincludes (std::string &result,
873                            const char *name, const char *text,
874                            xml_fetch_another fetcher, int depth)
875 {
876   xinclude_parsing_data data (result, fetcher, depth);
877 
878   gdb_xml_parser parser (name, xinclude_elements, &data);
879   parser.set_is_xinclude (true);
880 
881   XML_SetCharacterDataHandler (parser.expat_parser (), NULL);
882   XML_SetDefaultHandler (parser.expat_parser (), xml_xinclude_default);
883 
884   /* Always discard the XML version declarations; the only important
885      thing this provides is encoding, and our result will have been
886      converted to UTF-8.  */
887   XML_SetXmlDeclHandler (parser.expat_parser (), xml_xinclude_xml_decl);
888 
889   if (depth > 0)
890     /* Discard the doctype for included documents.  */
891     XML_SetDoctypeDeclHandler (parser.expat_parser (),
892                                      xml_xinclude_start_doctype,
893                                      xml_xinclude_end_doctype);
894 
895   parser.use_dtd ("xinclude.dtd");
896 
897   if (parser.parse (text) == 0)
898     {
899       if (depth == 0)
900           gdb_xml_debug (&parser, _("XInclude processing succeeded."));
901       return true;
902     }
903 
904   return false;
905 }
906 #endif /* HAVE_LIBEXPAT */
907 
908 
909 /* Return an XML document which was compiled into GDB, from
910    the given FILENAME, or NULL if the file was not compiled in.  */
911 
912 const char *
fetch_xml_builtin(const char * filename)913 fetch_xml_builtin (const char *filename)
914 {
915   const char *const (*p)[2];
916 
917   for (p = xml_builtin; (*p)[0]; p++)
918     if (strcmp ((*p)[0], filename) == 0)
919       return (*p)[1];
920 
921   return NULL;
922 }
923 
924 /* A to_xfer_partial helper function which reads XML files which were
925    compiled into GDB.  The target may call this function from its own
926    to_xfer_partial handler, after converting object and annex to the
927    appropriate filename.  */
928 
929 LONGEST
xml_builtin_xfer_partial(const char * filename,gdb_byte * readbuf,const gdb_byte * writebuf,ULONGEST offset,LONGEST len)930 xml_builtin_xfer_partial (const char *filename,
931                                 gdb_byte *readbuf, const gdb_byte *writebuf,
932                                 ULONGEST offset, LONGEST len)
933 {
934   const char *buf;
935   LONGEST len_avail;
936 
937   gdb_assert (readbuf != NULL && writebuf == NULL);
938   gdb_assert (filename != NULL);
939 
940   buf = fetch_xml_builtin (filename);
941   if (buf == NULL)
942     return -1;
943 
944   len_avail = strlen (buf);
945   if (offset >= len_avail)
946     return 0;
947 
948   if (len > len_avail - offset)
949     len = len_avail - offset;
950   memcpy (readbuf, buf + offset, len);
951   return len;
952 }
953 
954 
955 static void
show_debug_xml(struct ui_file * file,int from_tty,struct cmd_list_element * c,const char * value)956 show_debug_xml (struct ui_file *file, int from_tty,
957                     struct cmd_list_element *c, const char *value)
958 {
959   gdb_printf (file, _("XML debugging is %s.\n"), value);
960 }
961 
962 std::optional<gdb::char_vector>
xml_fetch_content_from_file(const char * filename,const char * dirname)963 xml_fetch_content_from_file (const char *filename, const char *dirname)
964 {
965   gdb_file_up file;
966 
967   if (dirname != nullptr && *dirname != '\0')
968     {
969       gdb::unique_xmalloc_ptr<char> fullname
970           (concat (dirname, "/", filename, (char *) NULL));
971 
972       file = gdb_fopen_cloexec (fullname.get (), FOPEN_RB);
973     }
974   else
975     file = gdb_fopen_cloexec (filename, FOPEN_RB);
976 
977   if (file == NULL)
978     return {};
979 
980   /* Read in the whole file.  */
981 
982   size_t len;
983 
984   if (fseek (file.get (), 0, SEEK_END) == -1)
985     perror_with_name (_("seek to end of file"));
986   len = ftell (file.get ());
987   rewind (file.get ());
988 
989   gdb::char_vector text (len + 1);
990 
991   if (fread (text.data (), 1, len, file.get ()) != len
992       || ferror (file.get ()))
993     {
994       warning (_("Read error from \"%s\""), filename);
995       return {};
996     }
997 
998   text.back () = '\0';
999   return text;
1000 }
1001 
1002 void _initialize_xml_support ();
1003 void _initialize_xml_support ();
1004 void
_initialize_xml_support()1005 _initialize_xml_support ()
1006 {
1007   add_setshow_boolean_cmd ("xml", class_maintenance, &debug_xml,
1008                                  _("Set XML parser debugging."),
1009                                  _("Show XML parser debugging."),
1010                                  _("When set, debugging messages for XML parsers "
1011                                    "are displayed."),
1012                                  NULL, show_debug_xml,
1013                                  &setdebuglist, &showdebuglist);
1014 }
1015