1 /* Mainly the interface between cpplib and the C front ends.
2    Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3    1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING.  If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.  */
21 
22 #include "config.h"
23 #include "system.h"
24 
25 #include "real.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "expr.h"
29 #include "input.h"
30 #include "output.h"
31 #include "c-tree.h"
32 #include "c-common.h"
33 #include "flags.h"
34 #include "timevar.h"
35 #include "cpplib.h"
36 #include "c-pragma.h"
37 #include "toplev.h"
38 #include "intl.h"
39 #include "tm_p.h"
40 #include "splay-tree.h"
41 #include "debug.h"
42 
43 #ifdef MULTIBYTE_CHARS
44 #include "mbchar.h"
45 #include <locale.h>
46 #endif /* MULTIBYTE_CHARS */
47 
48 /* The current line map.  */
49 static const struct line_map *map;
50 
51 /* The line used to refresh the lineno global variable after each token.  */
52 static unsigned int src_lineno;
53 
54 /* We may keep statistics about how long which files took to compile.  */
55 static int header_time, body_time;
56 static splay_tree file_info_tree;
57 
58 /* File used for outputting assembler code.  */
59 extern FILE *asm_out_file;
60 
61 #undef WCHAR_TYPE_SIZE
62 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
63 
64 /* Number of bytes in a wide character.  */
65 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
66 
67 int pending_lang_change; /* If we need to switch languages - C++ only */
68 int c_header_level;	 /* depth in C headers - C++ only */
69 
70 /* Nonzero tells yylex to ignore \ in string constants.  */
71 static int ignore_escape_flag;
72 
73 static tree interpret_integer	PARAMS ((const cpp_token *, unsigned int));
74 static tree interpret_float	PARAMS ((const cpp_token *, unsigned int));
75 static enum integer_type_kind
76   narrowest_unsigned_type	PARAMS ((tree, unsigned int));
77 static enum integer_type_kind
78   narrowest_signed_type		PARAMS ((tree, unsigned int));
79 static tree lex_string		PARAMS ((const unsigned char *, unsigned int,
80 					 int));
81 static tree lex_charconst	PARAMS ((const cpp_token *));
82 static void update_header_times	PARAMS ((const char *));
83 static int dump_one_header	PARAMS ((splay_tree_node, void *));
84 static void cb_line_change     PARAMS ((cpp_reader *, const cpp_token *, int));
85 static void cb_ident		PARAMS ((cpp_reader *, unsigned int,
86 					 const cpp_string *));
87 static void cb_file_change    PARAMS ((cpp_reader *, const struct line_map *));
88 static void cb_def_pragma	PARAMS ((cpp_reader *, unsigned int));
89 static void cb_define		PARAMS ((cpp_reader *, unsigned int,
90 					 cpp_hashnode *));
91 static void cb_undef		PARAMS ((cpp_reader *, unsigned int,
92 					 cpp_hashnode *));
93 
94 const char *
init_c_lex(filename)95 init_c_lex (filename)
96      const char *filename;
97 {
98   struct cpp_callbacks *cb;
99   struct c_fileinfo *toplevel;
100 
101   /* Set up filename timing.  Must happen before cpp_read_main_file.  */
102   file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
103 				   0,
104 				   (splay_tree_delete_value_fn)free);
105   toplevel = get_fileinfo ("<top level>");
106   if (flag_detailed_statistics)
107     {
108       header_time = 0;
109       body_time = get_run_time ();
110       toplevel->time = body_time;
111     }
112 
113 #ifdef MULTIBYTE_CHARS
114   /* Change to the native locale for multibyte conversions.  */
115   setlocale (LC_CTYPE, "");
116   GET_ENVIRONMENT (literal_codeset, "LANG");
117 #endif
118 
119   cb = cpp_get_callbacks (parse_in);
120 
121   cb->line_change = cb_line_change;
122   cb->ident = cb_ident;
123   cb->file_change = cb_file_change;
124   cb->def_pragma = cb_def_pragma;
125 
126   /* Set the debug callbacks if we can use them.  */
127   if (debug_info_level == DINFO_LEVEL_VERBOSE
128       && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG
129           || write_symbols == VMS_AND_DWARF2_DEBUG))
130     {
131       cb->define = cb_define;
132       cb->undef = cb_undef;
133     }
134 
135   /* Start it at 0.  */
136   lineno = 0;
137 
138   return cpp_read_main_file (parse_in, filename, ident_hash);
139 }
140 
141 /* A thin wrapper around the real parser that initializes the
142    integrated preprocessor after debug output has been initialized.
143    Also, make sure the start_source_file debug hook gets called for
144    the primary source file.  */
145 
146 void
c_common_parse_file(set_yydebug)147 c_common_parse_file (set_yydebug)
148      int set_yydebug ATTRIBUTE_UNUSED;
149 {
150 #if YYDEBUG != 0
151   yydebug = set_yydebug;
152 #else
153   warning ("YYDEBUG not defined");
154 #endif
155 
156   (*debug_hooks->start_source_file) (lineno, input_filename);
157   cpp_finish_options (parse_in);
158 
159   yyparse ();
160   free_parser_stacks ();
161 }
162 
163 struct c_fileinfo *
get_fileinfo(name)164 get_fileinfo (name)
165      const char *name;
166 {
167   splay_tree_node n;
168   struct c_fileinfo *fi;
169 
170   n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
171   if (n)
172     return (struct c_fileinfo *) n->value;
173 
174   fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
175   fi->time = 0;
176   fi->interface_only = 0;
177   fi->interface_unknown = 1;
178   splay_tree_insert (file_info_tree, (splay_tree_key) name,
179 		     (splay_tree_value) fi);
180   return fi;
181 }
182 
183 static void
update_header_times(name)184 update_header_times (name)
185      const char *name;
186 {
187   /* Changing files again.  This means currently collected time
188      is charged against header time, and body time starts back at 0.  */
189   if (flag_detailed_statistics)
190     {
191       int this_time = get_run_time ();
192       struct c_fileinfo *file = get_fileinfo (name);
193       header_time += this_time - body_time;
194       file->time += this_time - body_time;
195       body_time = this_time;
196     }
197 }
198 
199 static int
dump_one_header(n,dummy)200 dump_one_header (n, dummy)
201      splay_tree_node n;
202      void *dummy ATTRIBUTE_UNUSED;
203 {
204   print_time ((const char *) n->key,
205 	      ((struct c_fileinfo *) n->value)->time);
206   return 0;
207 }
208 
209 void
dump_time_statistics()210 dump_time_statistics ()
211 {
212   struct c_fileinfo *file = get_fileinfo (input_filename);
213   int this_time = get_run_time ();
214   file->time += this_time - body_time;
215 
216   fprintf (stderr, "\n******\n");
217   print_time ("header files (total)", header_time);
218   print_time ("main file (total)", this_time - body_time);
219   fprintf (stderr, "ratio = %g : 1\n",
220 	   (double)header_time / (double)(this_time - body_time));
221   fprintf (stderr, "\n******\n");
222 
223   splay_tree_foreach (file_info_tree, dump_one_header, 0);
224 }
225 
226 static void
cb_ident(pfile,line,str)227 cb_ident (pfile, line, str)
228      cpp_reader *pfile ATTRIBUTE_UNUSED;
229      unsigned int line ATTRIBUTE_UNUSED;
230      const cpp_string *str ATTRIBUTE_UNUSED;
231 {
232 #ifdef ASM_OUTPUT_IDENT
233   if (! flag_no_ident)
234     {
235       /* Convert escapes in the string.  */
236       tree value = lex_string (str->text, str->len, 0);
237       ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
238     }
239 #endif
240 }
241 
242 /* Called at the start of every non-empty line.  TOKEN is the first
243    lexed token on the line.  Used for diagnostic line numbers.  */
244 static void
cb_line_change(pfile,token,parsing_args)245 cb_line_change (pfile, token, parsing_args)
246      cpp_reader *pfile ATTRIBUTE_UNUSED;
247      const cpp_token *token;
248      int parsing_args;
249 {
250   if (token->type == CPP_EOF || parsing_args)
251     return;
252 
253   src_lineno = SOURCE_LINE (map, token->line);
254 }
255 
256 static void
cb_file_change(pfile,new_map)257 cb_file_change (pfile, new_map)
258      cpp_reader *pfile ATTRIBUTE_UNUSED;
259      const struct line_map *new_map;
260 {
261   unsigned int to_line = SOURCE_LINE (new_map, new_map->to_line);
262 
263   if (new_map->reason == LC_ENTER)
264     {
265       /* Don't stack the main buffer on the input stack;
266 	 we already did in compile_file.  */
267       if (map == NULL)
268 	main_input_filename = new_map->to_file;
269       else
270 	{
271           int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1);
272 
273 	  lineno = included_at;
274 	  push_srcloc (new_map->to_file, 1);
275 	  (*debug_hooks->start_source_file) (included_at, new_map->to_file);
276 #ifndef NO_IMPLICIT_EXTERN_C
277 	  if (c_header_level)
278 	    ++c_header_level;
279 	  else if (new_map->sysp == 2)
280 	    {
281 	      c_header_level = 1;
282 	      ++pending_lang_change;
283 	    }
284 #endif
285 	}
286     }
287   else if (new_map->reason == LC_LEAVE)
288     {
289 #ifndef NO_IMPLICIT_EXTERN_C
290       if (c_header_level && --c_header_level == 0)
291 	{
292 	  if (new_map->sysp == 2)
293 	    warning ("badly nested C headers from preprocessor");
294 	  --pending_lang_change;
295 	}
296 #endif
297       pop_srcloc ();
298 
299       (*debug_hooks->end_source_file) (to_line);
300     }
301 
302   update_header_times (new_map->to_file);
303   in_system_header = new_map->sysp != 0;
304   input_filename = new_map->to_file;
305   lineno = to_line;
306   map = new_map;
307 
308   /* Hook for C++.  */
309   extract_interface_info ();
310 }
311 
312 static void
cb_def_pragma(pfile,line)313 cb_def_pragma (pfile, line)
314      cpp_reader *pfile;
315      unsigned int line;
316 {
317   /* Issue a warning message if we have been asked to do so.  Ignore
318      unknown pragmas in system headers unless an explicit
319      -Wunknown-pragmas has been given.  */
320   if (warn_unknown_pragmas > in_system_header)
321     {
322       const unsigned char *space, *name;
323       const cpp_token *s;
324 
325       space = name = (const unsigned char *) "";
326       s = cpp_get_token (pfile);
327       if (s->type != CPP_EOF)
328 	{
329 	  space = cpp_token_as_text (pfile, s);
330 	  s = cpp_get_token (pfile);
331 	  if (s->type == CPP_NAME)
332 	    name = cpp_token_as_text (pfile, s);
333 	}
334 
335       lineno = SOURCE_LINE (map, line);
336       warning ("ignoring #pragma %s %s", space, name);
337     }
338 }
339 
340 /* #define callback for DWARF and DWARF2 debug info.  */
341 static void
cb_define(pfile,line,node)342 cb_define (pfile, line, node)
343      cpp_reader *pfile;
344      unsigned int line;
345      cpp_hashnode *node;
346 {
347   (*debug_hooks->define) (SOURCE_LINE (map, line),
348 			  (const char *) cpp_macro_definition (pfile, node));
349 }
350 
351 /* #undef callback for DWARF and DWARF2 debug info.  */
352 static void
cb_undef(pfile,line,node)353 cb_undef (pfile, line, node)
354      cpp_reader *pfile ATTRIBUTE_UNUSED;
355      unsigned int line;
356      cpp_hashnode *node;
357 {
358   (*debug_hooks->undef) (SOURCE_LINE (map, line),
359 			 (const char *) NODE_NAME (node));
360 }
361 
362 #if 0 /* not yet */
363 /* Returns nonzero if C is a universal-character-name.  Give an error if it
364    is not one which may appear in an identifier, as per [extendid].
365 
366    Note that extended character support in identifiers has not yet been
367    implemented.  It is my personal opinion that this is not a desirable
368    feature.  Portable code cannot count on support for more than the basic
369    identifier character set.  */
370 
371 static inline int
372 is_extended_char (c)
373      int c;
374 {
375 #ifdef TARGET_EBCDIC
376   return 0;
377 #else
378   /* ASCII.  */
379   if (c < 0x7f)
380     return 0;
381 
382   /* None of the valid chars are outside the Basic Multilingual Plane (the
383      low 16 bits).  */
384   if (c > 0xffff)
385     {
386       error ("universal-character-name '\\U%08x' not valid in identifier", c);
387       return 1;
388     }
389 
390   /* Latin */
391   if ((c >= 0x00c0 && c <= 0x00d6)
392       || (c >= 0x00d8 && c <= 0x00f6)
393       || (c >= 0x00f8 && c <= 0x01f5)
394       || (c >= 0x01fa && c <= 0x0217)
395       || (c >= 0x0250 && c <= 0x02a8)
396       || (c >= 0x1e00 && c <= 0x1e9a)
397       || (c >= 0x1ea0 && c <= 0x1ef9))
398     return 1;
399 
400   /* Greek */
401   if ((c == 0x0384)
402       || (c >= 0x0388 && c <= 0x038a)
403       || (c == 0x038c)
404       || (c >= 0x038e && c <= 0x03a1)
405       || (c >= 0x03a3 && c <= 0x03ce)
406       || (c >= 0x03d0 && c <= 0x03d6)
407       || (c == 0x03da)
408       || (c == 0x03dc)
409       || (c == 0x03de)
410       || (c == 0x03e0)
411       || (c >= 0x03e2 && c <= 0x03f3)
412       || (c >= 0x1f00 && c <= 0x1f15)
413       || (c >= 0x1f18 && c <= 0x1f1d)
414       || (c >= 0x1f20 && c <= 0x1f45)
415       || (c >= 0x1f48 && c <= 0x1f4d)
416       || (c >= 0x1f50 && c <= 0x1f57)
417       || (c == 0x1f59)
418       || (c == 0x1f5b)
419       || (c == 0x1f5d)
420       || (c >= 0x1f5f && c <= 0x1f7d)
421       || (c >= 0x1f80 && c <= 0x1fb4)
422       || (c >= 0x1fb6 && c <= 0x1fbc)
423       || (c >= 0x1fc2 && c <= 0x1fc4)
424       || (c >= 0x1fc6 && c <= 0x1fcc)
425       || (c >= 0x1fd0 && c <= 0x1fd3)
426       || (c >= 0x1fd6 && c <= 0x1fdb)
427       || (c >= 0x1fe0 && c <= 0x1fec)
428       || (c >= 0x1ff2 && c <= 0x1ff4)
429       || (c >= 0x1ff6 && c <= 0x1ffc))
430     return 1;
431 
432   /* Cyrillic */
433   if ((c >= 0x0401 && c <= 0x040d)
434       || (c >= 0x040f && c <= 0x044f)
435       || (c >= 0x0451 && c <= 0x045c)
436       || (c >= 0x045e && c <= 0x0481)
437       || (c >= 0x0490 && c <= 0x04c4)
438       || (c >= 0x04c7 && c <= 0x04c8)
439       || (c >= 0x04cb && c <= 0x04cc)
440       || (c >= 0x04d0 && c <= 0x04eb)
441       || (c >= 0x04ee && c <= 0x04f5)
442       || (c >= 0x04f8 && c <= 0x04f9))
443     return 1;
444 
445   /* Armenian */
446   if ((c >= 0x0531 && c <= 0x0556)
447       || (c >= 0x0561 && c <= 0x0587))
448     return 1;
449 
450   /* Hebrew */
451   if ((c >= 0x05d0 && c <= 0x05ea)
452       || (c >= 0x05f0 && c <= 0x05f4))
453     return 1;
454 
455   /* Arabic */
456   if ((c >= 0x0621 && c <= 0x063a)
457       || (c >= 0x0640 && c <= 0x0652)
458       || (c >= 0x0670 && c <= 0x06b7)
459       || (c >= 0x06ba && c <= 0x06be)
460       || (c >= 0x06c0 && c <= 0x06ce)
461       || (c >= 0x06e5 && c <= 0x06e7))
462     return 1;
463 
464   /* Devanagari */
465   if ((c >= 0x0905 && c <= 0x0939)
466       || (c >= 0x0958 && c <= 0x0962))
467     return 1;
468 
469   /* Bengali */
470   if ((c >= 0x0985 && c <= 0x098c)
471       || (c >= 0x098f && c <= 0x0990)
472       || (c >= 0x0993 && c <= 0x09a8)
473       || (c >= 0x09aa && c <= 0x09b0)
474       || (c == 0x09b2)
475       || (c >= 0x09b6 && c <= 0x09b9)
476       || (c >= 0x09dc && c <= 0x09dd)
477       || (c >= 0x09df && c <= 0x09e1)
478       || (c >= 0x09f0 && c <= 0x09f1))
479     return 1;
480 
481   /* Gurmukhi */
482   if ((c >= 0x0a05 && c <= 0x0a0a)
483       || (c >= 0x0a0f && c <= 0x0a10)
484       || (c >= 0x0a13 && c <= 0x0a28)
485       || (c >= 0x0a2a && c <= 0x0a30)
486       || (c >= 0x0a32 && c <= 0x0a33)
487       || (c >= 0x0a35 && c <= 0x0a36)
488       || (c >= 0x0a38 && c <= 0x0a39)
489       || (c >= 0x0a59 && c <= 0x0a5c)
490       || (c == 0x0a5e))
491     return 1;
492 
493   /* Gujarati */
494   if ((c >= 0x0a85 && c <= 0x0a8b)
495       || (c == 0x0a8d)
496       || (c >= 0x0a8f && c <= 0x0a91)
497       || (c >= 0x0a93 && c <= 0x0aa8)
498       || (c >= 0x0aaa && c <= 0x0ab0)
499       || (c >= 0x0ab2 && c <= 0x0ab3)
500       || (c >= 0x0ab5 && c <= 0x0ab9)
501       || (c == 0x0ae0))
502     return 1;
503 
504   /* Oriya */
505   if ((c >= 0x0b05 && c <= 0x0b0c)
506       || (c >= 0x0b0f && c <= 0x0b10)
507       || (c >= 0x0b13 && c <= 0x0b28)
508       || (c >= 0x0b2a && c <= 0x0b30)
509       || (c >= 0x0b32 && c <= 0x0b33)
510       || (c >= 0x0b36 && c <= 0x0b39)
511       || (c >= 0x0b5c && c <= 0x0b5d)
512       || (c >= 0x0b5f && c <= 0x0b61))
513     return 1;
514 
515   /* Tamil */
516   if ((c >= 0x0b85 && c <= 0x0b8a)
517       || (c >= 0x0b8e && c <= 0x0b90)
518       || (c >= 0x0b92 && c <= 0x0b95)
519       || (c >= 0x0b99 && c <= 0x0b9a)
520       || (c == 0x0b9c)
521       || (c >= 0x0b9e && c <= 0x0b9f)
522       || (c >= 0x0ba3 && c <= 0x0ba4)
523       || (c >= 0x0ba8 && c <= 0x0baa)
524       || (c >= 0x0bae && c <= 0x0bb5)
525       || (c >= 0x0bb7 && c <= 0x0bb9))
526     return 1;
527 
528   /* Telugu */
529   if ((c >= 0x0c05 && c <= 0x0c0c)
530       || (c >= 0x0c0e && c <= 0x0c10)
531       || (c >= 0x0c12 && c <= 0x0c28)
532       || (c >= 0x0c2a && c <= 0x0c33)
533       || (c >= 0x0c35 && c <= 0x0c39)
534       || (c >= 0x0c60 && c <= 0x0c61))
535     return 1;
536 
537   /* Kannada */
538   if ((c >= 0x0c85 && c <= 0x0c8c)
539       || (c >= 0x0c8e && c <= 0x0c90)
540       || (c >= 0x0c92 && c <= 0x0ca8)
541       || (c >= 0x0caa && c <= 0x0cb3)
542       || (c >= 0x0cb5 && c <= 0x0cb9)
543       || (c >= 0x0ce0 && c <= 0x0ce1))
544     return 1;
545 
546   /* Malayalam */
547   if ((c >= 0x0d05 && c <= 0x0d0c)
548       || (c >= 0x0d0e && c <= 0x0d10)
549       || (c >= 0x0d12 && c <= 0x0d28)
550       || (c >= 0x0d2a && c <= 0x0d39)
551       || (c >= 0x0d60 && c <= 0x0d61))
552     return 1;
553 
554   /* Thai */
555   if ((c >= 0x0e01 && c <= 0x0e30)
556       || (c >= 0x0e32 && c <= 0x0e33)
557       || (c >= 0x0e40 && c <= 0x0e46)
558       || (c >= 0x0e4f && c <= 0x0e5b))
559     return 1;
560 
561   /* Lao */
562   if ((c >= 0x0e81 && c <= 0x0e82)
563       || (c == 0x0e84)
564       || (c == 0x0e87)
565       || (c == 0x0e88)
566       || (c == 0x0e8a)
567       || (c == 0x0e0d)
568       || (c >= 0x0e94 && c <= 0x0e97)
569       || (c >= 0x0e99 && c <= 0x0e9f)
570       || (c >= 0x0ea1 && c <= 0x0ea3)
571       || (c == 0x0ea5)
572       || (c == 0x0ea7)
573       || (c == 0x0eaa)
574       || (c == 0x0eab)
575       || (c >= 0x0ead && c <= 0x0eb0)
576       || (c == 0x0eb2)
577       || (c == 0x0eb3)
578       || (c == 0x0ebd)
579       || (c >= 0x0ec0 && c <= 0x0ec4)
580       || (c == 0x0ec6))
581     return 1;
582 
583   /* Georgian */
584   if ((c >= 0x10a0 && c <= 0x10c5)
585       || (c >= 0x10d0 && c <= 0x10f6))
586     return 1;
587 
588   /* Hiragana */
589   if ((c >= 0x3041 && c <= 0x3094)
590       || (c >= 0x309b && c <= 0x309e))
591     return 1;
592 
593   /* Katakana */
594   if ((c >= 0x30a1 && c <= 0x30fe))
595     return 1;
596 
597   /* Bopmofo */
598   if ((c >= 0x3105 && c <= 0x312c))
599     return 1;
600 
601   /* Hangul */
602   if ((c >= 0x1100 && c <= 0x1159)
603       || (c >= 0x1161 && c <= 0x11a2)
604       || (c >= 0x11a8 && c <= 0x11f9))
605     return 1;
606 
607   /* CJK Unified Ideographs */
608   if ((c >= 0xf900 && c <= 0xfa2d)
609       || (c >= 0xfb1f && c <= 0xfb36)
610       || (c >= 0xfb38 && c <= 0xfb3c)
611       || (c == 0xfb3e)
612       || (c >= 0xfb40 && c <= 0xfb41)
613       || (c >= 0xfb42 && c <= 0xfb44)
614       || (c >= 0xfb46 && c <= 0xfbb1)
615       || (c >= 0xfbd3 && c <= 0xfd3f)
616       || (c >= 0xfd50 && c <= 0xfd8f)
617       || (c >= 0xfd92 && c <= 0xfdc7)
618       || (c >= 0xfdf0 && c <= 0xfdfb)
619       || (c >= 0xfe70 && c <= 0xfe72)
620       || (c == 0xfe74)
621       || (c >= 0xfe76 && c <= 0xfefc)
622       || (c >= 0xff21 && c <= 0xff3a)
623       || (c >= 0xff41 && c <= 0xff5a)
624       || (c >= 0xff66 && c <= 0xffbe)
625       || (c >= 0xffc2 && c <= 0xffc7)
626       || (c >= 0xffca && c <= 0xffcf)
627       || (c >= 0xffd2 && c <= 0xffd7)
628       || (c >= 0xffda && c <= 0xffdc)
629       || (c >= 0x4e00 && c <= 0x9fa5))
630     return 1;
631 
632   error ("universal-character-name '\\u%04x' not valid in identifier", c);
633   return 1;
634 #endif
635 }
636 
637 /* Add the UTF-8 representation of C to the token_buffer.  */
638 
639 static void
640 utf8_extend_token (c)
641      int c;
642 {
643   int shift, mask;
644 
645   if      (c <= 0x0000007f)
646     {
647       extend_token (c);
648       return;
649     }
650   else if (c <= 0x000007ff)
651     shift = 6, mask = 0xc0;
652   else if (c <= 0x0000ffff)
653     shift = 12, mask = 0xe0;
654   else if (c <= 0x001fffff)
655     shift = 18, mask = 0xf0;
656   else if (c <= 0x03ffffff)
657     shift = 24, mask = 0xf8;
658   else
659     shift = 30, mask = 0xfc;
660 
661   extend_token (mask | (c >> shift));
662   do
663     {
664       shift -= 6;
665       extend_token ((unsigned char) (0x80 | (c >> shift)));
666     }
667   while (shift);
668 }
669 #endif
670 
671 int
c_lex(value)672 c_lex (value)
673      tree *value;
674 {
675   const cpp_token *tok;
676 
677   retry:
678   timevar_push (TV_CPP);
679   do
680     tok = cpp_get_token (parse_in);
681   while (tok->type == CPP_PADDING);
682   timevar_pop (TV_CPP);
683 
684   /* The C++ front end does horrible things with the current line
685      number.  To ensure an accurate line number, we must reset it
686      every time we return a token.  */
687   lineno = src_lineno;
688 
689   *value = NULL_TREE;
690   switch (tok->type)
691     {
692     /* Issue this error here, where we can get at tok->val.c.  */
693     case CPP_OTHER:
694       if (ISGRAPH (tok->val.c))
695 	error ("stray '%c' in program", tok->val.c);
696       else
697 	error ("stray '\\%o' in program", tok->val.c);
698       goto retry;
699 
700     case CPP_NAME:
701       *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
702       break;
703 
704     case CPP_NUMBER:
705       {
706 	unsigned int flags = cpp_classify_number (parse_in, tok);
707 
708 	switch (flags & CPP_N_CATEGORY)
709 	  {
710 	  case CPP_N_INVALID:
711 	    /* cpplib has issued an error.  */
712 	    *value = error_mark_node;
713 	    break;
714 
715 	  case CPP_N_INTEGER:
716 	    *value = interpret_integer (tok, flags);
717 	    break;
718 
719 	  case CPP_N_FLOATING:
720 	    *value = interpret_float (tok, flags);
721 	    break;
722 
723 	  default:
724 	    abort ();
725 	  }
726       }
727       break;
728 
729     case CPP_CHAR:
730     case CPP_WCHAR:
731       *value = lex_charconst (tok);
732       break;
733 
734     case CPP_STRING:
735     case CPP_WSTRING:
736       *value = lex_string (tok->val.str.text, tok->val.str.len,
737 			   tok->type == CPP_WSTRING);
738       break;
739 
740       /* These tokens should not be visible outside cpplib.  */
741     case CPP_HEADER_NAME:
742     case CPP_COMMENT:
743     case CPP_MACRO_ARG:
744       abort ();
745 
746     default: break;
747     }
748 
749   return tok->type;
750 }
751 
752 /* Returns the narrowest C-visible unsigned type, starting with the
753    minimum specified by FLAGS, that can fit VALUE, or itk_none if
754    there isn't one.  */
755 static enum integer_type_kind
narrowest_unsigned_type(value,flags)756 narrowest_unsigned_type (value, flags)
757      tree value;
758      unsigned int flags;
759 {
760   enum integer_type_kind itk;
761 
762   if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
763     itk = itk_unsigned_int;
764   else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
765     itk = itk_unsigned_long;
766   else
767     itk = itk_unsigned_long_long;
768 
769   /* int_fits_type_p must think the type of its first argument is
770      wider than its second argument, or it won't do the proper check.  */
771   TREE_TYPE (value) = widest_unsigned_literal_type_node;
772 
773   for (; itk < itk_none; itk += 2 /* skip unsigned types */)
774     if (int_fits_type_p (value, integer_types[itk]))
775       return itk;
776 
777   return itk_none;
778 }
779 
780 /* Ditto, but narrowest signed type.  */
781 static enum integer_type_kind
narrowest_signed_type(value,flags)782 narrowest_signed_type (value, flags)
783      tree value;
784      unsigned int flags;
785 {
786   enum integer_type_kind itk;
787 
788   if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
789     itk = itk_int;
790   else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
791     itk = itk_long;
792   else
793     itk = itk_long_long;
794 
795   /* int_fits_type_p must think the type of its first argument is
796      wider than its second argument, or it won't do the proper check.  */
797   TREE_TYPE (value) = widest_unsigned_literal_type_node;
798 
799   for (; itk < itk_none; itk += 2 /* skip signed types */)
800     if (int_fits_type_p (value, integer_types[itk]))
801       return itk;
802 
803   return itk_none;
804 }
805 
806 /* Interpret TOKEN, an integer with FLAGS as classified by cpplib.  */
807 static tree
interpret_integer(token,flags)808 interpret_integer (token, flags)
809      const cpp_token *token;
810      unsigned int flags;
811 {
812   tree value, type;
813   enum integer_type_kind itk;
814   cpp_num integer;
815   cpp_options *options = cpp_get_options (parse_in);
816 
817   integer = cpp_interpret_integer (parse_in, token, flags);
818   integer = cpp_num_sign_extend (integer, options->precision);
819   value = build_int_2_wide (integer.low, integer.high);
820 
821   /* The type of a constant with a U suffix is straightforward.  */
822   if (flags & CPP_N_UNSIGNED)
823     itk = narrowest_unsigned_type (value, flags);
824   else
825     {
826       /* The type of a potentially-signed integer constant varies
827 	 depending on the base it's in, the standard in use, and the
828 	 length suffixes.  */
829       enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags);
830       enum integer_type_kind itk_s = narrowest_signed_type (value, flags);
831 
832       /* In both C89 and C99, octal and hex constants may be signed or
833 	 unsigned, whichever fits tighter.  We do not warn about this
834 	 choice differing from the traditional choice, as the constant
835 	 is probably a bit pattern and either way will work.  */
836       if ((flags & CPP_N_RADIX) != CPP_N_DECIMAL)
837 	itk = MIN (itk_u, itk_s);
838       else
839 	{
840 	  /* In C99, decimal constants are always signed.
841 	     In C89, decimal constants that don't fit in long have
842 	     undefined behavior; we try to make them unsigned long.
843 	     In GCC's extended C89, that last is true of decimal
844 	     constants that don't fit in long long, too.  */
845 
846 	  itk = itk_s;
847 	  if (itk_s > itk_u && itk_s > itk_long)
848 	    {
849 	      if (!flag_isoc99)
850 		{
851 		  if (itk_u < itk_unsigned_long)
852 		    itk_u = itk_unsigned_long;
853 		  itk = itk_u;
854 		  warning ("this decimal constant is unsigned only in ISO C90");
855 		}
856 	      else if (warn_traditional)
857 		warning ("this decimal constant would be unsigned in ISO C90");
858 	    }
859 	}
860     }
861 
862   if (itk == itk_none)
863     /* cpplib has already issued a warning for overflow.  */
864     type = ((flags & CPP_N_UNSIGNED)
865 	    ? widest_unsigned_literal_type_node
866 	    : widest_integer_literal_type_node);
867   else
868     type = integer_types[itk];
869 
870   if (itk > itk_unsigned_long
871       && (flags & CPP_N_WIDTH) != CPP_N_LARGE
872       && ! in_system_header && ! flag_isoc99)
873     pedwarn ("integer constant is too large for \"%s\" type",
874 	     (flags & CPP_N_UNSIGNED) ? "unsigned long" : "long");
875 
876   TREE_TYPE (value) = type;
877 
878   /* Convert imaginary to a complex type.  */
879   if (flags & CPP_N_IMAGINARY)
880     value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
881 
882   return value;
883 }
884 
885 /* Interpret TOKEN, a floating point number with FLAGS as classified
886    by cpplib.  */
887 static tree
interpret_float(token,flags)888 interpret_float (token, flags)
889      const cpp_token *token;
890      unsigned int flags;
891 {
892   tree type;
893   tree value;
894   REAL_VALUE_TYPE real;
895   char *copy;
896   size_t copylen;
897   const char *typename;
898 
899   /* FIXME: make %T work in error/warning, then we don't need typename.  */
900   if ((flags & CPP_N_WIDTH) == CPP_N_LARGE)
901     {
902       type = long_double_type_node;
903       typename = "long double";
904     }
905   else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL
906 	   || flag_single_precision_constant)
907     {
908       type = float_type_node;
909       typename = "float";
910     }
911   else
912     {
913       type = double_type_node;
914       typename = "double";
915     }
916 
917   /* Copy the constant to a nul-terminated buffer.  If the constant
918      has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF
919      can't handle them.  */
920   copylen = token->val.str.len;
921   if ((flags & CPP_N_WIDTH) != CPP_N_MEDIUM)
922     /* Must be an F or L suffix.  */
923     copylen--;
924   if (flags & CPP_N_IMAGINARY)
925     /* I or J suffix.  */
926     copylen--;
927 
928   copy = alloca (copylen + 1);
929   memcpy (copy, token->val.str.text, copylen);
930   copy[copylen] = '\0';
931 
932   real_from_string (&real, copy);
933   real_convert (&real, TYPE_MODE (type), &real);
934 
935   /* A diagnostic is required for "soft" overflow by some ISO C
936      testsuites.  This is not pedwarn, because some people don't want
937      an error for this.
938      ??? That's a dubious reason... is this a mandatory diagnostic or
939      isn't it?   -- zw, 2001-08-21.  */
940   if (REAL_VALUE_ISINF (real) && pedantic)
941     warning ("floating constant exceeds range of \"%s\"", typename);
942 
943   /* Create a node with determined type and value.  */
944   value = build_real (type, real);
945   if (flags & CPP_N_IMAGINARY)
946     value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
947 
948   return value;
949 }
950 
951 static tree
lex_string(str,len,wide)952 lex_string (str, len, wide)
953      const unsigned char *str;
954      unsigned int len;
955      int wide;
956 {
957   tree value;
958   char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
959   char *q = buf;
960   const unsigned char *p = str, *limit = str + len;
961   cppchar_t c;
962 
963 #ifdef MULTIBYTE_CHARS
964   /* Reset multibyte conversion state.  */
965   (void) local_mbtowc (NULL, NULL, 0);
966 #endif
967 
968   while (p < limit)
969     {
970 #ifdef MULTIBYTE_CHARS
971       wchar_t wc;
972       int char_len;
973 
974       char_len = local_mbtowc (&wc, (const char *) p, limit - p);
975       if (char_len == -1)
976 	{
977 	  warning ("ignoring invalid multibyte character");
978 	  char_len = 1;
979 	  c = *p++;
980 	}
981       else
982 	{
983 	  p += char_len;
984 	  c = wc;
985 	}
986 #else
987       c = *p++;
988 #endif
989 
990       if (c == '\\' && !ignore_escape_flag)
991 	c = cpp_parse_escape (parse_in, &p, limit, wide);
992 
993       /* Add this single character into the buffer either as a wchar_t,
994 	 a multibyte sequence, or as a single byte.  */
995       if (wide)
996 	{
997 	  unsigned charwidth = TYPE_PRECISION (char_type_node);
998 	  unsigned bytemask = (1 << charwidth) - 1;
999 	  int byte;
1000 
1001 	  for (byte = 0; byte < WCHAR_BYTES; ++byte)
1002 	    {
1003 	      int n;
1004 	      if (byte >= (int) sizeof (c))
1005 		n = 0;
1006 	      else
1007 		n = (c >> (byte * charwidth)) & bytemask;
1008 	      if (BYTES_BIG_ENDIAN)
1009 		q[WCHAR_BYTES - byte - 1] = n;
1010 	      else
1011 		q[byte] = n;
1012 	    }
1013 	  q += WCHAR_BYTES;
1014 	}
1015 #ifdef MULTIBYTE_CHARS
1016       else if (char_len > 1)
1017 	{
1018 	  /* We're dealing with a multibyte character.  */
1019 	  for ( ; char_len >0; --char_len)
1020 	    {
1021 	      *q++ = *(p - char_len);
1022 	    }
1023 	}
1024 #endif
1025       else
1026 	{
1027 	  *q++ = c;
1028 	}
1029     }
1030 
1031   /* Terminate the string value, either with a single byte zero
1032      or with a wide zero.  */
1033 
1034   if (wide)
1035     {
1036       memset (q, 0, WCHAR_BYTES);
1037       q += WCHAR_BYTES;
1038     }
1039   else
1040     {
1041       *q++ = '\0';
1042     }
1043 
1044   value = build_string (q - buf, buf);
1045 
1046   if (wide)
1047     TREE_TYPE (value) = wchar_array_type_node;
1048   else
1049     TREE_TYPE (value) = char_array_type_node;
1050   return value;
1051 }
1052 
1053 /* Converts a (possibly wide) character constant token into a tree.  */
1054 static tree
lex_charconst(token)1055 lex_charconst (token)
1056      const cpp_token *token;
1057 {
1058   cppchar_t result;
1059   tree type, value;
1060   unsigned int chars_seen;
1061   int unsignedp;
1062 
1063   result = cpp_interpret_charconst (parse_in, token,
1064  				    &chars_seen, &unsignedp);
1065 
1066   /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
1067      before possibly widening to HOST_WIDE_INT for build_int_2.  */
1068   if (unsignedp || (cppchar_signed_t) result >= 0)
1069     value = build_int_2 (result, 0);
1070   else
1071     value = build_int_2 ((cppchar_signed_t) result, -1);
1072 
1073   if (token->type == CPP_WCHAR)
1074     type = wchar_type_node;
1075   /* In C, a character constant has type 'int'.
1076      In C++ 'char', but multi-char charconsts have type 'int'.  */
1077   else if ((c_language == clk_c) || chars_seen > 1)
1078     type = integer_type_node;
1079   else
1080     type = char_type_node;
1081 
1082   TREE_TYPE (value) = type;
1083   return value;
1084 }
1085