1 /* Mainly the interface between cpplib and the C front ends.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24
25 #include "real.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "expr.h"
29 #include "input.h"
30 #include "output.h"
31 #include "c-tree.h"
32 #include "c-common.h"
33 #include "flags.h"
34 #include "timevar.h"
35 #include "cpplib.h"
36 #include "c-pragma.h"
37 #include "toplev.h"
38 #include "intl.h"
39 #include "tm_p.h"
40 #include "splay-tree.h"
41 #include "debug.h"
42
43 #ifdef MULTIBYTE_CHARS
44 #include "mbchar.h"
45 #include <locale.h>
46 #endif /* MULTIBYTE_CHARS */
47
48 /* The current line map. */
49 static const struct line_map *map;
50
51 /* The line used to refresh the lineno global variable after each token. */
52 static unsigned int src_lineno;
53
54 /* We may keep statistics about how long which files took to compile. */
55 static int header_time, body_time;
56 static splay_tree file_info_tree;
57
58 /* File used for outputting assembler code. */
59 extern FILE *asm_out_file;
60
61 #undef WCHAR_TYPE_SIZE
62 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
63
64 /* Number of bytes in a wide character. */
65 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
66
67 int pending_lang_change; /* If we need to switch languages - C++ only */
68 int c_header_level; /* depth in C headers - C++ only */
69
70 /* Nonzero tells yylex to ignore \ in string constants. */
71 static int ignore_escape_flag;
72
73 static tree interpret_integer PARAMS ((const cpp_token *, unsigned int));
74 static tree interpret_float PARAMS ((const cpp_token *, unsigned int));
75 static enum integer_type_kind
76 narrowest_unsigned_type PARAMS ((tree, unsigned int));
77 static enum integer_type_kind
78 narrowest_signed_type PARAMS ((tree, unsigned int));
79 static tree lex_string PARAMS ((const unsigned char *, unsigned int,
80 int));
81 static tree lex_charconst PARAMS ((const cpp_token *));
82 static void update_header_times PARAMS ((const char *));
83 static int dump_one_header PARAMS ((splay_tree_node, void *));
84 static void cb_line_change PARAMS ((cpp_reader *, const cpp_token *, int));
85 static void cb_ident PARAMS ((cpp_reader *, unsigned int,
86 const cpp_string *));
87 static void cb_file_change PARAMS ((cpp_reader *, const struct line_map *));
88 static void cb_def_pragma PARAMS ((cpp_reader *, unsigned int));
89 static void cb_define PARAMS ((cpp_reader *, unsigned int,
90 cpp_hashnode *));
91 static void cb_undef PARAMS ((cpp_reader *, unsigned int,
92 cpp_hashnode *));
93
94 const char *
init_c_lex(filename)95 init_c_lex (filename)
96 const char *filename;
97 {
98 struct cpp_callbacks *cb;
99 struct c_fileinfo *toplevel;
100
101 /* Set up filename timing. Must happen before cpp_read_main_file. */
102 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
103 0,
104 (splay_tree_delete_value_fn)free);
105 toplevel = get_fileinfo ("<top level>");
106 if (flag_detailed_statistics)
107 {
108 header_time = 0;
109 body_time = get_run_time ();
110 toplevel->time = body_time;
111 }
112
113 #ifdef MULTIBYTE_CHARS
114 /* Change to the native locale for multibyte conversions. */
115 setlocale (LC_CTYPE, "");
116 GET_ENVIRONMENT (literal_codeset, "LANG");
117 #endif
118
119 cb = cpp_get_callbacks (parse_in);
120
121 cb->line_change = cb_line_change;
122 cb->ident = cb_ident;
123 cb->file_change = cb_file_change;
124 cb->def_pragma = cb_def_pragma;
125
126 /* Set the debug callbacks if we can use them. */
127 if (debug_info_level == DINFO_LEVEL_VERBOSE
128 && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG
129 || write_symbols == VMS_AND_DWARF2_DEBUG))
130 {
131 cb->define = cb_define;
132 cb->undef = cb_undef;
133 }
134
135 /* Start it at 0. */
136 lineno = 0;
137
138 return cpp_read_main_file (parse_in, filename, ident_hash);
139 }
140
141 /* A thin wrapper around the real parser that initializes the
142 integrated preprocessor after debug output has been initialized.
143 Also, make sure the start_source_file debug hook gets called for
144 the primary source file. */
145
146 void
c_common_parse_file(set_yydebug)147 c_common_parse_file (set_yydebug)
148 int set_yydebug ATTRIBUTE_UNUSED;
149 {
150 #if YYDEBUG != 0
151 yydebug = set_yydebug;
152 #else
153 warning ("YYDEBUG not defined");
154 #endif
155
156 (*debug_hooks->start_source_file) (lineno, input_filename);
157 cpp_finish_options (parse_in);
158
159 yyparse ();
160 free_parser_stacks ();
161 }
162
163 struct c_fileinfo *
get_fileinfo(name)164 get_fileinfo (name)
165 const char *name;
166 {
167 splay_tree_node n;
168 struct c_fileinfo *fi;
169
170 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
171 if (n)
172 return (struct c_fileinfo *) n->value;
173
174 fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
175 fi->time = 0;
176 fi->interface_only = 0;
177 fi->interface_unknown = 1;
178 splay_tree_insert (file_info_tree, (splay_tree_key) name,
179 (splay_tree_value) fi);
180 return fi;
181 }
182
183 static void
update_header_times(name)184 update_header_times (name)
185 const char *name;
186 {
187 /* Changing files again. This means currently collected time
188 is charged against header time, and body time starts back at 0. */
189 if (flag_detailed_statistics)
190 {
191 int this_time = get_run_time ();
192 struct c_fileinfo *file = get_fileinfo (name);
193 header_time += this_time - body_time;
194 file->time += this_time - body_time;
195 body_time = this_time;
196 }
197 }
198
199 static int
dump_one_header(n,dummy)200 dump_one_header (n, dummy)
201 splay_tree_node n;
202 void *dummy ATTRIBUTE_UNUSED;
203 {
204 print_time ((const char *) n->key,
205 ((struct c_fileinfo *) n->value)->time);
206 return 0;
207 }
208
209 void
dump_time_statistics()210 dump_time_statistics ()
211 {
212 struct c_fileinfo *file = get_fileinfo (input_filename);
213 int this_time = get_run_time ();
214 file->time += this_time - body_time;
215
216 fprintf (stderr, "\n******\n");
217 print_time ("header files (total)", header_time);
218 print_time ("main file (total)", this_time - body_time);
219 fprintf (stderr, "ratio = %g : 1\n",
220 (double)header_time / (double)(this_time - body_time));
221 fprintf (stderr, "\n******\n");
222
223 splay_tree_foreach (file_info_tree, dump_one_header, 0);
224 }
225
226 static void
cb_ident(pfile,line,str)227 cb_ident (pfile, line, str)
228 cpp_reader *pfile ATTRIBUTE_UNUSED;
229 unsigned int line ATTRIBUTE_UNUSED;
230 const cpp_string *str ATTRIBUTE_UNUSED;
231 {
232 #ifdef ASM_OUTPUT_IDENT
233 if (! flag_no_ident)
234 {
235 /* Convert escapes in the string. */
236 tree value = lex_string (str->text, str->len, 0);
237 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
238 }
239 #endif
240 }
241
242 /* Called at the start of every non-empty line. TOKEN is the first
243 lexed token on the line. Used for diagnostic line numbers. */
244 static void
cb_line_change(pfile,token,parsing_args)245 cb_line_change (pfile, token, parsing_args)
246 cpp_reader *pfile ATTRIBUTE_UNUSED;
247 const cpp_token *token;
248 int parsing_args;
249 {
250 if (token->type == CPP_EOF || parsing_args)
251 return;
252
253 src_lineno = SOURCE_LINE (map, token->line);
254 }
255
256 static void
cb_file_change(pfile,new_map)257 cb_file_change (pfile, new_map)
258 cpp_reader *pfile ATTRIBUTE_UNUSED;
259 const struct line_map *new_map;
260 {
261 unsigned int to_line = SOURCE_LINE (new_map, new_map->to_line);
262
263 if (new_map->reason == LC_ENTER)
264 {
265 /* Don't stack the main buffer on the input stack;
266 we already did in compile_file. */
267 if (map == NULL)
268 main_input_filename = new_map->to_file;
269 else
270 {
271 int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1);
272
273 lineno = included_at;
274 push_srcloc (new_map->to_file, 1);
275 (*debug_hooks->start_source_file) (included_at, new_map->to_file);
276 #ifndef NO_IMPLICIT_EXTERN_C
277 if (c_header_level)
278 ++c_header_level;
279 else if (new_map->sysp == 2)
280 {
281 c_header_level = 1;
282 ++pending_lang_change;
283 }
284 #endif
285 }
286 }
287 else if (new_map->reason == LC_LEAVE)
288 {
289 #ifndef NO_IMPLICIT_EXTERN_C
290 if (c_header_level && --c_header_level == 0)
291 {
292 if (new_map->sysp == 2)
293 warning ("badly nested C headers from preprocessor");
294 --pending_lang_change;
295 }
296 #endif
297 pop_srcloc ();
298
299 (*debug_hooks->end_source_file) (to_line);
300 }
301
302 update_header_times (new_map->to_file);
303 in_system_header = new_map->sysp != 0;
304 input_filename = new_map->to_file;
305 lineno = to_line;
306 map = new_map;
307
308 /* Hook for C++. */
309 extract_interface_info ();
310 }
311
312 static void
cb_def_pragma(pfile,line)313 cb_def_pragma (pfile, line)
314 cpp_reader *pfile;
315 unsigned int line;
316 {
317 /* Issue a warning message if we have been asked to do so. Ignore
318 unknown pragmas in system headers unless an explicit
319 -Wunknown-pragmas has been given. */
320 if (warn_unknown_pragmas > in_system_header)
321 {
322 const unsigned char *space, *name;
323 const cpp_token *s;
324
325 space = name = (const unsigned char *) "";
326 s = cpp_get_token (pfile);
327 if (s->type != CPP_EOF)
328 {
329 space = cpp_token_as_text (pfile, s);
330 s = cpp_get_token (pfile);
331 if (s->type == CPP_NAME)
332 name = cpp_token_as_text (pfile, s);
333 }
334
335 lineno = SOURCE_LINE (map, line);
336 warning ("ignoring #pragma %s %s", space, name);
337 }
338 }
339
340 /* #define callback for DWARF and DWARF2 debug info. */
341 static void
cb_define(pfile,line,node)342 cb_define (pfile, line, node)
343 cpp_reader *pfile;
344 unsigned int line;
345 cpp_hashnode *node;
346 {
347 (*debug_hooks->define) (SOURCE_LINE (map, line),
348 (const char *) cpp_macro_definition (pfile, node));
349 }
350
351 /* #undef callback for DWARF and DWARF2 debug info. */
352 static void
cb_undef(pfile,line,node)353 cb_undef (pfile, line, node)
354 cpp_reader *pfile ATTRIBUTE_UNUSED;
355 unsigned int line;
356 cpp_hashnode *node;
357 {
358 (*debug_hooks->undef) (SOURCE_LINE (map, line),
359 (const char *) NODE_NAME (node));
360 }
361
362 #if 0 /* not yet */
363 /* Returns nonzero if C is a universal-character-name. Give an error if it
364 is not one which may appear in an identifier, as per [extendid].
365
366 Note that extended character support in identifiers has not yet been
367 implemented. It is my personal opinion that this is not a desirable
368 feature. Portable code cannot count on support for more than the basic
369 identifier character set. */
370
371 static inline int
372 is_extended_char (c)
373 int c;
374 {
375 #ifdef TARGET_EBCDIC
376 return 0;
377 #else
378 /* ASCII. */
379 if (c < 0x7f)
380 return 0;
381
382 /* None of the valid chars are outside the Basic Multilingual Plane (the
383 low 16 bits). */
384 if (c > 0xffff)
385 {
386 error ("universal-character-name '\\U%08x' not valid in identifier", c);
387 return 1;
388 }
389
390 /* Latin */
391 if ((c >= 0x00c0 && c <= 0x00d6)
392 || (c >= 0x00d8 && c <= 0x00f6)
393 || (c >= 0x00f8 && c <= 0x01f5)
394 || (c >= 0x01fa && c <= 0x0217)
395 || (c >= 0x0250 && c <= 0x02a8)
396 || (c >= 0x1e00 && c <= 0x1e9a)
397 || (c >= 0x1ea0 && c <= 0x1ef9))
398 return 1;
399
400 /* Greek */
401 if ((c == 0x0384)
402 || (c >= 0x0388 && c <= 0x038a)
403 || (c == 0x038c)
404 || (c >= 0x038e && c <= 0x03a1)
405 || (c >= 0x03a3 && c <= 0x03ce)
406 || (c >= 0x03d0 && c <= 0x03d6)
407 || (c == 0x03da)
408 || (c == 0x03dc)
409 || (c == 0x03de)
410 || (c == 0x03e0)
411 || (c >= 0x03e2 && c <= 0x03f3)
412 || (c >= 0x1f00 && c <= 0x1f15)
413 || (c >= 0x1f18 && c <= 0x1f1d)
414 || (c >= 0x1f20 && c <= 0x1f45)
415 || (c >= 0x1f48 && c <= 0x1f4d)
416 || (c >= 0x1f50 && c <= 0x1f57)
417 || (c == 0x1f59)
418 || (c == 0x1f5b)
419 || (c == 0x1f5d)
420 || (c >= 0x1f5f && c <= 0x1f7d)
421 || (c >= 0x1f80 && c <= 0x1fb4)
422 || (c >= 0x1fb6 && c <= 0x1fbc)
423 || (c >= 0x1fc2 && c <= 0x1fc4)
424 || (c >= 0x1fc6 && c <= 0x1fcc)
425 || (c >= 0x1fd0 && c <= 0x1fd3)
426 || (c >= 0x1fd6 && c <= 0x1fdb)
427 || (c >= 0x1fe0 && c <= 0x1fec)
428 || (c >= 0x1ff2 && c <= 0x1ff4)
429 || (c >= 0x1ff6 && c <= 0x1ffc))
430 return 1;
431
432 /* Cyrillic */
433 if ((c >= 0x0401 && c <= 0x040d)
434 || (c >= 0x040f && c <= 0x044f)
435 || (c >= 0x0451 && c <= 0x045c)
436 || (c >= 0x045e && c <= 0x0481)
437 || (c >= 0x0490 && c <= 0x04c4)
438 || (c >= 0x04c7 && c <= 0x04c8)
439 || (c >= 0x04cb && c <= 0x04cc)
440 || (c >= 0x04d0 && c <= 0x04eb)
441 || (c >= 0x04ee && c <= 0x04f5)
442 || (c >= 0x04f8 && c <= 0x04f9))
443 return 1;
444
445 /* Armenian */
446 if ((c >= 0x0531 && c <= 0x0556)
447 || (c >= 0x0561 && c <= 0x0587))
448 return 1;
449
450 /* Hebrew */
451 if ((c >= 0x05d0 && c <= 0x05ea)
452 || (c >= 0x05f0 && c <= 0x05f4))
453 return 1;
454
455 /* Arabic */
456 if ((c >= 0x0621 && c <= 0x063a)
457 || (c >= 0x0640 && c <= 0x0652)
458 || (c >= 0x0670 && c <= 0x06b7)
459 || (c >= 0x06ba && c <= 0x06be)
460 || (c >= 0x06c0 && c <= 0x06ce)
461 || (c >= 0x06e5 && c <= 0x06e7))
462 return 1;
463
464 /* Devanagari */
465 if ((c >= 0x0905 && c <= 0x0939)
466 || (c >= 0x0958 && c <= 0x0962))
467 return 1;
468
469 /* Bengali */
470 if ((c >= 0x0985 && c <= 0x098c)
471 || (c >= 0x098f && c <= 0x0990)
472 || (c >= 0x0993 && c <= 0x09a8)
473 || (c >= 0x09aa && c <= 0x09b0)
474 || (c == 0x09b2)
475 || (c >= 0x09b6 && c <= 0x09b9)
476 || (c >= 0x09dc && c <= 0x09dd)
477 || (c >= 0x09df && c <= 0x09e1)
478 || (c >= 0x09f0 && c <= 0x09f1))
479 return 1;
480
481 /* Gurmukhi */
482 if ((c >= 0x0a05 && c <= 0x0a0a)
483 || (c >= 0x0a0f && c <= 0x0a10)
484 || (c >= 0x0a13 && c <= 0x0a28)
485 || (c >= 0x0a2a && c <= 0x0a30)
486 || (c >= 0x0a32 && c <= 0x0a33)
487 || (c >= 0x0a35 && c <= 0x0a36)
488 || (c >= 0x0a38 && c <= 0x0a39)
489 || (c >= 0x0a59 && c <= 0x0a5c)
490 || (c == 0x0a5e))
491 return 1;
492
493 /* Gujarati */
494 if ((c >= 0x0a85 && c <= 0x0a8b)
495 || (c == 0x0a8d)
496 || (c >= 0x0a8f && c <= 0x0a91)
497 || (c >= 0x0a93 && c <= 0x0aa8)
498 || (c >= 0x0aaa && c <= 0x0ab0)
499 || (c >= 0x0ab2 && c <= 0x0ab3)
500 || (c >= 0x0ab5 && c <= 0x0ab9)
501 || (c == 0x0ae0))
502 return 1;
503
504 /* Oriya */
505 if ((c >= 0x0b05 && c <= 0x0b0c)
506 || (c >= 0x0b0f && c <= 0x0b10)
507 || (c >= 0x0b13 && c <= 0x0b28)
508 || (c >= 0x0b2a && c <= 0x0b30)
509 || (c >= 0x0b32 && c <= 0x0b33)
510 || (c >= 0x0b36 && c <= 0x0b39)
511 || (c >= 0x0b5c && c <= 0x0b5d)
512 || (c >= 0x0b5f && c <= 0x0b61))
513 return 1;
514
515 /* Tamil */
516 if ((c >= 0x0b85 && c <= 0x0b8a)
517 || (c >= 0x0b8e && c <= 0x0b90)
518 || (c >= 0x0b92 && c <= 0x0b95)
519 || (c >= 0x0b99 && c <= 0x0b9a)
520 || (c == 0x0b9c)
521 || (c >= 0x0b9e && c <= 0x0b9f)
522 || (c >= 0x0ba3 && c <= 0x0ba4)
523 || (c >= 0x0ba8 && c <= 0x0baa)
524 || (c >= 0x0bae && c <= 0x0bb5)
525 || (c >= 0x0bb7 && c <= 0x0bb9))
526 return 1;
527
528 /* Telugu */
529 if ((c >= 0x0c05 && c <= 0x0c0c)
530 || (c >= 0x0c0e && c <= 0x0c10)
531 || (c >= 0x0c12 && c <= 0x0c28)
532 || (c >= 0x0c2a && c <= 0x0c33)
533 || (c >= 0x0c35 && c <= 0x0c39)
534 || (c >= 0x0c60 && c <= 0x0c61))
535 return 1;
536
537 /* Kannada */
538 if ((c >= 0x0c85 && c <= 0x0c8c)
539 || (c >= 0x0c8e && c <= 0x0c90)
540 || (c >= 0x0c92 && c <= 0x0ca8)
541 || (c >= 0x0caa && c <= 0x0cb3)
542 || (c >= 0x0cb5 && c <= 0x0cb9)
543 || (c >= 0x0ce0 && c <= 0x0ce1))
544 return 1;
545
546 /* Malayalam */
547 if ((c >= 0x0d05 && c <= 0x0d0c)
548 || (c >= 0x0d0e && c <= 0x0d10)
549 || (c >= 0x0d12 && c <= 0x0d28)
550 || (c >= 0x0d2a && c <= 0x0d39)
551 || (c >= 0x0d60 && c <= 0x0d61))
552 return 1;
553
554 /* Thai */
555 if ((c >= 0x0e01 && c <= 0x0e30)
556 || (c >= 0x0e32 && c <= 0x0e33)
557 || (c >= 0x0e40 && c <= 0x0e46)
558 || (c >= 0x0e4f && c <= 0x0e5b))
559 return 1;
560
561 /* Lao */
562 if ((c >= 0x0e81 && c <= 0x0e82)
563 || (c == 0x0e84)
564 || (c == 0x0e87)
565 || (c == 0x0e88)
566 || (c == 0x0e8a)
567 || (c == 0x0e0d)
568 || (c >= 0x0e94 && c <= 0x0e97)
569 || (c >= 0x0e99 && c <= 0x0e9f)
570 || (c >= 0x0ea1 && c <= 0x0ea3)
571 || (c == 0x0ea5)
572 || (c == 0x0ea7)
573 || (c == 0x0eaa)
574 || (c == 0x0eab)
575 || (c >= 0x0ead && c <= 0x0eb0)
576 || (c == 0x0eb2)
577 || (c == 0x0eb3)
578 || (c == 0x0ebd)
579 || (c >= 0x0ec0 && c <= 0x0ec4)
580 || (c == 0x0ec6))
581 return 1;
582
583 /* Georgian */
584 if ((c >= 0x10a0 && c <= 0x10c5)
585 || (c >= 0x10d0 && c <= 0x10f6))
586 return 1;
587
588 /* Hiragana */
589 if ((c >= 0x3041 && c <= 0x3094)
590 || (c >= 0x309b && c <= 0x309e))
591 return 1;
592
593 /* Katakana */
594 if ((c >= 0x30a1 && c <= 0x30fe))
595 return 1;
596
597 /* Bopmofo */
598 if ((c >= 0x3105 && c <= 0x312c))
599 return 1;
600
601 /* Hangul */
602 if ((c >= 0x1100 && c <= 0x1159)
603 || (c >= 0x1161 && c <= 0x11a2)
604 || (c >= 0x11a8 && c <= 0x11f9))
605 return 1;
606
607 /* CJK Unified Ideographs */
608 if ((c >= 0xf900 && c <= 0xfa2d)
609 || (c >= 0xfb1f && c <= 0xfb36)
610 || (c >= 0xfb38 && c <= 0xfb3c)
611 || (c == 0xfb3e)
612 || (c >= 0xfb40 && c <= 0xfb41)
613 || (c >= 0xfb42 && c <= 0xfb44)
614 || (c >= 0xfb46 && c <= 0xfbb1)
615 || (c >= 0xfbd3 && c <= 0xfd3f)
616 || (c >= 0xfd50 && c <= 0xfd8f)
617 || (c >= 0xfd92 && c <= 0xfdc7)
618 || (c >= 0xfdf0 && c <= 0xfdfb)
619 || (c >= 0xfe70 && c <= 0xfe72)
620 || (c == 0xfe74)
621 || (c >= 0xfe76 && c <= 0xfefc)
622 || (c >= 0xff21 && c <= 0xff3a)
623 || (c >= 0xff41 && c <= 0xff5a)
624 || (c >= 0xff66 && c <= 0xffbe)
625 || (c >= 0xffc2 && c <= 0xffc7)
626 || (c >= 0xffca && c <= 0xffcf)
627 || (c >= 0xffd2 && c <= 0xffd7)
628 || (c >= 0xffda && c <= 0xffdc)
629 || (c >= 0x4e00 && c <= 0x9fa5))
630 return 1;
631
632 error ("universal-character-name '\\u%04x' not valid in identifier", c);
633 return 1;
634 #endif
635 }
636
637 /* Add the UTF-8 representation of C to the token_buffer. */
638
639 static void
640 utf8_extend_token (c)
641 int c;
642 {
643 int shift, mask;
644
645 if (c <= 0x0000007f)
646 {
647 extend_token (c);
648 return;
649 }
650 else if (c <= 0x000007ff)
651 shift = 6, mask = 0xc0;
652 else if (c <= 0x0000ffff)
653 shift = 12, mask = 0xe0;
654 else if (c <= 0x001fffff)
655 shift = 18, mask = 0xf0;
656 else if (c <= 0x03ffffff)
657 shift = 24, mask = 0xf8;
658 else
659 shift = 30, mask = 0xfc;
660
661 extend_token (mask | (c >> shift));
662 do
663 {
664 shift -= 6;
665 extend_token ((unsigned char) (0x80 | (c >> shift)));
666 }
667 while (shift);
668 }
669 #endif
670
671 int
c_lex(value)672 c_lex (value)
673 tree *value;
674 {
675 const cpp_token *tok;
676
677 retry:
678 timevar_push (TV_CPP);
679 do
680 tok = cpp_get_token (parse_in);
681 while (tok->type == CPP_PADDING);
682 timevar_pop (TV_CPP);
683
684 /* The C++ front end does horrible things with the current line
685 number. To ensure an accurate line number, we must reset it
686 every time we return a token. */
687 lineno = src_lineno;
688
689 *value = NULL_TREE;
690 switch (tok->type)
691 {
692 /* Issue this error here, where we can get at tok->val.c. */
693 case CPP_OTHER:
694 if (ISGRAPH (tok->val.c))
695 error ("stray '%c' in program", tok->val.c);
696 else
697 error ("stray '\\%o' in program", tok->val.c);
698 goto retry;
699
700 case CPP_NAME:
701 *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
702 break;
703
704 case CPP_NUMBER:
705 {
706 unsigned int flags = cpp_classify_number (parse_in, tok);
707
708 switch (flags & CPP_N_CATEGORY)
709 {
710 case CPP_N_INVALID:
711 /* cpplib has issued an error. */
712 *value = error_mark_node;
713 break;
714
715 case CPP_N_INTEGER:
716 *value = interpret_integer (tok, flags);
717 break;
718
719 case CPP_N_FLOATING:
720 *value = interpret_float (tok, flags);
721 break;
722
723 default:
724 abort ();
725 }
726 }
727 break;
728
729 case CPP_CHAR:
730 case CPP_WCHAR:
731 *value = lex_charconst (tok);
732 break;
733
734 case CPP_STRING:
735 case CPP_WSTRING:
736 *value = lex_string (tok->val.str.text, tok->val.str.len,
737 tok->type == CPP_WSTRING);
738 break;
739
740 /* These tokens should not be visible outside cpplib. */
741 case CPP_HEADER_NAME:
742 case CPP_COMMENT:
743 case CPP_MACRO_ARG:
744 abort ();
745
746 default: break;
747 }
748
749 return tok->type;
750 }
751
752 /* Returns the narrowest C-visible unsigned type, starting with the
753 minimum specified by FLAGS, that can fit VALUE, or itk_none if
754 there isn't one. */
755 static enum integer_type_kind
narrowest_unsigned_type(value,flags)756 narrowest_unsigned_type (value, flags)
757 tree value;
758 unsigned int flags;
759 {
760 enum integer_type_kind itk;
761
762 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
763 itk = itk_unsigned_int;
764 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
765 itk = itk_unsigned_long;
766 else
767 itk = itk_unsigned_long_long;
768
769 /* int_fits_type_p must think the type of its first argument is
770 wider than its second argument, or it won't do the proper check. */
771 TREE_TYPE (value) = widest_unsigned_literal_type_node;
772
773 for (; itk < itk_none; itk += 2 /* skip unsigned types */)
774 if (int_fits_type_p (value, integer_types[itk]))
775 return itk;
776
777 return itk_none;
778 }
779
780 /* Ditto, but narrowest signed type. */
781 static enum integer_type_kind
narrowest_signed_type(value,flags)782 narrowest_signed_type (value, flags)
783 tree value;
784 unsigned int flags;
785 {
786 enum integer_type_kind itk;
787
788 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
789 itk = itk_int;
790 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
791 itk = itk_long;
792 else
793 itk = itk_long_long;
794
795 /* int_fits_type_p must think the type of its first argument is
796 wider than its second argument, or it won't do the proper check. */
797 TREE_TYPE (value) = widest_unsigned_literal_type_node;
798
799 for (; itk < itk_none; itk += 2 /* skip signed types */)
800 if (int_fits_type_p (value, integer_types[itk]))
801 return itk;
802
803 return itk_none;
804 }
805
806 /* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */
807 static tree
interpret_integer(token,flags)808 interpret_integer (token, flags)
809 const cpp_token *token;
810 unsigned int flags;
811 {
812 tree value, type;
813 enum integer_type_kind itk;
814 cpp_num integer;
815 cpp_options *options = cpp_get_options (parse_in);
816
817 integer = cpp_interpret_integer (parse_in, token, flags);
818 integer = cpp_num_sign_extend (integer, options->precision);
819 value = build_int_2_wide (integer.low, integer.high);
820
821 /* The type of a constant with a U suffix is straightforward. */
822 if (flags & CPP_N_UNSIGNED)
823 itk = narrowest_unsigned_type (value, flags);
824 else
825 {
826 /* The type of a potentially-signed integer constant varies
827 depending on the base it's in, the standard in use, and the
828 length suffixes. */
829 enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags);
830 enum integer_type_kind itk_s = narrowest_signed_type (value, flags);
831
832 /* In both C89 and C99, octal and hex constants may be signed or
833 unsigned, whichever fits tighter. We do not warn about this
834 choice differing from the traditional choice, as the constant
835 is probably a bit pattern and either way will work. */
836 if ((flags & CPP_N_RADIX) != CPP_N_DECIMAL)
837 itk = MIN (itk_u, itk_s);
838 else
839 {
840 /* In C99, decimal constants are always signed.
841 In C89, decimal constants that don't fit in long have
842 undefined behavior; we try to make them unsigned long.
843 In GCC's extended C89, that last is true of decimal
844 constants that don't fit in long long, too. */
845
846 itk = itk_s;
847 if (itk_s > itk_u && itk_s > itk_long)
848 {
849 if (!flag_isoc99)
850 {
851 if (itk_u < itk_unsigned_long)
852 itk_u = itk_unsigned_long;
853 itk = itk_u;
854 warning ("this decimal constant is unsigned only in ISO C90");
855 }
856 else if (warn_traditional)
857 warning ("this decimal constant would be unsigned in ISO C90");
858 }
859 }
860 }
861
862 if (itk == itk_none)
863 /* cpplib has already issued a warning for overflow. */
864 type = ((flags & CPP_N_UNSIGNED)
865 ? widest_unsigned_literal_type_node
866 : widest_integer_literal_type_node);
867 else
868 type = integer_types[itk];
869
870 if (itk > itk_unsigned_long
871 && (flags & CPP_N_WIDTH) != CPP_N_LARGE
872 && ! in_system_header && ! flag_isoc99)
873 pedwarn ("integer constant is too large for \"%s\" type",
874 (flags & CPP_N_UNSIGNED) ? "unsigned long" : "long");
875
876 TREE_TYPE (value) = type;
877
878 /* Convert imaginary to a complex type. */
879 if (flags & CPP_N_IMAGINARY)
880 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
881
882 return value;
883 }
884
885 /* Interpret TOKEN, a floating point number with FLAGS as classified
886 by cpplib. */
887 static tree
interpret_float(token,flags)888 interpret_float (token, flags)
889 const cpp_token *token;
890 unsigned int flags;
891 {
892 tree type;
893 tree value;
894 REAL_VALUE_TYPE real;
895 char *copy;
896 size_t copylen;
897 const char *typename;
898
899 /* FIXME: make %T work in error/warning, then we don't need typename. */
900 if ((flags & CPP_N_WIDTH) == CPP_N_LARGE)
901 {
902 type = long_double_type_node;
903 typename = "long double";
904 }
905 else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL
906 || flag_single_precision_constant)
907 {
908 type = float_type_node;
909 typename = "float";
910 }
911 else
912 {
913 type = double_type_node;
914 typename = "double";
915 }
916
917 /* Copy the constant to a nul-terminated buffer. If the constant
918 has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF
919 can't handle them. */
920 copylen = token->val.str.len;
921 if ((flags & CPP_N_WIDTH) != CPP_N_MEDIUM)
922 /* Must be an F or L suffix. */
923 copylen--;
924 if (flags & CPP_N_IMAGINARY)
925 /* I or J suffix. */
926 copylen--;
927
928 copy = alloca (copylen + 1);
929 memcpy (copy, token->val.str.text, copylen);
930 copy[copylen] = '\0';
931
932 real_from_string (&real, copy);
933 real_convert (&real, TYPE_MODE (type), &real);
934
935 /* A diagnostic is required for "soft" overflow by some ISO C
936 testsuites. This is not pedwarn, because some people don't want
937 an error for this.
938 ??? That's a dubious reason... is this a mandatory diagnostic or
939 isn't it? -- zw, 2001-08-21. */
940 if (REAL_VALUE_ISINF (real) && pedantic)
941 warning ("floating constant exceeds range of \"%s\"", typename);
942
943 /* Create a node with determined type and value. */
944 value = build_real (type, real);
945 if (flags & CPP_N_IMAGINARY)
946 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
947
948 return value;
949 }
950
951 static tree
lex_string(str,len,wide)952 lex_string (str, len, wide)
953 const unsigned char *str;
954 unsigned int len;
955 int wide;
956 {
957 tree value;
958 char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
959 char *q = buf;
960 const unsigned char *p = str, *limit = str + len;
961 cppchar_t c;
962
963 #ifdef MULTIBYTE_CHARS
964 /* Reset multibyte conversion state. */
965 (void) local_mbtowc (NULL, NULL, 0);
966 #endif
967
968 while (p < limit)
969 {
970 #ifdef MULTIBYTE_CHARS
971 wchar_t wc;
972 int char_len;
973
974 char_len = local_mbtowc (&wc, (const char *) p, limit - p);
975 if (char_len == -1)
976 {
977 warning ("ignoring invalid multibyte character");
978 char_len = 1;
979 c = *p++;
980 }
981 else
982 {
983 p += char_len;
984 c = wc;
985 }
986 #else
987 c = *p++;
988 #endif
989
990 if (c == '\\' && !ignore_escape_flag)
991 c = cpp_parse_escape (parse_in, &p, limit, wide);
992
993 /* Add this single character into the buffer either as a wchar_t,
994 a multibyte sequence, or as a single byte. */
995 if (wide)
996 {
997 unsigned charwidth = TYPE_PRECISION (char_type_node);
998 unsigned bytemask = (1 << charwidth) - 1;
999 int byte;
1000
1001 for (byte = 0; byte < WCHAR_BYTES; ++byte)
1002 {
1003 int n;
1004 if (byte >= (int) sizeof (c))
1005 n = 0;
1006 else
1007 n = (c >> (byte * charwidth)) & bytemask;
1008 if (BYTES_BIG_ENDIAN)
1009 q[WCHAR_BYTES - byte - 1] = n;
1010 else
1011 q[byte] = n;
1012 }
1013 q += WCHAR_BYTES;
1014 }
1015 #ifdef MULTIBYTE_CHARS
1016 else if (char_len > 1)
1017 {
1018 /* We're dealing with a multibyte character. */
1019 for ( ; char_len >0; --char_len)
1020 {
1021 *q++ = *(p - char_len);
1022 }
1023 }
1024 #endif
1025 else
1026 {
1027 *q++ = c;
1028 }
1029 }
1030
1031 /* Terminate the string value, either with a single byte zero
1032 or with a wide zero. */
1033
1034 if (wide)
1035 {
1036 memset (q, 0, WCHAR_BYTES);
1037 q += WCHAR_BYTES;
1038 }
1039 else
1040 {
1041 *q++ = '\0';
1042 }
1043
1044 value = build_string (q - buf, buf);
1045
1046 if (wide)
1047 TREE_TYPE (value) = wchar_array_type_node;
1048 else
1049 TREE_TYPE (value) = char_array_type_node;
1050 return value;
1051 }
1052
1053 /* Converts a (possibly wide) character constant token into a tree. */
1054 static tree
lex_charconst(token)1055 lex_charconst (token)
1056 const cpp_token *token;
1057 {
1058 cppchar_t result;
1059 tree type, value;
1060 unsigned int chars_seen;
1061 int unsignedp;
1062
1063 result = cpp_interpret_charconst (parse_in, token,
1064 &chars_seen, &unsignedp);
1065
1066 /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
1067 before possibly widening to HOST_WIDE_INT for build_int_2. */
1068 if (unsignedp || (cppchar_signed_t) result >= 0)
1069 value = build_int_2 (result, 0);
1070 else
1071 value = build_int_2 ((cppchar_signed_t) result, -1);
1072
1073 if (token->type == CPP_WCHAR)
1074 type = wchar_type_node;
1075 /* In C, a character constant has type 'int'.
1076 In C++ 'char', but multi-char charconsts have type 'int'. */
1077 else if ((c_language == clk_c) || chars_seen > 1)
1078 type = integer_type_node;
1079 else
1080 type = char_type_node;
1081
1082 TREE_TYPE (value) = type;
1083 return value;
1084 }
1085