1 /*
2 * Copyright (c) 1987, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #if 0
31 #ifndef lint
32 static char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94";
33 #endif
34 #endif
35
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD: stable/9/usr.bin/ctags/C.c 216370 2010-12-11 08:32:16Z joel $");
38
39 #include <limits.h>
40 #include <stdio.h>
41 #include <string.h>
42
43 #include "ctags.h"
44
45 static int func_entry(void);
46 static void hash_entry(void);
47 static void skip_string(int);
48 static int str_entry(int);
49
50 /*
51 * c_entries --
52 * read .c and .h files and call appropriate routines
53 */
54 void
c_entries(void)55 c_entries(void)
56 {
57 int c; /* current character */
58 int level; /* brace level */
59 int token; /* if reading a token */
60 int t_def; /* if reading a typedef */
61 int t_level; /* typedef's brace level */
62 char *sp; /* buffer pointer */
63 char tok[MAXTOKEN]; /* token buffer */
64
65 lineftell = ftell(inf);
66 sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
67 while (GETC(!=, EOF)) {
68 switch (c) {
69 /*
70 * Here's where it DOESN'T handle: {
71 * foo(a)
72 * {
73 * #ifdef notdef
74 * }
75 * #endif
76 * if (a)
77 * puts("hello, world");
78 * }
79 */
80 case '{':
81 ++level;
82 goto endtok;
83 case '}':
84 /*
85 * if level goes below zero, try and fix
86 * it, even though we've already messed up
87 */
88 if (--level < 0)
89 level = 0;
90 goto endtok;
91
92 case '\n':
93 SETLINE;
94 /*
95 * the above 3 cases are similar in that they
96 * are special characters that also end tokens.
97 */
98 endtok: if (sp > tok) {
99 *sp = EOS;
100 token = YES;
101 sp = tok;
102 }
103 else
104 token = NO;
105 continue;
106
107 /*
108 * We ignore quoted strings and character constants
109 * completely.
110 */
111 case '"':
112 case '\'':
113 skip_string(c);
114 break;
115
116 /*
117 * comments can be fun; note the state is unchanged after
118 * return, in case we found:
119 * "foo() XX comment XX { int bar; }"
120 */
121 case '/':
122 if (GETC(==, '*') || c == '/') {
123 skip_comment(c);
124 continue;
125 }
126 (void)ungetc(c, inf);
127 c = '/';
128 goto storec;
129
130 /* hash marks flag #define's. */
131 case '#':
132 if (sp == tok) {
133 hash_entry();
134 break;
135 }
136 goto storec;
137
138 /*
139 * if we have a current token, parenthesis on
140 * level zero indicates a function.
141 */
142 case '(':
143 if (!level && token) {
144 int curline;
145
146 if (sp != tok)
147 *sp = EOS;
148 /*
149 * grab the line immediately, we may
150 * already be wrong, for example,
151 * foo\n
152 * (arg1,
153 */
154 getline();
155 curline = lineno;
156 if (func_entry()) {
157 ++level;
158 pfnote(tok, curline);
159 }
160 break;
161 }
162 goto storec;
163
164 /*
165 * semi-colons indicate the end of a typedef; if we find a
166 * typedef we search for the next semi-colon of the same
167 * level as the typedef. Ignoring "structs", they are
168 * tricky, since you can find:
169 *
170 * "typedef long time_t;"
171 * "typedef unsigned int u_int;"
172 * "typedef unsigned int u_int [10];"
173 *
174 * If looking at a typedef, we save a copy of the last token
175 * found. Then, when we find the ';' we take the current
176 * token if it starts with a valid token name, else we take
177 * the one we saved. There's probably some reasonable
178 * alternative to this...
179 */
180 case ';':
181 if (t_def && level == t_level) {
182 t_def = NO;
183 getline();
184 if (sp != tok)
185 *sp = EOS;
186 pfnote(tok, lineno);
187 break;
188 }
189 goto storec;
190
191 /*
192 * store characters until one that can't be part of a token
193 * comes along; check the current token against certain
194 * reserved words.
195 */
196 default:
197 /* ignore whitespace */
198 if (c == ' ' || c == '\t') {
199 int save = c;
200 while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
201 ;
202 if (c == EOF)
203 return;
204 (void)ungetc(c, inf);
205 c = save;
206 }
207 storec: if (!intoken(c)) {
208 if (sp == tok)
209 break;
210 *sp = EOS;
211 if (tflag) {
212 /* no typedefs inside typedefs */
213 if (!t_def &&
214 !memcmp(tok, "typedef",8)) {
215 t_def = YES;
216 t_level = level;
217 break;
218 }
219 /* catch "typedef struct" */
220 if ((!t_def || t_level < level)
221 && (!memcmp(tok, "struct", 7)
222 || !memcmp(tok, "union", 6)
223 || !memcmp(tok, "enum", 5))) {
224 /*
225 * get line immediately;
226 * may change before '{'
227 */
228 getline();
229 if (str_entry(c))
230 ++level;
231 break;
232 /* } */
233 }
234 }
235 sp = tok;
236 }
237 else if (sp != tok || begtoken(c)) {
238 if (sp == tok + sizeof tok - 1)
239 /* Too long -- truncate it */
240 *sp = EOS;
241 else
242 *sp++ = c;
243 token = YES;
244 }
245 continue;
246 }
247
248 sp = tok;
249 token = NO;
250 }
251 }
252
253 /*
254 * func_entry --
255 * handle a function reference
256 */
257 static int
func_entry(void)258 func_entry(void)
259 {
260 int c; /* current character */
261 int level = 0; /* for matching '()' */
262
263 /*
264 * Find the end of the assumed function declaration.
265 * Note that ANSI C functions can have type definitions so keep
266 * track of the parentheses nesting level.
267 */
268 while (GETC(!=, EOF)) {
269 switch (c) {
270 case '\'':
271 case '"':
272 /* skip strings and character constants */
273 skip_string(c);
274 break;
275 case '/':
276 /* skip comments */
277 if (GETC(==, '*') || c == '/')
278 skip_comment(c);
279 break;
280 case '(':
281 level++;
282 break;
283 case ')':
284 if (level == 0)
285 goto fnd;
286 level--;
287 break;
288 case '\n':
289 SETLINE;
290 }
291 }
292 return (NO);
293 fnd:
294 /*
295 * we assume that the character after a function's right paren
296 * is a token character if it's a function and a non-token
297 * character if it's a declaration. Comments don't count...
298 */
299 for (;;) {
300 while (GETC(!=, EOF) && iswhite(c))
301 if (c == '\n')
302 SETLINE;
303 if (intoken(c) || c == '{')
304 break;
305 if (c == '/' && (GETC(==, '*') || c == '/'))
306 skip_comment(c);
307 else { /* don't ever "read" '/' */
308 (void)ungetc(c, inf);
309 return (NO);
310 }
311 }
312 if (c != '{')
313 (void)skip_key('{');
314 return (YES);
315 }
316
317 /*
318 * hash_entry --
319 * handle a line starting with a '#'
320 */
321 static void
hash_entry(void)322 hash_entry(void)
323 {
324 int c; /* character read */
325 int curline; /* line started on */
326 char *sp; /* buffer pointer */
327 char tok[MAXTOKEN]; /* storage buffer */
328
329 /* ignore leading whitespace */
330 while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
331 ;
332 (void)ungetc(c, inf);
333
334 curline = lineno;
335 for (sp = tok;;) { /* get next token */
336 if (GETC(==, EOF))
337 return;
338 if (iswhite(c))
339 break;
340 if (sp == tok + sizeof tok - 1)
341 /* Too long -- truncate it */
342 *sp = EOS;
343 else
344 *sp++ = c;
345 }
346 *sp = EOS;
347 if (memcmp(tok, "define", 6)) /* only interested in #define's */
348 goto skip;
349 for (;;) { /* this doesn't handle "#define \n" */
350 if (GETC(==, EOF))
351 return;
352 if (!iswhite(c))
353 break;
354 }
355 for (sp = tok;;) { /* get next token */
356 if (sp == tok + sizeof tok - 1)
357 /* Too long -- truncate it */
358 *sp = EOS;
359 else
360 *sp++ = c;
361 if (GETC(==, EOF))
362 return;
363 /*
364 * this is where it DOESN'T handle
365 * "#define \n"
366 */
367 if (!intoken(c))
368 break;
369 }
370 *sp = EOS;
371 if (dflag || c == '(') { /* only want macros */
372 getline();
373 pfnote(tok, curline);
374 }
375 skip: if (c == '\n') { /* get rid of rest of define */
376 SETLINE
377 if (*(sp - 1) != '\\')
378 return;
379 }
380 (void)skip_key('\n');
381 }
382
383 /*
384 * str_entry --
385 * handle a struct, union or enum entry
386 */
387 static int
str_entry(int c)388 str_entry(int c) /* c is current character */
389 {
390 int curline; /* line started on */
391 char *sp; /* buffer pointer */
392 char tok[LINE_MAX]; /* storage buffer */
393
394 curline = lineno;
395 while (iswhite(c))
396 if (GETC(==, EOF))
397 return (NO);
398 if (c == '{') /* it was "struct {" */
399 return (YES);
400 for (sp = tok;;) { /* get next token */
401 if (sp == tok + sizeof tok - 1)
402 /* Too long -- truncate it */
403 *sp = EOS;
404 else
405 *sp++ = c;
406 if (GETC(==, EOF))
407 return (NO);
408 if (!intoken(c))
409 break;
410 }
411 switch (c) {
412 case '{': /* it was "struct foo{" */
413 --sp;
414 break;
415 case '\n': /* it was "struct foo\n" */
416 SETLINE;
417 /*FALLTHROUGH*/
418 default: /* probably "struct foo " */
419 while (GETC(!=, EOF))
420 if (!iswhite(c))
421 break;
422 if (c != '{') {
423 (void)ungetc(c, inf);
424 return (NO);
425 }
426 }
427 *sp = EOS;
428 pfnote(tok, curline);
429 return (YES);
430 }
431
432 /*
433 * skip_comment --
434 * skip over comment
435 */
436 void
skip_comment(int t)437 skip_comment(int t) /* t is comment character */
438 {
439 int c; /* character read */
440 int star; /* '*' flag */
441
442 for (star = 0; GETC(!=, EOF);)
443 switch(c) {
444 /* comments don't nest, nor can they be escaped. */
445 case '*':
446 star = YES;
447 break;
448 case '/':
449 if (star && t == '*')
450 return;
451 break;
452 case '\n':
453 if (t == '/')
454 return;
455 SETLINE;
456 /*FALLTHROUGH*/
457 default:
458 star = NO;
459 break;
460 }
461 }
462
463 /*
464 * skip_string --
465 * skip to the end of a string or character constant.
466 */
467 void
skip_string(int key)468 skip_string(int key)
469 {
470 int c,
471 skip;
472
473 for (skip = NO; GETC(!=, EOF); )
474 switch (c) {
475 case '\\': /* a backslash escapes anything */
476 skip = !skip; /* we toggle in case it's "\\" */
477 break;
478 case '\n':
479 SETLINE;
480 /*FALLTHROUGH*/
481 default:
482 if (c == key && !skip)
483 return;
484 skip = NO;
485 }
486 }
487
488 /*
489 * skip_key --
490 * skip to next char "key"
491 */
492 int
skip_key(int key)493 skip_key(int key)
494 {
495 int c,
496 skip,
497 retval;
498
499 for (skip = retval = NO; GETC(!=, EOF);)
500 switch(c) {
501 case '\\': /* a backslash escapes anything */
502 skip = !skip; /* we toggle in case it's "\\" */
503 break;
504 case ';': /* special case for yacc; if one */
505 case '|': /* of these chars occurs, we may */
506 retval = YES; /* have moved out of the rule */
507 break; /* not used by C */
508 case '\'':
509 case '"':
510 /* skip strings and character constants */
511 skip_string(c);
512 break;
513 case '/':
514 /* skip comments */
515 if (GETC(==, '*') || c == '/') {
516 skip_comment(c);
517 break;
518 }
519 (void)ungetc(c, inf);
520 c = '/';
521 goto norm;
522 case '\n':
523 SETLINE;
524 /*FALLTHROUGH*/
525 default:
526 norm:
527 if (c == key && !skip)
528 return (retval);
529 skip = NO;
530 }
531 return (retval);
532 }
533