1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1987, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #if 0
33 #ifndef lint
34 static char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94";
35 #endif
36 #endif
37
38 #include <sys/cdefs.h>
39 #include <limits.h>
40 #include <stddef.h>
41 #include <stdio.h>
42 #include <string.h>
43
44 #include "ctags.h"
45
46 static int func_entry(void);
47 static void hash_entry(void);
48 static void skip_string(int);
49 static int str_entry(int);
50
51 /*
52 * c_entries --
53 * read .c and .h files and call appropriate routines
54 */
55 void
c_entries(void)56 c_entries(void)
57 {
58 int c; /* current character */
59 int level; /* brace level */
60 int token; /* if reading a token */
61 int t_def; /* if reading a typedef */
62 int t_level; /* typedef's brace level */
63 char *sp; /* buffer pointer */
64 char tok[MAXTOKEN]; /* token buffer */
65
66 lineftell = ftell(inf);
67 sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
68 while (GETC(!=, EOF)) {
69 switch (c) {
70 /*
71 * Here's where it DOESN'T handle: {
72 * foo(a)
73 * {
74 * #ifdef notdef
75 * }
76 * #endif
77 * if (a)
78 * puts("hello, world");
79 * }
80 */
81 case '{':
82 ++level;
83 goto endtok;
84 case '}':
85 /*
86 * if level goes below zero, try and fix
87 * it, even though we've already messed up
88 */
89 if (--level < 0)
90 level = 0;
91 goto endtok;
92
93 case '\n':
94 SETLINE;
95 /*
96 * the above 3 cases are similar in that they
97 * are special characters that also end tokens.
98 */
99 endtok: if (sp > tok) {
100 *sp = EOS;
101 token = YES;
102 sp = tok;
103 }
104 else
105 token = NO;
106 continue;
107
108 /*
109 * We ignore quoted strings and character constants
110 * completely.
111 */
112 case '"':
113 case '\'':
114 skip_string(c);
115 break;
116
117 /*
118 * comments can be fun; note the state is unchanged after
119 * return, in case we found:
120 * "foo() XX comment XX { int bar; }"
121 */
122 case '/':
123 if (GETC(==, '*') || c == '/') {
124 skip_comment(c);
125 continue;
126 }
127 (void)ungetc(c, inf);
128 c = '/';
129 goto storec;
130
131 /* hash marks flag #define's. */
132 case '#':
133 if (sp == tok) {
134 hash_entry();
135 break;
136 }
137 goto storec;
138
139 /*
140 * if we have a current token, parenthesis on
141 * level zero indicates a function.
142 */
143 case '(':
144 if (!level && token) {
145 int curline;
146
147 if (sp != tok)
148 *sp = EOS;
149 /*
150 * grab the line immediately, we may
151 * already be wrong, for example,
152 * foo\n
153 * (arg1,
154 */
155 get_line();
156 curline = lineno;
157 if (func_entry()) {
158 ++level;
159 pfnote(tok, curline);
160 }
161 break;
162 }
163 goto storec;
164
165 /*
166 * semi-colons indicate the end of a typedef; if we find a
167 * typedef we search for the next semi-colon of the same
168 * level as the typedef. Ignoring "structs", they are
169 * tricky, since you can find:
170 *
171 * "typedef long time_t;"
172 * "typedef unsigned int u_int;"
173 * "typedef unsigned int u_int [10];"
174 *
175 * If looking at a typedef, we save a copy of the last token
176 * found. Then, when we find the ';' we take the current
177 * token if it starts with a valid token name, else we take
178 * the one we saved. There's probably some reasonable
179 * alternative to this...
180 */
181 case ';':
182 if (t_def && level == t_level) {
183 t_def = NO;
184 get_line();
185 if (sp != tok)
186 *sp = EOS;
187 pfnote(tok, lineno);
188 break;
189 }
190 goto storec;
191
192 /*
193 * store characters until one that can't be part of a token
194 * comes along; check the current token against certain
195 * reserved words.
196 */
197 default:
198 /* ignore whitespace */
199 if (c == ' ' || c == '\t') {
200 int save = c;
201 while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
202 ;
203 if (c == EOF)
204 return;
205 (void)ungetc(c, inf);
206 c = save;
207 }
208 storec: if (!intoken(c)) {
209 if (sp == tok)
210 break;
211 *sp = EOS;
212 if (tflag) {
213 /* no typedefs inside typedefs */
214 if (!t_def &&
215 !memcmp(tok, "typedef",8)) {
216 t_def = YES;
217 t_level = level;
218 break;
219 }
220 /* catch "typedef struct" */
221 if ((!t_def || t_level < level)
222 && (!memcmp(tok, "struct", 7)
223 || !memcmp(tok, "union", 6)
224 || !memcmp(tok, "enum", 5))) {
225 /*
226 * get line immediately;
227 * may change before '{'
228 */
229 get_line();
230 if (str_entry(c))
231 ++level;
232 break;
233 /* } */
234 }
235 }
236 sp = tok;
237 }
238 else if (sp != tok || begtoken(c)) {
239 if (sp == tok + sizeof tok - 1)
240 /* Too long -- truncate it */
241 *sp = EOS;
242 else
243 *sp++ = c;
244 token = YES;
245 }
246 continue;
247 }
248
249 sp = tok;
250 token = NO;
251 }
252 }
253
254 /*
255 * func_entry --
256 * handle a function reference
257 */
258 static int
func_entry(void)259 func_entry(void)
260 {
261 int c; /* current character */
262 int level = 0; /* for matching '()' */
263 static char attribute[] = "__attribute__";
264 char maybe_attribute[sizeof attribute + 1],
265 *anext;
266
267 /*
268 * Find the end of the assumed function declaration.
269 * Note that ANSI C functions can have type definitions so keep
270 * track of the parentheses nesting level.
271 */
272 while (GETC(!=, EOF)) {
273 switch (c) {
274 case '\'':
275 case '"':
276 /* skip strings and character constants */
277 skip_string(c);
278 break;
279 case '/':
280 /* skip comments */
281 if (GETC(==, '*') || c == '/')
282 skip_comment(c);
283 break;
284 case '(':
285 level++;
286 break;
287 case ')':
288 if (level == 0)
289 goto fnd;
290 level--;
291 break;
292 case '\n':
293 SETLINE;
294 }
295 }
296 return (NO);
297 fnd:
298 /*
299 * we assume that the character after a function's right paren
300 * is a token character if it's a function and a non-token
301 * character if it's a declaration. Comments don't count...
302 */
303 for (anext = maybe_attribute;;) {
304 while (GETC(!=, EOF) && iswhite(c))
305 if (c == '\n')
306 SETLINE;
307 if (c == EOF)
308 return NO;
309 /*
310 * Recognize the gnu __attribute__ extension, which would
311 * otherwise make the heuristic test DTWT
312 */
313 if (anext == maybe_attribute) {
314 if (intoken(c)) {
315 *anext++ = c;
316 continue;
317 }
318 } else {
319 if (intoken(c)) {
320 if (anext - maybe_attribute
321 < (ptrdiff_t)(sizeof attribute - 1))
322 *anext++ = c;
323 else break;
324 continue;
325 } else {
326 *anext++ = '\0';
327 if (strcmp(maybe_attribute, attribute) == 0) {
328 (void)ungetc(c, inf);
329 return NO;
330 }
331 break;
332 }
333 }
334 if (intoken(c) || c == '{')
335 break;
336 if (c == '/' && (GETC(==, '*') || c == '/'))
337 skip_comment(c);
338 else { /* don't ever "read" '/' */
339 (void)ungetc(c, inf);
340 return (NO);
341 }
342 }
343 if (c != '{')
344 (void)skip_key('{');
345 return (YES);
346 }
347
348 /*
349 * hash_entry --
350 * handle a line starting with a '#'
351 */
352 static void
hash_entry(void)353 hash_entry(void)
354 {
355 int c; /* character read */
356 int curline; /* line started on */
357 char *sp; /* buffer pointer */
358 char tok[MAXTOKEN]; /* storage buffer */
359
360 /* ignore leading whitespace */
361 while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
362 ;
363 (void)ungetc(c, inf);
364
365 curline = lineno;
366 for (sp = tok;;) { /* get next token */
367 if (GETC(==, EOF))
368 return;
369 if (iswhite(c))
370 break;
371 if (sp == tok + sizeof tok - 1)
372 /* Too long -- truncate it */
373 *sp = EOS;
374 else
375 *sp++ = c;
376 }
377 *sp = EOS;
378 if (memcmp(tok, "define", 6)) /* only interested in #define's */
379 goto skip;
380 for (;;) { /* this doesn't handle "#define \n" */
381 if (GETC(==, EOF))
382 return;
383 if (!iswhite(c))
384 break;
385 }
386 for (sp = tok;;) { /* get next token */
387 if (sp == tok + sizeof tok - 1)
388 /* Too long -- truncate it */
389 *sp = EOS;
390 else
391 *sp++ = c;
392 if (GETC(==, EOF))
393 return;
394 /*
395 * this is where it DOESN'T handle
396 * "#define \n"
397 */
398 if (!intoken(c))
399 break;
400 }
401 *sp = EOS;
402 if (dflag || c == '(') { /* only want macros */
403 get_line();
404 pfnote(tok, curline);
405 }
406 skip: if (c == '\n') { /* get rid of rest of define */
407 SETLINE
408 if (*(sp - 1) != '\\')
409 return;
410 }
411 (void)skip_key('\n');
412 }
413
414 /*
415 * str_entry --
416 * handle a struct, union or enum entry
417 */
418 static int
str_entry(int c)419 str_entry(int c) /* c is current character */
420 {
421 int curline; /* line started on */
422 char *sp; /* buffer pointer */
423 char tok[LINE_MAX]; /* storage buffer */
424
425 curline = lineno;
426 while (iswhite(c))
427 if (GETC(==, EOF))
428 return (NO);
429 if (c == '{') /* it was "struct {" */
430 return (YES);
431 for (sp = tok;;) { /* get next token */
432 if (sp == tok + sizeof tok - 1)
433 /* Too long -- truncate it */
434 *sp = EOS;
435 else
436 *sp++ = c;
437 if (GETC(==, EOF))
438 return (NO);
439 if (!intoken(c))
440 break;
441 }
442 switch (c) {
443 case '{': /* it was "struct foo{" */
444 --sp;
445 break;
446 case '\n': /* it was "struct foo\n" */
447 SETLINE;
448 /*FALLTHROUGH*/
449 default: /* probably "struct foo " */
450 while (GETC(!=, EOF))
451 if (!iswhite(c))
452 break;
453 if (c != '{') {
454 (void)ungetc(c, inf);
455 return (NO);
456 }
457 }
458 *sp = EOS;
459 pfnote(tok, curline);
460 return (YES);
461 }
462
463 /*
464 * skip_comment --
465 * skip over comment
466 */
467 void
skip_comment(int t)468 skip_comment(int t) /* t is comment character */
469 {
470 int c; /* character read */
471 int star; /* '*' flag */
472
473 for (star = 0; GETC(!=, EOF);)
474 switch(c) {
475 /* comments don't nest, nor can they be escaped. */
476 case '*':
477 star = YES;
478 break;
479 case '/':
480 if (star && t == '*')
481 return;
482 break;
483 case '\n':
484 SETLINE;
485 if (t == '/')
486 return;
487 /*FALLTHROUGH*/
488 default:
489 star = NO;
490 break;
491 }
492 }
493
494 /*
495 * skip_string --
496 * skip to the end of a string or character constant.
497 */
498 void
skip_string(int key)499 skip_string(int key)
500 {
501 int c,
502 skip;
503
504 for (skip = NO; GETC(!=, EOF); )
505 switch (c) {
506 case '\\': /* a backslash escapes anything */
507 skip = !skip; /* we toggle in case it's "\\" */
508 break;
509 case '\n':
510 SETLINE;
511 /*FALLTHROUGH*/
512 default:
513 if (c == key && !skip)
514 return;
515 skip = NO;
516 }
517 }
518
519 /*
520 * skip_key --
521 * skip to next char "key"
522 */
523 int
skip_key(int key)524 skip_key(int key)
525 {
526 int c,
527 skip,
528 retval;
529
530 for (skip = retval = NO; GETC(!=, EOF);)
531 switch(c) {
532 case '\\': /* a backslash escapes anything */
533 skip = !skip; /* we toggle in case it's "\\" */
534 break;
535 case ';': /* special case for yacc; if one */
536 case '|': /* of these chars occurs, we may */
537 retval = YES; /* have moved out of the rule */
538 break; /* not used by C */
539 case '\'':
540 case '"':
541 /* skip strings and character constants */
542 skip_string(c);
543 break;
544 case '/':
545 /* skip comments */
546 if (GETC(==, '*') || c == '/') {
547 skip_comment(c);
548 break;
549 }
550 (void)ungetc(c, inf);
551 c = '/';
552 goto norm;
553 case '\n':
554 SETLINE;
555 /*FALLTHROUGH*/
556 default:
557 norm:
558 if (c == key && !skip)
559 return (retval);
560 skip = NO;
561 }
562 return (retval);
563 }
564