1 /*-
2 * Copyright (c) 2015 Baptiste Daroussin <bapt@FreeBSD.org>
3 * Copyright (c) 2015 Xin LI <delphij@FreeBSD.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer
11 * in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30
31 #include <sys/types.h>
32 #include <sys/sbuf.h>
33
34 #include <ctype.h>
35 #include <err.h>
36 #include <stdbool.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <unistd.h>
40 #include <xlocale.h>
41
42 typedef enum {
43 /* state condition to transit to next state */
44 INIT, /* '$' */
45 DELIM_SEEN, /* letter */
46 KEYWORD, /* punctuation mark */
47 PUNC_SEEN, /* ':' -> _SVN; space -> TEXT */
48 PUNC_SEEN_SVN, /* space */
49 TEXT
50 } analyzer_states;
51
52 static int
scan(FILE * fp,const char * name,bool quiet)53 scan(FILE *fp, const char *name, bool quiet)
54 {
55 int c;
56 bool hasid = false;
57 bool subversion = false;
58 analyzer_states state = INIT;
59 struct sbuf *id = sbuf_new_auto();
60 locale_t l;
61
62 l = newlocale(LC_ALL_MASK, "C", NULL);
63
64 if (name != NULL)
65 printf("%s:\n", name);
66
67 while ((c = fgetc(fp)) != EOF) {
68 switch (state) {
69 case INIT:
70 if (c == '$') {
71 /* Transit to DELIM_SEEN if we see $ */
72 state = DELIM_SEEN;
73 } else {
74 /* Otherwise, stay in INIT state */
75 continue;
76 }
77 break;
78 case DELIM_SEEN:
79 if (isalpha_l(c, l)) {
80 /* Transit to KEYWORD if we see letter */
81 sbuf_clear(id);
82 sbuf_putc(id, '$');
83 sbuf_putc(id, c);
84 state = KEYWORD;
85
86 continue;
87 } else if (c == '$') {
88 /* Or, stay in DELIM_SEEN if more $ */
89 continue;
90 } else {
91 /* Otherwise, transit back to INIT */
92 state = INIT;
93 }
94 break;
95 case KEYWORD:
96 sbuf_putc(id, c);
97
98 if (isalpha_l(c, l)) {
99 /*
100 * Stay in KEYWORD if additional letter is seen
101 */
102 continue;
103 } else if (c == ':') {
104 /*
105 * See ':' for the first time, transit to
106 * PUNC_SEEN.
107 */
108 state = PUNC_SEEN;
109 subversion = false;
110 } else if (c == '$') {
111 /*
112 * Incomplete ident. Go back to DELIM_SEEN
113 * state because we see a '$' which could be
114 * the beginning of a keyword.
115 */
116 state = DELIM_SEEN;
117 } else {
118 /*
119 * Go back to INIT state otherwise.
120 */
121 state = INIT;
122 }
123 break;
124 case PUNC_SEEN:
125 case PUNC_SEEN_SVN:
126 sbuf_putc(id, c);
127
128 switch (c) {
129 case ':':
130 /*
131 * If we see '::' (seen : in PUNC_SEEN),
132 * activate subversion treatment and transit
133 * to PUNC_SEEN_SVN state.
134 *
135 * If more than two :'s were seen, the ident
136 * is invalid and we would therefore go back
137 * to INIT state.
138 */
139 if (state == PUNC_SEEN) {
140 state = PUNC_SEEN_SVN;
141 subversion = true;
142 } else {
143 state = INIT;
144 }
145 break;
146 case ' ':
147 /*
148 * A space after ':' or '::' indicates we are at the
149 * last component of potential ident.
150 */
151 state = TEXT;
152 break;
153 default:
154 /* All other characters are invalid */
155 state = INIT;
156 break;
157 }
158 break;
159 case TEXT:
160 sbuf_putc(id, c);
161
162 if (iscntrl_l(c, l)) {
163 /* Control characters are not allowed in this state */
164 state = INIT;
165 } else if (c == '$') {
166 sbuf_finish(id);
167 /*
168 * valid ident should end with a space.
169 *
170 * subversion extension uses '#' to indicate that
171 * the keyword expansion have exceeded the fixed
172 * width, so it is also permitted if we are in
173 * subversion mode. No length check is enforced
174 * because GNU RCS ident(1) does not do it either.
175 */
176 c = sbuf_data(id)[sbuf_len(id) - 2];
177 if (c == ' ' || (subversion && c == '#')) {
178 printf(" %s\n", sbuf_data(id));
179 hasid = true;
180 }
181 state = INIT;
182 }
183 /* Other characters: stay in the state */
184 break;
185 }
186 }
187 sbuf_delete(id);
188 freelocale(l);
189
190 if (!hasid) {
191 if (!quiet)
192 fprintf(stderr, "%s warning: no id keywords in %s\n",
193 getprogname(), name ? name : "standard input");
194
195 return (EXIT_FAILURE);
196 }
197
198 return (EXIT_SUCCESS);
199 }
200
201 int
main(int argc,char ** argv)202 main(int argc, char **argv)
203 {
204 bool quiet = false;
205 int ch, i;
206 int ret = EXIT_SUCCESS;
207 FILE *fp;
208
209 while ((ch = getopt(argc, argv, "qV")) != -1) {
210 switch (ch) {
211 case 'q':
212 quiet = true;
213 break;
214 case 'V':
215 /* Do nothing, compat with GNU rcs's ident */
216 return (EXIT_SUCCESS);
217 default:
218 errx(EXIT_FAILURE, "usage: %s [-q] [-V] [file...]",
219 getprogname());
220 }
221 }
222
223 argc -= optind;
224 argv += optind;
225
226 if (argc == 0)
227 return (scan(stdin, NULL, quiet));
228
229 for (i = 0; i < argc; i++) {
230 fp = fopen(argv[i], "r");
231 if (fp == NULL) {
232 warn("%s", argv[i]);
233 ret = EXIT_FAILURE;
234 continue;
235 }
236 if (scan(fp, argv[i], quiet) != EXIT_SUCCESS)
237 ret = EXIT_FAILURE;
238 fclose(fp);
239 }
240
241 return (ret);
242 }
243