1 /*
2 * Copyright (C) 2013-2015 Internet Systems Consortium, Inc. ("ISC")
3 *
4 * Permission to use, copy, modify, and/or distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
10 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
13 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
14 * PERFORMANCE OF THIS SOFTWARE.
15 */
16
17 #include <config.h>
18
19 #include <isc/file.h>
20 #include <isc/print.h>
21 #include <isc/regex.h>
22 #include <isc/string.h>
23
24 #if VALREGEX_REPORT_REASON
25 #define FAIL(x) do { reason = (x); goto error; } while(0)
26 #else
27 #define FAIL(x) goto error
28 #endif
29
30 /*
31 * Validate the regular expression 'C' locale.
32 */
33 int
isc_regex_validate(const char * c)34 isc_regex_validate(const char *c) {
35 enum {
36 none, parse_bracket, parse_bound,
37 parse_ce, parse_ec, parse_cc
38 } state = none;
39 /* Well known character classes. */
40 const char *cc[] = {
41 ":alnum:", ":digit:", ":punct:", ":alpha:", ":graph:",
42 ":space:", ":blank:", ":lower:", ":upper:", ":cntrl:",
43 ":print:", ":xdigit:"
44 };
45 isc_boolean_t seen_comma = ISC_FALSE;
46 isc_boolean_t seen_high = ISC_FALSE;
47 isc_boolean_t seen_char = ISC_FALSE;
48 isc_boolean_t seen_ec = ISC_FALSE;
49 isc_boolean_t seen_ce = ISC_FALSE;
50 isc_boolean_t have_atom = ISC_FALSE;
51 int group = 0;
52 int range = 0;
53 int sub = 0;
54 isc_boolean_t empty_ok = ISC_FALSE;
55 isc_boolean_t neg = ISC_FALSE;
56 isc_boolean_t was_multiple = ISC_FALSE;
57 unsigned int low = 0;
58 unsigned int high = 0;
59 const char *ccname = NULL;
60 int range_start = 0;
61 #if VALREGEX_REPORT_REASON
62 const char *reason = "";
63 #endif
64
65 if (c == NULL || *c == 0)
66 FAIL("empty string");
67
68 while (c != NULL && *c != 0) {
69 switch (state) {
70 case none:
71 switch (*c) {
72 case '\\': /* make literal */
73 ++c;
74 switch (*c) {
75 case '1': case '2': case '3':
76 case '4': case '5': case '6':
77 case '7': case '8': case '9':
78 if ((*c - '0') > sub)
79 FAIL("bad back reference");
80 have_atom = ISC_TRUE;
81 was_multiple = ISC_FALSE;
82 break;
83 case 0:
84 FAIL("escaped end-of-string");
85 default:
86 goto literal;
87 }
88 ++c;
89 break;
90 case '[': /* bracket start */
91 ++c;
92 neg = ISC_FALSE;
93 was_multiple = ISC_FALSE;
94 seen_char = ISC_FALSE;
95 state = parse_bracket;
96 break;
97 case '{': /* bound start */
98 switch (c[1]) {
99 case '0': case '1': case '2': case '3':
100 case '4': case '5': case '6': case '7':
101 case '8': case '9':
102 if (!have_atom)
103 FAIL("no atom");
104 if (was_multiple)
105 FAIL("was multiple");
106 seen_comma = ISC_FALSE;
107 seen_high = ISC_FALSE;
108 low = high = 0;
109 state = parse_bound;
110 break;
111 default:
112 goto literal;
113 }
114 ++c;
115 have_atom = ISC_TRUE;
116 was_multiple = ISC_TRUE;
117 break;
118 case '}':
119 goto literal;
120 case '(': /* group start */
121 have_atom = ISC_FALSE;
122 was_multiple = ISC_FALSE;
123 empty_ok = ISC_TRUE;
124 ++group;
125 ++sub;
126 ++c;
127 break;
128 case ')': /* group end */
129 if (group && !have_atom && !empty_ok)
130 FAIL("empty alternative");
131 have_atom = ISC_TRUE;
132 was_multiple = ISC_FALSE;
133 if (group != 0)
134 --group;
135 ++c;
136 break;
137 case '|': /* alternative seperator */
138 if (!have_atom)
139 FAIL("no atom");
140 have_atom = ISC_FALSE;
141 empty_ok = ISC_FALSE;
142 was_multiple = ISC_FALSE;
143 ++c;
144 break;
145 case '^':
146 case '$':
147 have_atom = ISC_TRUE;
148 was_multiple = ISC_TRUE;
149 ++c;
150 break;
151 case '+':
152 case '*':
153 case '?':
154 if (was_multiple)
155 FAIL("was multiple");
156 if (!have_atom)
157 FAIL("no atom");
158 have_atom = ISC_TRUE;
159 was_multiple = ISC_TRUE;
160 ++c;
161 break;
162 case '.':
163 default:
164 literal:
165 have_atom = ISC_TRUE;
166 was_multiple = ISC_FALSE;
167 ++c;
168 break;
169 }
170 break;
171 case parse_bound:
172 switch (*c) {
173 case '0': case '1': case '2': case '3': case '4':
174 case '5': case '6': case '7': case '8': case '9':
175 if (!seen_comma) {
176 low = low * 10 + *c - '0';
177 if (low > 255)
178 FAIL("lower bound too big");
179 } else {
180 seen_high = ISC_TRUE;
181 high = high * 10 + *c - '0';
182 if (high > 255)
183 FAIL("upper bound too big");
184 }
185 ++c;
186 break;
187 case ',':
188 if (seen_comma)
189 FAIL("multiple commas");
190 seen_comma = ISC_TRUE;
191 ++c;
192 break;
193 default:
194 case '{':
195 FAIL("non digit/comma");
196 case '}':
197 if (seen_high && low > high)
198 FAIL("bad parse bound");
199 seen_comma = ISC_FALSE;
200 state = none;
201 ++c;
202 break;
203 }
204 break;
205 case parse_bracket:
206 switch (*c) {
207 case '^':
208 if (seen_char || neg) goto inside;
209 neg = ISC_TRUE;
210 ++c;
211 break;
212 case '-':
213 if (range == 2) goto inside;
214 if (!seen_char) goto inside;
215 if (range == 1)
216 FAIL("bad range");
217 range = 2;
218 ++c;
219 break;
220 case '[':
221 ++c;
222 switch (*c) {
223 case '.': /* collating element */
224 if (range != 0) --range;
225 ++c;
226 state = parse_ce;
227 seen_ce = ISC_FALSE;
228 break;
229 case '=': /* equivalence class */
230 if (range == 2)
231 FAIL("equivalence class in range");
232 ++c;
233 state = parse_ec;
234 seen_ec = ISC_FALSE;
235 break;
236 case ':': /* character class */
237 if (range == 2)
238 FAIL("character class in range");
239 ccname = c;
240 ++c;
241 state = parse_cc;
242 break;
243 }
244 seen_char = ISC_TRUE;
245 break;
246 case ']':
247 if (!c[1] && !seen_char)
248 FAIL("unfinished brace");
249 if (!seen_char)
250 goto inside;
251 ++c;
252 range = 0;
253 have_atom = ISC_TRUE;
254 state = none;
255 break;
256 default:
257 inside:
258 seen_char = ISC_TRUE;
259 if (range == 2 && (*c & 0xff) < range_start)
260 FAIL("out of order range");
261 if (range != 0)
262 --range;
263 range_start = *c & 0xff;
264 ++c;
265 break;
266 };
267 break;
268 case parse_ce:
269 switch (*c) {
270 case '.':
271 ++c;
272 switch (*c) {
273 case ']':
274 if (!seen_ce)
275 FAIL("empty ce");
276 ++c;
277 state = parse_bracket;
278 break;
279 default:
280 if (seen_ce)
281 range_start = 256;
282 else
283 range_start = '.';
284 seen_ce = ISC_TRUE;
285 break;
286 }
287 break;
288 default:
289 if (seen_ce)
290 range_start = 256;
291 else
292 range_start = *c;
293 seen_ce = ISC_TRUE;
294 ++c;
295 break;
296 }
297 break;
298 case parse_ec:
299 switch (*c) {
300 case '=':
301 ++c;
302 switch (*c) {
303 case ']':
304 if (!seen_ec)
305 FAIL("no ec");
306 ++c;
307 state = parse_bracket;
308 break;
309 default:
310 seen_ec = ISC_TRUE;
311 break;
312 }
313 break;
314 default:
315 seen_ec = ISC_TRUE;
316 ++c;
317 break;
318 }
319 break;
320 case parse_cc:
321 switch (*c) {
322 case ':':
323 ++c;
324 switch (*c) {
325 case ']': {
326 unsigned int i;
327 isc_boolean_t found = ISC_FALSE;
328 for (i = 0;
329 i < sizeof(cc)/sizeof(*cc);
330 i++)
331 {
332 unsigned int len;
333 len = strlen(cc[i]);
334 if (len !=
335 (unsigned int)(c - ccname))
336 continue;
337 if (strncmp(cc[i], ccname, len))
338 continue;
339 found = ISC_TRUE;
340 }
341 if (!found)
342 FAIL("unknown cc");
343 ++c;
344 state = parse_bracket;
345 break;
346 }
347 default:
348 break;
349 }
350 break;
351 default:
352 ++c;
353 break;
354 }
355 break;
356 }
357 }
358 if (group != 0)
359 FAIL("group open");
360 if (state != none)
361 FAIL("incomplete");
362 if (!have_atom)
363 FAIL("no atom");
364 return (sub);
365
366 error:
367 #if VALREGEX_REPORT_REASON
368 fprintf(stderr, "%s\n", reason);
369 #endif
370 return (-1);
371 }
372