1 /* ccl - routines for character classes */
2
3 /* Copyright (c) 1990 The Regents of the University of California. */
4 /* All rights reserved. */
5
6 /* This code is derived from software contributed to Berkeley by */
7 /* Vern Paxson. */
8
9 /* The United States Government has rights in this work pursuant */
10 /* to contract no. DE-AC03-76SF00098 between the United States */
11 /* Department of Energy and the University of California. */
12
13 /* This file is part of flex. */
14
15 /* Redistribution and use in source and binary forms, with or without */
16 /* modification, are permitted provided that the following conditions */
17 /* are met: */
18
19 /* 1. Redistributions of source code must retain the above copyright */
20 /* notice, this list of conditions and the following disclaimer. */
21 /* 2. Redistributions in binary form must reproduce the above copyright */
22 /* notice, this list of conditions and the following disclaimer in the */
23 /* documentation and/or other materials provided with the distribution. */
24
25 /* Neither the name of the University nor the names of its contributors */
26 /* may be used to endorse or promote products derived from this software */
27 /* without specific prior written permission. */
28
29 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32 /* PURPOSE. */
33
34 #include "flexdef.h"
35
36 /* return true if the chr is in the ccl. Takes negation into account. */
37 static bool
ccl_contains(const int cclp,const int ch)38 ccl_contains (const int cclp, const int ch)
39 {
40 int ind, len, i;
41
42 len = ccllen[cclp];
43 ind = cclmap[cclp];
44
45 for (i = 0; i < len; ++i)
46 if (ccltbl[ind + i] == ch)
47 return !cclng[cclp];
48
49 return cclng[cclp];
50 }
51
52
53 /* ccladd - add a single character to a ccl */
54
ccladd(cclp,ch)55 void ccladd (cclp, ch)
56 int cclp;
57 int ch;
58 {
59 int ind, len, newpos, i;
60
61 check_char (ch);
62
63 len = ccllen[cclp];
64 ind = cclmap[cclp];
65
66 /* check to see if the character is already in the ccl */
67
68 for (i = 0; i < len; ++i)
69 if (ccltbl[ind + i] == ch)
70 return;
71
72 /* mark newlines */
73 if (ch == nlch)
74 ccl_has_nl[cclp] = true;
75
76 newpos = ind + len;
77
78 if (newpos >= current_max_ccl_tbl_size) {
79 current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
80
81 ++num_reallocs;
82
83 ccltbl = reallocate_Character_array (ccltbl,
84 current_max_ccl_tbl_size);
85 }
86
87 ccllen[cclp] = len + 1;
88 ccltbl[newpos] = ch;
89 }
90
91 /* dump_cclp - same thing as list_character_set, but for cclps. */
92
dump_cclp(FILE * file,int cclp)93 static void dump_cclp (FILE* file, int cclp)
94 {
95 int i;
96
97 putc ('[', file);
98
99 for (i = 0; i < csize; ++i) {
100 if (ccl_contains(cclp, i)){
101 int start_char = i;
102
103 putc (' ', file);
104
105 fputs (readable_form (i), file);
106
107 while (++i < csize && ccl_contains(cclp,i)) ;
108
109 if (i - 1 > start_char)
110 /* this was a run */
111 fprintf (file, "-%s",
112 readable_form (i - 1));
113
114 putc (' ', file);
115 }
116 }
117
118 putc (']', file);
119 }
120
121
122
123 /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
124 int
ccl_set_diff(int a,int b)125 ccl_set_diff (int a, int b)
126 {
127 int d, ch;
128
129 /* create new class */
130 d = cclinit();
131
132 /* In order to handle negation, we spin through all possible chars,
133 * addding each char in a that is not in b.
134 * (This could be O(n^2), but n is small and bounded.)
135 */
136 for ( ch = 0; ch < csize; ++ch )
137 if (ccl_contains (a, ch) && !ccl_contains(b, ch))
138 ccladd (d, ch);
139
140 /* debug */
141 if (0){
142 fprintf(stderr, "ccl_set_diff (");
143 fprintf(stderr, "\n ");
144 dump_cclp (stderr, a);
145 fprintf(stderr, "\n ");
146 dump_cclp (stderr, b);
147 fprintf(stderr, "\n ");
148 dump_cclp (stderr, d);
149 fprintf(stderr, "\n)\n");
150 }
151 return d;
152 }
153
154 /* ccl_set_union - create a new ccl as the set union of the two given ccls. */
155 int
ccl_set_union(int a,int b)156 ccl_set_union (int a, int b)
157 {
158 int d, i;
159
160 /* create new class */
161 d = cclinit();
162
163 /* Add all of a */
164 for (i = 0; i < ccllen[a]; ++i)
165 ccladd (d, ccltbl[cclmap[a] + i]);
166
167 /* Add all of b */
168 for (i = 0; i < ccllen[b]; ++i)
169 ccladd (d, ccltbl[cclmap[b] + i]);
170
171 /* debug */
172 if (0){
173 fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
174 fprintf(stderr, "\n ");
175 dump_cclp (stderr, a);
176 fprintf(stderr, "\n ");
177 dump_cclp (stderr, b);
178 fprintf(stderr, "\n ");
179 dump_cclp (stderr, d);
180 fprintf(stderr, "\n)\n");
181 }
182 return d;
183 }
184
185
186 /* cclinit - return an empty ccl */
187
cclinit()188 int cclinit ()
189 {
190 if (++lastccl >= current_maxccls) {
191 current_maxccls += MAX_CCLS_INCREMENT;
192
193 ++num_reallocs;
194
195 cclmap =
196 reallocate_integer_array (cclmap, current_maxccls);
197 ccllen =
198 reallocate_integer_array (ccllen, current_maxccls);
199 cclng = reallocate_integer_array (cclng, current_maxccls);
200 ccl_has_nl =
201 reallocate_bool_array (ccl_has_nl,
202 current_maxccls);
203 }
204
205 if (lastccl == 1)
206 /* we're making the first ccl */
207 cclmap[lastccl] = 0;
208
209 else
210 /* The new pointer is just past the end of the last ccl.
211 * Since the cclmap points to the \first/ character of a
212 * ccl, adding the length of the ccl to the cclmap pointer
213 * will produce a cursor to the first free space.
214 */
215 cclmap[lastccl] =
216 cclmap[lastccl - 1] + ccllen[lastccl - 1];
217
218 ccllen[lastccl] = 0;
219 cclng[lastccl] = 0; /* ccl's start out life un-negated */
220 ccl_has_nl[lastccl] = false;
221
222 return lastccl;
223 }
224
225
226 /* cclnegate - negate the given ccl */
227
cclnegate(cclp)228 void cclnegate (cclp)
229 int cclp;
230 {
231 cclng[cclp] = 1;
232 ccl_has_nl[cclp] = !ccl_has_nl[cclp];
233 }
234
235
236 /* list_character_set - list the members of a set of characters in CCL form
237 *
238 * Writes to the given file a character-class representation of those
239 * characters present in the given CCL. A character is present if it
240 * has a non-zero value in the cset array.
241 */
242
list_character_set(file,cset)243 void list_character_set (file, cset)
244 FILE *file;
245 int cset[];
246 {
247 int i;
248
249 putc ('[', file);
250
251 for (i = 0; i < csize; ++i) {
252 if (cset[i]) {
253 int start_char = i;
254
255 putc (' ', file);
256
257 fputs (readable_form (i), file);
258
259 while (++i < csize && cset[i]) ;
260
261 if (i - 1 > start_char)
262 /* this was a run */
263 fprintf (file, "-%s",
264 readable_form (i - 1));
265
266 putc (' ', file);
267 }
268 }
269
270 putc (']', file);
271 }
272
273 /** Determines if the range [c1-c2] is unambiguous in a case-insensitive
274 * scanner. Specifically, if a lowercase or uppercase character, x, is in the
275 * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
276 * be in the range. If not, then this range is ambiguous, and the function
277 * returns false. For example, [@-_] spans [a-z] but not [A-Z]. Beware that
278 * [a-z] will be labeled ambiguous because it does not include [A-Z].
279 *
280 * @param c1 the lower end of the range
281 * @param c2 the upper end of the range
282 * @return true if [c1-c2] is not ambiguous for a caseless scanner.
283 */
range_covers_case(int c1,int c2)284 bool range_covers_case (int c1, int c2)
285 {
286 int i, o;
287
288 for (i = c1; i <= c2; i++) {
289 if (has_case (i)) {
290 o = reverse_case (i);
291 if (o < c1 || c2 < o)
292 return false;
293 }
294 }
295 return true;
296 }
297
298 /** Reverse the case of a character, if possible.
299 * @return c if case-reversal does not apply.
300 */
reverse_case(int c)301 int reverse_case (int c)
302 {
303 return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c);
304 }
305
306 /** Return true if c is uppercase or lowercase. */
has_case(int c)307 bool has_case (int c)
308 {
309 return (isupper (c) || islower (c)) ? true : false;
310 }
311