xref: /NextBSD/contrib/flex/ccl.c (revision eb1a5f8de9f7ea602c373a710f531abbf81141c4)
1 /* ccl - routines for character classes */
2 
3 /*  Copyright (c) 1990 The Regents of the University of California. */
4 /*  All rights reserved. */
5 
6 /*  This code is derived from software contributed to Berkeley by */
7 /*  Vern Paxson. */
8 
9 /*  The United States Government has rights in this work pursuant */
10 /*  to contract no. DE-AC03-76SF00098 between the United States */
11  /*  Department of Energy and the University of California. */
12 
13 /*  This file is part of flex. */
14 
15 /*  Redistribution and use in source and binary forms, with or without */
16 /*  modification, are permitted provided that the following conditions */
17 /*  are met: */
18 
19 /*  1. Redistributions of source code must retain the above copyright */
20 /*     notice, this list of conditions and the following disclaimer. */
21 /*  2. Redistributions in binary form must reproduce the above copyright */
22 /*     notice, this list of conditions and the following disclaimer in the */
23 /*     documentation and/or other materials provided with the distribution. */
24 
25 /*  Neither the name of the University nor the names of its contributors */
26 /*  may be used to endorse or promote products derived from this software */
27 /*  without specific prior written permission. */
28 
29 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32 /*  PURPOSE. */
33 
34 #include "flexdef.h"
35 
36 /* return true if the chr is in the ccl. Takes negation into account. */
37 static bool
ccl_contains(const int cclp,const int ch)38 ccl_contains (const int cclp, const int ch)
39 {
40 	int     ind, len, i;
41 
42 	len = ccllen[cclp];
43 	ind = cclmap[cclp];
44 
45 	for (i = 0; i < len; ++i)
46 		if (ccltbl[ind + i] == ch)
47 			return !cclng[cclp];
48 
49     return cclng[cclp];
50 }
51 
52 
53 /* ccladd - add a single character to a ccl */
54 
ccladd(cclp,ch)55 void    ccladd (cclp, ch)
56      int     cclp;
57      int     ch;
58 {
59 	int     ind, len, newpos, i;
60 
61 	check_char (ch);
62 
63 	len = ccllen[cclp];
64 	ind = cclmap[cclp];
65 
66 	/* check to see if the character is already in the ccl */
67 
68 	for (i = 0; i < len; ++i)
69 		if (ccltbl[ind + i] == ch)
70 			return;
71 
72 	/* mark newlines */
73 	if (ch == nlch)
74 		ccl_has_nl[cclp] = true;
75 
76 	newpos = ind + len;
77 
78 	if (newpos >= current_max_ccl_tbl_size) {
79 		current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
80 
81 		++num_reallocs;
82 
83 		ccltbl = reallocate_Character_array (ccltbl,
84 						     current_max_ccl_tbl_size);
85 	}
86 
87 	ccllen[cclp] = len + 1;
88 	ccltbl[newpos] = ch;
89 }
90 
91 /* dump_cclp - same thing as list_character_set, but for cclps.  */
92 
dump_cclp(FILE * file,int cclp)93 static void    dump_cclp (FILE* file, int cclp)
94 {
95 	int i;
96 
97 	putc ('[', file);
98 
99 	for (i = 0; i < csize; ++i) {
100 		if (ccl_contains(cclp, i)){
101 			int start_char = i;
102 
103 			putc (' ', file);
104 
105 			fputs (readable_form (i), file);
106 
107 			while (++i < csize && ccl_contains(cclp,i)) ;
108 
109 			if (i - 1 > start_char)
110 				/* this was a run */
111 				fprintf (file, "-%s",
112 					 readable_form (i - 1));
113 
114 			putc (' ', file);
115 		}
116 	}
117 
118 	putc (']', file);
119 }
120 
121 
122 
123 /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
124 int
ccl_set_diff(int a,int b)125 ccl_set_diff (int a, int b)
126 {
127     int  d, ch;
128 
129     /* create new class  */
130     d = cclinit();
131 
132     /* In order to handle negation, we spin through all possible chars,
133      * addding each char in a that is not in b.
134      * (This could be O(n^2), but n is small and bounded.)
135      */
136 	for ( ch = 0; ch < csize; ++ch )
137         if (ccl_contains (a, ch) && !ccl_contains(b, ch))
138             ccladd (d, ch);
139 
140     /* debug */
141     if (0){
142         fprintf(stderr, "ccl_set_diff (");
143             fprintf(stderr, "\n    ");
144             dump_cclp (stderr, a);
145             fprintf(stderr, "\n    ");
146             dump_cclp (stderr, b);
147             fprintf(stderr, "\n    ");
148             dump_cclp (stderr, d);
149         fprintf(stderr, "\n)\n");
150     }
151     return d;
152 }
153 
154 /* ccl_set_union - create a new ccl as the set union of the two given ccls. */
155 int
ccl_set_union(int a,int b)156 ccl_set_union (int a, int b)
157 {
158     int  d, i;
159 
160     /* create new class  */
161     d = cclinit();
162 
163     /* Add all of a */
164     for (i = 0; i < ccllen[a]; ++i)
165 		ccladd (d, ccltbl[cclmap[a] + i]);
166 
167     /* Add all of b */
168     for (i = 0; i < ccllen[b]; ++i)
169 		ccladd (d, ccltbl[cclmap[b] + i]);
170 
171     /* debug */
172     if (0){
173         fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
174             fprintf(stderr, "\n    ");
175             dump_cclp (stderr, a);
176             fprintf(stderr, "\n    ");
177             dump_cclp (stderr, b);
178             fprintf(stderr, "\n    ");
179             dump_cclp (stderr, d);
180         fprintf(stderr, "\n)\n");
181     }
182     return d;
183 }
184 
185 
186 /* cclinit - return an empty ccl */
187 
cclinit()188 int     cclinit ()
189 {
190 	if (++lastccl >= current_maxccls) {
191 		current_maxccls += MAX_CCLS_INCREMENT;
192 
193 		++num_reallocs;
194 
195 		cclmap =
196 			reallocate_integer_array (cclmap, current_maxccls);
197 		ccllen =
198 			reallocate_integer_array (ccllen, current_maxccls);
199 		cclng = reallocate_integer_array (cclng, current_maxccls);
200 		ccl_has_nl =
201 			reallocate_bool_array (ccl_has_nl,
202 					       current_maxccls);
203 	}
204 
205 	if (lastccl == 1)
206 		/* we're making the first ccl */
207 		cclmap[lastccl] = 0;
208 
209 	else
210 		/* The new pointer is just past the end of the last ccl.
211 		 * Since the cclmap points to the \first/ character of a
212 		 * ccl, adding the length of the ccl to the cclmap pointer
213 		 * will produce a cursor to the first free space.
214 		 */
215 		cclmap[lastccl] =
216 			cclmap[lastccl - 1] + ccllen[lastccl - 1];
217 
218 	ccllen[lastccl] = 0;
219 	cclng[lastccl] = 0;	/* ccl's start out life un-negated */
220 	ccl_has_nl[lastccl] = false;
221 
222 	return lastccl;
223 }
224 
225 
226 /* cclnegate - negate the given ccl */
227 
cclnegate(cclp)228 void    cclnegate (cclp)
229      int     cclp;
230 {
231 	cclng[cclp] = 1;
232 	ccl_has_nl[cclp] = !ccl_has_nl[cclp];
233 }
234 
235 
236 /* list_character_set - list the members of a set of characters in CCL form
237  *
238  * Writes to the given file a character-class representation of those
239  * characters present in the given CCL.  A character is present if it
240  * has a non-zero value in the cset array.
241  */
242 
list_character_set(file,cset)243 void    list_character_set (file, cset)
244      FILE   *file;
245      int     cset[];
246 {
247 	int i;
248 
249 	putc ('[', file);
250 
251 	for (i = 0; i < csize; ++i) {
252 		if (cset[i]) {
253 			int start_char = i;
254 
255 			putc (' ', file);
256 
257 			fputs (readable_form (i), file);
258 
259 			while (++i < csize && cset[i]) ;
260 
261 			if (i - 1 > start_char)
262 				/* this was a run */
263 				fprintf (file, "-%s",
264 					 readable_form (i - 1));
265 
266 			putc (' ', file);
267 		}
268 	}
269 
270 	putc (']', file);
271 }
272 
273 /** Determines if the range [c1-c2] is unambiguous in a case-insensitive
274  * scanner.  Specifically, if a lowercase or uppercase character, x, is in the
275  * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
276  * be in the range. If not, then this range is ambiguous, and the function
277  * returns false.  For example, [@-_] spans [a-z] but not [A-Z].  Beware that
278  * [a-z] will be labeled ambiguous because it does not include [A-Z].
279  *
280  * @param c1 the lower end of the range
281  * @param c2 the upper end of the range
282  * @return true if [c1-c2] is not ambiguous for a caseless scanner.
283  */
range_covers_case(int c1,int c2)284 bool range_covers_case (int c1, int c2)
285 {
286 	int     i, o;
287 
288 	for (i = c1; i <= c2; i++) {
289 		if (has_case (i)) {
290 			o = reverse_case (i);
291 			if (o < c1 || c2 < o)
292 				return false;
293 		}
294 	}
295 	return true;
296 }
297 
298 /** Reverse the case of a character, if possible.
299  * @return c if case-reversal does not apply.
300  */
reverse_case(int c)301 int reverse_case (int c)
302 {
303 	return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c);
304 }
305 
306 /** Return true if c is uppercase or lowercase. */
has_case(int c)307 bool has_case (int c)
308 {
309 	return (isupper (c) || islower (c)) ? true : false;
310 }
311