xref: /dragonfly/contrib/flex/src/ccl.c (revision 388e4ddaf1c230f115961bdb4bad6a8d3e017c93)
1 /* ccl - routines for character classes */
2 
3 /*  Copyright (c) 1990 The Regents of the University of California. */
4 /*  All rights reserved. */
5 
6 /*  This code is derived from software contributed to Berkeley by */
7 /*  Vern Paxson. */
8 
9 /*  The United States Government has rights in this work pursuant */
10 /*  to contract no. DE-AC03-76SF00098 between the United States */
11  /*  Department of Energy and the University of California. */
12 
13 /*  This file is part of flex. */
14 
15 /*  Redistribution and use in source and binary forms, with or without */
16 /*  modification, are permitted provided that the following conditions */
17 /*  are met: */
18 
19 /*  1. Redistributions of source code must retain the above copyright */
20 /*     notice, this list of conditions and the following disclaimer. */
21 /*  2. Redistributions in binary form must reproduce the above copyright */
22 /*     notice, this list of conditions and the following disclaimer in the */
23 /*     documentation and/or other materials provided with the distribution. */
24 
25 /*  Neither the name of the University nor the names of its contributors */
26 /*  may be used to endorse or promote products derived from this software */
27 /*  without specific prior written permission. */
28 
29 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32 /*  PURPOSE. */
33 
34 #include "flexdef.h"
35 
36 /* return true if the chr is in the ccl. Takes negation into account. */
37 static bool
ccl_contains(const int cclp,const int ch)38 ccl_contains (const int cclp, const int ch)
39 {
40           int     ind, len, i;
41 
42           len = ccllen[cclp];
43           ind = cclmap[cclp];
44 
45           for (i = 0; i < len; ++i)
46                     if (ccltbl[ind + i] == ch)
47                               return !cclng[cclp];
48 
49     return cclng[cclp];
50 }
51 
52 
53 /* ccladd - add a single character to a ccl */
54 
ccladd(int cclp,int ch)55 void ccladd (int cclp, int ch)
56 {
57           int     ind, len, newpos, i;
58 
59           check_char (ch);
60 
61           len = ccllen[cclp];
62           ind = cclmap[cclp];
63 
64           /* check to see if the character is already in the ccl */
65 
66           for (i = 0; i < len; ++i)
67                     if (ccltbl[ind + i] == ch)
68                               return;
69 
70           /* mark newlines */
71           if (ch == nlch)
72                     ccl_has_nl[cclp] = true;
73 
74           newpos = ind + len;
75 
76           if (newpos >= current_max_ccl_tbl_size) {
77                     current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
78 
79                     ++num_reallocs;
80 
81                     ccltbl = reallocate_Character_array (ccltbl,
82                                                                  current_max_ccl_tbl_size);
83           }
84 
85           ccllen[cclp] = len + 1;
86           ccltbl[newpos] = (unsigned char) ch;
87 }
88 
89 /* dump_cclp - same thing as list_character_set, but for cclps.  */
90 
dump_cclp(FILE * file,int cclp)91 static void    dump_cclp (FILE* file, int cclp)
92 {
93           int i;
94 
95           putc ('[', file);
96 
97           for (i = 0; i < csize; ++i) {
98                     if (ccl_contains(cclp, i)){
99                               int start_char = i;
100 
101                               putc (' ', file);
102 
103                               fputs (readable_form (i), file);
104 
105                               while (++i < csize && ccl_contains(cclp,i)) ;
106 
107                               if (i - 1 > start_char)
108                                         /* this was a run */
109                                         fprintf (file, "-%s",
110                                                    readable_form (i - 1));
111 
112                               putc (' ', file);
113                     }
114           }
115 
116           putc (']', file);
117 }
118 
119 
120 
121 /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
122 int
ccl_set_diff(int a,int b)123 ccl_set_diff (int a, int b)
124 {
125     int  d, ch;
126 
127     /* create new class  */
128     d = cclinit();
129 
130     /* In order to handle negation, we spin through all possible chars,
131      * addding each char in a that is not in b.
132      * (This could be O(n^2), but n is small and bounded.)
133      */
134           for ( ch = 0; ch < csize; ++ch )
135         if (ccl_contains (a, ch) && !ccl_contains(b, ch))
136             ccladd (d, ch);
137 
138     /* debug */
139     if (0){
140         fprintf(stderr, "ccl_set_diff (");
141             fprintf(stderr, "\n    ");
142             dump_cclp (stderr, a);
143             fprintf(stderr, "\n    ");
144             dump_cclp (stderr, b);
145             fprintf(stderr, "\n    ");
146             dump_cclp (stderr, d);
147         fprintf(stderr, "\n)\n");
148     }
149     return d;
150 }
151 
152 /* ccl_set_union - create a new ccl as the set union of the two given ccls. */
153 int
ccl_set_union(int a,int b)154 ccl_set_union (int a, int b)
155 {
156     int  d, i;
157 
158     /* create new class  */
159     d = cclinit();
160 
161     /* Add all of a */
162     for (i = 0; i < ccllen[a]; ++i)
163                     ccladd (d, ccltbl[cclmap[a] + i]);
164 
165     /* Add all of b */
166     for (i = 0; i < ccllen[b]; ++i)
167                     ccladd (d, ccltbl[cclmap[b] + i]);
168 
169     /* debug */
170     if (0){
171         fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
172             fprintf(stderr, "\n    ");
173             dump_cclp (stderr, a);
174             fprintf(stderr, "\n    ");
175             dump_cclp (stderr, b);
176             fprintf(stderr, "\n    ");
177             dump_cclp (stderr, d);
178         fprintf(stderr, "\n)\n");
179     }
180     return d;
181 }
182 
183 
184 /* cclinit - return an empty ccl */
185 
cclinit(void)186 int     cclinit (void)
187 {
188           if (++lastccl >= current_maxccls) {
189                     current_maxccls += MAX_CCLS_INCREMENT;
190 
191                     ++num_reallocs;
192 
193                     cclmap =
194                               reallocate_integer_array (cclmap, current_maxccls);
195                     ccllen =
196                               reallocate_integer_array (ccllen, current_maxccls);
197                     cclng = reallocate_integer_array (cclng, current_maxccls);
198                     ccl_has_nl =
199                               reallocate_bool_array (ccl_has_nl,
200                                                          current_maxccls);
201           }
202 
203           if (lastccl == 1)
204                     /* we're making the first ccl */
205                     cclmap[lastccl] = 0;
206 
207           else
208                     /* The new pointer is just past the end of the last ccl.
209                      * Since the cclmap points to the \first/ character of a
210                      * ccl, adding the length of the ccl to the cclmap pointer
211                      * will produce a cursor to the first free space.
212                      */
213                     cclmap[lastccl] =
214                               cclmap[lastccl - 1] + ccllen[lastccl - 1];
215 
216           ccllen[lastccl] = 0;
217           cclng[lastccl] = 0; /* ccl's start out life un-negated */
218           ccl_has_nl[lastccl] = false;
219 
220           return lastccl;
221 }
222 
223 
224 /* cclnegate - negate the given ccl */
225 
cclnegate(int cclp)226 void    cclnegate (int cclp)
227 {
228           cclng[cclp] = 1;
229           ccl_has_nl[cclp] = !ccl_has_nl[cclp];
230 }
231 
232 
233 /* list_character_set - list the members of a set of characters in CCL form
234  *
235  * Writes to the given file a character-class representation of those
236  * characters present in the given CCL.  A character is present if it
237  * has a non-zero value in the cset array.
238  */
239 
list_character_set(FILE * file,int cset[])240 void    list_character_set (FILE *file, int cset[])
241 {
242           int i;
243 
244           putc ('[', file);
245 
246           for (i = 0; i < csize; ++i) {
247                     if (cset[i]) {
248                               int start_char = i;
249 
250                               putc (' ', file);
251 
252                               fputs (readable_form (i), file);
253 
254                               while (++i < csize && cset[i]) ;
255 
256                               if (i - 1 > start_char)
257                                         /* this was a run */
258                                         fprintf (file, "-%s",
259                                                    readable_form (i - 1));
260 
261                               putc (' ', file);
262                     }
263           }
264 
265           putc (']', file);
266 }
267 
268 /** Determines if the range [c1-c2] is unambiguous in a case-insensitive
269  * scanner.  Specifically, if a lowercase or uppercase character, x, is in the
270  * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
271  * be in the range. If not, then this range is ambiguous, and the function
272  * returns false.  For example, [@-_] spans [a-z] but not [A-Z].  Beware that
273  * [a-z] will be labeled ambiguous because it does not include [A-Z].
274  *
275  * @param c1 the lower end of the range
276  * @param c2 the upper end of the range
277  * @return true if [c1-c2] is not ambiguous for a caseless scanner.
278  */
range_covers_case(int c1,int c2)279 bool range_covers_case (int c1, int c2)
280 {
281           int     i, o;
282 
283           for (i = c1; i <= c2; i++) {
284                     if (has_case (i)) {
285                               o = reverse_case (i);
286                               if (o < c1 || c2 < o)
287                                         return false;
288                     }
289           }
290           return true;
291 }
292 
293 /** Reverse the case of a character, if possible.
294  * @return c if case-reversal does not apply.
295  */
reverse_case(int c)296 int reverse_case (int c)
297 {
298           return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c);
299 }
300 
301 /** Return true if c is uppercase or lowercase. */
has_case(int c)302 bool has_case (int c)
303 {
304           return (isupper (c) || islower (c)) ? true : false;
305 }
306