1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /* Must be the first one. Must not depend on any other include. */
28 #include "sljitLir.h"
29 #include "regexJIT.h"
30 
31 #include <stdio.h>
32 
33 #if defined _WIN32 || defined _WIN64
34 #define COLOR_RED
35 #define COLOR_GREEN
36 #define COLOR_ARCH
37 #define COLOR_DEFAULT
38 #else
39 #define COLOR_RED "\33[31m"
40 #define COLOR_GREEN "\33[32m"
41 #define COLOR_ARCH "\33[33m"
42 #define COLOR_DEFAULT "\33[0m"
43 #endif
44 
45 #ifdef REGEX_USE_8BIT_CHARS
46 #define S(str)      str
47 #else
48 #define S(str)      L##str
49 #endif
50 
51 #ifdef REGEX_MATCH_VERBOSE
verbose_test(regex_char_t * pattern,regex_char_t * string)52 void verbose_test(regex_char_t *pattern, regex_char_t *string)
53 {
54           int error;
55           regex_char_t *ptr;
56           struct regex_machine* machine;
57           struct regex_match* match;
58           int begin, end, id;
59 
60           ptr = pattern;
61           while (*ptr)
62                     ptr++;
63 
64           printf("Start test '%s' matches to '%s'\n", pattern, string);
65           machine = regex_compile(pattern, ptr - pattern, REGEX_MATCH_VERBOSE | REGEX_NEWLINE, &error);
66 
67           if (error) {
68                     printf("WARNING: Error %d\n", error);
69                     return;
70           }
71           if (!machine) {
72                     printf("ERROR: machine must be exists. Report this bug, please\n");
73                     return;
74           }
75 
76           match = regex_begin_match(machine);
77           if (!match) {
78                     printf("WARNING: Not enough memory for matching\n");
79                     regex_free_machine(machine);
80                     return;
81           }
82 
83           ptr = string;
84           while (*ptr)
85                     ptr++;
86 
87           regex_continue_match_debug(match, string, ptr - string);
88 
89           begin = regex_get_result(match, &end, &id);
90           printf("Math returns: %3d->%3d [%3d]\n", begin, end, id);
91 
92           regex_free_match(match);
93           regex_free_machine(machine);
94 }
95 #endif
96 
97 struct test_case {
98           int begin;          /* Expected begin. */
99           int end;  /* Expected end. */
100           int id;             /* Expected id. */
101           int finished;       /* -1 : don't care, 0 : false, 1 : true. */
102           int flags;          /* REGEX_MATCH_* */
103           const regex_char_t *pattern;  /* NULL : use the previous pattern. */
104           const regex_char_t *string;   /* NULL : end of tests. */
105 };
106 
run_tests(struct test_case * test,int verbose,int silent)107 void run_tests(struct test_case* test, int verbose, int silent)
108 {
109           int error;
110           const regex_char_t *ptr;
111           struct regex_machine* machine = NULL;
112           struct regex_match* match;
113           int begin, end, id, finished;
114           int success = 0, fail = 0;
115 
116           if (!verbose && !silent)
117                     printf("Pass -v to enable verbose, -s to disable this hint.\n\n");
118 
119           for ( ; test->string ; test++) {
120                     if (verbose)
121                               printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
122                     fail++;
123 
124                     if (test->pattern) {
125                               if (machine)
126                                         regex_free_machine(machine);
127 
128                               ptr = test->pattern;
129                               while (*ptr)
130                                         ptr++;
131 
132                               machine = regex_compile(test->pattern, ptr - test->pattern, test->flags, &error);
133 
134                               if (error) {
135                                         if (!verbose)
136                                                   printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
137                                         printf("ABORT: Error %d\n", error);
138                                         return;
139                               }
140                               if (!machine) {
141                                         if (!verbose)
142                                                   printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
143                                         printf("ABORT: machine must be exists. Report this bug, please\n");
144                                         return;
145                               }
146                     }
147                     else if (test->flags != 0) {
148                               if (!verbose)
149                                         printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
150                               printf("ABORT: flag must be 0 if no pattern\n");
151                               return;
152                     }
153 
154                     ptr = test->string;
155                     while (*ptr)
156                               ptr++;
157 
158                     match = regex_begin_match(machine);
159 #ifdef REGEX_MATCH_VERBOSE
160                     if (!match) {
161                               if (!verbose)
162                                         printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
163                               printf("ABORT: Not enough memory for matching\n");
164                               regex_free_machine(machine);
165                               return;
166                     }
167                     regex_continue_match_debug(match, test->string, ptr - test->string);
168                     begin = regex_get_result(match, &end, &id);
169                     finished = regex_is_match_finished(match);
170 
171                     if (begin != test->begin || end != test->end || id != test->id) {
172                               if (!verbose)
173                                         printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
174                               printf("FAIL A: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id);
175                               continue;
176                     }
177                     if (test->finished != -1 && test->finished != !!finished) {
178                               if (!verbose)
179                                         printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
180                               printf("FAIL A: finish check\n");
181                               continue;
182                     }
183 #endif
184 
185                     regex_reset_match(match);
186                     regex_continue_match(match, test->string, ptr - test->string);
187                     begin = regex_get_result(match, &end, &id);
188                     finished = regex_is_match_finished(match);
189                     regex_free_match(match);
190 
191                     if (begin != test->begin || end != test->end || id != test->id) {
192                               if (!verbose)
193                                         printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
194                               printf("FAIL B: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id);
195                               continue;
196                     }
197                     if (test->finished != -1 && test->finished != !!finished) {
198                               if (!verbose)
199                                         printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
200                               printf("FAIL B: finish check\n");
201                               continue;
202                     }
203 
204                     if (verbose)
205                               printf("SUCCESS\n");
206                     fail--;
207                     success++;
208           }
209           if (machine)
210                     regex_free_machine(machine);
211 
212           printf("REGEX tests: ");
213           if (fail == 0)
214                     printf("all tests are " COLOR_GREEN "PASSED" COLOR_DEFAULT " ");
215           else
216                     printf(COLOR_RED "%d" COLOR_DEFAULT " (" COLOR_RED "%d%%" COLOR_DEFAULT ") tests are failed ", fail, fail * 100 / (success + fail));
217           printf("on " COLOR_ARCH "%s" COLOR_DEFAULT "\n", regex_get_platform_name());
218 }
219 
220 /* Testing. */
221 
222 static struct test_case tests[] = {
223 { 3, 7, 0, -1, 0,
224   S("text"), S("is textile") },
225 { 0, 10, 0, -1, 0,
226   S("^(ab|c)*?d+(es)?"), S("abccabddeses") },
227 { -1, 0, 0, 1, 0,
228   S("^a+"), S("saaaa") },
229 { 3, 6, 0, 0, 0,
230   S("(a+|b+)$"), S("saabbb") },
231 { 1, 6, 0, 0, 0,
232   S("(a+|b+){,2}$"), S("saabbb") },
233 { 1, 6, 0, 1, 0,
234   S("(abcde|bc)(a+*|(b|c){2}+){0}"), S("babcdeaaaaaaaa") },
235 { 1, 6, 0, 1, 0,
236   S("(abc(aa)?|(cab+){2})"), S("cabcaa") },
237 { -1, 0, 0, 1, 0,
238   S("^(abc(aa)?|(cab+){2})$"), S("cabcaa") },
239 { 0, 3, 1, -1, 0,
240   S("^(ab{001!})?c"), S("abcde") },
241 { 1, 15, 2, -1, 0,
242   S("(c?(a|bb{2!}){2,3}()+d){2,3}"), S("ccabbadbbadcaadcaad") },
243 { 2, 9, 0, -1, 0,
244   NULL, S("cacaadaadaa") },
245 { -1, 0, 0, -1, REGEX_MATCH_BEGIN,
246   S("(((ab?c|d{1})))"), S("ad") },
247 { 0, 9, 3, -1, REGEX_MATCH_BEGIN,
248   S("^((a{1!}|b{2!}|c{3!}){3,6}d)+"), S("cabadbacddaa") },
249 { 1, 6, 0, 0, REGEX_MATCH_END,
250   S("(a+(bb|cc?)?){4,}"), S("maaaac") },
251 { 3, 12, 1, 0, REGEX_MATCH_END,
252   S("(x+x+{02,03}(x+|{1!})){03,06}$"), S("aaaxxxxxxxxx") },
253 { 1, 2, 3, -1, 0,
254   S("((c{1!})?|x+{2!}|{3!})(a|c)"), S("scs") },
255 { 1, 4, 2, 1, 0,
256   NULL, S("sxxaxxxaccacca") },
257 { 0, 2, 1, 1, 0,
258   NULL, S("ccdcdcdddddcdccccd") },
259 { 0, 3, 0, -1, REGEX_MATCH_NON_GREEDY,
260   S("^a+a+a+"), S("aaaaaa") },
261 { 2, 5, 0, -1, REGEX_MATCH_NON_GREEDY,
262   S("a+a+a+"), S("bbaaaaaa") },
263 { 1, 4, 0, 1, 0,
264   S("baa|a+"), S("sbaaaaaa") },
265 { 0, 6, 0, 1, 0,
266   S("baaa|baa|sbaaaa"), S("sbaaaaa") },
267 { 1, 4, 0, 1, REGEX_MATCH_NON_GREEDY,
268   S("baaa|baa"), S("xbaaa") },
269 { 0, 0, 3, 1, 0,
270   S("{3!}"), S("xx") },
271 { 0, 0, 1, 1, 0,
272   S("{1!}(a{2!})*"), S("xx") },
273 { 0, 2, 2, 0, 0,
274   NULL, S("aa") },
275 { 0, 0, 1, 1, REGEX_MATCH_NON_GREEDY,
276   S("{1!}(a{2!})*"), S("aaxx") },
277 { 4, 12, 0, 1, 0,
278   S("(.[]-]){3}[^]-]{2}"), S("ax-xs-[][]lmn") },
279 { 3, 7, 1, 1, 0,
280   S("([ABC]|[abc]{1!}){3,5}"), S("AbSAabbx") },
281 { 0, 8, 3, 0, 0,
282   S("^[x\\-y[\\]]+([[\\]]{3!})*$"), S("x-y[-][]") },
283 { 0, 9, 0, 0, 0,
284   NULL, S("x-y[-][]x") },
285 { 2, 8, 0, 1, 0,
286   S("<(/{1!})?[^>]+>"), S("  <html></html> ") },
287 { 2, 9, 1, 1, 0,
288   NULL, S("  </html><html> ") },
289 { 2, 9, 0, 1, 0,
290   S("[A-Z0-9a-z]+"), S("[(Iden9aA)]") },
291 { 1, 4, 0, 1, 0,
292   S("[^x-y]+[a-c_]{2,3}"), S("x_a_y") },
293 { 4, 11, 0, 0, 0,
294   NULL, S("ssaymmaa_ccl") },
295 { 3, 6, 0, 1, REGEX_NEWLINE,
296   S(".a[^k]"), S("\na\nxa\ns") },
297 { 0, 2, 0, 1, REGEX_NEWLINE,
298   S("^a+"), S("aa\n") },
299 { 1, 4, 0, 1, 0 /* =REGEX_NEWLINE */,
300   NULL, S("\naaa\n") },
301 { 2, 3, 0, 1, 0 /* =REGEX_NEWLINE */,
302   NULL, S("\n\na\n") },
303 { 0, 2, 0, 1, REGEX_NEWLINE,
304   S("a+$"), S("aa\n") },
305 { 0, 3, 0, 0, 0 /* =REGEX_NEWLINE */,
306   NULL, S("aaa") },
307 { 2, 4, 1, 1, REGEX_NEWLINE,
308   S("^a(a{1!})*$"), S("\n\naa\n\n") },
309 { 0, 1, 0, 0, 0 /* REGEX_NEWLINE */,
310   NULL, S("a") },
311 { -1, 0, 0, -1, 0 /* REGEX_NEWLINE */,
312   NULL, S("ab\nba") },
313 { -1, 0, 0, 0, 0,
314   NULL, NULL }
315 };
316 
main(int argc,char * argv[])317 int main(int argc, char* argv[])
318 {
319           int has_arg = (argc >= 2 && argv[1][0] == '-' && argv[1][2] == '\0');
320 
321 /*        verbose_test("a((b)((c|d))|)c|"); */
322 /*        verbose_test("Xa{009,0010}Xb{,7}Xc{5,}Xd{,}Xe{1,}Xf{,1}X"); */
323 /*        verbose_test("{3!}({3})({0!}){,"); */
324 /*        verbose_test("(s(ab){2,4}t){2,}*S(a*(b)(c()|)d+){3,4}{0,0}*M"); */
325 /*        verbose_test("^a({2!})*b+(a|{1!}b)+d$"); */
326 /*        verbose_test("((a|b|c)*(xy)+)+", "asbcxyxy"); */
327 
328           run_tests(tests, has_arg && argv[1][1] == 'v', has_arg && argv[1][1] == 's');
329 
330           sljit_free_unused_memory_exec();
331 
332           return 0;
333 }
334