xref: /dragonfly/contrib/less/pattern.c (revision e0f238eda64c20d98364903e0c003825fd0b66dd)
1 /*
2  * Copyright (C) 1984-2024  Mark Nudelman
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Less License, as specified in the README file.
6  *
7  * For more information, see the README file.
8  */
9 
10 /*
11  * Routines to do pattern matching.
12  */
13 
14 #include "less.h"
15 
16 extern int caseless;
17 extern int is_caseless;
18 extern int utf_mode;
19 
20 /*
21  * Compile a search pattern, for future use by match_pattern.
22  */
compile_pattern2(constant char * pattern,int search_type,PATTERN_TYPE * comp_pattern,int show_error)23 static int compile_pattern2(constant char *pattern, int search_type, PATTERN_TYPE *comp_pattern, int show_error)
24 {
25           if (search_type & SRCH_NO_REGEX)
26                     return (0);
27   {
28 #if HAVE_GNU_REGEX
29           struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
30                     ecalloc(1, sizeof(struct re_pattern_buffer));
31           re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
32           if (re_compile_pattern(pattern, strlen(pattern), comp))
33           {
34                     free(comp);
35                     if (show_error)
36                               error("Invalid pattern", NULL_PARG);
37                     return (-1);
38           }
39           if (*comp_pattern != NULL)
40           {
41                     regfree(*comp_pattern);
42                     free(*comp_pattern);
43           }
44           *comp_pattern = comp;
45 #endif
46 #if HAVE_POSIX_REGCOMP
47           regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
48           if (regcomp(comp, pattern, REGCOMP_FLAG | (is_caseless ? REG_ICASE : 0)))
49           {
50                     free(comp);
51                     if (show_error)
52                               error("Invalid pattern", NULL_PARG);
53                     return (-1);
54           }
55           if (*comp_pattern != NULL)
56           {
57                     regfree(*comp_pattern);
58                     free(*comp_pattern);
59           }
60           *comp_pattern = comp;
61 #endif
62 #if HAVE_PCRE
63           constant char *errstring;
64           int erroffset;
65           PARG parg;
66           pcre *comp = pcre_compile(pattern,
67                               ((utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0) |
68                               (is_caseless ? PCRE_CASELESS : 0),
69                               &errstring, &erroffset, NULL);
70           if (comp == NULL)
71           {
72                     parg.p_string = (char *) errstring;
73                     if (show_error)
74                               error("%s", &parg);
75                     return (-1);
76           }
77           *comp_pattern = comp;
78 #endif
79 #if HAVE_PCRE2
80           int errcode;
81           PCRE2_SIZE erroffset;
82           PARG parg;
83           pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern),
84                               (is_caseless ? PCRE2_CASELESS : 0),
85                               &errcode, &erroffset, NULL);
86           if (comp == NULL)
87           {
88                     if (show_error)
89                     {
90                               char msg[160];
91                               pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg));
92                               parg.p_string = msg;
93                               error("%s", &parg);
94                     }
95                     return (-1);
96           }
97           *comp_pattern = comp;
98 #endif
99 #if HAVE_RE_COMP
100           PARG parg;
101           if ((parg.p_string = re_comp(pattern)) != NULL)
102           {
103                     if (show_error)
104                               error("%s", &parg);
105                     return (-1);
106           }
107           *comp_pattern = 1;
108 #endif
109 #if HAVE_REGCMP
110           char *comp;
111           if ((comp = regcmp(pattern, 0)) == NULL)
112           {
113                     if (show_error)
114                               error("Invalid pattern", NULL_PARG);
115                     return (-1);
116           }
117           if (comp_pattern != NULL)
118                     free(*comp_pattern);
119           *comp_pattern = comp;
120 #endif
121 #if HAVE_V8_REGCOMP
122           struct regexp *comp;
123           reg_show_error = show_error;
124           comp = regcomp(pattern);
125           reg_show_error = 1;
126           if (comp == NULL)
127           {
128                     /*
129                      * regcomp has already printed an error message
130                      * via regerror().
131                      */
132                     return (-1);
133           }
134           if (*comp_pattern != NULL)
135                     free(*comp_pattern);
136           *comp_pattern = comp;
137 #endif
138   }
139           return (0);
140 }
141 
142 /*
143  * Like compile_pattern2, but convert the pattern to lowercase if necessary.
144  */
compile_pattern(constant char * pattern,int search_type,int show_error,PATTERN_TYPE * comp_pattern)145 public int compile_pattern(constant char *pattern, int search_type, int show_error, PATTERN_TYPE *comp_pattern)
146 {
147           int result;
148 
149           if (caseless != OPT_ONPLUS || (re_handles_caseless && !(search_type & SRCH_NO_REGEX)))
150           {
151                     result = compile_pattern2(pattern, search_type, comp_pattern, show_error);
152           } else
153           {
154                     char *cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
155                     cvt_text(cvt_pattern, pattern, NULL, NULL, CVT_TO_LC);
156                     result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error);
157                     free(cvt_pattern);
158           }
159           return (result);
160 }
161 
162 /*
163  * Forget that we have a compiled pattern.
164  */
uncompile_pattern(PATTERN_TYPE * pattern)165 public void uncompile_pattern(PATTERN_TYPE *pattern)
166 {
167 #if HAVE_GNU_REGEX
168           if (*pattern != NULL)
169           {
170                     regfree(*pattern);
171                     free(*pattern);
172           }
173           *pattern = NULL;
174 #endif
175 #if HAVE_POSIX_REGCOMP
176           if (*pattern != NULL)
177           {
178                     regfree(*pattern);
179                     free(*pattern);
180           }
181           *pattern = NULL;
182 #endif
183 #if HAVE_PCRE
184           if (*pattern != NULL)
185                     pcre_free(*pattern);
186           *pattern = NULL;
187 #endif
188 #if HAVE_PCRE2
189           if (*pattern != NULL)
190                     pcre2_code_free(*pattern);
191           *pattern = NULL;
192 #endif
193 #if HAVE_RE_COMP
194           *pattern = 0;
195 #endif
196 #if HAVE_REGCMP
197           if (*pattern != NULL)
198                     free(*pattern);
199           *pattern = NULL;
200 #endif
201 #if HAVE_V8_REGCOMP
202           if (*pattern != NULL)
203                     free(*pattern);
204           *pattern = NULL;
205 #endif
206 }
207 
208 #if 0
209 /*
210  * Can a pattern be successfully compiled?
211  */
212 public int valid_pattern(char *pattern)
213 {
214           PATTERN_TYPE comp_pattern;
215           int result;
216 
217           SET_NULL_PATTERN(comp_pattern);
218           result = compile_pattern2(pattern, 0, &comp_pattern, 0);
219           if (result != 0)
220                     return (0);
221           uncompile_pattern(&comp_pattern);
222           return (1);
223 }
224 #endif
225 
226 /*
227  * Is a compiled pattern null?
228  */
is_null_pattern(PATTERN_TYPE pattern)229 public lbool is_null_pattern(PATTERN_TYPE pattern)
230 {
231 #if HAVE_GNU_REGEX
232           return (pattern == NULL);
233 #endif
234 #if HAVE_POSIX_REGCOMP
235           return (pattern == NULL);
236 #endif
237 #if HAVE_PCRE
238           return (pattern == NULL);
239 #endif
240 #if HAVE_PCRE2
241           return (pattern == NULL);
242 #endif
243 #if HAVE_RE_COMP
244           return (pattern == 0);
245 #endif
246 #if HAVE_REGCMP
247           return (pattern == NULL);
248 #endif
249 #if HAVE_V8_REGCOMP
250           return (pattern == NULL);
251 #endif
252 #if NO_REGEX
253           return (pattern == NULL);
254 #endif
255 }
256 /*
257  * Simple pattern matching function.
258  * It supports no metacharacters like *, etc.
259  */
match(constant char * pattern,size_t pattern_len,constant char * buf,int buf_len,constant char *** sp,constant char *** ep,int nsubs)260 static int match(constant char *pattern, size_t pattern_len, constant char *buf, int buf_len, constant char ***sp, constant char ***ep, int nsubs)
261 {
262           constant char *pp;
263           constant char *lp;
264           constant char *pattern_end = pattern + pattern_len;
265           constant char *buf_end = buf + buf_len;
266 
267           (void) nsubs;
268           for ( ;  buf < buf_end;  buf++)
269           {
270                     for (pp = pattern, lp = buf;  ;  pp++, lp++)
271                     {
272                               char cp = *pp;
273                               char cl = *lp;
274                               if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
275                                         cp = ASCII_TO_LOWER(cp);
276                               if (cp != cl)
277                                         break;
278                               if (pp == pattern_end || lp == buf_end)
279                                         break;
280                     }
281                     if (pp == pattern_end)
282                     {
283                               *(*sp)++ = buf;
284                               *(*ep)++ = lp;
285                               return (1);
286                     }
287           }
288           **sp = **ep = NULL;
289           return (0);
290 }
291 
292 /*
293  * Perform a pattern match with the previously compiled pattern.
294  * Set sp[0] and ep[0] to the start and end of the matched string.
295  * Set sp[i] and ep[i] to the start and end of the i-th matched subpattern.
296  * Subpatterns are defined by parentheses in the regex language.
297  */
match_pattern1(PATTERN_TYPE pattern,constant char * tpattern,constant char * line,size_t aline_len,constant char ** sp,constant char ** ep,int nsp,int notbol,int search_type)298 static int match_pattern1(PATTERN_TYPE pattern, constant char *tpattern, constant char *line, size_t aline_len, constant char **sp, constant char **ep, int nsp, int notbol, int search_type)
299 {
300           int matched;
301           int line_len = (int) aline_len; /*{{type-issue}}*/
302 
303 #if NO_REGEX
304           search_type |= SRCH_NO_REGEX;
305 #endif
306           if (search_type & SRCH_NO_REGEX)
307                     matched = match(tpattern, strlen(tpattern), line, line_len, &sp, &ep, nsp);
308           else
309           {
310 #if HAVE_GNU_REGEX
311           {
312                     struct re_registers search_regs;
313                     pattern->not_bol = notbol;
314                     pattern->regs_allocated = REGS_UNALLOCATED;
315                     matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0;
316                     if (matched)
317                     {
318                               *sp++ = line + search_regs.start[0];
319                               *ep++ = line + search_regs.end[0];
320                     }
321           }
322 #endif
323 #if HAVE_POSIX_REGCOMP
324           {
325                     #define RM_COUNT (NUM_SEARCH_COLORS+2)
326                     regmatch_t rm[RM_COUNT];
327                     int flags = (notbol) ? REG_NOTBOL : 0;
328 #ifdef REG_STARTEND
329                     flags |= REG_STARTEND;
330                     rm[0].rm_so = 0;
331                     rm[0].rm_eo = line_len;
332 #endif
333                     matched = !regexec(pattern, line, RM_COUNT, rm, flags);
334                     if (matched)
335                     {
336                               int i;
337                               int ecount;
338                               for (ecount = RM_COUNT;  ecount > 0;  ecount--)
339                                         if (rm[ecount-1].rm_so >= 0)
340                                                   break;
341                               if (ecount >= nsp)
342                                         ecount = nsp-1;
343                               for (i = 0;  i < ecount;  i++)
344                               {
345                                         if (rm[i].rm_so < 0)
346                                         {
347                                                   *sp++ = *ep++ = line;
348                                         } else
349                                         {
350 #ifndef __WATCOMC__
351                                                   *sp++ = line + rm[i].rm_so;
352                                                   *ep++ = line + rm[i].rm_eo;
353 #else
354                                                   *sp++ = rm[i].rm_sp;
355                                                   *ep++ = rm[i].rm_ep;
356 #endif
357                                         }
358                               }
359                     }
360           }
361 #endif
362 #if HAVE_PCRE
363           {
364                     #define OVECTOR_COUNT ((3*NUM_SEARCH_COLORS)+3)
365                     int ovector[OVECTOR_COUNT];
366                     int flags = (notbol) ? PCRE_NOTBOL : 0;
367                     int i;
368                     int ecount;
369                     int mcount = pcre_exec(pattern, NULL, line, line_len,
370                               0, flags, ovector, OVECTOR_COUNT);
371                     matched = (mcount > 0);
372                     ecount = nsp-1;
373                     if (ecount > mcount) ecount = mcount;
374                     for (i = 0;  i < ecount*2; )
375                     {
376                               if (ovector[i] < 0 || ovector[i+1] < 0)
377                               {
378                                         *sp++ = *ep++ = line;
379                                         i += 2;
380                               } else
381                               {
382                                         *sp++ = line + ovector[i++];
383                                         *ep++ = line + ovector[i++];
384                               }
385                     }
386           }
387 #endif
388 #if HAVE_PCRE2
389           {
390                     int flags = (notbol) ? PCRE2_NOTBOL : 0;
391                     pcre2_match_data *md = pcre2_match_data_create(nsp-1, NULL);
392                     int mcount = pcre2_match(pattern, (PCRE2_SPTR)line, line_len,
393                               0, flags, md, NULL);
394                     matched = (mcount > 0);
395                     if (matched)
396                     {
397                               PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
398                               int i;
399                               int ecount = nsp-1;
400                               if (ecount > mcount) ecount = mcount;
401                               for (i = 0;  i < ecount*2; )
402                               {
403                                         if (ovector[i] < 0 || ovector[i+1] < 0)
404                                         {
405                                                   *sp++ = *ep++ = line;
406                                                   i += 2;
407                                         } else
408                                         {
409                                                   *sp++ = line + ovector[i++];
410                                                   *ep++ = line + ovector[i++];
411                                         }
412                               }
413                     }
414                     pcre2_match_data_free(md);
415           }
416 #endif
417 #if HAVE_RE_COMP
418           matched = (re_exec(line) == 1);
419           /*
420            * re_exec doesn't seem to provide a way to get the matched string.
421            */
422 #endif
423 #if HAVE_REGCMP
424           matched = ((*ep++ = regex(pattern, line)) != NULL);
425           if (matched)
426                     *sp++ = __loc1;
427 #endif
428 #if HAVE_V8_REGCOMP
429 #if HAVE_REGEXEC2
430           matched = regexec2(pattern, line, notbol);
431 #else
432           matched = regexec(pattern, line);
433 #endif
434           if (matched)
435           {
436                     *sp++ = pattern->startp[0];
437                     *ep++ = pattern->endp[0];
438           }
439 #endif
440           }
441           *sp = *ep = NULL;
442           matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
443                               ((search_type & SRCH_NO_MATCH) && !matched);
444           return (matched);
445 }
446 
match_pattern(PATTERN_TYPE pattern,constant char * tpattern,constant char * line,size_t line_len,constant char ** sp,constant char ** ep,int nsp,int notbol,int search_type)447 public int match_pattern(PATTERN_TYPE pattern, constant char *tpattern, constant char *line, size_t line_len, constant char **sp, constant char **ep, int nsp, int notbol, int search_type)
448 {
449           int matched = match_pattern1(pattern, tpattern, line, line_len, sp, ep, nsp, notbol, search_type);
450           int i;
451           for (i = 1;  i <= NUM_SEARCH_COLORS;  i++)
452           {
453                     if ((search_type & SRCH_SUBSEARCH(i)) && ep[i] == sp[i])
454                               matched = 0;
455           }
456           return matched;
457 }
458 
459 /*
460  * Return the name of the pattern matching library.
461  */
pattern_lib_name(void)462 public constant char * pattern_lib_name(void)
463 {
464 #if HAVE_GNU_REGEX
465           return ("GNU");
466 #else
467 #if HAVE_POSIX_REGCOMP
468           return ("POSIX");
469 #else
470 #if HAVE_PCRE2
471           return ("PCRE2");
472 #else
473 #if HAVE_PCRE
474           return ("PCRE");
475 #else
476 #if HAVE_RE_COMP
477           return ("BSD");
478 #else
479 #if HAVE_REGCMP
480           return ("V8");
481 #else
482 #if HAVE_V8_REGCOMP
483           return ("Spencer V8");
484 #else
485           return ("no");
486 #endif
487 #endif
488 #endif
489 #endif
490 #endif
491 #endif
492 #endif
493 }
494