1 /*        $NetBSD: pattern.c,v 1.4 2023/10/06 05:49:49 simonb Exp $   */
2 
3 /*
4  * Copyright (C) 1984-2023  Mark Nudelman
5  *
6  * You may distribute under the terms of either the GNU General Public
7  * License or the Less License, as specified in the README file.
8  *
9  * For more information, see the README file.
10  */
11 
12 /*
13  * Routines to do pattern matching.
14  */
15 
16 #include "less.h"
17 
18 extern int caseless;
19 extern int is_caseless;
20 extern int utf_mode;
21 
22 /*
23  * Compile a search pattern, for future use by match_pattern.
24  */
compile_pattern2(char * pattern,int search_type,PATTERN_TYPE * comp_pattern,int show_error)25 static int compile_pattern2(char *pattern, int search_type, PATTERN_TYPE *comp_pattern, int show_error)
26 {
27           if (search_type & SRCH_NO_REGEX)
28                     return (0);
29   {
30 #if HAVE_GNU_REGEX
31           struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
32                     ecalloc(1, sizeof(struct re_pattern_buffer));
33           re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
34           if (re_compile_pattern(pattern, strlen(pattern), comp))
35           {
36                     free(comp);
37                     if (show_error)
38                               error("Invalid pattern", NULL_PARG);
39                     return (-1);
40           }
41           if (*comp_pattern != NULL)
42           {
43                     regfree(*comp_pattern);
44                     free(*comp_pattern);
45           }
46           *comp_pattern = comp;
47 #endif
48 #if HAVE_POSIX_REGCOMP
49           regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
50           if (regcomp(comp, pattern, REGCOMP_FLAG | (is_caseless ? REG_ICASE : 0)))
51           {
52                     free(comp);
53                     if (show_error)
54                               error("Invalid pattern", NULL_PARG);
55                     return (-1);
56           }
57           if (*comp_pattern != NULL)
58           {
59                     regfree(*comp_pattern);
60                     free(*comp_pattern);
61           }
62           *comp_pattern = comp;
63 #endif
64 #if HAVE_PCRE
65           constant char *errstring;
66           int erroffset;
67           PARG parg;
68           pcre *comp = pcre_compile(pattern,
69                               ((utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0) |
70                               (is_caseless ? PCRE_CASELESS : 0),
71                               &errstring, &erroffset, NULL);
72           if (comp == NULL)
73           {
74                     parg.p_string = (char *) errstring;
75                     if (show_error)
76                               error("%s", &parg);
77                     return (-1);
78           }
79           *comp_pattern = comp;
80 #endif
81 #if HAVE_PCRE2
82           int errcode;
83           PCRE2_SIZE erroffset;
84           PARG parg;
85           pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern),
86                               (is_caseless ? PCRE2_CASELESS : 0),
87                               &errcode, &erroffset, NULL);
88           if (comp == NULL)
89           {
90                     if (show_error)
91                     {
92                               char msg[160];
93                               pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg));
94                               parg.p_string = msg;
95                               error("%s", &parg);
96                     }
97                     return (-1);
98           }
99           *comp_pattern = comp;
100 #endif
101 #if HAVE_RE_COMP
102           PARG parg;
103           if ((parg.p_string = re_comp(pattern)) != NULL)
104           {
105                     if (show_error)
106                               error("%s", &parg);
107                     return (-1);
108           }
109           *comp_pattern = 1;
110 #endif
111 #if HAVE_REGCMP
112           char *comp;
113           if ((comp = regcmp(pattern, 0)) == NULL)
114           {
115                     if (show_error)
116                               error("Invalid pattern", NULL_PARG);
117                     return (-1);
118           }
119           if (comp_pattern != NULL)
120                     free(*comp_pattern);
121           *comp_pattern = comp;
122 #endif
123 #if HAVE_V8_REGCOMP
124           struct regexp *comp;
125           reg_show_error = show_error;
126           comp = regcomp(pattern);
127           reg_show_error = 1;
128           if (comp == NULL)
129           {
130                     /*
131                      * regcomp has already printed an error message
132                      * via regerror().
133                      */
134                     return (-1);
135           }
136           if (*comp_pattern != NULL)
137                     free(*comp_pattern);
138           *comp_pattern = comp;
139 #endif
140   }
141           return (0);
142 }
143 
144 /*
145  * Like compile_pattern2, but convert the pattern to lowercase if necessary.
146  */
compile_pattern(char * pattern,int search_type,int show_error,PATTERN_TYPE * comp_pattern)147 public int compile_pattern(char *pattern, int search_type, int show_error, PATTERN_TYPE *comp_pattern)
148 {
149           char *cvt_pattern;
150           int result;
151 
152           if (caseless != OPT_ONPLUS || (re_handles_caseless && !(search_type & SRCH_NO_REGEX)))
153                     cvt_pattern = pattern;
154           else
155           {
156                     cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
157                     cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
158           }
159           result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error);
160           if (cvt_pattern != pattern)
161                     free(cvt_pattern);
162           return (result);
163 }
164 
165 /*
166  * Forget that we have a compiled pattern.
167  */
uncompile_pattern(PATTERN_TYPE * pattern)168 public void uncompile_pattern(PATTERN_TYPE *pattern)
169 {
170 #if HAVE_GNU_REGEX
171           if (*pattern != NULL)
172           {
173                     regfree(*pattern);
174                     free(*pattern);
175           }
176           *pattern = NULL;
177 #endif
178 #if HAVE_POSIX_REGCOMP
179           if (*pattern != NULL)
180           {
181                     regfree(*pattern);
182                     free(*pattern);
183           }
184           *pattern = NULL;
185 #endif
186 #if HAVE_PCRE
187           if (*pattern != NULL)
188                     pcre_free(*pattern);
189           *pattern = NULL;
190 #endif
191 #if HAVE_PCRE2
192           if (*pattern != NULL)
193                     pcre2_code_free(*pattern);
194           *pattern = NULL;
195 #endif
196 #if HAVE_RE_COMP
197           *pattern = 0;
198 #endif
199 #if HAVE_REGCMP
200           if (*pattern != NULL)
201                     free(*pattern);
202           *pattern = NULL;
203 #endif
204 #if HAVE_V8_REGCOMP
205           if (*pattern != NULL)
206                     free(*pattern);
207           *pattern = NULL;
208 #endif
209 }
210 
211 #if 0
212 /*
213  * Can a pattern be successfully compiled?
214  */
215 public int valid_pattern(char *pattern)
216 {
217           PATTERN_TYPE comp_pattern;
218           int result;
219 
220           SET_NULL_PATTERN(comp_pattern);
221           result = compile_pattern2(pattern, 0, &comp_pattern, 0);
222           if (result != 0)
223                     return (0);
224           uncompile_pattern(&comp_pattern);
225           return (1);
226 }
227 #endif
228 
229 /*
230  * Is a compiled pattern null?
231  */
is_null_pattern(PATTERN_TYPE pattern)232 public int is_null_pattern(PATTERN_TYPE pattern)
233 {
234 #if HAVE_GNU_REGEX
235           return (pattern == NULL);
236 #endif
237 #if HAVE_POSIX_REGCOMP
238           return (pattern == NULL);
239 #endif
240 #if HAVE_PCRE
241           return (pattern == NULL);
242 #endif
243 #if HAVE_PCRE2
244           return (pattern == NULL);
245 #endif
246 #if HAVE_RE_COMP
247           return (pattern == 0);
248 #endif
249 #if HAVE_REGCMP
250           return (pattern == NULL);
251 #endif
252 #if HAVE_V8_REGCOMP
253           return (pattern == NULL);
254 #endif
255 #if NO_REGEX
256           return (pattern == NULL);
257 #endif
258 }
259 /*
260  * Simple pattern matching function.
261  * It supports no metacharacters like *, etc.
262  */
match(char * pattern,int pattern_len,char * buf,int buf_len,char *** sp,char *** ep,int nsubs)263 static int match(char *pattern, int pattern_len, char *buf, int buf_len, char ***sp, char ***ep, int nsubs)
264 {
265           char *pp, *lp;
266           char *pattern_end = pattern + pattern_len;
267           char *buf_end = buf + buf_len;
268 
269           for ( ;  buf < buf_end;  buf++)
270           {
271                     for (pp = pattern, lp = buf;  ;  pp++, lp++)
272                     {
273                               char cp = *pp;
274                               char cl = *lp;
275                               if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
276                                         cp = ASCII_TO_LOWER(cp);
277                               if (cp != cl)
278                                         break;
279                               if (pp == pattern_end || lp == buf_end)
280                                         break;
281                     }
282                     if (pp == pattern_end)
283                     {
284                               *(*sp)++ = buf;
285                               *(*ep)++ = lp;
286                               return (1);
287                     }
288           }
289           **sp = **ep = NULL;
290           return (0);
291 }
292 
293 /*
294  * Perform a pattern match with the previously compiled pattern.
295  * Set sp[0] and ep[0] to the start and end of the matched string.
296  * Set sp[i] and ep[i] to the start and end of the i-th matched subpattern.
297  * Subpatterns are defined by parentheses in the regex language.
298  */
match_pattern1(PATTERN_TYPE pattern,char * tpattern,char * line,int line_len,char ** sp,char ** ep,int nsp,int notbol,int search_type)299 static int match_pattern1(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type)
300 {
301           int matched;
302 
303 #if NO_REGEX
304           search_type |= SRCH_NO_REGEX;
305 #endif
306           if (search_type & SRCH_NO_REGEX)
307                     matched = match(tpattern, strlen(tpattern), line, line_len, &sp, &ep, nsp);
308           else
309           {
310 #if HAVE_GNU_REGEX
311           {
312                     struct re_registers search_regs;
313                     pattern->not_bol = notbol;
314                     pattern->regs_allocated = REGS_UNALLOCATED;
315                     matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0;
316                     if (matched)
317                     {
318                               *sp++ = line + search_regs.start[0];
319                               *ep++ = line + search_regs.end[0];
320                     }
321           }
322 #endif
323 #if HAVE_POSIX_REGCOMP
324           {
325                     #define RM_COUNT (NUM_SEARCH_COLORS+2)
326                     regmatch_t rm[RM_COUNT];
327                     int flags = (notbol) ? REG_NOTBOL : 0;
328 #ifdef REG_STARTEND
329                     flags |= REG_STARTEND;
330                     rm[0].rm_so = 0;
331                     rm[0].rm_eo = line_len;
332 #endif
333                     matched = !regexec(pattern, line, RM_COUNT, rm, flags);
334                     if (matched)
335                     {
336                               int i;
337                               int ecount;
338                               for (ecount = RM_COUNT;  ecount > 0;  ecount--)
339                                         if (rm[ecount-1].rm_so >= 0)
340                                                   break;
341                               if (ecount >= nsp)
342                                         ecount = nsp-1;
343                               for (i = 0;  i < ecount;  i++)
344                               {
345                                         if (rm[i].rm_so < 0)
346                                         {
347                                                   *sp++ = *ep++ = line;
348                                         } else
349                                         {
350 #ifndef __WATCOMC__
351                                                   *sp++ = line + rm[i].rm_so;
352                                                   *ep++ = line + rm[i].rm_eo;
353 #else
354                                                   *sp++ = rm[i].rm_sp;
355                                                   *ep++ = rm[i].rm_ep;
356 #endif
357                                         }
358                               }
359                     }
360           }
361 #endif
362 #if HAVE_PCRE
363           {
364                     #define OVECTOR_COUNT ((3*NUM_SEARCH_COLORS)+3)
365                     int ovector[OVECTOR_COUNT];
366                     int flags = (notbol) ? PCRE_NOTBOL : 0;
367                     int i;
368                     int ecount;
369                     int mcount = pcre_exec(pattern, NULL, line, line_len,
370                               0, flags, ovector, OVECTOR_COUNT);
371                     matched = (mcount > 0);
372                     ecount = nsp-1;
373                     if (ecount > mcount) ecount = mcount;
374                     for (i = 0;  i < ecount*2; )
375                     {
376                               if (ovector[i] < 0 || ovector[i+1] < 0)
377                               {
378                                         *sp++ = *ep++ = line;
379                                         i += 2;
380                               } else
381                               {
382                                         *sp++ = line + ovector[i++];
383                                         *ep++ = line + ovector[i++];
384                               }
385                     }
386           }
387 #endif
388 #if HAVE_PCRE2
389           {
390                     int flags = (notbol) ? PCRE2_NOTBOL : 0;
391                     pcre2_match_data *md = pcre2_match_data_create(nsp-1, NULL);
392                     int mcount = pcre2_match(pattern, (PCRE2_SPTR)line, line_len,
393                               0, flags, md, NULL);
394                     matched = (mcount > 0);
395                     if (matched)
396                     {
397                               PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
398                               int i;
399                               int ecount = nsp-1;
400                               if (ecount > mcount) ecount = mcount;
401                               for (i = 0;  i < ecount*2; )
402                               {
403                                         if (ovector[i] < 0 || ovector[i+1] < 0)
404                                         {
405                                                   *sp++ = *ep++ = line;
406                                                   i += 2;
407                                         } else
408                                         {
409                                                   *sp++ = line + ovector[i++];
410                                                   *ep++ = line + ovector[i++];
411                                         }
412                               }
413                     }
414                     pcre2_match_data_free(md);
415           }
416 #endif
417 #if HAVE_RE_COMP
418           matched = (re_exec(line) == 1);
419           /*
420            * re_exec doesn't seem to provide a way to get the matched string.
421            */
422 #endif
423 #if HAVE_REGCMP
424           matched = ((*ep++ = regex(pattern, line)) != NULL);
425           if (matched)
426                     *sp++ = __loc1;
427 #endif
428 #if HAVE_V8_REGCOMP
429 #if HAVE_REGEXEC2
430           matched = regexec2(pattern, line, notbol);
431 #else
432           matched = regexec(pattern, line);
433 #endif
434           if (matched)
435           {
436                     *sp++ = pattern->startp[0];
437                     *ep++ = pattern->endp[0];
438           }
439 #endif
440           }
441           *sp = *ep = NULL;
442           matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
443                               ((search_type & SRCH_NO_MATCH) && !matched);
444           return (matched);
445 }
446 
match_pattern(PATTERN_TYPE pattern,char * tpattern,char * line,int line_len,char ** sp,char ** ep,int nsp,int notbol,int search_type)447 public int match_pattern(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type)
448 {
449           int matched = match_pattern1(pattern, tpattern, line, line_len, sp, ep, nsp, notbol, search_type);
450           int i;
451           for (i = 1;  i <= NUM_SEARCH_COLORS;  i++)
452           {
453                     if ((search_type & SRCH_SUBSEARCH(i)) && ep[i] == sp[i])
454                               matched = 0;
455           }
456           return matched;
457 }
458 
459 /*
460  * Return the name of the pattern matching library.
461  */
pattern_lib_name(void)462 public char * pattern_lib_name(void)
463 {
464 #if HAVE_GNU_REGEX
465           return ("GNU");
466 #else
467 #if HAVE_POSIX_REGCOMP
468           return ("POSIX");
469 #else
470 #if HAVE_PCRE2
471           return ("PCRE2");
472 #else
473 #if HAVE_PCRE
474           return ("PCRE");
475 #else
476 #if HAVE_RE_COMP
477           return ("BSD");
478 #else
479 #if HAVE_REGCMP
480           return ("V8");
481 #else
482 #if HAVE_V8_REGCOMP
483           return ("Spencer V8");
484 #else
485           return ("no");
486 #endif
487 #endif
488 #endif
489 #endif
490 #endif
491 #endif
492 #endif
493 }
494