1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2022 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  *  @file bits/regex.tcc
27  *  This is an internal header file, included by other library headers.
28  *  Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 _GLIBCXX_BEGIN_NAMESPACE_VERSION
34 
35 namespace __detail
36 {
37   /// @cond undocumented
38 
39   // Result of merging regex_match and regex_search.
40   //
41   // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
42   // the other one if possible, for test purpose).
43   //
44   // That __match_mode is true means regex_match, else regex_search.
45   template<typename _BiIter, typename _Alloc,
46              typename _CharT, typename _TraitsT>
47     bool
__regex_algo_impl(_BiIter __s,_BiIter __e,match_results<_BiIter,_Alloc> & __m,const basic_regex<_CharT,_TraitsT> & __re,regex_constants::match_flag_type __flags,_RegexExecutorPolicy __policy,bool __match_mode)48     __regex_algo_impl(_BiIter                              __s,
49                           _BiIter                              __e,
50                           match_results<_BiIter, _Alloc>&      __m,
51                           const basic_regex<_CharT, _TraitsT>& __re,
52                           regex_constants::match_flag_type     __flags,
53                           _RegexExecutorPolicy                 __policy,
54                           bool                                           __match_mode)
55     {
56       if (__re._M_automaton == nullptr)
57           return false;
58 
59       typename match_results<_BiIter, _Alloc>::_Unchecked& __res = __m;
60       __m._M_begin = __s;
61       __m._M_resize(__re._M_automaton->_M_sub_count());
62 
63       bool __ret;
64       if ((__re.flags() & regex_constants::__polynomial)
65             || (__policy == _RegexExecutorPolicy::_S_alternate
66                 && !__re._M_automaton->_M_has_backref))
67           {
68             _Executor<_BiIter, _Alloc, _TraitsT, false>
69               __executor(__s, __e, __res, __re, __flags);
70             if (__match_mode)
71               __ret = __executor._M_match();
72             else
73               __ret = __executor._M_search();
74           }
75       else
76           {
77             _Executor<_BiIter, _Alloc, _TraitsT, true>
78               __executor(__s, __e, __res, __re, __flags);
79             if (__match_mode)
80               __ret = __executor._M_match();
81             else
82               __ret = __executor._M_search();
83           }
84       if (__ret)
85           {
86             for (auto& __it : __res)
87               if (!__it.matched)
88                 __it.first = __it.second = __e;
89             auto& __pre = __m._M_prefix();
90             auto& __suf = __m._M_suffix();
91             if (__match_mode)
92               {
93                 __pre.matched = false;
94                 __pre.first = __s;
95                 __pre.second = __s;
96                 __suf.matched = false;
97                 __suf.first = __e;
98                 __suf.second = __e;
99               }
100             else
101               {
102                 __pre.first = __s;
103                 __pre.second = __res[0].first;
104                 __pre.matched = (__pre.first != __pre.second);
105                 __suf.first = __res[0].second;
106                 __suf.second = __e;
107                 __suf.matched = (__suf.first != __suf.second);
108               }
109           }
110       else
111           {
112             __m._M_establish_failed_match(__e);
113           }
114       return __ret;
115     }
116   /// @endcond
117 } // namespace __detail
118 
119   /// @cond
120 
121   template<typename _Ch_type>
122   template<typename _Fwd_iter>
123     typename regex_traits<_Ch_type>::string_type
124     regex_traits<_Ch_type>::
lookup_collatename(_Fwd_iter __first,_Fwd_iter __last) const125     lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
126     {
127       typedef std::ctype<char_type> __ctype_type;
128       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
129 
130       static const char* __collatenames[] =
131           {
132             "NUL",
133             "SOH",
134             "STX",
135             "ETX",
136             "EOT",
137             "ENQ",
138             "ACK",
139             "alert",
140             "backspace",
141             "tab",
142             "newline",
143             "vertical-tab",
144             "form-feed",
145             "carriage-return",
146             "SO",
147             "SI",
148             "DLE",
149             "DC1",
150             "DC2",
151             "DC3",
152             "DC4",
153             "NAK",
154             "SYN",
155             "ETB",
156             "CAN",
157             "EM",
158             "SUB",
159             "ESC",
160             "IS4",
161             "IS3",
162             "IS2",
163             "IS1",
164             "space",
165             "exclamation-mark",
166             "quotation-mark",
167             "number-sign",
168             "dollar-sign",
169             "percent-sign",
170             "ampersand",
171             "apostrophe",
172             "left-parenthesis",
173             "right-parenthesis",
174             "asterisk",
175             "plus-sign",
176             "comma",
177             "hyphen",
178             "period",
179             "slash",
180             "zero",
181             "one",
182             "two",
183             "three",
184             "four",
185             "five",
186             "six",
187             "seven",
188             "eight",
189             "nine",
190             "colon",
191             "semicolon",
192             "less-than-sign",
193             "equals-sign",
194             "greater-than-sign",
195             "question-mark",
196             "commercial-at",
197             "A",
198             "B",
199             "C",
200             "D",
201             "E",
202             "F",
203             "G",
204             "H",
205             "I",
206             "J",
207             "K",
208             "L",
209             "M",
210             "N",
211             "O",
212             "P",
213             "Q",
214             "R",
215             "S",
216             "T",
217             "U",
218             "V",
219             "W",
220             "X",
221             "Y",
222             "Z",
223             "left-square-bracket",
224             "backslash",
225             "right-square-bracket",
226             "circumflex",
227             "underscore",
228             "grave-accent",
229             "a",
230             "b",
231             "c",
232             "d",
233             "e",
234             "f",
235             "g",
236             "h",
237             "i",
238             "j",
239             "k",
240             "l",
241             "m",
242             "n",
243             "o",
244             "p",
245             "q",
246             "r",
247             "s",
248             "t",
249             "u",
250             "v",
251             "w",
252             "x",
253             "y",
254             "z",
255             "left-curly-bracket",
256             "vertical-line",
257             "right-curly-bracket",
258             "tilde",
259             "DEL",
260           };
261 
262       string __s;
263       for (; __first != __last; ++__first)
264           __s += __fctyp.narrow(*__first, 0);
265 
266       for (const auto& __it : __collatenames)
267           if (__s == __it)
268             return string_type(1, __fctyp.widen(
269               static_cast<char>(&__it - __collatenames)));
270 
271       // TODO Add digraph support:
272       // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
273 
274       return string_type();
275     }
276 
277   template<typename _Ch_type>
278   template<typename _Fwd_iter>
279     typename regex_traits<_Ch_type>::char_class_type
280     regex_traits<_Ch_type>::
lookup_classname(_Fwd_iter __first,_Fwd_iter __last,bool __icase) const281     lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
282     {
283       typedef std::ctype<char_type> __ctype_type;
284       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
285 
286       // Mappings from class name to class mask.
287       static const pair<const char*, char_class_type> __classnames[] =
288       {
289           {"d", ctype_base::digit},
290           {"w", {ctype_base::alnum, _RegexMask::_S_under}},
291           {"s", ctype_base::space},
292           {"alnum", ctype_base::alnum},
293           {"alpha", ctype_base::alpha},
294           {"blank", ctype_base::blank},
295           {"cntrl", ctype_base::cntrl},
296           {"digit", ctype_base::digit},
297           {"graph", ctype_base::graph},
298           {"lower", ctype_base::lower},
299           {"print", ctype_base::print},
300           {"punct", ctype_base::punct},
301           {"space", ctype_base::space},
302           {"upper", ctype_base::upper},
303           {"xdigit", ctype_base::xdigit},
304       };
305 
306       string __s;
307       for (; __first != __last; ++__first)
308           __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
309 
310       for (const auto& __it : __classnames)
311           if (__s == __it.first)
312             {
313               if (__icase
314                     && ((__it.second
315                          & (ctype_base::lower | ctype_base::upper)) != 0))
316                 return ctype_base::alpha;
317               return __it.second;
318             }
319       return 0;
320     }
321 
322   template<typename _Ch_type>
323     bool
324     regex_traits<_Ch_type>::
isctype(_Ch_type __c,char_class_type __f) const325     isctype(_Ch_type __c, char_class_type __f) const
326     {
327       typedef std::ctype<char_type> __ctype_type;
328       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
329 
330       return __fctyp.is(__f._M_base, __c)
331           // [[:w:]]
332           || ((__f._M_extended & _RegexMask::_S_under)
333               && __c == __fctyp.widen('_'));
334     }
335 
336   template<typename _Ch_type>
337     int
338     regex_traits<_Ch_type>::
value(_Ch_type __ch,int __radix) const339     value(_Ch_type __ch, int __radix) const
340     {
341       std::basic_istringstream<char_type> __is(string_type(1, __ch));
342       long __v;
343       if (__radix == 8)
344           __is >> std::oct;
345       else if (__radix == 16)
346           __is >> std::hex;
347       __is >> __v;
348       return __is.fail() ? -1 : __v;
349     }
350 
351   template<typename _Bi_iter, typename _Alloc>
352   template<typename _Out_iter>
353     _Out_iter
354     match_results<_Bi_iter, _Alloc>::
format(_Out_iter __out,const match_results<_Bi_iter,_Alloc>::char_type * __fmt_first,const match_results<_Bi_iter,_Alloc>::char_type * __fmt_last,match_flag_type __flags) const355     format(_Out_iter __out,
356              const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
357              const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
358              match_flag_type __flags) const
359     {
360       __glibcxx_assert( ready() );
361       regex_traits<char_type> __traits;
362       typedef std::ctype<char_type> __ctype_type;
363       const __ctype_type&
364           __fctyp(use_facet<__ctype_type>(__traits.getloc()));
365 
366       auto __output = [&](size_t __idx)
367           {
368             auto& __sub = (*this)[__idx];
369             if (__sub.matched)
370               __out = std::copy(__sub.first, __sub.second, __out);
371           };
372 
373       if (__flags & regex_constants::format_sed)
374           {
375             bool __escaping = false;
376             for (; __fmt_first != __fmt_last; __fmt_first++)
377               {
378                 if (__escaping)
379                     {
380                       __escaping = false;
381                       if (__fctyp.is(__ctype_type::digit, *__fmt_first))
382                         __output(__traits.value(*__fmt_first, 10));
383                       else
384                         *__out++ = *__fmt_first;
385                       continue;
386                     }
387                 if (*__fmt_first == '\\')
388                     {
389                       __escaping = true;
390                       continue;
391                     }
392                 if (*__fmt_first == '&')
393                     {
394                       __output(0);
395                       continue;
396                     }
397                 *__out++ = *__fmt_first;
398               }
399             if (__escaping)
400               *__out++ = '\\';
401           }
402       else
403           {
404             while (1)
405               {
406                 auto __next = std::find(__fmt_first, __fmt_last, '$');
407                 if (__next == __fmt_last)
408                     break;
409 
410                 __out = std::copy(__fmt_first, __next, __out);
411 
412                 auto __eat = [&](char __ch) -> bool
413                     {
414                       if (*__next == __ch)
415                         {
416                           ++__next;
417                           return true;
418                         }
419                       return false;
420                     };
421 
422                 if (++__next == __fmt_last)
423                     *__out++ = '$';
424                 else if (__eat('$'))
425                     *__out++ = '$';
426                 else if (__eat('&'))
427                     __output(0);
428                 else if (__eat('`'))
429                     {
430                       auto& __sub = _M_prefix();
431                       if (__sub.matched)
432                         __out = std::copy(__sub.first, __sub.second, __out);
433                     }
434                 else if (__eat('\''))
435                     {
436                       auto& __sub = _M_suffix();
437                       if (__sub.matched)
438                         __out = std::copy(__sub.first, __sub.second, __out);
439                     }
440                 else if (__fctyp.is(__ctype_type::digit, *__next))
441                     {
442                       long __num = __traits.value(*__next, 10);
443                       if (++__next != __fmt_last
444                           && __fctyp.is(__ctype_type::digit, *__next))
445                         {
446                           __num *= 10;
447                           __num += __traits.value(*__next++, 10);
448                         }
449                       if (0 <= __num && __num < this->size())
450                         __output(__num);
451                     }
452                 else
453                     *__out++ = '$';
454                 __fmt_first = __next;
455               }
456             __out = std::copy(__fmt_first, __fmt_last, __out);
457           }
458       return __out;
459     }
460 
461   template<typename _Out_iter, typename _Bi_iter,
462              typename _Rx_traits, typename _Ch_type>
463     _Out_iter
__regex_replace(_Out_iter __out,_Bi_iter __first,_Bi_iter __last,const basic_regex<_Ch_type,_Rx_traits> & __e,const _Ch_type * __fmt,size_t __len,regex_constants::match_flag_type __flags)464     __regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
465                         const basic_regex<_Ch_type, _Rx_traits>& __e,
466                         const _Ch_type* __fmt, size_t __len,
467                         regex_constants::match_flag_type __flags)
468     {
469       typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
470       _IterT __i(__first, __last, __e, __flags);
471       _IterT __end;
472       if (__i == __end)
473           {
474             if (!(__flags & regex_constants::format_no_copy))
475               __out = std::copy(__first, __last, __out);
476           }
477       else
478           {
479             sub_match<_Bi_iter> __last;
480             for (; __i != __end; ++__i)
481               {
482                 if (!(__flags & regex_constants::format_no_copy))
483                     __out = std::copy(__i->prefix().first, __i->prefix().second,
484                                           __out);
485                 __out = __i->format(__out, __fmt, __fmt + __len, __flags);
486                 __last = __i->suffix();
487                 if (__flags & regex_constants::format_first_only)
488                     break;
489               }
490             if (!(__flags & regex_constants::format_no_copy))
491               __out = std::copy(__last.first, __last.second, __out);
492           }
493       return __out;
494     }
495 
496   template<typename _Bi_iter,
497              typename _Ch_type,
498              typename _Rx_traits>
499     bool
500     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator ==(const regex_iterator & __rhs) const501     operator==(const regex_iterator& __rhs) const noexcept
502     {
503       if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
504           return true;
505       return _M_pregex == __rhs._M_pregex
506             && _M_begin == __rhs._M_begin
507             && _M_end == __rhs._M_end
508             && _M_flags == __rhs._M_flags
509             && _M_match[0] == __rhs._M_match[0];
510     }
511 
512   template<typename _Bi_iter,
513              typename _Ch_type,
514              typename _Rx_traits>
515     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
516     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator ++()517     operator++()
518     {
519       // In all cases in which the call to regex_search returns true,
520       // match.prefix().first shall be equal to the previous value of
521       // match[0].second, and for each index i in the half-open range
522       // [0, match.size()) for which match[i].matched is true,
523       // match[i].position() shall return distance(begin, match[i].first).
524       // [28.12.1.4.5]
525       if (_M_match[0].matched)
526           {
527             auto __start = _M_match[0].second;
528             auto __prefix_first = _M_match[0].second;
529             if (_M_match[0].first == _M_match[0].second)
530               {
531                 if (__start == _M_end)
532                     {
533                       _M_pregex = nullptr;
534                       return *this;
535                     }
536                 else
537                     {
538                       if (regex_search(__start, _M_end, _M_match, *_M_pregex,
539                                            _M_flags
540                                            | regex_constants::match_not_null
541                                            | regex_constants::match_continuous))
542                         {
543                           __glibcxx_assert(_M_match[0].matched);
544                           auto& __prefix = _M_match._M_prefix();
545                           __prefix.first = __prefix_first;
546                           __prefix.matched = __prefix.first != __prefix.second;
547                           // [28.12.1.4.5]
548                           _M_match._M_begin = _M_begin;
549                           return *this;
550                         }
551                       else
552                         ++__start;
553                     }
554               }
555             _M_flags |= regex_constants::match_prev_avail;
556             if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
557               {
558                 __glibcxx_assert(_M_match[0].matched);
559                 auto& __prefix = _M_match._M_prefix();
560                 __prefix.first = __prefix_first;
561                 __prefix.matched = __prefix.first != __prefix.second;
562                 // [28.12.1.4.5]
563                 _M_match._M_begin = _M_begin;
564               }
565             else
566               _M_pregex = nullptr;
567           }
568       return *this;
569     }
570 
571   template<typename _Bi_iter,
572              typename _Ch_type,
573              typename _Rx_traits>
574     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
575     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator =(const regex_token_iterator & __rhs)576     operator=(const regex_token_iterator& __rhs)
577     {
578       _M_position = __rhs._M_position;
579       _M_subs = __rhs._M_subs;
580       _M_n = __rhs._M_n;
581       _M_suffix = __rhs._M_suffix;
582       _M_has_m1 = __rhs._M_has_m1;
583       _M_normalize_result();
584       return *this;
585     }
586 
587   template<typename _Bi_iter,
588              typename _Ch_type,
589              typename _Rx_traits>
590     bool
591     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator ==(const regex_token_iterator & __rhs) const592     operator==(const regex_token_iterator& __rhs) const
593     {
594       if (_M_end_of_seq() && __rhs._M_end_of_seq())
595           return true;
596       if (_M_suffix.matched && __rhs._M_suffix.matched
597             && _M_suffix == __rhs._M_suffix)
598           return true;
599       if (_M_end_of_seq() || _M_suffix.matched
600             || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
601           return false;
602       return _M_position == __rhs._M_position
603           && _M_n == __rhs._M_n
604           && _M_subs == __rhs._M_subs;
605     }
606 
607   template<typename _Bi_iter,
608              typename _Ch_type,
609              typename _Rx_traits>
610     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
611     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator ++()612     operator++()
613     {
614       _Position __prev = _M_position;
615       if (_M_suffix.matched)
616           *this = regex_token_iterator();
617       else if (_M_n + 1 < _M_subs.size())
618           {
619             _M_n++;
620             _M_result = &_M_current_match();
621           }
622       else
623           {
624             _M_n = 0;
625             ++_M_position;
626             if (_M_position != _Position())
627               _M_result = &_M_current_match();
628             else if (_M_has_m1 && __prev->suffix().length() != 0)
629               {
630                 _M_suffix.matched = true;
631                 _M_suffix.first = __prev->suffix().first;
632                 _M_suffix.second = __prev->suffix().second;
633                 _M_result = &_M_suffix;
634               }
635             else
636               *this = regex_token_iterator();
637           }
638       return *this;
639     }
640 
641   template<typename _Bi_iter,
642              typename _Ch_type,
643              typename _Rx_traits>
644     void
645     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
_M_init(_Bi_iter __a,_Bi_iter __b)646     _M_init(_Bi_iter __a, _Bi_iter __b)
647     {
648       _M_has_m1 = false;
649       for (auto __it : _M_subs)
650           if (__it == -1)
651             {
652               _M_has_m1 = true;
653               break;
654             }
655       if (_M_position != _Position())
656           _M_result = &_M_current_match();
657       else if (_M_has_m1)
658           {
659             _M_suffix.matched = true;
660             _M_suffix.first = __a;
661             _M_suffix.second = __b;
662             _M_result = &_M_suffix;
663           }
664       else
665           _M_result = nullptr;
666     }
667 
668   /// @endcond
669 
670 _GLIBCXX_END_NAMESPACE_VERSION
671 } // namespace
672