libstdc++
regex.tcc
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2025 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex.tcc
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 _GLIBCXX_BEGIN_NAMESPACE_VERSION
34 
35 namespace __detail
36 {
37  /// @cond undocumented
38 
39  // Result of merging regex_match and regex_search.
40  //
41  // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
42  // the other one if possible, for test purpose).
43  //
44  // That __match_mode is true means regex_match, else regex_search.
45  template<typename _BiIter, typename _Alloc,
46  typename _CharT, typename _TraitsT>
47  bool
48  __regex_algo_impl(_BiIter __s,
49  _BiIter __e,
50  match_results<_BiIter, _Alloc>& __m,
51  const basic_regex<_CharT, _TraitsT>& __re,
53  _RegexExecutorPolicy __policy,
54  bool __match_mode)
55  {
56  if (__re._M_automaton == nullptr)
57  return false;
58 
59  typename match_results<_BiIter, _Alloc>::_Unchecked& __res = __m;
60  __m._M_begin = __s;
61  __m._M_resize(__re._M_automaton->_M_sub_count());
62 
63  bool __ret;
64  if ((__re.flags() & regex_constants::__polynomial)
65  || (__policy == _RegexExecutorPolicy::_S_alternate
66  && !__re._M_automaton->_M_has_backref))
67  {
68  _Executor<_BiIter, _Alloc, _TraitsT, false>
69  __executor(__s, __e, __res, __re, __flags);
70  if (__match_mode)
71  __ret = __executor._M_match();
72  else
73  __ret = __executor._M_search();
74  }
75  else
76  {
77  _Executor<_BiIter, _Alloc, _TraitsT, true>
78  __executor(__s, __e, __res, __re, __flags);
79  if (__match_mode)
80  __ret = __executor._M_match();
81  else
82  __ret = __executor._M_search();
83  }
84  if (__ret)
85  {
86  for (auto& __it : __res)
87  if (!__it.matched)
88  __it.first = __it.second = __e;
89  auto& __pre = __m._M_prefix();
90  auto& __suf = __m._M_suffix();
91  if (__match_mode)
92  {
93  __pre.matched = false;
94  __pre.first = __s;
95  __pre.second = __s;
96  __suf.matched = false;
97  __suf.first = __e;
98  __suf.second = __e;
99  }
100  else
101  {
102  __pre.first = __s;
103  __pre.second = __res[0].first;
104  __pre.matched = (__pre.first != __pre.second);
105  __suf.first = __res[0].second;
106  __suf.second = __e;
107  __suf.matched = (__suf.first != __suf.second);
108  }
109  }
110  else
111  {
112  __m._M_establish_failed_match(__e);
113  }
114  return __ret;
115  }
116  /// @endcond
117 } // namespace __detail
118 
119  template<typename _Ch_type>
120  template<typename _Fwd_iter>
121  typename regex_traits<_Ch_type>::string_type
123  lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
124  {
125  typedef std::ctype<char_type> __ctype_type;
126  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
127 
128  static const char* __collatenames[] =
129  {
130  "NUL",
131  "SOH",
132  "STX",
133  "ETX",
134  "EOT",
135  "ENQ",
136  "ACK",
137  "alert",
138  "backspace",
139  "tab",
140  "newline",
141  "vertical-tab",
142  "form-feed",
143  "carriage-return",
144  "SO",
145  "SI",
146  "DLE",
147  "DC1",
148  "DC2",
149  "DC3",
150  "DC4",
151  "NAK",
152  "SYN",
153  "ETB",
154  "CAN",
155  "EM",
156  "SUB",
157  "ESC",
158  "IS4",
159  "IS3",
160  "IS2",
161  "IS1",
162  "space",
163  "exclamation-mark",
164  "quotation-mark",
165  "number-sign",
166  "dollar-sign",
167  "percent-sign",
168  "ampersand",
169  "apostrophe",
170  "left-parenthesis",
171  "right-parenthesis",
172  "asterisk",
173  "plus-sign",
174  "comma",
175  "hyphen",
176  "period",
177  "slash",
178  "zero",
179  "one",
180  "two",
181  "three",
182  "four",
183  "five",
184  "six",
185  "seven",
186  "eight",
187  "nine",
188  "colon",
189  "semicolon",
190  "less-than-sign",
191  "equals-sign",
192  "greater-than-sign",
193  "question-mark",
194  "commercial-at",
195  "A",
196  "B",
197  "C",
198  "D",
199  "E",
200  "F",
201  "G",
202  "H",
203  "I",
204  "J",
205  "K",
206  "L",
207  "M",
208  "N",
209  "O",
210  "P",
211  "Q",
212  "R",
213  "S",
214  "T",
215  "U",
216  "V",
217  "W",
218  "X",
219  "Y",
220  "Z",
221  "left-square-bracket",
222  "backslash",
223  "right-square-bracket",
224  "circumflex",
225  "underscore",
226  "grave-accent",
227  "a",
228  "b",
229  "c",
230  "d",
231  "e",
232  "f",
233  "g",
234  "h",
235  "i",
236  "j",
237  "k",
238  "l",
239  "m",
240  "n",
241  "o",
242  "p",
243  "q",
244  "r",
245  "s",
246  "t",
247  "u",
248  "v",
249  "w",
250  "x",
251  "y",
252  "z",
253  "left-curly-bracket",
254  "vertical-line",
255  "right-curly-bracket",
256  "tilde",
257  "DEL",
258  };
259 
260  string __s;
261  for (; __first != __last; ++__first)
262  __s += __fctyp.narrow(*__first, 0);
263 
264  for (const auto& __it : __collatenames)
265  if (__s == __it)
266  return string_type(1, __fctyp.widen(
267  static_cast<char>(&__it - __collatenames)));
268 
269  // TODO Add digraph support:
270  // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
271 
272  return string_type();
273  }
274 
275  template<typename _Ch_type>
276  template<typename _Fwd_iter>
277  typename regex_traits<_Ch_type>::char_class_type
279  lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
280  {
281  typedef std::ctype<char_type> __ctype_type;
282  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
283 
284  // Mappings from class name to class mask.
285  static const pair<const char*, char_class_type> __classnames[] =
286  {
287  {"d", ctype_base::digit},
288  {"w", {ctype_base::alnum, _RegexMask::_S_under}},
289  {"s", ctype_base::space},
290  {"alnum", ctype_base::alnum},
291  {"alpha", ctype_base::alpha},
292  {"blank", ctype_base::blank},
293  {"cntrl", ctype_base::cntrl},
294  {"digit", ctype_base::digit},
295  {"graph", ctype_base::graph},
296  {"lower", ctype_base::lower},
297  {"print", ctype_base::print},
298  {"punct", ctype_base::punct},
299  {"space", ctype_base::space},
300  {"upper", ctype_base::upper},
301  {"xdigit", ctype_base::xdigit},
302  };
303 
304  string __s;
305  for (; __first != __last; ++__first)
306  __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
307 
308  for (const auto& __it : __classnames)
309  if (__s == __it.first)
310  {
311  if (__icase
312  && (__it.second._M_base == ctype_base::lower
313  || __it.second._M_base == ctype_base::upper))
314  return ctype_base::alpha;
315  return __it.second;
316  }
317  return 0;
318  }
319 
320  template<typename _Ch_type>
321  bool
323  isctype(_Ch_type __c, char_class_type __f) const
324  {
325  typedef std::ctype<char_type> __ctype_type;
326  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
327 
328  return __fctyp.is(__f._M_base, __c)
329  // [[:w:]]
330  || ((__f._M_extended & _RegexMask::_S_under)
331  && __c == __fctyp.widen('_'));
332  }
333 
334 #pragma GCC diagnostic push
335 #pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr
336  template<typename _Ch_type>
337  int
339  value(_Ch_type __ch, int __radix) const
340  {
341  if constexpr (sizeof(_Ch_type) > 1)
342  {
343  const auto& __ctyp = std::use_facet<ctype<_Ch_type>>(_M_locale);
344  const char __c = __ctyp.narrow(__ch, '\0');
345  return regex_traits<char>{}.value(__c, __radix);
346  }
347  else
348  {
349  const char __c = static_cast<char>(__ch);
350  const char __max_digit = __radix == 8 ? '7' : '9';
351  if ('0' <= __c && __c <= __max_digit)
352  return __c - '0';
353  if (__radix < 16)
354  return -1;
355  switch (__c)
356  {
357  case 'a':
358  case 'A':
359  return 10;
360  case 'b':
361  case 'B':
362  return 11;
363  case 'c':
364  case 'C':
365  return 12;
366  case 'd':
367  case 'D':
368  return 13;
369  case 'e':
370  case 'E':
371  return 14;
372  case 'f':
373  case 'F':
374  return 15;
375  default:
376  return -1;
377  }
378  }
379  }
380 #pragma GCC diagnostic pop
381 
382  template<typename _Bi_iter, typename _Alloc>
383  template<typename _Out_iter>
384  _Out_iter
386  format(_Out_iter __out,
387  const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
388  const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
389  match_flag_type __flags) const
390  {
391  __glibcxx_assert( ready() );
392  regex_traits<char_type> __traits;
393  typedef std::ctype<char_type> __ctype_type;
394  const __ctype_type&
395  __fctyp(use_facet<__ctype_type>(__traits.getloc()));
396 
397  auto __output = [&](size_t __idx)
398  {
399  auto& __sub = (*this)[__idx];
400  if (__sub.matched)
401  __out = std::copy(__sub.first, __sub.second, __out);
402  };
403 
404  if (__flags & regex_constants::format_sed)
405  {
406  bool __escaping = false;
407  for (; __fmt_first != __fmt_last; __fmt_first++)
408  {
409  if (__escaping)
410  {
411  __escaping = false;
412  if (__fctyp.is(__ctype_type::digit, *__fmt_first))
413  __output(__traits.value(*__fmt_first, 10));
414  else
415  *__out++ = *__fmt_first;
416  continue;
417  }
418  if (*__fmt_first == '\\')
419  {
420  __escaping = true;
421  continue;
422  }
423  if (*__fmt_first == '&')
424  {
425  __output(0);
426  continue;
427  }
428  *__out++ = *__fmt_first;
429  }
430  if (__escaping)
431  *__out++ = '\\';
432  }
433  else
434  {
435  while (1)
436  {
437  auto __next = std::find(__fmt_first, __fmt_last, '$');
438  if (__next == __fmt_last)
439  break;
440 
441  __out = std::copy(__fmt_first, __next, __out);
442 
443  auto __eat = [&](char __ch) -> bool
444  {
445  if (*__next == __ch)
446  {
447  ++__next;
448  return true;
449  }
450  return false;
451  };
452 
453  if (++__next == __fmt_last)
454  *__out++ = '$';
455  else if (__eat('$'))
456  *__out++ = '$';
457  else if (__eat('&'))
458  __output(0);
459  else if (__eat('`'))
460  {
461  auto& __sub = _M_prefix();
462  if (__sub.matched)
463  __out = std::copy(__sub.first, __sub.second, __out);
464  }
465  else if (__eat('\''))
466  {
467  auto& __sub = _M_suffix();
468  if (__sub.matched)
469  __out = std::copy(__sub.first, __sub.second, __out);
470  }
471  else if (__fctyp.is(__ctype_type::digit, *__next))
472  {
473  long __num = __traits.value(*__next, 10);
474  if (++__next != __fmt_last
475  && __fctyp.is(__ctype_type::digit, *__next))
476  {
477  __num *= 10;
478  __num += __traits.value(*__next++, 10);
479  }
480  if (0 <= __num && size_t(__num) < this->size())
481  __output(__num);
482  }
483  else
484  *__out++ = '$';
485  __fmt_first = __next;
486  }
487  __out = std::copy(__fmt_first, __fmt_last, __out);
488  }
489  return __out;
490  }
491 
492  template<typename _Out_iter, typename _Bi_iter,
493  typename _Rx_traits, typename _Ch_type>
494  _Out_iter
495  __regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
497  const _Ch_type* __fmt, size_t __len,
499  {
501  _IterT __i(__first, __last, __e, __flags);
502  _IterT __end;
503  if (__i == __end)
504  {
505  if (!(__flags & regex_constants::format_no_copy))
506  __out = std::copy(__first, __last, __out);
507  }
508  else
509  {
510  sub_match<_Bi_iter> __last;
511  for (; __i != __end; ++__i)
512  {
513  if (!(__flags & regex_constants::format_no_copy))
514  __out = std::copy(__i->prefix().first, __i->prefix().second,
515  __out);
516  __out = __i->format(__out, __fmt, __fmt + __len, __flags);
517  __last = __i->suffix();
519  break;
520  }
521  if (!(__flags & regex_constants::format_no_copy))
522  __out = std::copy(__last.first, __last.second, __out);
523  }
524  return __out;
525  }
526 
527  template<typename _Bi_iter,
528  typename _Ch_type,
529  typename _Rx_traits>
530  bool
532  operator==(const regex_iterator& __rhs) const noexcept
533  {
534  if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
535  return true;
536  return _M_pregex == __rhs._M_pregex
537  && _M_begin == __rhs._M_begin
538  && _M_end == __rhs._M_end
539  && _M_flags == __rhs._M_flags
540  && _M_match[0] == __rhs._M_match[0];
541  }
542 
543  template<typename _Bi_iter,
544  typename _Ch_type,
545  typename _Rx_traits>
549  {
550  // In all cases in which the call to regex_search returns true,
551  // match.prefix().first shall be equal to the previous value of
552  // match[0].second, and for each index i in the half-open range
553  // [0, match.size()) for which match[i].matched is true,
554  // match[i].position() shall return distance(begin, match[i].first).
555  // [28.12.1.4.5]
556  if (_M_match[0].matched)
557  {
558  auto __start = _M_match[0].second;
559  auto __prefix_first = _M_match[0].second;
560  if (_M_match[0].first == _M_match[0].second)
561  {
562  if (__start == _M_end)
563  {
564  _M_pregex = nullptr;
565  return *this;
566  }
567  else
568  {
569  if (regex_search(__start, _M_end, _M_match, *_M_pregex,
570  _M_flags
573  {
574  __glibcxx_assert(_M_match[0].matched);
575  auto& __prefix = _M_match._M_prefix();
576  __prefix.first = __prefix_first;
577  __prefix.matched = __prefix.first != __prefix.second;
578  // [28.12.1.4.5]
579  _M_match._M_begin = _M_begin;
580  return *this;
581  }
582  else
583  ++__start;
584  }
585  }
587  if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
588  {
589  __glibcxx_assert(_M_match[0].matched);
590  auto& __prefix = _M_match._M_prefix();
591  __prefix.first = __prefix_first;
592  __prefix.matched = __prefix.first != __prefix.second;
593  // [28.12.1.4.5]
594  _M_match._M_begin = _M_begin;
595  }
596  else
597  _M_pregex = nullptr;
598  }
599  return *this;
600  }
601 
602  template<typename _Bi_iter,
603  typename _Ch_type,
604  typename _Rx_traits>
608  {
609  _M_position = __rhs._M_position;
610  _M_subs = __rhs._M_subs;
611  _M_n = __rhs._M_n;
612  _M_suffix = __rhs._M_suffix;
613  _M_has_m1 = __rhs._M_has_m1;
614  _M_normalize_result();
615  return *this;
616  }
617 
618  template<typename _Bi_iter,
619  typename _Ch_type,
620  typename _Rx_traits>
621  bool
624  {
625  if (_M_end_of_seq() && __rhs._M_end_of_seq())
626  return true;
627  if (_M_suffix.matched && __rhs._M_suffix.matched
628  && _M_suffix == __rhs._M_suffix)
629  return true;
630  if (_M_end_of_seq() || _M_suffix.matched
631  || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
632  return false;
633  return _M_position == __rhs._M_position
634  && _M_n == __rhs._M_n
635  && _M_subs == __rhs._M_subs;
636  }
637 
638  template<typename _Bi_iter,
639  typename _Ch_type,
640  typename _Rx_traits>
644  {
645  _Position __prev = _M_position;
646  if (_M_suffix.matched)
647  *this = regex_token_iterator();
648  else if (_M_n + 1 < _M_subs.size())
649  {
650  _M_n++;
651  _M_result = &_M_current_match();
652  }
653  else
654  {
655  _M_n = 0;
656  ++_M_position;
657  if (_M_position != _Position())
658  _M_result = &_M_current_match();
659  else if (_M_has_m1 && __prev->suffix().length() != 0)
660  {
661  _M_suffix.matched = true;
662  _M_suffix.first = __prev->suffix().first;
663  _M_suffix.second = __prev->suffix().second;
664  _M_result = &_M_suffix;
665  }
666  else
667  *this = regex_token_iterator();
668  }
669  return *this;
670  }
671 
672  template<typename _Bi_iter,
673  typename _Ch_type,
674  typename _Rx_traits>
675  void
677  _M_init(_Bi_iter __a, _Bi_iter __b)
678  {
679  _M_has_m1 = false;
680  for (auto __it : _M_subs)
681  if (__it == -1)
682  {
683  _M_has_m1 = true;
684  break;
685  }
686  if (_M_position != _Position())
687  _M_result = &_M_current_match();
688  else if (_M_has_m1)
689  {
690  _M_suffix.matched = true;
691  _M_suffix.first = __a;
692  _M_suffix.second = __b;
693  _M_result = &_M_suffix;
694  }
695  else
696  _M_result = nullptr;
697  }
698 
699 _GLIBCXX_END_NAMESPACE_VERSION
700 } // namespace
constexpr match_flag_type match_continuous
constexpr match_flag_type match_prev_avail
_Out_iter format(_Out_iter __out, const char_type *__fmt_first, const char_type *__fmt_last, match_flag_type __flags=regex_constants::format_default) const
int value(_Ch_type __ch, int __radix) const
Converts a digit to an int.
Definition: regex.tcc:339
Definition: simd.h:306
A regular expression.
Definition: regex.h:43
constexpr match_flag_type format_no_copy
regex_token_iterator & operator++()
Increments a regex_token_iterator.
Definition: regex.tcc:643
Struct holding two objects of arbitrary type.
Describes aspects of a regular expression.
Definition: regex.h:99
match_flag_type
This is a bitmask type indicating regex matching rules.
constexpr match_flag_type format_sed
bool isctype(_Ch_type __c, char_class_type __f) const
Determines if c is a member of an identified class.
Definition: regex.tcc:323
bool regex_search(_Bi_iter __s, _Bi_iter __e, match_results< _Bi_iter, _Alloc > &__m, const basic_regex< _Ch_type, _Rx_traits > &__re, regex_constants::match_flag_type __flags=regex_constants::match_default)
Definition: regex.h:2440
char_class_type lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase=false) const
Maps one or more characters to a named character classification.
Definition: regex.tcc:279
Primary class template ctype facet.This template class defines classification and conversion function...
string_type lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
Gets a collation element by name.
Definition: regex.tcc:123
constexpr syntax_option_type __polynomial
regex_token_iterator & operator=(const regex_token_iterator &__rhs)
Assigns a regex_token_iterator to another.
Definition: regex.tcc:607
ISO C++ entities toplevel namespace is std.
bool operator==(const regex_token_iterator &__rhs) const
Compares a regex_token_iterator to another for equality.
Definition: regex.tcc:623
constexpr match_flag_type match_not_null
locale_type getloc() const
Gets a copy of the current locale in use by the regex_traits object.
Definition: regex.h:411
constexpr auto size(const _Container &__cont) noexcept(noexcept(__cont.size())) -> decltype(__cont.size())
Return the size of a container.
Definition: range_access.h:274
Managing sequences of characters and character-like objects.
Definition: cow_string.h:108
regex_iterator & operator++()
Increments a regex_iterator.
Definition: regex.tcc:548
bool operator==(const regex_iterator &) const noexcept
Tests the equivalence of two regex iterators.
Definition: regex.tcc:532
constexpr match_flag_type format_first_only