libstdc++
regex.tcc
Go to the documentation of this file.
1// class template regex -*- C++ -*-
2
3// Copyright (C) 2013-2026 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/**
26 * @file bits/regex.tcc
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31namespace std _GLIBCXX_VISIBILITY(default)
32{
33_GLIBCXX_BEGIN_NAMESPACE_VERSION
34
35namespace __detail
36{
37 /// @cond undocumented
38
39 // Result of merging regex_match and regex_search.
40 //
41 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
42 // the other one if possible, for test purpose).
43 //
44 // That __match_mode is true means regex_match, else regex_search.
45 template<typename _BiIter, typename _Alloc,
46 typename _CharT, typename _TraitsT>
47 bool
48 __regex_algo_impl(_BiIter __s,
49 _BiIter __e,
50 match_results<_BiIter, _Alloc>& __m,
51 const basic_regex<_CharT, _TraitsT>& __re,
53 _RegexExecutorPolicy __policy,
54 bool __match_mode)
55 {
56 if (__re._M_automaton == nullptr)
57 return false;
58
59 typename match_results<_BiIter, _Alloc>::_Unchecked& __res = __m;
60 __m._M_begin = __s;
61 __m._M_resize(__re._M_automaton->_M_sub_count());
62
63 bool __ret;
64 bool __use_dfs = true;
65 if ((__re.flags() & regex_constants::__polynomial)
66 || (__policy == _RegexExecutorPolicy::_S_alternate
67 && !__re._M_automaton->_M_has_backref))
68 __use_dfs = false;
69
71 __executor(__s, __e, __res, __re, __flags, __use_dfs);
72 if (__match_mode)
73 __ret = __executor._M_match();
74 else
75 __ret = __executor._M_search();
76
77 if (__ret)
78 {
79 for (auto& __it : __res)
80 if (!__it.matched)
81 __it.first = __it.second = __e;
82 auto& __pre = __m._M_prefix();
83 auto& __suf = __m._M_suffix();
84 if (__match_mode)
85 {
86 __pre.matched = false;
87 __pre.first = __s;
88 __pre.second = __s;
89 __suf.matched = false;
90 __suf.first = __e;
91 __suf.second = __e;
92 }
93 else
94 {
95 __pre.first = __s;
96 __pre.second = __res[0].first;
97 __pre.matched = (__pre.first != __pre.second);
98 __suf.first = __res[0].second;
99 __suf.second = __e;
100 __suf.matched = (__suf.first != __suf.second);
101 }
102 }
103 else
104 {
105 __m._M_establish_failed_match(__e);
106 }
107 return __ret;
108 }
109
110 inline void
111 __lookup_collatename(string& __name) noexcept
112 {
113 static const char* const __collatenames[] =
114 {
115 "NUL",
116 "SOH",
117 "STX",
118 "ETX",
119 "EOT",
120 "ENQ",
121 "ACK",
122 "alert",
123 "backspace",
124 "tab",
125 "newline",
126 "vertical-tab",
127 "form-feed",
128 "carriage-return",
129 "SO",
130 "SI",
131 "DLE",
132 "DC1",
133 "DC2",
134 "DC3",
135 "DC4",
136 "NAK",
137 "SYN",
138 "ETB",
139 "CAN",
140 "EM",
141 "SUB",
142 "ESC",
143 "IS4",
144 "IS3",
145 "IS2",
146 "IS1",
147 "space",
148 "exclamation-mark",
149 "quotation-mark",
150 "number-sign",
151 "dollar-sign",
152 "percent-sign",
153 "ampersand",
154 "apostrophe",
155 "left-parenthesis",
156 "right-parenthesis",
157 "asterisk",
158 "plus-sign",
159 "comma",
160 "hyphen",
161 "period",
162 "slash",
163 "zero",
164 "one",
165 "two",
166 "three",
167 "four",
168 "five",
169 "six",
170 "seven",
171 "eight",
172 "nine",
173 "colon",
174 "semicolon",
175 "less-than-sign",
176 "equals-sign",
177 "greater-than-sign",
178 "question-mark",
179 "commercial-at",
180 "A",
181 "B",
182 "C",
183 "D",
184 "E",
185 "F",
186 "G",
187 "H",
188 "I",
189 "J",
190 "K",
191 "L",
192 "M",
193 "N",
194 "O",
195 "P",
196 "Q",
197 "R",
198 "S",
199 "T",
200 "U",
201 "V",
202 "W",
203 "X",
204 "Y",
205 "Z",
206 "left-square-bracket",
207 "backslash",
208 "right-square-bracket",
209 "circumflex",
210 "underscore",
211 "grave-accent",
212 "a",
213 "b",
214 "c",
215 "d",
216 "e",
217 "f",
218 "g",
219 "h",
220 "i",
221 "j",
222 "k",
223 "l",
224 "m",
225 "n",
226 "o",
227 "p",
228 "q",
229 "r",
230 "s",
231 "t",
232 "u",
233 "v",
234 "w",
235 "x",
236 "y",
237 "z",
238 "left-curly-bracket",
239 "vertical-line",
240 "right-curly-bracket",
241 "tilde",
242 "DEL",
243 };
244
245 for (const auto& __it : __collatenames)
246 if (__name == __it)
247 {
248 __name.assign(1, static_cast<char>(&__it - __collatenames));
249 return;
250 }
251
252 __name.clear();
253 }
254
255 /// @endcond
256} // namespace __detail
257
258#pragma GCC diagnostic push
259#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr
260
261 template<typename _Ch_type>
262 template<typename _Fwd_iter>
263 typename regex_traits<_Ch_type>::string_type
265 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
266 {
267 // TODO Add digraph support:
268 // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
269
271 {
272 string __s(__first, __last);
273 __detail::__lookup_collatename(__s);
274 return __s;
275 }
276 else
277 {
278 typedef std::ctype<char_type> __ctype_type;
279 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
280
281 string __s;
282 for (; __first != __last; ++__first)
283 __s += __fctyp.narrow(*__first, 0);
284 __detail::__lookup_collatename(__s);
285 if (__s.empty())
286 return string_type();
287 else
288 return string_type(1, __fctyp.widen(__s[0]));
289 }
290 }
291
292 template<typename _Ch_type>
293 template<typename _Fwd_iter>
294 typename regex_traits<_Ch_type>::char_class_type
296 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
297 {
298 if constexpr (__is_any_random_access_iter<_Fwd_iter>::value)
299 if ((__last - __first) > 6) [[__unlikely__]]
300 return {}; // "xdigit" is the longest classname
301
302 typedef std::ctype<char_type> __ctype_type;
303 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
304
305 auto __read_ch = [&]() -> char {
306 if (__first == __last)
307 return '\0';
308 char __c = __fctyp.narrow(__fctyp.tolower(*__first), 0);
309 ++__first;
310 return __c;
311 };
312
313 auto __match = [&](const char* __s) -> bool {
314 do
315 if (__read_ch() != *__s)
316 return false;
317 while (*++__s);
318 return __first == __last;
319 };
320
321 switch(__read_ch())
322 {
323 case 'a':
324 if (__read_ch() == 'l')
325 switch (__read_ch())
326 {
327 case 'n':
328 if (__match("um")) // "alnum"
329 return ctype_base::alnum;
330 break;
331 case 'p':
332 if (__match("ha")) // "alpha"
333 return ctype_base::alpha;
334 break;
335 }
336 break;
337 case 'b':
338 if (__match("lank")) // "blank"
339 return ctype_base::blank;
340 break;
341 case 'c':
342 if (__match("ntrl")) // "cntrl"
343 return ctype_base::cntrl;
344 break;
345 case 'd':
346 if (__first == __last || __match("igit")) // "d" or "digit"
347 return ctype_base::digit;
348 break;
349 case 'g':
350 if (__match("raph")) // "graph"
351 return ctype_base::graph;
352 break;
353 case 'l':
354 if (__match("ower")) // "lower"
355 return __icase ? ctype_base::alpha : ctype_base::lower;
356 break;
357 case 'p':
358 switch (__read_ch())
359 {
360 case 'r':
361 if (__match("int")) // "print"
362 return ctype_base::print;
363 break;
364 case 'u':
365 if (__match("nct")) // "punct"
366 return ctype_base::punct;
367 break;
368 }
369 break;
370 case 's':
371 if (__first == __last || __match("pace")) // "s" or "space"
372 return ctype_base::space;
373 break;
374 case 'u':
375 if (__match("pper")) // "upper"
376 return __icase ? ctype_base::alpha : ctype_base::upper;
377 break;
378 case 'w':
379 if (__first == __last) // "w"
380 return {ctype_base::alnum, char_class_type::_S_under};
381 break;
382 case 'x':
383 if (__match("digit")) // "xdigit"
384 return ctype_base::xdigit;
385 break;
386 }
387
388 return {};
389 }
390
391 template<typename _Ch_type>
392 bool
394 isctype(_Ch_type __c, char_class_type __f) const
395 {
396 typedef std::ctype<char_type> __ctype_type;
397 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
398
399 return __fctyp.is(__f._M_base, __c)
400 // [[:w:]]
401 || ((__f._M_extended & _RegexMask::_S_under)
402 && __c == __fctyp.widen('_'));
403 }
404
405 template<typename _Ch_type>
406 int
408 value(_Ch_type __ch, int __radix) const
409 {
410 if constexpr (sizeof(_Ch_type) > 1)
411 {
412 const auto& __ctyp = std::use_facet<ctype<_Ch_type>>(_M_locale);
413 const char __c = __ctyp.narrow(__ch, '\0');
414 return regex_traits<char>{}.value(__c, __radix);
415 }
416 else
417 {
418 const char __c = static_cast<char>(__ch);
419 const char __max_digit = __radix == 8 ? '7' : '9';
420 if ('0' <= __c && __c <= __max_digit)
421 return __c - '0';
422 if (__radix < 16)
423 return -1;
424 switch (__c)
425 {
426 case 'a':
427 case 'A':
428 return 10;
429 case 'b':
430 case 'B':
431 return 11;
432 case 'c':
433 case 'C':
434 return 12;
435 case 'd':
436 case 'D':
437 return 13;
438 case 'e':
439 case 'E':
440 return 14;
441 case 'f':
442 case 'F':
443 return 15;
444 default:
445 return -1;
446 }
447 }
448 }
449#pragma GCC diagnostic pop
450
451 template<typename _Bi_iter, typename _Alloc>
452 template<typename _Out_iter>
453 _Out_iter
455 format(_Out_iter __out,
456 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
457 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
458 match_flag_type __flags) const
459 {
460 __glibcxx_assert( ready() );
462 typedef std::ctype<char_type> __ctype_type;
463 const __ctype_type&
464 __fctyp(use_facet<__ctype_type>(__traits.getloc()));
465
466 auto __output = [&](size_t __idx)
467 {
468 auto& __sub = (*this)[__idx];
469 if (__sub.matched)
470 __out = std::copy(__sub.first, __sub.second, __out);
471 };
472
473 if (__flags & regex_constants::format_sed)
474 {
475 bool __escaping = false;
476 for (; __fmt_first != __fmt_last; __fmt_first++)
477 {
478 if (__escaping)
479 {
480 __escaping = false;
481 if (__fctyp.is(__ctype_type::digit, *__fmt_first))
482 __output(__traits.value(*__fmt_first, 10));
483 else
484 *__out++ = *__fmt_first;
485 continue;
486 }
487 if (*__fmt_first == '\\')
488 {
489 __escaping = true;
490 continue;
491 }
492 if (*__fmt_first == '&')
493 {
494 __output(0);
495 continue;
496 }
497 *__out++ = *__fmt_first;
498 }
499 if (__escaping)
500 *__out++ = '\\';
501 }
502 else
503 {
504 while (1)
505 {
506 auto __next = std::find(__fmt_first, __fmt_last, '$');
507 if (__next == __fmt_last)
508 break;
509
510 __out = std::copy(__fmt_first, __next, __out);
511
512 auto __eat = [&](char __ch) -> bool
513 {
514 if (*__next == __ch)
515 {
516 ++__next;
517 return true;
518 }
519 return false;
520 };
521
522 if (++__next == __fmt_last)
523 *__out++ = '$';
524 else if (__eat('$'))
525 *__out++ = '$';
526 else if (__eat('&'))
527 __output(0);
528 else if (__eat('`'))
529 {
530 auto& __sub = _M_prefix();
531 if (__sub.matched)
532 __out = std::copy(__sub.first, __sub.second, __out);
533 }
534 else if (__eat('\''))
535 {
536 auto& __sub = _M_suffix();
537 if (__sub.matched)
538 __out = std::copy(__sub.first, __sub.second, __out);
539 }
540 else if (__fctyp.is(__ctype_type::digit, *__next))
541 {
542 long __num = __traits.value(*__next, 10);
543 if (++__next != __fmt_last
544 && __fctyp.is(__ctype_type::digit, *__next))
545 {
546 __num *= 10;
547 __num += __traits.value(*__next++, 10);
548 }
549 if (0 <= __num && size_t(__num) < this->size())
550 __output(__num);
551 }
552 else
553 *__out++ = '$';
554 __fmt_first = __next;
555 }
556 __out = std::copy(__fmt_first, __fmt_last, __out);
557 }
558 return __out;
559 }
560
561 template<typename _Out_iter, typename _Bi_iter,
562 typename _Rx_traits, typename _Ch_type>
563 _Out_iter
564 __regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
566 const _Ch_type* __fmt, size_t __len,
568 {
570 _IterT __i(__first, __last, __e, __flags);
571 _IterT __end;
572 if (__i == __end)
573 {
574 if (!(__flags & regex_constants::format_no_copy))
575 __out = std::copy(__first, __last, __out);
576 }
577 else
578 {
579 sub_match<_Bi_iter> __last;
580 for (; __i != __end; ++__i)
581 {
582 if (!(__flags & regex_constants::format_no_copy))
583 __out = std::copy(__i->prefix().first, __i->prefix().second,
584 __out);
585 __out = __i->format(__out, __fmt, __fmt + __len, __flags);
586 __last = __i->suffix();
588 break;
589 }
590 if (!(__flags & regex_constants::format_no_copy))
591 __out = std::copy(__last.first, __last.second, __out);
592 }
593 return __out;
594 }
595
596 template<typename _Bi_iter,
597 typename _Ch_type,
598 typename _Rx_traits>
599 bool
601 operator==(const regex_iterator& __rhs) const noexcept
602 {
603 if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
604 return true;
605 return _M_pregex == __rhs._M_pregex
606 && _M_begin == __rhs._M_begin
607 && _M_end == __rhs._M_end
608 && _M_flags == __rhs._M_flags
609 && _M_match[0] == __rhs._M_match[0];
610 }
611
612 template<typename _Bi_iter,
613 typename _Ch_type,
614 typename _Rx_traits>
618 {
619 // In all cases in which the call to regex_search returns true,
620 // match.prefix().first shall be equal to the previous value of
621 // match[0].second, and for each index i in the half-open range
622 // [0, match.size()) for which match[i].matched is true,
623 // match[i].position() shall return distance(begin, match[i].first).
624 // [28.12.1.4.5]
625 if (_M_match[0].matched)
626 {
627 auto __start = _M_match[0].second;
628 auto __prefix_first = _M_match[0].second;
629 if (_M_match[0].first == _M_match[0].second)
630 {
631 if (__start == _M_end)
632 {
633 _M_pregex = nullptr;
634 return *this;
635 }
636 else
637 {
638 if (regex_search(__start, _M_end, _M_match, *_M_pregex,
639 _M_flags
642 {
643 __glibcxx_assert(_M_match[0].matched);
644 auto& __prefix = _M_match._M_prefix();
645 __prefix.first = __prefix_first;
646 __prefix.matched = __prefix.first != __prefix.second;
647 // [28.12.1.4.5]
648 _M_match._M_begin = _M_begin;
649 return *this;
650 }
651 else
652 ++__start;
653 }
654 }
656 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
657 {
658 __glibcxx_assert(_M_match[0].matched);
659 auto& __prefix = _M_match._M_prefix();
660 __prefix.first = __prefix_first;
661 __prefix.matched = __prefix.first != __prefix.second;
662 // [28.12.1.4.5]
663 _M_match._M_begin = _M_begin;
664 }
665 else
666 _M_pregex = nullptr;
667 }
668 return *this;
669 }
670
671 template<typename _Bi_iter,
672 typename _Ch_type,
673 typename _Rx_traits>
677 {
678 _M_position = __rhs._M_position;
679 _M_subs = __rhs._M_subs;
680 _M_n = __rhs._M_n;
681 _M_suffix = __rhs._M_suffix;
682 _M_has_m1 = __rhs._M_has_m1;
683 _M_normalize_result();
684 return *this;
685 }
686
687 template<typename _Bi_iter,
688 typename _Ch_type,
689 typename _Rx_traits>
690 bool
692 operator==(const regex_token_iterator& __rhs) const
693 {
694 if (_M_end_of_seq() && __rhs._M_end_of_seq())
695 return true;
696 if (_M_suffix.matched && __rhs._M_suffix.matched
697 && _M_suffix == __rhs._M_suffix)
698 return true;
699 if (_M_end_of_seq() || _M_suffix.matched
700 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
701 return false;
702 return _M_position == __rhs._M_position
703 && _M_n == __rhs._M_n
704 && _M_subs == __rhs._M_subs;
705 }
706
707 template<typename _Bi_iter,
708 typename _Ch_type,
709 typename _Rx_traits>
713 {
714 _Position __prev = _M_position;
715 if (_M_suffix.matched)
716 *this = regex_token_iterator();
717 else if (_M_n + 1 < _M_subs.size())
718 {
719 _M_n++;
720 _M_result = &_M_current_match();
721 }
722 else
723 {
724 _M_n = 0;
725 ++_M_position;
726 if (_M_position != _Position())
727 _M_result = &_M_current_match();
728 else if (_M_has_m1 && __prev->suffix().length() != 0)
729 {
730 _M_suffix.matched = true;
731 _M_suffix.first = __prev->suffix().first;
732 _M_suffix.second = __prev->suffix().second;
733 _M_result = &_M_suffix;
734 }
735 else
736 *this = regex_token_iterator();
737 }
738 return *this;
739 }
740
741 template<typename _Bi_iter,
742 typename _Ch_type,
743 typename _Rx_traits>
744 void
746 _M_init(_Bi_iter __a, _Bi_iter __b)
747 {
748 _M_has_m1 = false;
749 for (auto __it : _M_subs)
750 if (__it == -1)
751 {
752 _M_has_m1 = true;
753 break;
754 }
755 if (_M_position != _Position())
756 _M_result = &_M_current_match();
757 else if (_M_has_m1)
758 {
759 _M_suffix.matched = true;
760 _M_suffix.first = __a;
761 _M_suffix.second = __b;
762 _M_result = &_M_suffix;
763 }
764 else
765 _M_result = nullptr;
766 }
767
768_GLIBCXX_END_NAMESPACE_VERSION
769} // namespace
const _Facet & use_facet(const locale &__loc)
Return a facet.
bool regex_search(_Bi_iter __s, _Bi_iter __e, match_results< _Bi_iter, _Alloc > &__m, const basic_regex< _Ch_type, _Rx_traits > &__re, regex_constants::match_flag_type __flags=regex_constants::match_default)
Definition regex.h:2440
ISO C++ entities toplevel namespace is std.
constexpr auto size(const _Container &__cont) noexcept(noexcept(__cont.size())) -> decltype(__cont.size())
Return the size of a container.
Implementation details not part of the namespace std interface.
constexpr syntax_option_type __polynomial
constexpr match_flag_type format_first_only
constexpr match_flag_type match_continuous
match_flag_type
This is a bitmask type indicating regex matching rules.
constexpr match_flag_type match_prev_avail
constexpr match_flag_type format_sed
constexpr match_flag_type match_not_null
constexpr match_flag_type format_no_copy
constexpr bool empty() const noexcept
Primary class template ctype facet.
A regular expression.
Definition regex.h:441
_Out_iter format(_Out_iter __out, const char_type *__fmt_first, const char_type *__fmt_last, match_flag_type __flags=regex_constants::format_default) const
Takes a regex and an input string and does the matching.
Describes aspects of a regular expression.
Definition regex.h:100
regex_traits()
Constructs a default traits object.
Definition regex.h:179
int value(_Ch_type __ch, int __radix) const
Converts a digit to an int.
Definition regex.tcc:408
string_type lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
Gets a collation element by name.
Definition regex.tcc:265
locale_type getloc() const
Gets a copy of the current locale in use by the regex_traits object.
Definition regex.h:411
bool isctype(_Ch_type __c, char_class_type __f) const
Determines if c is a member of an identified class.
Definition regex.tcc:394
char_class_type lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase=false) const
Maps one or more characters to a named character classification.
Definition regex.tcc:296
regex_iterator()=default
Provides a singular iterator, useful for indicating one-past-the-end of a range.
bool operator==(const regex_iterator &) const noexcept
Tests the equivalence of two regex iterators.
Definition regex.tcc:601
regex_iterator & operator++()
Increments a regex_iterator.
Definition regex.tcc:617
bool operator==(const regex_token_iterator &__rhs) const
Compares a regex_token_iterator to another for equality.
Definition regex.tcc:692
regex_token_iterator & operator=(const regex_token_iterator &__rhs)
Assigns a regex_token_iterator to another.
Definition regex.tcc:676
regex_token_iterator & operator++()
Increments a regex_token_iterator.
Definition regex.tcc:712
regex_token_iterator()
Default constructs a regex_token_iterator.
Definition regex.h:2909