libstdc++
regex.tcc
Go to the documentation of this file.
1// class template regex -*- C++ -*-
2
3// Copyright (C) 2013-2026 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/**
26 * @file bits/regex.tcc
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31namespace std _GLIBCXX_VISIBILITY(default)
32{
33_GLIBCXX_BEGIN_NAMESPACE_VERSION
34
35namespace __detail
36{
37 /// @cond undocumented
38
39 // Result of merging regex_match and regex_search.
40 //
41 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
42 // the other one if possible, for test purpose).
43 //
44 // That __match_mode is true means regex_match, else regex_search.
45 template<typename _BiIter, typename _Alloc,
46 typename _CharT, typename _TraitsT>
47 bool
48 __regex_algo_impl(_BiIter __s,
49 _BiIter __e,
50 match_results<_BiIter, _Alloc>& __m,
51 const basic_regex<_CharT, _TraitsT>& __re,
53 _RegexExecutorPolicy __policy,
54 bool __match_mode)
55 {
56 if (__re._M_automaton == nullptr)
57 return false;
58
59 typename match_results<_BiIter, _Alloc>::_Unchecked& __res = __m;
60 __m._M_begin = __s;
61 __m._M_resize(__re._M_automaton->_M_sub_count());
62
63 bool __ret;
64 bool __use_dfs = true;
65 if ((__re.flags() & regex_constants::__polynomial)
66 || (__policy == _RegexExecutorPolicy::_S_alternate
67 && !__re._M_automaton->_M_has_backref))
68 __use_dfs = false;
69
71 __executor(__s, __e, __res, __re, __flags, __use_dfs);
72 if (__match_mode)
73 __ret = __executor._M_match();
74 else
75 __ret = __executor._M_search();
76
77 if (__ret)
78 {
79 for (auto& __it : __res)
80 if (!__it.matched)
81 __it.first = __it.second = __e;
82 auto& __pre = __m._M_prefix();
83 auto& __suf = __m._M_suffix();
84 if (__match_mode)
85 {
86 __pre.matched = false;
87 __pre.first = __s;
88 __pre.second = __s;
89 __suf.matched = false;
90 __suf.first = __e;
91 __suf.second = __e;
92 }
93 else
94 {
95 __pre.first = __s;
96 __pre.second = __res[0].first;
97 __pre.matched = (__pre.first != __pre.second);
98 __suf.first = __res[0].second;
99 __suf.second = __e;
100 __suf.matched = (__suf.first != __suf.second);
101 }
102 }
103 else
104 {
105 __m._M_establish_failed_match(__e);
106 }
107 return __ret;
108 }
109 /// @endcond
110} // namespace __detail
111
112 template<typename _Ch_type>
113 template<typename _Fwd_iter>
114 typename regex_traits<_Ch_type>::string_type
116 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
117 {
118 typedef std::ctype<char_type> __ctype_type;
119 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
120
121 static const char* const __collatenames[] =
122 {
123 "NUL",
124 "SOH",
125 "STX",
126 "ETX",
127 "EOT",
128 "ENQ",
129 "ACK",
130 "alert",
131 "backspace",
132 "tab",
133 "newline",
134 "vertical-tab",
135 "form-feed",
136 "carriage-return",
137 "SO",
138 "SI",
139 "DLE",
140 "DC1",
141 "DC2",
142 "DC3",
143 "DC4",
144 "NAK",
145 "SYN",
146 "ETB",
147 "CAN",
148 "EM",
149 "SUB",
150 "ESC",
151 "IS4",
152 "IS3",
153 "IS2",
154 "IS1",
155 "space",
156 "exclamation-mark",
157 "quotation-mark",
158 "number-sign",
159 "dollar-sign",
160 "percent-sign",
161 "ampersand",
162 "apostrophe",
163 "left-parenthesis",
164 "right-parenthesis",
165 "asterisk",
166 "plus-sign",
167 "comma",
168 "hyphen",
169 "period",
170 "slash",
171 "zero",
172 "one",
173 "two",
174 "three",
175 "four",
176 "five",
177 "six",
178 "seven",
179 "eight",
180 "nine",
181 "colon",
182 "semicolon",
183 "less-than-sign",
184 "equals-sign",
185 "greater-than-sign",
186 "question-mark",
187 "commercial-at",
188 "A",
189 "B",
190 "C",
191 "D",
192 "E",
193 "F",
194 "G",
195 "H",
196 "I",
197 "J",
198 "K",
199 "L",
200 "M",
201 "N",
202 "O",
203 "P",
204 "Q",
205 "R",
206 "S",
207 "T",
208 "U",
209 "V",
210 "W",
211 "X",
212 "Y",
213 "Z",
214 "left-square-bracket",
215 "backslash",
216 "right-square-bracket",
217 "circumflex",
218 "underscore",
219 "grave-accent",
220 "a",
221 "b",
222 "c",
223 "d",
224 "e",
225 "f",
226 "g",
227 "h",
228 "i",
229 "j",
230 "k",
231 "l",
232 "m",
233 "n",
234 "o",
235 "p",
236 "q",
237 "r",
238 "s",
239 "t",
240 "u",
241 "v",
242 "w",
243 "x",
244 "y",
245 "z",
246 "left-curly-bracket",
247 "vertical-line",
248 "right-curly-bracket",
249 "tilde",
250 "DEL",
251 };
252
253 string __s;
254 for (; __first != __last; ++__first)
255 __s += __fctyp.narrow(*__first, 0);
256
257 for (const auto& __it : __collatenames)
258 if (__s == __it)
259 return string_type(1, __fctyp.widen(
260 static_cast<char>(&__it - __collatenames)));
261
262 // TODO Add digraph support:
263 // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
264
265 return string_type();
266 }
267
268 template<typename _Ch_type>
269 template<typename _Fwd_iter>
270 typename regex_traits<_Ch_type>::char_class_type
272 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
273 {
274 typedef std::ctype<char_type> __ctype_type;
275 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
276
277 // Mappings from class name to class mask.
278 static const pair<const char*, char_class_type> __classnames[] =
279 {
280 {"d", ctype_base::digit},
281 {"w", {ctype_base::alnum, _RegexMask::_S_under}},
282 {"s", ctype_base::space},
283 {"alnum", ctype_base::alnum},
284 {"alpha", ctype_base::alpha},
285 {"blank", ctype_base::blank},
286 {"cntrl", ctype_base::cntrl},
287 {"digit", ctype_base::digit},
288 {"graph", ctype_base::graph},
289 {"lower", ctype_base::lower},
290 {"print", ctype_base::print},
291 {"punct", ctype_base::punct},
292 {"space", ctype_base::space},
293 {"upper", ctype_base::upper},
294 {"xdigit", ctype_base::xdigit},
295 };
296
297 string __s;
298 for (; __first != __last; ++__first)
299 __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
300
301 for (const auto& __it : __classnames)
302 if (__s == __it.first)
303 {
304 if (__icase
305 && ((__it.second
306 & (ctype_base::lower | ctype_base::upper)) != 0))
307 return ctype_base::alpha;
308 return __it.second;
309 }
310 return 0;
311 }
312
313 template<typename _Ch_type>
314 bool
316 isctype(_Ch_type __c, char_class_type __f) const
317 {
318 typedef std::ctype<char_type> __ctype_type;
319 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
320
321 return __fctyp.is(__f._M_base, __c)
322 // [[:w:]]
323 || ((__f._M_extended & _RegexMask::_S_under)
324 && __c == __fctyp.widen('_'));
325 }
326
327#pragma GCC diagnostic push
328#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr
329 template<typename _Ch_type>
330 int
332 value(_Ch_type __ch, int __radix) const
333 {
334 if constexpr (sizeof(_Ch_type) > 1)
335 {
336 const auto& __ctyp = std::use_facet<ctype<_Ch_type>>(_M_locale);
337 const char __c = __ctyp.narrow(__ch, '\0');
338 return regex_traits<char>{}.value(__c, __radix);
339 }
340 else
341 {
342 const char __c = static_cast<char>(__ch);
343 const char __max_digit = __radix == 8 ? '7' : '9';
344 if ('0' <= __c && __c <= __max_digit)
345 return __c - '0';
346 if (__radix < 16)
347 return -1;
348 switch (__c)
349 {
350 case 'a':
351 case 'A':
352 return 10;
353 case 'b':
354 case 'B':
355 return 11;
356 case 'c':
357 case 'C':
358 return 12;
359 case 'd':
360 case 'D':
361 return 13;
362 case 'e':
363 case 'E':
364 return 14;
365 case 'f':
366 case 'F':
367 return 15;
368 default:
369 return -1;
370 }
371 }
372 }
373#pragma GCC diagnostic pop
374
375 template<typename _Bi_iter, typename _Alloc>
376 template<typename _Out_iter>
377 _Out_iter
379 format(_Out_iter __out,
380 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
381 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
382 match_flag_type __flags) const
383 {
384 __glibcxx_assert( ready() );
386 typedef std::ctype<char_type> __ctype_type;
387 const __ctype_type&
388 __fctyp(use_facet<__ctype_type>(__traits.getloc()));
389
390 auto __output = [&](size_t __idx)
391 {
392 auto& __sub = (*this)[__idx];
393 if (__sub.matched)
394 __out = std::copy(__sub.first, __sub.second, __out);
395 };
396
397 if (__flags & regex_constants::format_sed)
398 {
399 bool __escaping = false;
400 for (; __fmt_first != __fmt_last; __fmt_first++)
401 {
402 if (__escaping)
403 {
404 __escaping = false;
405 if (__fctyp.is(__ctype_type::digit, *__fmt_first))
406 __output(__traits.value(*__fmt_first, 10));
407 else
408 *__out++ = *__fmt_first;
409 continue;
410 }
411 if (*__fmt_first == '\\')
412 {
413 __escaping = true;
414 continue;
415 }
416 if (*__fmt_first == '&')
417 {
418 __output(0);
419 continue;
420 }
421 *__out++ = *__fmt_first;
422 }
423 if (__escaping)
424 *__out++ = '\\';
425 }
426 else
427 {
428 while (1)
429 {
430 auto __next = std::find(__fmt_first, __fmt_last, '$');
431 if (__next == __fmt_last)
432 break;
433
434 __out = std::copy(__fmt_first, __next, __out);
435
436 auto __eat = [&](char __ch) -> bool
437 {
438 if (*__next == __ch)
439 {
440 ++__next;
441 return true;
442 }
443 return false;
444 };
445
446 if (++__next == __fmt_last)
447 *__out++ = '$';
448 else if (__eat('$'))
449 *__out++ = '$';
450 else if (__eat('&'))
451 __output(0);
452 else if (__eat('`'))
453 {
454 auto& __sub = _M_prefix();
455 if (__sub.matched)
456 __out = std::copy(__sub.first, __sub.second, __out);
457 }
458 else if (__eat('\''))
459 {
460 auto& __sub = _M_suffix();
461 if (__sub.matched)
462 __out = std::copy(__sub.first, __sub.second, __out);
463 }
464 else if (__fctyp.is(__ctype_type::digit, *__next))
465 {
466 long __num = __traits.value(*__next, 10);
467 if (++__next != __fmt_last
468 && __fctyp.is(__ctype_type::digit, *__next))
469 {
470 __num *= 10;
471 __num += __traits.value(*__next++, 10);
472 }
473 if (0 <= __num && size_t(__num) < this->size())
474 __output(__num);
475 }
476 else
477 *__out++ = '$';
478 __fmt_first = __next;
479 }
480 __out = std::copy(__fmt_first, __fmt_last, __out);
481 }
482 return __out;
483 }
484
485 template<typename _Out_iter, typename _Bi_iter,
486 typename _Rx_traits, typename _Ch_type>
487 _Out_iter
488 __regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
490 const _Ch_type* __fmt, size_t __len,
492 {
494 _IterT __i(__first, __last, __e, __flags);
495 _IterT __end;
496 if (__i == __end)
497 {
498 if (!(__flags & regex_constants::format_no_copy))
499 __out = std::copy(__first, __last, __out);
500 }
501 else
502 {
503 sub_match<_Bi_iter> __last;
504 for (; __i != __end; ++__i)
505 {
506 if (!(__flags & regex_constants::format_no_copy))
507 __out = std::copy(__i->prefix().first, __i->prefix().second,
508 __out);
509 __out = __i->format(__out, __fmt, __fmt + __len, __flags);
510 __last = __i->suffix();
512 break;
513 }
514 if (!(__flags & regex_constants::format_no_copy))
515 __out = std::copy(__last.first, __last.second, __out);
516 }
517 return __out;
518 }
519
520 template<typename _Bi_iter,
521 typename _Ch_type,
522 typename _Rx_traits>
523 bool
525 operator==(const regex_iterator& __rhs) const noexcept
526 {
527 if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
528 return true;
529 return _M_pregex == __rhs._M_pregex
530 && _M_begin == __rhs._M_begin
531 && _M_end == __rhs._M_end
532 && _M_flags == __rhs._M_flags
533 && _M_match[0] == __rhs._M_match[0];
534 }
535
536 template<typename _Bi_iter,
537 typename _Ch_type,
538 typename _Rx_traits>
542 {
543 // In all cases in which the call to regex_search returns true,
544 // match.prefix().first shall be equal to the previous value of
545 // match[0].second, and for each index i in the half-open range
546 // [0, match.size()) for which match[i].matched is true,
547 // match[i].position() shall return distance(begin, match[i].first).
548 // [28.12.1.4.5]
549 if (_M_match[0].matched)
550 {
551 auto __start = _M_match[0].second;
552 auto __prefix_first = _M_match[0].second;
553 if (_M_match[0].first == _M_match[0].second)
554 {
555 if (__start == _M_end)
556 {
557 _M_pregex = nullptr;
558 return *this;
559 }
560 else
561 {
562 if (regex_search(__start, _M_end, _M_match, *_M_pregex,
563 _M_flags
566 {
567 __glibcxx_assert(_M_match[0].matched);
568 auto& __prefix = _M_match._M_prefix();
569 __prefix.first = __prefix_first;
570 __prefix.matched = __prefix.first != __prefix.second;
571 // [28.12.1.4.5]
572 _M_match._M_begin = _M_begin;
573 return *this;
574 }
575 else
576 ++__start;
577 }
578 }
580 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
581 {
582 __glibcxx_assert(_M_match[0].matched);
583 auto& __prefix = _M_match._M_prefix();
584 __prefix.first = __prefix_first;
585 __prefix.matched = __prefix.first != __prefix.second;
586 // [28.12.1.4.5]
587 _M_match._M_begin = _M_begin;
588 }
589 else
590 _M_pregex = nullptr;
591 }
592 return *this;
593 }
594
595 template<typename _Bi_iter,
596 typename _Ch_type,
597 typename _Rx_traits>
601 {
602 _M_position = __rhs._M_position;
603 _M_subs = __rhs._M_subs;
604 _M_n = __rhs._M_n;
605 _M_suffix = __rhs._M_suffix;
606 _M_has_m1 = __rhs._M_has_m1;
607 _M_normalize_result();
608 return *this;
609 }
610
611 template<typename _Bi_iter,
612 typename _Ch_type,
613 typename _Rx_traits>
614 bool
616 operator==(const regex_token_iterator& __rhs) const
617 {
618 if (_M_end_of_seq() && __rhs._M_end_of_seq())
619 return true;
620 if (_M_suffix.matched && __rhs._M_suffix.matched
621 && _M_suffix == __rhs._M_suffix)
622 return true;
623 if (_M_end_of_seq() || _M_suffix.matched
624 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
625 return false;
626 return _M_position == __rhs._M_position
627 && _M_n == __rhs._M_n
628 && _M_subs == __rhs._M_subs;
629 }
630
631 template<typename _Bi_iter,
632 typename _Ch_type,
633 typename _Rx_traits>
637 {
638 _Position __prev = _M_position;
639 if (_M_suffix.matched)
640 *this = regex_token_iterator();
641 else if (_M_n + 1 < _M_subs.size())
642 {
643 _M_n++;
644 _M_result = &_M_current_match();
645 }
646 else
647 {
648 _M_n = 0;
649 ++_M_position;
650 if (_M_position != _Position())
651 _M_result = &_M_current_match();
652 else if (_M_has_m1 && __prev->suffix().length() != 0)
653 {
654 _M_suffix.matched = true;
655 _M_suffix.first = __prev->suffix().first;
656 _M_suffix.second = __prev->suffix().second;
657 _M_result = &_M_suffix;
658 }
659 else
660 *this = regex_token_iterator();
661 }
662 return *this;
663 }
664
665 template<typename _Bi_iter,
666 typename _Ch_type,
667 typename _Rx_traits>
668 void
670 _M_init(_Bi_iter __a, _Bi_iter __b)
671 {
672 _M_has_m1 = false;
673 for (auto __it : _M_subs)
674 if (__it == -1)
675 {
676 _M_has_m1 = true;
677 break;
678 }
679 if (_M_position != _Position())
680 _M_result = &_M_current_match();
681 else if (_M_has_m1)
682 {
683 _M_suffix.matched = true;
684 _M_suffix.first = __a;
685 _M_suffix.second = __b;
686 _M_result = &_M_suffix;
687 }
688 else
689 _M_result = nullptr;
690 }
691
692_GLIBCXX_END_NAMESPACE_VERSION
693} // namespace
const _Facet & use_facet(const locale &__loc)
Return a facet.
bool regex_search(_Bi_iter __s, _Bi_iter __e, match_results< _Bi_iter, _Alloc > &__m, const basic_regex< _Ch_type, _Rx_traits > &__re, regex_constants::match_flag_type __flags=regex_constants::match_default)
Definition regex.h:2440
ISO C++ entities toplevel namespace is std.
constexpr auto size(const _Container &__cont) noexcept(noexcept(__cont.size())) -> decltype(__cont.size())
Return the size of a container.
Implementation details not part of the namespace std interface.
constexpr syntax_option_type __polynomial
constexpr match_flag_type format_first_only
constexpr match_flag_type match_continuous
match_flag_type
This is a bitmask type indicating regex matching rules.
constexpr match_flag_type match_prev_avail
constexpr match_flag_type format_sed
constexpr match_flag_type match_not_null
constexpr match_flag_type format_no_copy
Primary class template ctype facet.
A regular expression.
Definition regex.h:441
_Out_iter format(_Out_iter __out, const char_type *__fmt_first, const char_type *__fmt_last, match_flag_type __flags=regex_constants::format_default) const
Takes a regex and an input string and does the matching.
Describes aspects of a regular expression.
Definition regex.h:100
regex_traits()
Constructs a default traits object.
Definition regex.h:179
int value(_Ch_type __ch, int __radix) const
Converts a digit to an int.
Definition regex.tcc:332
string_type lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
Gets a collation element by name.
Definition regex.tcc:116
locale_type getloc() const
Gets a copy of the current locale in use by the regex_traits object.
Definition regex.h:411
bool isctype(_Ch_type __c, char_class_type __f) const
Determines if c is a member of an identified class.
Definition regex.tcc:316
char_class_type lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase=false) const
Maps one or more characters to a named character classification.
Definition regex.tcc:272
regex_iterator()=default
Provides a singular iterator, useful for indicating one-past-the-end of a range.
bool operator==(const regex_iterator &) const noexcept
Tests the equivalence of two regex iterators.
Definition regex.tcc:525
regex_iterator & operator++()
Increments a regex_iterator.
Definition regex.tcc:541
bool operator==(const regex_token_iterator &__rhs) const
Compares a regex_token_iterator to another for equality.
Definition regex.tcc:616
regex_token_iterator & operator=(const regex_token_iterator &__rhs)
Assigns a regex_token_iterator to another.
Definition regex.tcc:600
regex_token_iterator & operator++()
Increments a regex_token_iterator.
Definition regex.tcc:636
regex_token_iterator()
Default constructs a regex_token_iterator.
Definition regex.h:2909
Struct holding two objects of arbitrary type.
Definition stl_pair.h:304