libstdc++
regex.tcc
Go to the documentation of this file.
1// class template regex -*- C++ -*-
2
3// Copyright (C) 2013-2025 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/**
26 * @file bits/regex.tcc
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31namespace std _GLIBCXX_VISIBILITY(default)
32{
33_GLIBCXX_BEGIN_NAMESPACE_VERSION
34
35namespace __detail
36{
37 /// @cond undocumented
38
39 // Result of merging regex_match and regex_search.
40 //
41 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
42 // the other one if possible, for test purpose).
43 //
44 // That __match_mode is true means regex_match, else regex_search.
45 template<typename _BiIter, typename _Alloc,
46 typename _CharT, typename _TraitsT>
47 bool
48 __regex_algo_impl(_BiIter __s,
49 _BiIter __e,
50 match_results<_BiIter, _Alloc>& __m,
51 const basic_regex<_CharT, _TraitsT>& __re,
53 _RegexExecutorPolicy __policy,
54 bool __match_mode)
55 {
56 if (__re._M_automaton == nullptr)
57 return false;
58
59 typename match_results<_BiIter, _Alloc>::_Unchecked& __res = __m;
60 __m._M_begin = __s;
61 __m._M_resize(__re._M_automaton->_M_sub_count());
62
63 bool __ret;
64 if ((__re.flags() & regex_constants::__polynomial)
65 || (__policy == _RegexExecutorPolicy::_S_alternate
66 && !__re._M_automaton->_M_has_backref))
67 {
69 __executor(__s, __e, __res, __re, __flags);
70 if (__match_mode)
71 __ret = __executor._M_match();
72 else
73 __ret = __executor._M_search();
74 }
75 else
76 {
78 __executor(__s, __e, __res, __re, __flags);
79 if (__match_mode)
80 __ret = __executor._M_match();
81 else
82 __ret = __executor._M_search();
83 }
84 if (__ret)
85 {
86 for (auto& __it : __res)
87 if (!__it.matched)
88 __it.first = __it.second = __e;
89 auto& __pre = __m._M_prefix();
90 auto& __suf = __m._M_suffix();
91 if (__match_mode)
92 {
93 __pre.matched = false;
94 __pre.first = __s;
95 __pre.second = __s;
96 __suf.matched = false;
97 __suf.first = __e;
98 __suf.second = __e;
99 }
100 else
101 {
102 __pre.first = __s;
103 __pre.second = __res[0].first;
104 __pre.matched = (__pre.first != __pre.second);
105 __suf.first = __res[0].second;
106 __suf.second = __e;
107 __suf.matched = (__suf.first != __suf.second);
108 }
109 }
110 else
111 {
112 __m._M_establish_failed_match(__e);
113 }
114 return __ret;
115 }
116 /// @endcond
117} // namespace __detail
118
119 template<typename _Ch_type>
120 template<typename _Fwd_iter>
121 typename regex_traits<_Ch_type>::string_type
123 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
124 {
125 typedef std::ctype<char_type> __ctype_type;
126 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
127
128 static const char* __collatenames[] =
129 {
130 "NUL",
131 "SOH",
132 "STX",
133 "ETX",
134 "EOT",
135 "ENQ",
136 "ACK",
137 "alert",
138 "backspace",
139 "tab",
140 "newline",
141 "vertical-tab",
142 "form-feed",
143 "carriage-return",
144 "SO",
145 "SI",
146 "DLE",
147 "DC1",
148 "DC2",
149 "DC3",
150 "DC4",
151 "NAK",
152 "SYN",
153 "ETB",
154 "CAN",
155 "EM",
156 "SUB",
157 "ESC",
158 "IS4",
159 "IS3",
160 "IS2",
161 "IS1",
162 "space",
163 "exclamation-mark",
164 "quotation-mark",
165 "number-sign",
166 "dollar-sign",
167 "percent-sign",
168 "ampersand",
169 "apostrophe",
170 "left-parenthesis",
171 "right-parenthesis",
172 "asterisk",
173 "plus-sign",
174 "comma",
175 "hyphen",
176 "period",
177 "slash",
178 "zero",
179 "one",
180 "two",
181 "three",
182 "four",
183 "five",
184 "six",
185 "seven",
186 "eight",
187 "nine",
188 "colon",
189 "semicolon",
190 "less-than-sign",
191 "equals-sign",
192 "greater-than-sign",
193 "question-mark",
194 "commercial-at",
195 "A",
196 "B",
197 "C",
198 "D",
199 "E",
200 "F",
201 "G",
202 "H",
203 "I",
204 "J",
205 "K",
206 "L",
207 "M",
208 "N",
209 "O",
210 "P",
211 "Q",
212 "R",
213 "S",
214 "T",
215 "U",
216 "V",
217 "W",
218 "X",
219 "Y",
220 "Z",
221 "left-square-bracket",
222 "backslash",
223 "right-square-bracket",
224 "circumflex",
225 "underscore",
226 "grave-accent",
227 "a",
228 "b",
229 "c",
230 "d",
231 "e",
232 "f",
233 "g",
234 "h",
235 "i",
236 "j",
237 "k",
238 "l",
239 "m",
240 "n",
241 "o",
242 "p",
243 "q",
244 "r",
245 "s",
246 "t",
247 "u",
248 "v",
249 "w",
250 "x",
251 "y",
252 "z",
253 "left-curly-bracket",
254 "vertical-line",
255 "right-curly-bracket",
256 "tilde",
257 "DEL",
258 };
259
260 string __s;
261 for (; __first != __last; ++__first)
262 __s += __fctyp.narrow(*__first, 0);
263
264 for (const auto& __it : __collatenames)
265 if (__s == __it)
266 return string_type(1, __fctyp.widen(
267 static_cast<char>(&__it - __collatenames)));
268
269 // TODO Add digraph support:
270 // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
271
272 return string_type();
273 }
274
275 template<typename _Ch_type>
276 template<typename _Fwd_iter>
277 typename regex_traits<_Ch_type>::char_class_type
279 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
280 {
281 typedef std::ctype<char_type> __ctype_type;
282 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
283
284 // Mappings from class name to class mask.
285 static const pair<const char*, char_class_type> __classnames[] =
286 {
287 {"d", ctype_base::digit},
288 {"w", {ctype_base::alnum, _RegexMask::_S_under}},
289 {"s", ctype_base::space},
290 {"alnum", ctype_base::alnum},
291 {"alpha", ctype_base::alpha},
292 {"blank", ctype_base::blank},
293 {"cntrl", ctype_base::cntrl},
294 {"digit", ctype_base::digit},
295 {"graph", ctype_base::graph},
296 {"lower", ctype_base::lower},
297 {"print", ctype_base::print},
298 {"punct", ctype_base::punct},
299 {"space", ctype_base::space},
300 {"upper", ctype_base::upper},
301 {"xdigit", ctype_base::xdigit},
302 };
303
304 string __s;
305 for (; __first != __last; ++__first)
306 __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
307
308 for (const auto& __it : __classnames)
309 if (__s == __it.first)
310 {
311 if (__icase
312 && ((__it.second
313 & (ctype_base::lower | ctype_base::upper)) != 0))
314 return ctype_base::alpha;
315 return __it.second;
316 }
317 return 0;
318 }
319
320 template<typename _Ch_type>
321 bool
323 isctype(_Ch_type __c, char_class_type __f) const
324 {
325 typedef std::ctype<char_type> __ctype_type;
326 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
327
328 return __fctyp.is(__f._M_base, __c)
329 // [[:w:]]
330 || ((__f._M_extended & _RegexMask::_S_under)
331 && __c == __fctyp.widen('_'));
332 }
333
334#pragma GCC diagnostic push
335#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr
336 template<typename _Ch_type>
337 int
339 value(_Ch_type __ch, int __radix) const
340 {
341 if constexpr (sizeof(_Ch_type) > 1)
342 {
343 const auto& __ctyp = std::use_facet<ctype<_Ch_type>>(_M_locale);
344 const char __c = __ctyp.narrow(__ch, '\0');
345 return regex_traits<char>{}.value(__c, __radix);
346 }
347 else
348 {
349 const char __c = static_cast<char>(__ch);
350 const char __max_digit = __radix == 8 ? '7' : '9';
351 if ('0' <= __c && __c <= __max_digit)
352 return __c - '0';
353 if (__radix < 16)
354 return -1;
355 switch (__c)
356 {
357 case 'a':
358 case 'A':
359 return 10;
360 case 'b':
361 case 'B':
362 return 11;
363 case 'c':
364 case 'C':
365 return 12;
366 case 'd':
367 case 'D':
368 return 13;
369 case 'e':
370 case 'E':
371 return 14;
372 case 'f':
373 case 'F':
374 return 15;
375 default:
376 return -1;
377 }
378 }
379 }
380#pragma GCC diagnostic pop
381
382 template<typename _Bi_iter, typename _Alloc>
383 template<typename _Out_iter>
384 _Out_iter
386 format(_Out_iter __out,
387 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
388 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
389 match_flag_type __flags) const
390 {
391 __glibcxx_assert( ready() );
393 typedef std::ctype<char_type> __ctype_type;
394 const __ctype_type&
395 __fctyp(use_facet<__ctype_type>(__traits.getloc()));
396
397 auto __output = [&](size_t __idx)
398 {
399 auto& __sub = (*this)[__idx];
400 if (__sub.matched)
401 __out = std::copy(__sub.first, __sub.second, __out);
402 };
403
404 if (__flags & regex_constants::format_sed)
405 {
406 bool __escaping = false;
407 for (; __fmt_first != __fmt_last; __fmt_first++)
408 {
409 if (__escaping)
410 {
411 __escaping = false;
412 if (__fctyp.is(__ctype_type::digit, *__fmt_first))
413 __output(__traits.value(*__fmt_first, 10));
414 else
415 *__out++ = *__fmt_first;
416 continue;
417 }
418 if (*__fmt_first == '\\')
419 {
420 __escaping = true;
421 continue;
422 }
423 if (*__fmt_first == '&')
424 {
425 __output(0);
426 continue;
427 }
428 *__out++ = *__fmt_first;
429 }
430 if (__escaping)
431 *__out++ = '\\';
432 }
433 else
434 {
435 while (1)
436 {
437 auto __next = std::find(__fmt_first, __fmt_last, '$');
438 if (__next == __fmt_last)
439 break;
440
441 __out = std::copy(__fmt_first, __next, __out);
442
443 auto __eat = [&](char __ch) -> bool
444 {
445 if (*__next == __ch)
446 {
447 ++__next;
448 return true;
449 }
450 return false;
451 };
452
453 if (++__next == __fmt_last)
454 *__out++ = '$';
455 else if (__eat('$'))
456 *__out++ = '$';
457 else if (__eat('&'))
458 __output(0);
459 else if (__eat('`'))
460 {
461 auto& __sub = _M_prefix();
462 if (__sub.matched)
463 __out = std::copy(__sub.first, __sub.second, __out);
464 }
465 else if (__eat('\''))
466 {
467 auto& __sub = _M_suffix();
468 if (__sub.matched)
469 __out = std::copy(__sub.first, __sub.second, __out);
470 }
471 else if (__fctyp.is(__ctype_type::digit, *__next))
472 {
473 long __num = __traits.value(*__next, 10);
474 if (++__next != __fmt_last
475 && __fctyp.is(__ctype_type::digit, *__next))
476 {
477 __num *= 10;
478 __num += __traits.value(*__next++, 10);
479 }
480 if (0 <= __num && size_t(__num) < this->size())
481 __output(__num);
482 }
483 else
484 *__out++ = '$';
485 __fmt_first = __next;
486 }
487 __out = std::copy(__fmt_first, __fmt_last, __out);
488 }
489 return __out;
490 }
491
492 template<typename _Out_iter, typename _Bi_iter,
493 typename _Rx_traits, typename _Ch_type>
494 _Out_iter
495 __regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
497 const _Ch_type* __fmt, size_t __len,
499 {
501 _IterT __i(__first, __last, __e, __flags);
502 _IterT __end;
503 if (__i == __end)
504 {
505 if (!(__flags & regex_constants::format_no_copy))
506 __out = std::copy(__first, __last, __out);
507 }
508 else
509 {
510 sub_match<_Bi_iter> __last;
511 for (; __i != __end; ++__i)
512 {
513 if (!(__flags & regex_constants::format_no_copy))
514 __out = std::copy(__i->prefix().first, __i->prefix().second,
515 __out);
516 __out = __i->format(__out, __fmt, __fmt + __len, __flags);
517 __last = __i->suffix();
519 break;
520 }
521 if (!(__flags & regex_constants::format_no_copy))
522 __out = std::copy(__last.first, __last.second, __out);
523 }
524 return __out;
525 }
526
527 template<typename _Bi_iter,
528 typename _Ch_type,
529 typename _Rx_traits>
530 bool
532 operator==(const regex_iterator& __rhs) const noexcept
533 {
534 if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
535 return true;
536 return _M_pregex == __rhs._M_pregex
537 && _M_begin == __rhs._M_begin
538 && _M_end == __rhs._M_end
539 && _M_flags == __rhs._M_flags
540 && _M_match[0] == __rhs._M_match[0];
541 }
542
543 template<typename _Bi_iter,
544 typename _Ch_type,
545 typename _Rx_traits>
549 {
550 // In all cases in which the call to regex_search returns true,
551 // match.prefix().first shall be equal to the previous value of
552 // match[0].second, and for each index i in the half-open range
553 // [0, match.size()) for which match[i].matched is true,
554 // match[i].position() shall return distance(begin, match[i].first).
555 // [28.12.1.4.5]
556 if (_M_match[0].matched)
557 {
558 auto __start = _M_match[0].second;
559 auto __prefix_first = _M_match[0].second;
560 if (_M_match[0].first == _M_match[0].second)
561 {
562 if (__start == _M_end)
563 {
564 _M_pregex = nullptr;
565 return *this;
566 }
567 else
568 {
569 if (regex_search(__start, _M_end, _M_match, *_M_pregex,
570 _M_flags
573 {
574 __glibcxx_assert(_M_match[0].matched);
575 auto& __prefix = _M_match._M_prefix();
576 __prefix.first = __prefix_first;
577 __prefix.matched = __prefix.first != __prefix.second;
578 // [28.12.1.4.5]
579 _M_match._M_begin = _M_begin;
580 return *this;
581 }
582 else
583 ++__start;
584 }
585 }
587 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
588 {
589 __glibcxx_assert(_M_match[0].matched);
590 auto& __prefix = _M_match._M_prefix();
591 __prefix.first = __prefix_first;
592 __prefix.matched = __prefix.first != __prefix.second;
593 // [28.12.1.4.5]
594 _M_match._M_begin = _M_begin;
595 }
596 else
597 _M_pregex = nullptr;
598 }
599 return *this;
600 }
601
602 template<typename _Bi_iter,
603 typename _Ch_type,
604 typename _Rx_traits>
608 {
609 _M_position = __rhs._M_position;
610 _M_subs = __rhs._M_subs;
611 _M_n = __rhs._M_n;
612 _M_suffix = __rhs._M_suffix;
613 _M_has_m1 = __rhs._M_has_m1;
614 _M_normalize_result();
615 return *this;
616 }
617
618 template<typename _Bi_iter,
619 typename _Ch_type,
620 typename _Rx_traits>
621 bool
623 operator==(const regex_token_iterator& __rhs) const
624 {
625 if (_M_end_of_seq() && __rhs._M_end_of_seq())
626 return true;
627 if (_M_suffix.matched && __rhs._M_suffix.matched
628 && _M_suffix == __rhs._M_suffix)
629 return true;
630 if (_M_end_of_seq() || _M_suffix.matched
631 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
632 return false;
633 return _M_position == __rhs._M_position
634 && _M_n == __rhs._M_n
635 && _M_subs == __rhs._M_subs;
636 }
637
638 template<typename _Bi_iter,
639 typename _Ch_type,
640 typename _Rx_traits>
644 {
645 _Position __prev = _M_position;
646 if (_M_suffix.matched)
647 *this = regex_token_iterator();
648 else if (_M_n + 1 < _M_subs.size())
649 {
650 _M_n++;
651 _M_result = &_M_current_match();
652 }
653 else
654 {
655 _M_n = 0;
656 ++_M_position;
657 if (_M_position != _Position())
658 _M_result = &_M_current_match();
659 else if (_M_has_m1 && __prev->suffix().length() != 0)
660 {
661 _M_suffix.matched = true;
662 _M_suffix.first = __prev->suffix().first;
663 _M_suffix.second = __prev->suffix().second;
664 _M_result = &_M_suffix;
665 }
666 else
667 *this = regex_token_iterator();
668 }
669 return *this;
670 }
671
672 template<typename _Bi_iter,
673 typename _Ch_type,
674 typename _Rx_traits>
675 void
677 _M_init(_Bi_iter __a, _Bi_iter __b)
678 {
679 _M_has_m1 = false;
680 for (auto __it : _M_subs)
681 if (__it == -1)
682 {
683 _M_has_m1 = true;
684 break;
685 }
686 if (_M_position != _Position())
687 _M_result = &_M_current_match();
688 else if (_M_has_m1)
689 {
690 _M_suffix.matched = true;
691 _M_suffix.first = __a;
692 _M_suffix.second = __b;
693 _M_result = &_M_suffix;
694 }
695 else
696 _M_result = nullptr;
697 }
698
699_GLIBCXX_END_NAMESPACE_VERSION
700} // namespace
const _Facet & use_facet(const locale &__loc)
Return a facet.
bool regex_search(_Bi_iter __s, _Bi_iter __e, match_results< _Bi_iter, _Alloc > &__m, const basic_regex< _Ch_type, _Rx_traits > &__re, regex_constants::match_flag_type __flags=regex_constants::match_default)
Definition regex.h:2440
ISO C++ entities toplevel namespace is std.
constexpr auto size(const _Container &__cont) noexcept(noexcept(__cont.size())) -> decltype(__cont.size())
Return the size of a container.
Implementation details not part of the namespace std interface.
constexpr syntax_option_type __polynomial
constexpr match_flag_type format_first_only
constexpr match_flag_type match_continuous
match_flag_type
This is a bitmask type indicating regex matching rules.
constexpr match_flag_type match_prev_avail
constexpr match_flag_type format_sed
constexpr match_flag_type match_not_null
constexpr match_flag_type format_no_copy
Primary class template ctype facet.
A regular expression.
Definition regex.h:441
_Out_iter format(_Out_iter __out, const char_type *__fmt_first, const char_type *__fmt_last, match_flag_type __flags=regex_constants::format_default) const
Takes a regex and an input string and does the matching.
Describes aspects of a regular expression.
Definition regex.h:100
regex_traits()
Constructs a default traits object.
Definition regex.h:179
int value(_Ch_type __ch, int __radix) const
Converts a digit to an int.
Definition regex.tcc:339
string_type lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
Gets a collation element by name.
Definition regex.tcc:123
locale_type getloc() const
Gets a copy of the current locale in use by the regex_traits object.
Definition regex.h:411
bool isctype(_Ch_type __c, char_class_type __f) const
Determines if c is a member of an identified class.
Definition regex.tcc:323
char_class_type lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase=false) const
Maps one or more characters to a named character classification.
Definition regex.tcc:279
regex_iterator()=default
Provides a singular iterator, useful for indicating one-past-the-end of a range.
bool operator==(const regex_iterator &) const noexcept
Tests the equivalence of two regex iterators.
Definition regex.tcc:532
regex_iterator & operator++()
Increments a regex_iterator.
Definition regex.tcc:548
bool operator==(const regex_token_iterator &__rhs) const
Compares a regex_token_iterator to another for equality.
Definition regex.tcc:623
regex_token_iterator & operator=(const regex_token_iterator &__rhs)
Assigns a regex_token_iterator to another.
Definition regex.tcc:607
regex_token_iterator & operator++()
Increments a regex_token_iterator.
Definition regex.tcc:643
regex_token_iterator()
Default constructs a regex_token_iterator.
Definition regex.h:2909
Struct holding two objects of arbitrary type.
Definition stl_pair.h:304