libstdc++
regex_executor.h
Go to the documentation of this file.
1// class template regex -*- C++ -*-
2
3// Copyright (C) 2013-2026 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/**
26 * @file bits/regex_executor.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31// FIXME convert comments to doxygen format.
32
33namespace std _GLIBCXX_VISIBILITY(default)
34{
35_GLIBCXX_BEGIN_NAMESPACE_VERSION
36
37namespace __detail
38{
39 /**
40 * @addtogroup regex-detail
41 * @{
42 */
43
44 template<typename _BiIter, bool _Trivial = is_trivially_copyable<_BiIter>::value>
45 struct _ExecutorFrame;
46
47 /**
48 * @brief Takes a regex and an input string and does the matching.
49 *
50 * The %_Executor class has two modes: DFS mode and BFS mode, controlled
51 * by the function parameter %__search_mode.
52 */
53 template<typename _BiIter, typename _Alloc, typename _TraitsT>
54 class _Executor
55 {
56 enum class _Search_mode : unsigned char { _BFS = 0, _DFS = 1 };
57 enum class _Match_mode : unsigned char { _Exact, _Prefix };
58
59 public:
60 typedef typename iterator_traits<_BiIter>::value_type _CharT;
61 typedef basic_regex<_CharT, _TraitsT> _RegexT;
62 typedef _GLIBCXX_STD_C::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
64 typedef typename _TraitsT::char_class_type _ClassT;
65 typedef _NFA<_TraitsT> _NFAT;
66
67 public:
68 _Executor(_BiIter __begin,
69 _BiIter __end,
70 _ResultsVec& __results,
71 const _RegexT& __re,
72 _FlagT __flags,
73 bool __use_dfs)
74 : _M_cur_results(__results.get_allocator()),
75 _M_begin(__begin),
76 _M_end(__end),
77 _M_re(__re),
78 _M_nfa(*__re._M_automaton),
79 _M_results(__results),
80 _M_rep_count(_M_nfa.size()),
81 _M_start(_M_nfa._M_start()),
82 _M_visited_states(nullptr),
83 _M_flags(__flags),
84 _M_search_mode(__use_dfs ? _Search_mode::_DFS : _Search_mode::_BFS)
85 {
86 using namespace regex_constants;
87 if (__flags & match_prev_avail) // ignore not_bol and not_bow
88 _M_flags &= ~(match_not_bol | match_not_bow);
89 if (_M_search_mode == _Search_mode::_BFS)
90 _M_visited_states = new bool[_M_nfa.size()];
91 }
92
93 ~_Executor()
94 { delete[] _M_visited_states; }
95
96 // Set matched when string exactly matches the pattern.
97 bool
98 _M_match()
99 {
100 _M_current = _M_begin;
101 return _M_main(_Match_mode::_Exact);
102 }
103
104 // Set matched when some prefix of the string matches the pattern.
105 bool
106 _M_search_from_first()
107 {
108 _M_current = _M_begin;
109 return _M_main(_Match_mode::_Prefix);
110 }
111
112 bool
113 _M_search();
114
115 private:
116 void
117 _M_rep_once_more(_Match_mode __match_mode, _StateIdT);
118
119 void
120 _M_handle_repeat(_Match_mode, _StateIdT);
121
122 void
123 _M_handle_subexpr_begin(_Match_mode, _StateIdT);
124
125 void
126 _M_handle_subexpr_end(_Match_mode, _StateIdT);
127
128 void
129 _M_handle_line_begin_assertion(_Match_mode, _StateIdT);
130
131 void
132 _M_handle_line_end_assertion(_Match_mode, _StateIdT);
133
134 void
135 _M_handle_word_boundary(_Match_mode, _StateIdT);
136
137 void
138 _M_handle_subexpr_lookahead(_Match_mode, _StateIdT);
139
140 void
141 _M_handle_match(_Match_mode, _StateIdT);
142
143 void
144 _M_handle_backref(_Match_mode, _StateIdT);
145
146 void
147 _M_handle_accept(_Match_mode, _StateIdT);
148
149 void
150 _M_handle_alternative(_Match_mode, _StateIdT);
151
152 void
153 _M_node(_Match_mode, _StateIdT);
154
155 void
156 _M_dfs(_Match_mode __match_mode, _StateIdT __start);
157
158 bool
159 _M_main(_Match_mode __match_mode)
160 {
161 if (_M_search_mode == _Search_mode::_DFS)
162 return _M_main_dfs(__match_mode);
163 else
164 return _M_main_bfs(__match_mode);
165 }
166
167 bool
168 _M_main_dfs(_Match_mode __match_mode);
169
170 bool
171 _M_main_bfs(_Match_mode __match_mode);
172
173 bool
174 _M_is_word(_CharT __ch) const
175 {
176 static const _CharT __s[2] = { 'w' };
177 return _M_re._M_automaton->_M_traits.isctype
178 (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1));
179 }
180
181 bool
182 _M_at_begin() const
183 {
184 if (_M_current == _M_begin)
185 {
186 // match_not_bol means ^ does not match [_M_begin,_M_begin)
187 if (_M_flags & regex_constants::match_not_bol)
188 return false;
189 // match_prev_avail means _M_begin is not the start of the input.
191 {
192 // For ECMAScript multiline matches, check if the previous
193 // character is a line terminator.
194 if (_M_match_multiline())
195 return _M_is_line_terminator(*std::prev(_M_current));
196 else
197 return false;
198 }
199 else // ^ matches at _M_begin
200 return true;
201 }
202 else if (_M_match_multiline())
203 return _M_is_line_terminator(*std::prev(_M_current));
204 else
205 return false;
206 }
207
208 bool
209 _M_at_end() const
210 {
211 if (_M_current == _M_end)
212 return !(_M_flags & regex_constants::match_not_eol);
213 else if (_M_match_multiline())
214 return _M_is_line_terminator(*_M_current);
215 else
216 return false;
217 }
218
219 bool
220 _M_word_boundary() const;
221
222 bool
223 _M_lookahead(_StateIdT __next);
224
225 bool
226 _M_is_line_terminator(_CharT __c) const
227 {
228 const auto& __traits = _M_re._M_automaton->_M_traits;
229 const auto& __ct = use_facet<ctype<_CharT>>(__traits.getloc());
230 const char __n{ __ct.narrow(__c, ' ') };
231 if (__n == '\n')
232 return true;
233 if (_M_re._M_automaton->_M_options() & regex_constants::ECMAScript)
234 {
235 if (__n == '\r')
236 return true;
237 // FIXME: U+2028 (line separator) and U+2029 (paragraph separator)
238 }
239 return false;
240 }
241
242 bool
243 _M_match_multiline() const noexcept
244 {
245 constexpr auto __m
247 return (_M_re._M_automaton->_M_options() & __m) == __m;
248 }
249
250 bool
251 _M_visited(_StateIdT __i)
252 {
253 if (_M_visited_states)
254 {
255 if (_M_visited_states[__i])
256 return true;
257 _M_visited_states[__i] = true;
258 }
259 return false;
260 }
261
262 _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
263
264 public:
265 _GLIBCXX_STD_C::vector<_ExecutorFrame<_BiIter>> _M_frames;
266 _ResultsVec _M_cur_results;
267 _BiIter _M_current;
268 _BiIter _M_begin;
269 const _BiIter _M_end;
270 const _RegexT& _M_re;
271 const _NFAT& _M_nfa;
272 _ResultsVec& _M_results;
273 _GLIBCXX_STD_C::vector<pair<_BiIter, int>> _M_rep_count;
274 // To record current solution.
275 _StateIdT _M_start;
276 _BiIter _M_sol_pos;
277 // (BFS only) Saves states that need to be considered for the next character.
278 _GLIBCXX_STD_C::vector<pair<_StateIdT, _ResultsVec>> _M_match_queue;
279 // (BFS only) Indicates which states are already visited.
280 bool* _M_visited_states;
281 _FlagT _M_flags;
282 const _Search_mode _M_search_mode;
283 // Do we have a solution so far?
284 bool _M_has_sol;
285 };
286
287 ///@} regex-detail
288} // namespace __detail
289_GLIBCXX_END_NAMESPACE_VERSION
290} // namespace std
291
const _Facet & use_facet(const locale &__loc)
Return a facet.
ISO C++ entities toplevel namespace is std.
Implementation details not part of the namespace std interface.
ISO C++ 2011 namespace for options and flags used with std::regex.
constexpr match_flag_type match_not_bol
constexpr syntax_option_type ECMAScript
constexpr syntax_option_type __multiline
Extension: Equivalent to regex_constants::multiline for C++11 and C++14.
constexpr match_flag_type match_not_eol
match_flag_type
This is a bitmask type indicating regex matching rules.
constexpr match_flag_type match_prev_avail
Traits class for iterators.
A regular expression.
Definition regex.h:441
constexpr allocator_type get_allocator() const noexcept
Get a copy of the memory allocation object.
Definition stl_vector.h:317