libstdc++
regex_executor.h
Go to the documentation of this file.
1// class template regex -*- C++ -*-
2
3// Copyright (C) 2013-2026 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/**
26 * @file bits/regex_executor.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31// FIXME convert comments to doxygen format.
32
33namespace std _GLIBCXX_VISIBILITY(default)
34{
35_GLIBCXX_BEGIN_NAMESPACE_VERSION
36
37namespace __detail
38{
39_GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
40 /**
41 * @addtogroup regex-detail
42 * @{
43 */
44
45 template<typename _BiIter, bool _Trivial = is_trivially_copyable<_BiIter>::value>
46 struct _ExecutorFrame;
47
48 /**
49 * @brief Takes a regex and an input string and does the matching.
50 *
51 * The %_Executor class has two modes: DFS mode and BFS mode, controlled
52 * by the template parameter %__dfs_mode.
53 */
54 template<typename _BiIter, typename _Alloc, typename _TraitsT,
55 bool __dfs_mode>
56 class _Executor
57 {
58 using __search_mode = integral_constant<bool, __dfs_mode>;
59 using __dfs = true_type;
60 using __bfs = false_type;
61
62 enum class _Match_mode : unsigned char { _Exact, _Prefix };
63
64 public:
65 typedef typename iterator_traits<_BiIter>::value_type _CharT;
66 typedef basic_regex<_CharT, _TraitsT> _RegexT;
67 typedef _GLIBCXX_STD_C::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
69 typedef typename _TraitsT::char_class_type _ClassT;
70 typedef _NFA<_TraitsT> _NFAT;
71
72 public:
73 _Executor(_BiIter __begin,
74 _BiIter __end,
75 _ResultsVec& __results,
76 const _RegexT& __re,
77 _FlagT __flags)
78 : _M_cur_results(__results.get_allocator()),
79 _M_begin(__begin),
80 _M_end(__end),
81 _M_re(__re),
82 _M_nfa(*__re._M_automaton),
83 _M_results(__results),
84 _M_rep_count(_M_nfa.size()),
85 _M_states(_M_nfa._M_start(), _M_nfa.size()),
86 _M_flags(__flags)
87 {
88 using namespace regex_constants;
89 if (__flags & match_prev_avail) // ignore not_bol and not_bow
90 _M_flags &= ~(match_not_bol | match_not_bow);
91 }
92
93 // Set matched when string exactly matches the pattern.
94 bool
95 _M_match()
96 {
97 _M_current = _M_begin;
98 return _M_main(_Match_mode::_Exact);
99 }
100
101 // Set matched when some prefix of the string matches the pattern.
102 bool
103 _M_search_from_first()
104 {
105 _M_current = _M_begin;
106 return _M_main(_Match_mode::_Prefix);
107 }
108
109 bool
110 _M_search();
111
112 private:
113 void
114 _M_rep_once_more(_Match_mode __match_mode, _StateIdT);
115
116 void
117 _M_handle_repeat(_Match_mode, _StateIdT);
118
119 void
120 _M_handle_subexpr_begin(_Match_mode, _StateIdT);
121
122 void
123 _M_handle_subexpr_end(_Match_mode, _StateIdT);
124
125 void
126 _M_handle_line_begin_assertion(_Match_mode, _StateIdT);
127
128 void
129 _M_handle_line_end_assertion(_Match_mode, _StateIdT);
130
131 void
132 _M_handle_word_boundary(_Match_mode, _StateIdT);
133
134 void
135 _M_handle_subexpr_lookahead(_Match_mode, _StateIdT);
136
137 void
138 _M_handle_match(_Match_mode, _StateIdT);
139
140 void
141 _M_handle_backref(_Match_mode, _StateIdT);
142
143 void
144 _M_handle_accept(_Match_mode, _StateIdT);
145
146 void
147 _M_handle_alternative(_Match_mode, _StateIdT);
148
149 void
150 _M_node(_Match_mode, _StateIdT);
151
152 void
153 _M_dfs(_Match_mode __match_mode, _StateIdT __start);
154
155 bool
156 _M_main(_Match_mode __match_mode)
157 { return _M_main_dispatch(__match_mode, __search_mode{}); }
158
159 bool
160 _M_main_dispatch(_Match_mode __match_mode, __dfs);
161
162 bool
163 _M_main_dispatch(_Match_mode __match_mode, __bfs);
164
165 bool
166 _M_is_word(_CharT __ch) const
167 {
168 static const _CharT __s[2] = { 'w' };
169 return _M_re._M_automaton->_M_traits.isctype
170 (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1));
171 }
172
173 bool
174 _M_at_begin() const
175 {
176 if (_M_current == _M_begin)
177 {
178 // match_not_bol means ^ does not match [_M_begin,_M_begin)
179 if (_M_flags & regex_constants::match_not_bol)
180 return false;
181 // match_prev_avail means _M_begin is not the start of the input.
183 {
184 // For ECMAScript multiline matches, check if the previous
185 // character is a line terminator.
186 if (_M_match_multiline())
187 return _M_is_line_terminator(*std::prev(_M_current));
188 else
189 return false;
190 }
191 else // ^ matches at _M_begin
192 return true;
193 }
194 else if (_M_match_multiline())
195 return _M_is_line_terminator(*std::prev(_M_current));
196 else
197 return false;
198 }
199
200 bool
201 _M_at_end() const
202 {
203 if (_M_current == _M_end)
204 return !(_M_flags & regex_constants::match_not_eol);
205 else if (_M_match_multiline())
206 return _M_is_line_terminator(*_M_current);
207 else
208 return false;
209 }
210
211 bool
212 _M_word_boundary() const;
213
214 bool
215 _M_lookahead(_StateIdT __next);
216
217 bool
218 _M_is_line_terminator(_CharT __c) const
219 {
220 const auto& __traits = _M_re._M_automaton->_M_traits;
221 const auto& __ct = use_facet<ctype<_CharT>>(__traits.getloc());
222 const char __n{ __ct.narrow(__c, ' ') };
223 if (__n == '\n')
224 return true;
225 if (_M_re._M_automaton->_M_options() & regex_constants::ECMAScript)
226 {
227 if (__n == '\r')
228 return true;
229 // FIXME: U+2028 (line separator) and U+2029 (paragraph separator)
230 }
231 return false;
232 }
233
234 bool
235 _M_match_multiline() const noexcept
236 {
237 constexpr auto __m
239 return (_M_re._M_automaton->_M_options() & __m) == __m;
240 }
241
242 // Holds additional information used in BFS-mode.
243 template<typename _SearchMode, typename _ResultsVec>
244 struct _State_info;
245
246 template<typename _ResultsVec>
247 struct _State_info<__bfs, _ResultsVec>
248 {
249 explicit
250 _State_info(_StateIdT __start, size_t __n)
251 : _M_visited_states(new bool[__n]()), _M_start(__start)
252 { }
253
254 ~_State_info() { delete[] _M_visited_states; }
255
256 _State_info(const _State_info&) = delete;
257 _State_info& operator=(const _State_info&) = delete;
258
259 bool _M_visited(_StateIdT __i)
260 {
261 if (_M_visited_states[__i])
262 return true;
263 _M_visited_states[__i] = true;
264 return false;
265 }
266
267 void _M_queue(_StateIdT __i, const _ResultsVec& __res)
268 { _M_match_queue.emplace_back(__i, __res); }
269
270 // Dummy implementations for BFS mode.
271 _BiIter* _M_get_sol_pos() { return nullptr; }
272
273 // Saves states that need to be considered for the next character.
274 _GLIBCXX_STD_C::vector<pair<_StateIdT, _ResultsVec>> _M_match_queue;
275 // Indicates which states are already visited.
276 bool* _M_visited_states;
277 // To record current solution.
278 _StateIdT _M_start;
279 };
280
281 template<typename _ResultsVec>
282 struct _State_info<__dfs, _ResultsVec>
283 {
284 explicit
285 _State_info(_StateIdT __start, size_t) : _M_start(__start)
286 { }
287
288 // Dummy implementations for DFS mode.
289 bool _M_visited(_StateIdT) const { return false; }
290 void _M_queue(_StateIdT, const _ResultsVec&) { }
291
292 _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
293
294 // To record current solution.
295 _StateIdT _M_start;
296 _BiIter _M_sol_pos;
297 };
298
299 public:
300 _GLIBCXX_STD_C::vector<_ExecutorFrame<_BiIter>> _M_frames;
301 _ResultsVec _M_cur_results;
302 _BiIter _M_current;
303 _BiIter _M_begin;
304 const _BiIter _M_end;
305 const _RegexT& _M_re;
306 const _NFAT& _M_nfa;
307 _ResultsVec& _M_results;
308 _GLIBCXX_STD_C::vector<pair<_BiIter, int>> _M_rep_count;
309 _State_info<__search_mode, _ResultsVec> _M_states;
310 _FlagT _M_flags;
311 // Do we have a solution so far?
312 bool _M_has_sol;
313 };
314
315 ///@} regex-detail
316_GLIBCXX_END_INLINE_ABI_NAMESPACE(_V2)
317} // namespace __detail
318_GLIBCXX_END_NAMESPACE_VERSION
319} // namespace std
320
__bool_constant< true > true_type
The type used as a compile-time boolean with true value.
Definition type_traits:119
__bool_constant< false > false_type
The type used as a compile-time boolean with false value.
Definition type_traits:122
const _Facet & use_facet(const locale &__loc)
Return a facet.
ISO C++ entities toplevel namespace is std.
Implementation details not part of the namespace std interface.
ISO C++ 2011 namespace for options and flags used with std::regex.
constexpr match_flag_type match_not_bol
constexpr syntax_option_type ECMAScript
constexpr syntax_option_type __multiline
Extension: Equivalent to regex_constants::multiline for C++11 and C++14.
constexpr match_flag_type match_not_eol
match_flag_type
This is a bitmask type indicating regex matching rules.
constexpr match_flag_type match_prev_avail
integral_constant
Definition type_traits:96
Traits class for iterators.
A regular expression.
Definition regex.h:443
constexpr allocator_type get_allocator() const noexcept
Get a copy of the memory allocation object.
Definition stl_vector.h:317