1/*
2 *
3 * Copyright (c) 2004
4 * John Maddock
5 *
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 *
10 */
11
12 /*
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE basic_regex_creator.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Declares template class basic_regex_creator which fills in
17 * the data members of a regex_data object.
18 */
19
20#ifndef BOOST_REGEX_V4_BASIC_REGEX_CREATOR_HPP
21#define BOOST_REGEX_V4_BASIC_REGEX_CREATOR_HPP
22
23#ifdef BOOST_MSVC
24#pragma warning(push)
25#pragma warning(disable: 4103)
26#endif
27#ifdef BOOST_HAS_ABI_HEADERS
28# include BOOST_ABI_PREFIX
29#endif
30#ifdef BOOST_MSVC
31#pragma warning(pop)
32#endif
33
34#ifdef BOOST_MSVC
35# pragma warning(push)
36# pragma warning(disable: 4800)
37#endif
38
39namespace boost{
40
41namespace BOOST_REGEX_DETAIL_NS{
42
43template <class charT>
44struct digraph : public std::pair<charT, charT>
45{
46 digraph() : std::pair<charT, charT>(0, 0){}
47 digraph(charT c1) : std::pair<charT, charT>(c1, 0){}
48 digraph(charT c1, charT c2) : std::pair<charT, charT>(c1, c2)
49 {}
50 digraph(const digraph<charT>& d) : std::pair<charT, charT>(d.first, d.second){}
51 template <class Seq>
52 digraph(const Seq& s) : std::pair<charT, charT>()
53 {
54 BOOST_ASSERT(s.size() <= 2);
55 BOOST_ASSERT(s.size());
56 this->first = s[0];
57 this->second = (s.size() > 1) ? s[1] : 0;
58 }
59};
60
61template <class charT, class traits>
62class basic_char_set
63{
64public:
65 typedef digraph<charT> digraph_type;
66 typedef typename traits::string_type string_type;
67 typedef typename traits::char_class_type m_type;
68
69 basic_char_set()
70 {
71 m_negate = false;
72 m_has_digraphs = false;
73 m_classes = 0;
74 m_negated_classes = 0;
75 m_empty = true;
76 }
77
78 void add_single(const digraph_type& s)
79 {
80 m_singles.insert(m_singles.end(), s);
81 if(s.second)
82 m_has_digraphs = true;
83 m_empty = false;
84 }
85 void add_range(const digraph_type& first, const digraph_type& end)
86 {
87 m_ranges.insert(m_ranges.end(), first);
88 m_ranges.insert(m_ranges.end(), end);
89 if(first.second)
90 {
91 m_has_digraphs = true;
92 add_single(s: first);
93 }
94 if(end.second)
95 {
96 m_has_digraphs = true;
97 add_single(s: end);
98 }
99 m_empty = false;
100 }
101 void add_class(m_type m)
102 {
103 m_classes |= m;
104 m_empty = false;
105 }
106 void add_negated_class(m_type m)
107 {
108 m_negated_classes |= m;
109 m_empty = false;
110 }
111 void add_equivalent(const digraph_type& s)
112 {
113 m_equivalents.insert(m_equivalents.end(), s);
114 if(s.second)
115 {
116 m_has_digraphs = true;
117 add_single(s);
118 }
119 m_empty = false;
120 }
121 void negate()
122 {
123 m_negate = true;
124 //m_empty = false;
125 }
126
127 //
128 // accessor functions:
129 //
130 bool has_digraphs()const
131 {
132 return m_has_digraphs;
133 }
134 bool is_negated()const
135 {
136 return m_negate;
137 }
138 typedef typename std::vector<digraph_type>::const_iterator list_iterator;
139 list_iterator singles_begin()const
140 {
141 return m_singles.begin();
142 }
143 list_iterator singles_end()const
144 {
145 return m_singles.end();
146 }
147 list_iterator ranges_begin()const
148 {
149 return m_ranges.begin();
150 }
151 list_iterator ranges_end()const
152 {
153 return m_ranges.end();
154 }
155 list_iterator equivalents_begin()const
156 {
157 return m_equivalents.begin();
158 }
159 list_iterator equivalents_end()const
160 {
161 return m_equivalents.end();
162 }
163 m_type classes()const
164 {
165 return m_classes;
166 }
167 m_type negated_classes()const
168 {
169 return m_negated_classes;
170 }
171 bool empty()const
172 {
173 return m_empty;
174 }
175private:
176 std::vector<digraph_type> m_singles; // a list of single characters to match
177 std::vector<digraph_type> m_ranges; // a list of end points of our ranges
178 bool m_negate; // true if the set is to be negated
179 bool m_has_digraphs; // true if we have digraphs present
180 m_type m_classes; // character classes to match
181 m_type m_negated_classes; // negated character classes to match
182 bool m_empty; // whether we've added anything yet
183 std::vector<digraph_type> m_equivalents; // a list of equivalence classes
184};
185
186template <class charT, class traits>
187class basic_regex_creator
188{
189public:
190 basic_regex_creator(regex_data<charT, traits>* data);
191 std::ptrdiff_t getoffset(void* addr)
192 {
193 return getoffset(addr, m_pdata->m_data.data());
194 }
195 std::ptrdiff_t getoffset(const void* addr, const void* base)
196 {
197 return static_cast<const char*>(addr) - static_cast<const char*>(base);
198 }
199 re_syntax_base* getaddress(std::ptrdiff_t off)
200 {
201 return getaddress(off, m_pdata->m_data.data());
202 }
203 re_syntax_base* getaddress(std::ptrdiff_t off, void* base)
204 {
205 return static_cast<re_syntax_base*>(static_cast<void*>(static_cast<char*>(base) + off));
206 }
207 void init(unsigned l_flags)
208 {
209 m_pdata->m_flags = l_flags;
210 m_icase = l_flags & regex_constants::icase;
211 }
212 regbase::flag_type flags()
213 {
214 return m_pdata->m_flags;
215 }
216 void flags(regbase::flag_type f)
217 {
218 m_pdata->m_flags = f;
219 if(m_icase != static_cast<bool>(f & regbase::icase))
220 {
221 m_icase = static_cast<bool>(f & regbase::icase);
222 }
223 }
224 re_syntax_base* append_state(syntax_element_type t, std::size_t s = sizeof(re_syntax_base));
225 re_syntax_base* insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s = sizeof(re_syntax_base));
226 re_literal* append_literal(charT c);
227 re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set);
228 re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set, mpl::false_*);
229 re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set, mpl::true_*);
230 void finalize(const charT* p1, const charT* p2);
231protected:
232 regex_data<charT, traits>* m_pdata; // pointer to the basic_regex_data struct we are filling in
233 const ::boost::regex_traits_wrapper<traits>&
234 m_traits; // convenience reference to traits class
235 re_syntax_base* m_last_state; // the last state we added
236 bool m_icase; // true for case insensitive matches
237 unsigned m_repeater_id; // the state_id of the next repeater
238 bool m_has_backrefs; // true if there are actually any backrefs
239 unsigned m_backrefs; // bitmask of permitted backrefs
240 boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for;
241 bool m_has_recursions; // set when we have recursive expresisons to fixup
242 std::vector<bool> m_recursion_checks; // notes which recursions we've followed while analysing this expression
243 typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
244 typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character
245 typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character
246 typename traits::char_class_type m_upper_mask; // mask used to determine if a character is an uppercase character
247 typename traits::char_class_type m_alpha_mask; // mask used to determine if a character is an alphabetic character
248private:
249 basic_regex_creator& operator=(const basic_regex_creator&);
250 basic_regex_creator(const basic_regex_creator&);
251
252 void fixup_pointers(re_syntax_base* state);
253 void fixup_recursions(re_syntax_base* state);
254 void create_startmaps(re_syntax_base* state);
255 int calculate_backstep(re_syntax_base* state);
256 void create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask);
257 unsigned get_restart_type(re_syntax_base* state);
258 void set_all_masks(unsigned char* bits, unsigned char);
259 bool is_bad_repeat(re_syntax_base* pt);
260 void set_bad_repeat(re_syntax_base* pt);
261 syntax_element_type get_repeat_type(re_syntax_base* state);
262 void probe_leading_repeat(re_syntax_base* state);
263};
264
265template <class charT, class traits>
266basic_regex_creator<charT, traits>::basic_regex_creator(regex_data<charT, traits>* data)
267 : m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_repeater_id(0), m_has_backrefs(false), m_backrefs(0), m_has_recursions(false)
268{
269 m_pdata->m_data.clear();
270 m_pdata->m_status = ::boost::regex_constants::error_ok;
271 static const charT w = 'w';
272 static const charT s = 's';
273 static const charT l[5] = { 'l', 'o', 'w', 'e', 'r', };
274 static const charT u[5] = { 'u', 'p', 'p', 'e', 'r', };
275 static const charT a[5] = { 'a', 'l', 'p', 'h', 'a', };
276 m_word_mask = m_traits.lookup_classname(&w, &w +1);
277 m_mask_space = m_traits.lookup_classname(&s, &s +1);
278 m_lower_mask = m_traits.lookup_classname(l, l + 5);
279 m_upper_mask = m_traits.lookup_classname(u, u + 5);
280 m_alpha_mask = m_traits.lookup_classname(a, a + 5);
281 m_pdata->m_word_mask = m_word_mask;
282 BOOST_ASSERT(m_word_mask != 0);
283 BOOST_ASSERT(m_mask_space != 0);
284 BOOST_ASSERT(m_lower_mask != 0);
285 BOOST_ASSERT(m_upper_mask != 0);
286 BOOST_ASSERT(m_alpha_mask != 0);
287}
288
289template <class charT, class traits>
290re_syntax_base* basic_regex_creator<charT, traits>::append_state(syntax_element_type t, std::size_t s)
291{
292 // if the state is a backref then make a note of it:
293 if(t == syntax_element_backref)
294 this->m_has_backrefs = true;
295 // append a new state, start by aligning our last one:
296 m_pdata->m_data.align();
297 // set the offset to the next state in our last one:
298 if(m_last_state)
299 m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state);
300 // now actually extent our data:
301 m_last_state = static_cast<re_syntax_base*>(m_pdata->m_data.extend(s));
302 // fill in boilerplate options in the new state:
303 m_last_state->next.i = 0;
304 m_last_state->type = t;
305 return m_last_state;
306}
307
308template <class charT, class traits>
309re_syntax_base* basic_regex_creator<charT, traits>::insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s)
310{
311 // append a new state, start by aligning our last one:
312 m_pdata->m_data.align();
313 // set the offset to the next state in our last one:
314 if(m_last_state)
315 m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state);
316 // remember the last state position:
317 std::ptrdiff_t off = getoffset(m_last_state) + s;
318 // now actually insert our data:
319 re_syntax_base* new_state = static_cast<re_syntax_base*>(m_pdata->m_data.insert(pos, s));
320 // fill in boilerplate options in the new state:
321 new_state->next.i = s;
322 new_state->type = t;
323 m_last_state = getaddress(off);
324 return new_state;
325}
326
327template <class charT, class traits>
328re_literal* basic_regex_creator<charT, traits>::append_literal(charT c)
329{
330 re_literal* result;
331 // start by seeing if we have an existing re_literal we can extend:
332 if((0 == m_last_state) || (m_last_state->type != syntax_element_literal))
333 {
334 // no existing re_literal, create a new one:
335 result = static_cast<re_literal*>(append_state(t: syntax_element_literal, s: sizeof(re_literal) + sizeof(charT)));
336 result->length = 1;
337 *static_cast<charT*>(static_cast<void*>(result+1)) = m_traits.translate(c, m_icase);
338 }
339 else
340 {
341 // we have an existing re_literal, extend it:
342 std::ptrdiff_t off = getoffset(m_last_state);
343 m_pdata->m_data.extend(sizeof(charT));
344 m_last_state = result = static_cast<re_literal*>(getaddress(off));
345 charT* characters = static_cast<charT*>(static_cast<void*>(result+1));
346 characters[result->length] = m_traits.translate(c, m_icase);
347 result->length += 1;
348 }
349 return result;
350}
351
352template <class charT, class traits>
353inline re_syntax_base* basic_regex_creator<charT, traits>::append_set(
354 const basic_char_set<charT, traits>& char_set)
355{
356 typedef mpl::bool_< (sizeof(charT) == 1) > truth_type;
357 return char_set.has_digraphs()
358 ? append_set(char_set, static_cast<mpl::false_*>(0))
359 : append_set(char_set, static_cast<truth_type*>(0));
360}
361
362template <class charT, class traits>
363re_syntax_base* basic_regex_creator<charT, traits>::append_set(
364 const basic_char_set<charT, traits>& char_set, mpl::false_*)
365{
366 typedef typename traits::string_type string_type;
367 typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
368 typedef typename traits::char_class_type m_type;
369
370 re_set_long<m_type>* result = static_cast<re_set_long<m_type>*>(append_state(t: syntax_element_long_set, s: sizeof(re_set_long<m_type>)));
371 //
372 // fill in the basics:
373 //
374 result->csingles = static_cast<unsigned int>(::boost::BOOST_REGEX_DETAIL_NS::distance(char_set.singles_begin(), char_set.singles_end()));
375 result->cranges = static_cast<unsigned int>(::boost::BOOST_REGEX_DETAIL_NS::distance(char_set.ranges_begin(), char_set.ranges_end())) / 2;
376 result->cequivalents = static_cast<unsigned int>(::boost::BOOST_REGEX_DETAIL_NS::distance(char_set.equivalents_begin(), char_set.equivalents_end()));
377 result->cclasses = char_set.classes();
378 result->cnclasses = char_set.negated_classes();
379 if(flags() & regbase::icase)
380 {
381 // adjust classes as needed:
382 if(((result->cclasses & m_lower_mask) == m_lower_mask) || ((result->cclasses & m_upper_mask) == m_upper_mask))
383 result->cclasses |= m_alpha_mask;
384 if(((result->cnclasses & m_lower_mask) == m_lower_mask) || ((result->cnclasses & m_upper_mask) == m_upper_mask))
385 result->cnclasses |= m_alpha_mask;
386 }
387
388 result->isnot = char_set.is_negated();
389 result->singleton = !char_set.has_digraphs();
390 //
391 // remember where the state is for later:
392 //
393 std::ptrdiff_t offset = getoffset(result);
394 //
395 // now extend with all the singles:
396 //
397 item_iterator first, last;
398 first = char_set.singles_begin();
399 last = char_set.singles_end();
400 while(first != last)
401 {
402 charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (first->second ? 3 : 2)));
403 p[0] = m_traits.translate(first->first, m_icase);
404 if(first->second)
405 {
406 p[1] = m_traits.translate(first->second, m_icase);
407 p[2] = 0;
408 }
409 else
410 p[1] = 0;
411 ++first;
412 }
413 //
414 // now extend with all the ranges:
415 //
416 first = char_set.ranges_begin();
417 last = char_set.ranges_end();
418 while(first != last)
419 {
420 // first grab the endpoints of the range:
421 digraph<charT> c1 = *first;
422 c1.first = this->m_traits.translate(c1.first, this->m_icase);
423 c1.second = this->m_traits.translate(c1.second, this->m_icase);
424 ++first;
425 digraph<charT> c2 = *first;
426 c2.first = this->m_traits.translate(c2.first, this->m_icase);
427 c2.second = this->m_traits.translate(c2.second, this->m_icase);
428 ++first;
429 string_type s1, s2;
430 // different actions now depending upon whether collation is turned on:
431 if(flags() & regex_constants::collate)
432 {
433 // we need to transform our range into sort keys:
434 charT a1[3] = { c1.first, c1.second, charT(0), };
435 charT a2[3] = { c2.first, c2.second, charT(0), };
436 s1 = this->m_traits.transform(a1, (a1[1] ? a1+2 : a1+1));
437 s2 = this->m_traits.transform(a2, (a2[1] ? a2+2 : a2+1));
438 if(s1.size() == 0)
439 s1 = string_type(1, charT(0));
440 if(s2.size() == 0)
441 s2 = string_type(1, charT(0));
442 }
443 else
444 {
445 if(c1.second)
446 {
447 s1.insert(s1.end(), c1.first);
448 s1.insert(s1.end(), c1.second);
449 }
450 else
451 s1 = string_type(1, c1.first);
452 if(c2.second)
453 {
454 s2.insert(s2.end(), c2.first);
455 s2.insert(s2.end(), c2.second);
456 }
457 else
458 s2.insert(s2.end(), c2.first);
459 }
460 if(s1 > s2)
461 {
462 // Oops error:
463 return 0;
464 }
465 charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s1.size() + s2.size() + 2) ) );
466 BOOST_REGEX_DETAIL_NS::copy(s1.begin(), s1.end(), p);
467 p[s1.size()] = charT(0);
468 p += s1.size() + 1;
469 BOOST_REGEX_DETAIL_NS::copy(s2.begin(), s2.end(), p);
470 p[s2.size()] = charT(0);
471 }
472 //
473 // now process the equivalence classes:
474 //
475 first = char_set.equivalents_begin();
476 last = char_set.equivalents_end();
477 while(first != last)
478 {
479 string_type s;
480 if(first->second)
481 {
482 charT cs[3] = { first->first, first->second, charT(0), };
483 s = m_traits.transform_primary(cs, cs+2);
484 }
485 else
486 s = m_traits.transform_primary(&first->first, &first->first+1);
487 if(s.empty())
488 return 0; // invalid or unsupported equivalence class
489 charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s.size()+1) ) );
490 BOOST_REGEX_DETAIL_NS::copy(s.begin(), s.end(), p);
491 p[s.size()] = charT(0);
492 ++first;
493 }
494 //
495 // finally reset the address of our last state:
496 //
497 m_last_state = result = static_cast<re_set_long<m_type>*>(getaddress(offset));
498 return result;
499}
500
501template<class T>
502inline bool char_less(T t1, T t2)
503{
504 return t1 < t2;
505}
506inline bool char_less(char t1, char t2)
507{
508 return static_cast<unsigned char>(t1) < static_cast<unsigned char>(t2);
509}
510inline bool char_less(signed char t1, signed char t2)
511{
512 return static_cast<unsigned char>(t1) < static_cast<unsigned char>(t2);
513}
514
515template <class charT, class traits>
516re_syntax_base* basic_regex_creator<charT, traits>::append_set(
517 const basic_char_set<charT, traits>& char_set, mpl::true_*)
518{
519 typedef typename traits::string_type string_type;
520 typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
521
522 re_set* result = static_cast<re_set*>(append_state(t: syntax_element_set, s: sizeof(re_set)));
523 bool negate = char_set.is_negated();
524 std::memset(s: result->_map, c: 0, n: sizeof(result->_map));
525 //
526 // handle singles first:
527 //
528 item_iterator first, last;
529 first = char_set.singles_begin();
530 last = char_set.singles_end();
531 while(first != last)
532 {
533 for(unsigned int i = 0; i < (1 << CHAR_BIT); ++i)
534 {
535 if(this->m_traits.translate(static_cast<charT>(i), this->m_icase)
536 == this->m_traits.translate(first->first, this->m_icase))
537 result->_map[i] = true;
538 }
539 ++first;
540 }
541 //
542 // OK now handle ranges:
543 //
544 first = char_set.ranges_begin();
545 last = char_set.ranges_end();
546 while(first != last)
547 {
548 // first grab the endpoints of the range:
549 charT c1 = this->m_traits.translate(first->first, this->m_icase);
550 ++first;
551 charT c2 = this->m_traits.translate(first->first, this->m_icase);
552 ++first;
553 // different actions now depending upon whether collation is turned on:
554 if(flags() & regex_constants::collate)
555 {
556 // we need to transform our range into sort keys:
557 charT c3[2] = { c1, charT(0), };
558 string_type s1 = this->m_traits.transform(c3, c3+1);
559 c3[0] = c2;
560 string_type s2 = this->m_traits.transform(c3, c3+1);
561 if(s1 > s2)
562 {
563 // Oops error:
564 return 0;
565 }
566 BOOST_ASSERT(c3[1] == charT(0));
567 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
568 {
569 c3[0] = static_cast<charT>(i);
570 string_type s3 = this->m_traits.transform(c3, c3 +1);
571 if((s1 <= s3) && (s3 <= s2))
572 result->_map[i] = true;
573 }
574 }
575 else
576 {
577 if(char_less(c2, c1))
578 {
579 // Oops error:
580 return 0;
581 }
582 // everything in range matches:
583 std::memset(s: result->_map + static_cast<unsigned char>(c1), c: true, n: 1 + static_cast<unsigned char>(c2) - static_cast<unsigned char>(c1));
584 }
585 }
586 //
587 // and now the classes:
588 //
589 typedef typename traits::char_class_type m_type;
590 m_type m = char_set.classes();
591 if(flags() & regbase::icase)
592 {
593 // adjust m as needed:
594 if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
595 m |= m_alpha_mask;
596 }
597 if(m != 0)
598 {
599 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
600 {
601 if(this->m_traits.isctype(static_cast<charT>(i), m))
602 result->_map[i] = true;
603 }
604 }
605 //
606 // and now the negated classes:
607 //
608 m = char_set.negated_classes();
609 if(flags() & regbase::icase)
610 {
611 // adjust m as needed:
612 if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
613 m |= m_alpha_mask;
614 }
615 if(m != 0)
616 {
617 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
618 {
619 if(0 == this->m_traits.isctype(static_cast<charT>(i), m))
620 result->_map[i] = true;
621 }
622 }
623 //
624 // now process the equivalence classes:
625 //
626 first = char_set.equivalents_begin();
627 last = char_set.equivalents_end();
628 while(first != last)
629 {
630 string_type s;
631 BOOST_ASSERT(static_cast<charT>(0) == first->second);
632 s = m_traits.transform_primary(&first->first, &first->first+1);
633 if(s.empty())
634 return 0; // invalid or unsupported equivalence class
635 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
636 {
637 charT c[2] = { (static_cast<charT>(i)), charT(0), };
638 string_type s2 = this->m_traits.transform_primary(c, c+1);
639 if(s == s2)
640 result->_map[i] = true;
641 }
642 ++first;
643 }
644 if(negate)
645 {
646 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
647 {
648 result->_map[i] = !(result->_map[i]);
649 }
650 }
651 return result;
652}
653
654template <class charT, class traits>
655void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT* p2)
656{
657 if(this->m_pdata->m_status)
658 return;
659 // we've added all the states we need, now finish things off.
660 // start by adding a terminating state:
661 append_state(t: syntax_element_match);
662 // extend storage to store original expression:
663 std::ptrdiff_t len = p2 - p1;
664 m_pdata->m_expression_len = len;
665 charT* ps = static_cast<charT*>(m_pdata->m_data.extend(sizeof(charT) * (1 + (p2 - p1))));
666 m_pdata->m_expression = ps;
667 BOOST_REGEX_DETAIL_NS::copy(p1, p2, ps);
668 ps[p2 - p1] = 0;
669 // fill in our other data...
670 // successful parsing implies a zero status:
671 m_pdata->m_status = 0;
672 // get the first state of the machine:
673 m_pdata->m_first_state = static_cast<re_syntax_base*>(m_pdata->m_data.data());
674 // fixup pointers in the machine:
675 fixup_pointers(state: m_pdata->m_first_state);
676 if(m_has_recursions)
677 {
678 m_pdata->m_has_recursions = true;
679 fixup_recursions(state: m_pdata->m_first_state);
680 if(this->m_pdata->m_status)
681 return;
682 }
683 else
684 m_pdata->m_has_recursions = false;
685 // create nested startmaps:
686 create_startmaps(state: m_pdata->m_first_state);
687 // create main startmap:
688 std::memset(s: m_pdata->m_startmap, c: 0, n: sizeof(m_pdata->m_startmap));
689 m_pdata->m_can_be_null = 0;
690
691 m_bad_repeats = 0;
692 if(m_has_recursions)
693 m_recursion_checks.assign(1 + m_pdata->m_mark_count, false);
694 create_startmap(state: m_pdata->m_first_state, l_map: m_pdata->m_startmap, pnull: &(m_pdata->m_can_be_null), mask: mask_all);
695 // get the restart type:
696 m_pdata->m_restart_type = get_restart_type(state: m_pdata->m_first_state);
697 // optimise a leading repeat if there is one:
698 probe_leading_repeat(state: m_pdata->m_first_state);
699}
700
701template <class charT, class traits>
702void basic_regex_creator<charT, traits>::fixup_pointers(re_syntax_base* state)
703{
704 while(state)
705 {
706 switch(state->type)
707 {
708 case syntax_element_recurse:
709 m_has_recursions = true;
710 if(state->next.i)
711 state->next.p = getaddress(state->next.i, state);
712 else
713 state->next.p = 0;
714 break;
715 case syntax_element_rep:
716 case syntax_element_dot_rep:
717 case syntax_element_char_rep:
718 case syntax_element_short_set_rep:
719 case syntax_element_long_set_rep:
720 // set the state_id of this repeat:
721 static_cast<re_repeat*>(state)->state_id = m_repeater_id++;
722 BOOST_FALLTHROUGH;
723 case syntax_element_alt:
724 std::memset(s: static_cast<re_alt*>(state)->_map, c: 0, n: sizeof(static_cast<re_alt*>(state)->_map));
725 static_cast<re_alt*>(state)->can_be_null = 0;
726 BOOST_FALLTHROUGH;
727 case syntax_element_jump:
728 static_cast<re_jump*>(state)->alt.p = getaddress(static_cast<re_jump*>(state)->alt.i, state);
729 BOOST_FALLTHROUGH;
730 default:
731 if(state->next.i)
732 state->next.p = getaddress(state->next.i, state);
733 else
734 state->next.p = 0;
735 }
736 state = state->next.p;
737 }
738}
739
740template <class charT, class traits>
741void basic_regex_creator<charT, traits>::fixup_recursions(re_syntax_base* state)
742{
743 re_syntax_base* base = state;
744 while(state)
745 {
746 switch(state->type)
747 {
748 case syntax_element_assert_backref:
749 {
750 // just check that the index is valid:
751 int idx = static_cast<const re_brace*>(state)->index;
752 if(idx < 0)
753 {
754 idx = -idx-1;
755 if(idx >= 10000)
756 {
757 idx = m_pdata->get_id(idx);
758 if(idx <= 0)
759 {
760 // check of sub-expression that doesn't exist:
761 if(0 == this->m_pdata->m_status) // update the error code if not already set
762 this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
763 //
764 // clear the expression, we should be empty:
765 //
766 this->m_pdata->m_expression = 0;
767 this->m_pdata->m_expression_len = 0;
768 //
769 // and throw if required:
770 //
771 if(0 == (this->flags() & regex_constants::no_except))
772 {
773 std::string message = "Encountered a forward reference to a marked sub-expression that does not exist.";
774 boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
775 e.raise();
776 }
777 }
778 }
779 }
780 }
781 break;
782 case syntax_element_recurse:
783 {
784 bool ok = false;
785 re_syntax_base* p = base;
786 std::ptrdiff_t idx = static_cast<re_jump*>(state)->alt.i;
787 if(idx > 10000)
788 {
789 //
790 // There may be more than one capture group with this hash, just do what Perl
791 // does and recurse to the leftmost:
792 //
793 idx = m_pdata->get_id(static_cast<int>(idx));
794 }
795 while(p)
796 {
797 if((p->type == syntax_element_startmark) && (static_cast<re_brace*>(p)->index == idx))
798 {
799 //
800 // We've found the target of the recursion, set the jump target:
801 //
802 static_cast<re_jump*>(state)->alt.p = p;
803 ok = true;
804 //
805 // Now scan the target for nested repeats:
806 //
807 p = p->next.p;
808 int next_rep_id = 0;
809 while(p)
810 {
811 switch(p->type)
812 {
813 case syntax_element_rep:
814 case syntax_element_dot_rep:
815 case syntax_element_char_rep:
816 case syntax_element_short_set_rep:
817 case syntax_element_long_set_rep:
818 next_rep_id = static_cast<re_repeat*>(p)->state_id;
819 break;
820 case syntax_element_endmark:
821 if(static_cast<const re_brace*>(p)->index == idx)
822 next_rep_id = -1;
823 break;
824 default:
825 break;
826 }
827 if(next_rep_id)
828 break;
829 p = p->next.p;
830 }
831 if(next_rep_id > 0)
832 {
833 static_cast<re_recurse*>(state)->state_id = next_rep_id - 1;
834 }
835
836 break;
837 }
838 p = p->next.p;
839 }
840 if(!ok)
841 {
842 // recursion to sub-expression that doesn't exist:
843 if(0 == this->m_pdata->m_status) // update the error code if not already set
844 this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
845 //
846 // clear the expression, we should be empty:
847 //
848 this->m_pdata->m_expression = 0;
849 this->m_pdata->m_expression_len = 0;
850 //
851 // and throw if required:
852 //
853 if(0 == (this->flags() & regex_constants::no_except))
854 {
855 std::string message = "Encountered a forward reference to a recursive sub-expression that does not exist.";
856 boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
857 e.raise();
858 }
859 }
860 }
861 break;
862 default:
863 break;
864 }
865 state = state->next.p;
866 }
867}
868
869template <class charT, class traits>
870void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
871{
872 // non-recursive implementation:
873 // create the last map in the machine first, so that earlier maps
874 // can make use of the result...
875 //
876 // This was originally a recursive implementation, but that caused stack
877 // overflows with complex expressions on small stacks (think COM+).
878
879 // start by saving the case setting:
880 bool l_icase = m_icase;
881 std::vector<std::pair<bool, re_syntax_base*> > v;
882
883 while(state)
884 {
885 switch(state->type)
886 {
887 case syntax_element_toggle_case:
888 // we need to track case changes here:
889 m_icase = static_cast<re_case*>(state)->icase;
890 state = state->next.p;
891 continue;
892 case syntax_element_alt:
893 case syntax_element_rep:
894 case syntax_element_dot_rep:
895 case syntax_element_char_rep:
896 case syntax_element_short_set_rep:
897 case syntax_element_long_set_rep:
898 // just push the state onto our stack for now:
899 v.push_back(x: std::pair<bool, re_syntax_base*>(m_icase, state));
900 state = state->next.p;
901 break;
902 case syntax_element_backstep:
903 // we need to calculate how big the backstep is:
904 static_cast<re_brace*>(state)->index
905 = this->calculate_backstep(state->next.p);
906 if(static_cast<re_brace*>(state)->index < 0)
907 {
908 // Oops error:
909 if(0 == this->m_pdata->m_status) // update the error code if not already set
910 this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
911 //
912 // clear the expression, we should be empty:
913 //
914 this->m_pdata->m_expression = 0;
915 this->m_pdata->m_expression_len = 0;
916 //
917 // and throw if required:
918 //
919 if(0 == (this->flags() & regex_constants::no_except))
920 {
921 std::string message = "Invalid lookbehind assertion encountered in the regular expression.";
922 boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
923 e.raise();
924 }
925 }
926 BOOST_FALLTHROUGH;
927 default:
928 state = state->next.p;
929 }
930 }
931
932 // now work through our list, building all the maps as we go:
933 while(v.size())
934 {
935 // Initialize m_recursion_checks if we need it:
936 if(m_has_recursions)
937 m_recursion_checks.assign(1 + m_pdata->m_mark_count, false);
938
939 const std::pair<bool, re_syntax_base*>& p = v.back();
940 m_icase = p.first;
941 state = p.second;
942 v.pop_back();
943
944 // Build maps:
945 m_bad_repeats = 0;
946 create_startmap(state: state->next.p, l_map: static_cast<re_alt*>(state)->_map, pnull: &static_cast<re_alt*>(state)->can_be_null, mask: mask_take);
947 m_bad_repeats = 0;
948
949 if(m_has_recursions)
950 m_recursion_checks.assign(1 + m_pdata->m_mark_count, false);
951 create_startmap(state: static_cast<re_alt*>(state)->alt.p, l_map: static_cast<re_alt*>(state)->_map, pnull: &static_cast<re_alt*>(state)->can_be_null, mask: mask_skip);
952 // adjust the type of the state to allow for faster matching:
953 state->type = this->get_repeat_type(state);
954 }
955 // restore case sensitivity:
956 m_icase = l_icase;
957}
958
959template <class charT, class traits>
960int basic_regex_creator<charT, traits>::calculate_backstep(re_syntax_base* state)
961{
962 typedef typename traits::char_class_type m_type;
963 int result = 0;
964 while(state)
965 {
966 switch(state->type)
967 {
968 case syntax_element_startmark:
969 if((static_cast<re_brace*>(state)->index == -1)
970 || (static_cast<re_brace*>(state)->index == -2))
971 {
972 state = static_cast<re_jump*>(state->next.p)->alt.p->next.p;
973 continue;
974 }
975 else if(static_cast<re_brace*>(state)->index == -3)
976 {
977 state = state->next.p->next.p;
978 continue;
979 }
980 break;
981 case syntax_element_endmark:
982 if((static_cast<re_brace*>(state)->index == -1)
983 || (static_cast<re_brace*>(state)->index == -2))
984 return result;
985 break;
986 case syntax_element_literal:
987 result += static_cast<re_literal*>(state)->length;
988 break;
989 case syntax_element_wild:
990 case syntax_element_set:
991 result += 1;
992 break;
993 case syntax_element_dot_rep:
994 case syntax_element_char_rep:
995 case syntax_element_short_set_rep:
996 case syntax_element_backref:
997 case syntax_element_rep:
998 case syntax_element_combining:
999 case syntax_element_long_set_rep:
1000 case syntax_element_backstep:
1001 {
1002 re_repeat* rep = static_cast<re_repeat *>(state);
1003 // adjust the type of the state to allow for faster matching:
1004 state->type = this->get_repeat_type(state);
1005 if((state->type == syntax_element_dot_rep)
1006 || (state->type == syntax_element_char_rep)
1007 || (state->type == syntax_element_short_set_rep))
1008 {
1009 if(rep->max != rep->min)
1010 return -1;
1011 result += static_cast<int>(rep->min);
1012 state = rep->alt.p;
1013 continue;
1014 }
1015 else if(state->type == syntax_element_long_set_rep)
1016 {
1017 BOOST_ASSERT(rep->next.p->type == syntax_element_long_set);
1018 if(static_cast<re_set_long<m_type>*>(rep->next.p)->singleton == 0)
1019 return -1;
1020 if(rep->max != rep->min)
1021 return -1;
1022 result += static_cast<int>(rep->min);
1023 state = rep->alt.p;
1024 continue;
1025 }
1026 }
1027 return -1;
1028 case syntax_element_long_set:
1029 if(static_cast<re_set_long<m_type>*>(state)->singleton == 0)
1030 return -1;
1031 result += 1;
1032 break;
1033 case syntax_element_jump:
1034 state = static_cast<re_jump*>(state)->alt.p;
1035 continue;
1036 case syntax_element_alt:
1037 {
1038 int r1 = calculate_backstep(state: state->next.p);
1039 int r2 = calculate_backstep(state: static_cast<re_alt*>(state)->alt.p);
1040 if((r1 < 0) || (r1 != r2))
1041 return -1;
1042 return result + r1;
1043 }
1044 default:
1045 break;
1046 }
1047 state = state->next.p;
1048 }
1049 return -1;
1050}
1051
1052template <class charT, class traits>
1053void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask)
1054{
1055 int not_last_jump = 1;
1056 re_syntax_base* recursion_start = 0;
1057 int recursion_sub = 0;
1058 re_syntax_base* recursion_restart = 0;
1059
1060 // track case sensitivity:
1061 bool l_icase = m_icase;
1062
1063 while(state)
1064 {
1065 switch(state->type)
1066 {
1067 case syntax_element_toggle_case:
1068 l_icase = static_cast<re_case*>(state)->icase;
1069 state = state->next.p;
1070 break;
1071 case syntax_element_literal:
1072 {
1073 // don't set anything in *pnull, set each element in l_map
1074 // that could match the first character in the literal:
1075 if(l_map)
1076 {
1077 l_map[0] |= mask_init;
1078 charT first_char = *static_cast<charT*>(static_cast<void*>(static_cast<re_literal*>(state) + 1));
1079 for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
1080 {
1081 if(m_traits.translate(static_cast<charT>(i), l_icase) == first_char)
1082 l_map[i] |= mask;
1083 }
1084 }
1085 return;
1086 }
1087 case syntax_element_end_line:
1088 {
1089 // next character must be a line separator (if there is one):
1090 if(l_map)
1091 {
1092 l_map[0] |= mask_init;
1093 l_map[static_cast<unsigned>('\n')] |= mask;
1094 l_map[static_cast<unsigned>('\r')] |= mask;
1095 l_map[static_cast<unsigned>('\f')] |= mask;
1096 l_map[0x85] |= mask;
1097 }
1098 // now figure out if we can match a NULL string at this point:
1099 if(pnull)
1100 create_startmap(state: state->next.p, l_map: 0, pnull, mask);
1101 return;
1102 }
1103 case syntax_element_recurse:
1104 {
1105 if(state->type == syntax_element_startmark)
1106 recursion_sub = static_cast<re_brace*>(state)->index;
1107 else
1108 recursion_sub = 0;
1109 if(m_recursion_checks[recursion_sub])
1110 {
1111 // Infinite recursion!!
1112 if(0 == this->m_pdata->m_status) // update the error code if not already set
1113 this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
1114 //
1115 // clear the expression, we should be empty:
1116 //
1117 this->m_pdata->m_expression = 0;
1118 this->m_pdata->m_expression_len = 0;
1119 //
1120 // and throw if required:
1121 //
1122 if(0 == (this->flags() & regex_constants::no_except))
1123 {
1124 std::string message = "Encountered an infinite recursion.";
1125 boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
1126 e.raise();
1127 }
1128 }
1129 else if(recursion_start == 0)
1130 {
1131 recursion_start = state;
1132 recursion_restart = state->next.p;
1133 state = static_cast<re_jump*>(state)->alt.p;
1134 m_recursion_checks[recursion_sub] = true;
1135 break;
1136 }
1137 m_recursion_checks[recursion_sub] = true;
1138 // can't handle nested recursion here...
1139 BOOST_FALLTHROUGH;
1140 }
1141 case syntax_element_backref:
1142 // can be null, and any character can match:
1143 if(pnull)
1144 *pnull |= mask;
1145 BOOST_FALLTHROUGH;
1146 case syntax_element_wild:
1147 {
1148 // can't be null, any character can match:
1149 set_all_masks(bits: l_map, mask);
1150 return;
1151 }
1152 case syntax_element_accept:
1153 case syntax_element_match:
1154 {
1155 // must be null, any character can match:
1156 set_all_masks(bits: l_map, mask);
1157 if(pnull)
1158 *pnull |= mask;
1159 return;
1160 }
1161 case syntax_element_word_start:
1162 {
1163 // recurse, then AND with all the word characters:
1164 create_startmap(state: state->next.p, l_map, pnull, mask);
1165 if(l_map)
1166 {
1167 l_map[0] |= mask_init;
1168 for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
1169 {
1170 if(!m_traits.isctype(static_cast<charT>(i), m_word_mask))
1171 l_map[i] &= static_cast<unsigned char>(~mask);
1172 }
1173 }
1174 return;
1175 }
1176 case syntax_element_word_end:
1177 {
1178 // recurse, then AND with all the word characters:
1179 create_startmap(state: state->next.p, l_map, pnull, mask);
1180 if(l_map)
1181 {
1182 l_map[0] |= mask_init;
1183 for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
1184 {
1185 if(m_traits.isctype(static_cast<charT>(i), m_word_mask))
1186 l_map[i] &= static_cast<unsigned char>(~mask);
1187 }
1188 }
1189 return;
1190 }
1191 case syntax_element_buffer_end:
1192 {
1193 // we *must be null* :
1194 if(pnull)
1195 *pnull |= mask;
1196 return;
1197 }
1198 case syntax_element_long_set:
1199 if(l_map)
1200 {
1201 typedef typename traits::char_class_type m_type;
1202 if(static_cast<re_set_long<m_type>*>(state)->singleton)
1203 {
1204 l_map[0] |= mask_init;
1205 for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
1206 {
1207 charT c = static_cast<charT>(i);
1208 if(&c != re_is_set_member(&c, &c + 1, static_cast<re_set_long<m_type>*>(state), *m_pdata, l_icase))
1209 l_map[i] |= mask;
1210 }
1211 }
1212 else
1213 set_all_masks(bits: l_map, mask);
1214 }
1215 return;
1216 case syntax_element_set:
1217 if(l_map)
1218 {
1219 l_map[0] |= mask_init;
1220 for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
1221 {
1222 if(static_cast<re_set*>(state)->_map[
1223 static_cast<unsigned char>(m_traits.translate(static_cast<charT>(i), l_icase))])
1224 l_map[i] |= mask;
1225 }
1226 }
1227 return;
1228 case syntax_element_jump:
1229 // take the jump:
1230 state = static_cast<re_alt*>(state)->alt.p;
1231 not_last_jump = -1;
1232 break;
1233 case syntax_element_alt:
1234 case syntax_element_rep:
1235 case syntax_element_dot_rep:
1236 case syntax_element_char_rep:
1237 case syntax_element_short_set_rep:
1238 case syntax_element_long_set_rep:
1239 {
1240 re_alt* rep = static_cast<re_alt*>(state);
1241 if(rep->_map[0] & mask_init)
1242 {
1243 if(l_map)
1244 {
1245 // copy previous results:
1246 l_map[0] |= mask_init;
1247 for(unsigned int i = 0; i <= UCHAR_MAX; ++i)
1248 {
1249 if(rep->_map[i] & mask_any)
1250 l_map[i] |= mask;
1251 }
1252 }
1253 if(pnull)
1254 {
1255 if(rep->can_be_null & mask_any)
1256 *pnull |= mask;
1257 }
1258 }
1259 else
1260 {
1261 // we haven't created a startmap for this alternative yet
1262 // so take the union of the two options:
1263 if(is_bad_repeat(pt: state))
1264 {
1265 set_all_masks(bits: l_map, mask);
1266 if(pnull)
1267 *pnull |= mask;
1268 return;
1269 }
1270 set_bad_repeat(state);
1271 create_startmap(state: state->next.p, l_map, pnull, mask);
1272 if((state->type == syntax_element_alt)
1273 || (static_cast<re_repeat*>(state)->min == 0)
1274 || (not_last_jump == 0))
1275 create_startmap(state: rep->alt.p, l_map, pnull, mask);
1276 }
1277 }
1278 return;
1279 case syntax_element_soft_buffer_end:
1280 // match newline or null:
1281 if(l_map)
1282 {
1283 l_map[0] |= mask_init;
1284 l_map[static_cast<unsigned>('\n')] |= mask;
1285 l_map[static_cast<unsigned>('\r')] |= mask;
1286 }
1287 if(pnull)
1288 *pnull |= mask;
1289 return;
1290 case syntax_element_endmark:
1291 // need to handle independent subs as a special case:
1292 if(static_cast<re_brace*>(state)->index < 0)
1293 {
1294 // can be null, any character can match:
1295 set_all_masks(bits: l_map, mask);
1296 if(pnull)
1297 *pnull |= mask;
1298 return;
1299 }
1300 else if(recursion_start && (recursion_sub != 0) && (recursion_sub == static_cast<re_brace*>(state)->index))
1301 {
1302 // recursion termination:
1303 recursion_start = 0;
1304 state = recursion_restart;
1305 break;
1306 }
1307
1308 //
1309 // Normally we just go to the next state... but if this sub-expression is
1310 // the target of a recursion, then we might be ending a recursion, in which
1311 // case we should check whatever follows that recursion, as well as whatever
1312 // follows this state:
1313 //
1314 if(m_pdata->m_has_recursions && static_cast<re_brace*>(state)->index)
1315 {
1316 bool ok = false;
1317 re_syntax_base* p = m_pdata->m_first_state;
1318 while(p)
1319 {
1320 if(p->type == syntax_element_recurse)
1321 {
1322 re_brace* p2 = static_cast<re_brace*>(static_cast<re_jump*>(p)->alt.p);
1323 if((p2->type == syntax_element_startmark) && (p2->index == static_cast<re_brace*>(state)->index))
1324 {
1325 ok = true;
1326 break;
1327 }
1328 }
1329 p = p->next.p;
1330 }
1331 if(ok)
1332 {
1333 create_startmap(state: p->next.p, l_map, pnull, mask);
1334 }
1335 }
1336 state = state->next.p;
1337 break;
1338
1339 case syntax_element_commit:
1340 set_all_masks(bits: l_map, mask);
1341 // Continue scanning so we can figure out whether we can be null:
1342 state = state->next.p;
1343 break;
1344 case syntax_element_startmark:
1345 // need to handle independent subs as a special case:
1346 if(static_cast<re_brace*>(state)->index == -3)
1347 {
1348 state = state->next.p->next.p;
1349 break;
1350 }
1351 BOOST_FALLTHROUGH;
1352 default:
1353 state = state->next.p;
1354 }
1355 ++not_last_jump;
1356 }
1357}
1358
1359template <class charT, class traits>
1360unsigned basic_regex_creator<charT, traits>::get_restart_type(re_syntax_base* state)
1361{
1362 //
1363 // find out how the machine starts, so we can optimise the search:
1364 //
1365 while(state)
1366 {
1367 switch(state->type)
1368 {
1369 case syntax_element_startmark:
1370 case syntax_element_endmark:
1371 state = state->next.p;
1372 continue;
1373 case syntax_element_start_line:
1374 return regbase::restart_line;
1375 case syntax_element_word_start:
1376 return regbase::restart_word;
1377 case syntax_element_buffer_start:
1378 return regbase::restart_buf;
1379 case syntax_element_restart_continue:
1380 return regbase::restart_continue;
1381 default:
1382 state = 0;
1383 continue;
1384 }
1385 }
1386 return regbase::restart_any;
1387}
1388
1389template <class charT, class traits>
1390void basic_regex_creator<charT, traits>::set_all_masks(unsigned char* bits, unsigned char mask)
1391{
1392 //
1393 // set mask in all of bits elements,
1394 // if bits[0] has mask_init not set then we can
1395 // optimise this to a call to memset:
1396 //
1397 if(bits)
1398 {
1399 if(bits[0] == 0)
1400 (std::memset)(s: bits, c: mask, n: 1u << CHAR_BIT);
1401 else
1402 {
1403 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
1404 bits[i] |= mask;
1405 }
1406 bits[0] |= mask_init;
1407 }
1408}
1409
1410template <class charT, class traits>
1411bool basic_regex_creator<charT, traits>::is_bad_repeat(re_syntax_base* pt)
1412{
1413 switch(pt->type)
1414 {
1415 case syntax_element_rep:
1416 case syntax_element_dot_rep:
1417 case syntax_element_char_rep:
1418 case syntax_element_short_set_rep:
1419 case syntax_element_long_set_rep:
1420 {
1421 unsigned state_id = static_cast<re_repeat*>(pt)->state_id;
1422 if(state_id > sizeof(m_bad_repeats) * CHAR_BIT)
1423 return true; // run out of bits, assume we can't traverse this one.
1424 static const boost::uintmax_t one = 1uL;
1425 return m_bad_repeats & (one << state_id);
1426 }
1427 default:
1428 return false;
1429 }
1430}
1431
1432template <class charT, class traits>
1433void basic_regex_creator<charT, traits>::set_bad_repeat(re_syntax_base* pt)
1434{
1435 switch(pt->type)
1436 {
1437 case syntax_element_rep:
1438 case syntax_element_dot_rep:
1439 case syntax_element_char_rep:
1440 case syntax_element_short_set_rep:
1441 case syntax_element_long_set_rep:
1442 {
1443 unsigned state_id = static_cast<re_repeat*>(pt)->state_id;
1444 static const boost::uintmax_t one = 1uL;
1445 if(state_id <= sizeof(m_bad_repeats) * CHAR_BIT)
1446 m_bad_repeats |= (one << state_id);
1447 }
1448 break;
1449 default:
1450 break;
1451 }
1452}
1453
1454template <class charT, class traits>
1455syntax_element_type basic_regex_creator<charT, traits>::get_repeat_type(re_syntax_base* state)
1456{
1457 typedef typename traits::char_class_type m_type;
1458 if(state->type == syntax_element_rep)
1459 {
1460 // check to see if we are repeating a single state:
1461 if(state->next.p->next.p->next.p == static_cast<re_alt*>(state)->alt.p)
1462 {
1463 switch(state->next.p->type)
1464 {
1465 case BOOST_REGEX_DETAIL_NS::syntax_element_wild:
1466 return BOOST_REGEX_DETAIL_NS::syntax_element_dot_rep;
1467 case BOOST_REGEX_DETAIL_NS::syntax_element_literal:
1468 return BOOST_REGEX_DETAIL_NS::syntax_element_char_rep;
1469 case BOOST_REGEX_DETAIL_NS::syntax_element_set:
1470 return BOOST_REGEX_DETAIL_NS::syntax_element_short_set_rep;
1471 case BOOST_REGEX_DETAIL_NS::syntax_element_long_set:
1472 if(static_cast<BOOST_REGEX_DETAIL_NS::re_set_long<m_type>*>(state->next.p)->singleton)
1473 return BOOST_REGEX_DETAIL_NS::syntax_element_long_set_rep;
1474 break;
1475 default:
1476 break;
1477 }
1478 }
1479 }
1480 return state->type;
1481}
1482
1483template <class charT, class traits>
1484void basic_regex_creator<charT, traits>::probe_leading_repeat(re_syntax_base* state)
1485{
1486 // enumerate our states, and see if we have a leading repeat
1487 // for which failed search restarts can be optimised;
1488 do
1489 {
1490 switch(state->type)
1491 {
1492 case syntax_element_startmark:
1493 if(static_cast<re_brace*>(state)->index >= 0)
1494 {
1495 state = state->next.p;
1496 continue;
1497 }
1498 if((static_cast<re_brace*>(state)->index == -1)
1499 || (static_cast<re_brace*>(state)->index == -2))
1500 {
1501 // skip past the zero width assertion:
1502 state = static_cast<const re_jump*>(state->next.p)->alt.p->next.p;
1503 continue;
1504 }
1505 if(static_cast<re_brace*>(state)->index == -3)
1506 {
1507 // Have to skip the leading jump state:
1508 state = state->next.p->next.p;
1509 continue;
1510 }
1511 return;
1512 case syntax_element_endmark:
1513 case syntax_element_start_line:
1514 case syntax_element_end_line:
1515 case syntax_element_word_boundary:
1516 case syntax_element_within_word:
1517 case syntax_element_word_start:
1518 case syntax_element_word_end:
1519 case syntax_element_buffer_start:
1520 case syntax_element_buffer_end:
1521 case syntax_element_restart_continue:
1522 state = state->next.p;
1523 break;
1524 case syntax_element_dot_rep:
1525 case syntax_element_char_rep:
1526 case syntax_element_short_set_rep:
1527 case syntax_element_long_set_rep:
1528 if(this->m_has_backrefs == 0)
1529 static_cast<re_repeat*>(state)->leading = true;
1530 BOOST_FALLTHROUGH;
1531 default:
1532 return;
1533 }
1534 }while(state);
1535}
1536
1537
1538} // namespace BOOST_REGEX_DETAIL_NS
1539
1540} // namespace boost
1541
1542#ifdef BOOST_MSVC
1543# pragma warning(pop)
1544#endif
1545
1546#ifdef BOOST_MSVC
1547#pragma warning(push)
1548#pragma warning(disable: 4103)
1549#endif
1550#ifdef BOOST_HAS_ABI_HEADERS
1551# include BOOST_ABI_SUFFIX
1552#endif
1553#ifdef BOOST_MSVC
1554#pragma warning(pop)
1555#endif
1556
1557#endif
1558
1559

source code of boost/boost/regex/v4/basic_regex_creator.hpp