1/*
2 *
3 * Copyright (c) 2004
4 * John Maddock
5 *
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 *
10 */
11
12 /*
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE basic_regex_parser.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Declares template class basic_regex_parser.
17 */
18
19#ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
20#define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
21
22#ifdef BOOST_MSVC
23#pragma warning(push)
24#pragma warning(disable: 4103)
25#endif
26#ifdef BOOST_HAS_ABI_HEADERS
27# include BOOST_ABI_PREFIX
28#endif
29#ifdef BOOST_MSVC
30#pragma warning(pop)
31#endif
32
33namespace boost{
34namespace BOOST_REGEX_DETAIL_NS{
35
36#ifdef BOOST_MSVC
37#pragma warning(push)
38#pragma warning(disable:4244 4800)
39#endif
40
41template <class charT, class traits>
42class basic_regex_parser : public basic_regex_creator<charT, traits>
43{
44public:
45 basic_regex_parser(regex_data<charT, traits>* data);
46 void parse(const charT* p1, const charT* p2, unsigned flags);
47 void fail(regex_constants::error_type error_code, std::ptrdiff_t position);
48 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos);
49 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message)
50 {
51 fail(error_code, position, message, position);
52 }
53
54 bool parse_all();
55 bool parse_basic();
56 bool parse_extended();
57 bool parse_literal();
58 bool parse_open_paren();
59 bool parse_basic_escape();
60 bool parse_extended_escape();
61 bool parse_match_any();
62 bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
63 bool parse_repeat_range(bool isbasic);
64 bool parse_alt();
65 bool parse_set();
66 bool parse_backref();
67 void parse_set_literal(basic_char_set<charT, traits>& char_set);
68 bool parse_inner_set(basic_char_set<charT, traits>& char_set);
69 bool parse_QE();
70 bool parse_perl_extension();
71 bool parse_perl_verb();
72 bool match_verb(const char*);
73 bool add_emacs_code(bool negate);
74 bool unwind_alts(std::ptrdiff_t last_paren_start);
75 digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
76 charT unescape_character();
77 regex_constants::syntax_option_type parse_options();
78
79private:
80 typedef bool (basic_regex_parser::*parser_proc_type)();
81 typedef typename traits::string_type string_type;
82 typedef typename traits::char_class_type char_class_type;
83 parser_proc_type m_parser_proc; // the main parser to use
84 const charT* m_base; // the start of the string being parsed
85 const charT* m_end; // the end of the string being parsed
86 const charT* m_position; // our current parser position
87 unsigned m_mark_count; // how many sub-expressions we have
88 int m_mark_reset; // used to indicate that we're inside a (?|...) block.
89 unsigned m_max_mark; // largest mark count seen inside a (?|...) block.
90 std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
91 std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
92 bool m_has_case_change; // true if somewhere in the current block the case has changed
93#if defined(BOOST_MSVC) && defined(_M_IX86)
94 // This is an ugly warning suppression workaround (for warnings *inside* std::vector
95 // that can not otherwise be suppressed)...
96 BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*));
97 std::vector<long> m_alt_jumps; // list of alternative in the current scope.
98#else
99 std::vector<std::ptrdiff_t> m_alt_jumps; // list of alternative in the current scope.
100#endif
101
102 basic_regex_parser& operator=(const basic_regex_parser&);
103 basic_regex_parser(const basic_regex_parser&);
104};
105
106template <class charT, class traits>
107basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
108 : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false)
109{
110}
111
112template <class charT, class traits>
113void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags)
114{
115 // pass l_flags on to base class:
116 this->init(l_flags);
117 // set up pointers:
118 m_position = m_base = p1;
119 m_end = p2;
120 // empty strings are errors:
121 if((p1 == p2) &&
122 (
123 ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group)
124 || (l_flags & regbase::no_empty_expressions)
125 )
126 )
127 {
128 fail(regex_constants::error_empty, 0);
129 return;
130 }
131 // select which parser to use:
132 switch(l_flags & regbase::main_option_type)
133 {
134 case regbase::perl_syntax_group:
135 {
136 m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
137 //
138 // Add a leading paren with index zero to give recursions a target:
139 //
140 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
141 br->index = 0;
142 br->icase = this->flags() & regbase::icase;
143 break;
144 }
145 case regbase::basic_syntax_group:
146 m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
147 break;
148 case regbase::literal:
149 m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;
150 break;
151 default:
152 // Ooops, someone has managed to set more than one of the main option flags,
153 // so this must be an error:
154 fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used.");
155 return;
156 }
157
158 // parse all our characters:
159 bool result = parse_all();
160 //
161 // Unwind our alternatives:
162 //
163 unwind_alts(last_paren_start: -1);
164 // reset l_flags as a global scope (?imsx) may have altered them:
165 this->flags(l_flags);
166 // if we haven't gobbled up all the characters then we must
167 // have had an unexpected ')' :
168 if(!result)
169 {
170 fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Found a closing ) with no corresponding openening parenthesis.");
171 return;
172 }
173 // if an error has been set then give up now:
174 if(this->m_pdata->m_status)
175 return;
176 // fill in our sub-expression count:
177 this->m_pdata->m_mark_count = 1 + m_mark_count;
178 this->finalize(p1, p2);
179}
180
181template <class charT, class traits>
182void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position)
183{
184 // get the error message:
185 std::string message = this->m_pdata->m_ptraits->error_string(error_code);
186 fail(error_code, position, message);
187}
188
189template <class charT, class traits>
190void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos)
191{
192 if(0 == this->m_pdata->m_status) // update the error code if not already set
193 this->m_pdata->m_status = error_code;
194 m_position = m_end; // don't bother parsing anything else
195
196#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
197 //
198 // Augment error message with the regular expression text:
199 //
200 if(start_pos == position)
201 start_pos = (std::max)(a: static_cast<std::ptrdiff_t>(0), b: position - static_cast<std::ptrdiff_t>(10));
202 std::ptrdiff_t end_pos = (std::min)(a: position + static_cast<std::ptrdiff_t>(10), b: static_cast<std::ptrdiff_t>(m_end - m_base));
203 if(error_code != regex_constants::error_empty)
204 {
205 if((start_pos != 0) || (end_pos != (m_end - m_base)))
206 message += " The error occurred while parsing the regular expression fragment: '";
207 else
208 message += " The error occurred while parsing the regular expression: '";
209 if(start_pos != end_pos)
210 {
211 message += std::string(m_base + start_pos, m_base + position);
212 message += ">>>HERE>>>";
213 message += std::string(m_base + position, m_base + end_pos);
214 }
215 message += "'.";
216 }
217#endif
218
219#ifndef BOOST_NO_EXCEPTIONS
220 if(0 == (this->flags() & regex_constants::no_except))
221 {
222 boost::regex_error e(message, error_code, position);
223 e.raise();
224 }
225#else
226 (void)position; // suppress warnings.
227#endif
228}
229
230template <class charT, class traits>
231bool basic_regex_parser<charT, traits>::parse_all()
232{
233 bool result = true;
234 while(result && (m_position != m_end))
235 {
236 result = (this->*m_parser_proc)();
237 }
238 return result;
239}
240
241#ifdef BOOST_MSVC
242#pragma warning(push)
243#pragma warning(disable:4702)
244#endif
245template <class charT, class traits>
246bool basic_regex_parser<charT, traits>::parse_basic()
247{
248 switch(this->m_traits.syntax_type(*m_position))
249 {
250 case regex_constants::syntax_escape:
251 return parse_basic_escape();
252 case regex_constants::syntax_dot:
253 return parse_match_any();
254 case regex_constants::syntax_caret:
255 ++m_position;
256 this->append_state(syntax_element_start_line);
257 break;
258 case regex_constants::syntax_dollar:
259 ++m_position;
260 this->append_state(syntax_element_end_line);
261 break;
262 case regex_constants::syntax_star:
263 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line))
264 return parse_literal();
265 else
266 {
267 ++m_position;
268 return parse_repeat();
269 }
270 case regex_constants::syntax_plus:
271 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
272 return parse_literal();
273 else
274 {
275 ++m_position;
276 return parse_repeat(low: 1);
277 }
278 case regex_constants::syntax_question:
279 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
280 return parse_literal();
281 else
282 {
283 ++m_position;
284 return parse_repeat(low: 0, high: 1);
285 }
286 case regex_constants::syntax_open_set:
287 return parse_set();
288 case regex_constants::syntax_newline:
289 if(this->flags() & regbase::newline_alt)
290 return parse_alt();
291 else
292 return parse_literal();
293 default:
294 return parse_literal();
295 }
296 return true;
297}
298
299template <class charT, class traits>
300bool basic_regex_parser<charT, traits>::parse_extended()
301{
302 bool result = true;
303 switch(this->m_traits.syntax_type(*m_position))
304 {
305 case regex_constants::syntax_open_mark:
306 return parse_open_paren();
307 case regex_constants::syntax_close_mark:
308 return false;
309 case regex_constants::syntax_escape:
310 return parse_extended_escape();
311 case regex_constants::syntax_dot:
312 return parse_match_any();
313 case regex_constants::syntax_caret:
314 ++m_position;
315 this->append_state(
316 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
317 break;
318 case regex_constants::syntax_dollar:
319 ++m_position;
320 this->append_state(
321 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
322 break;
323 case regex_constants::syntax_star:
324 if(m_position == this->m_base)
325 {
326 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"*\" cannot start a regular expression.");
327 return false;
328 }
329 ++m_position;
330 return parse_repeat();
331 case regex_constants::syntax_question:
332 if(m_position == this->m_base)
333 {
334 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"?\" cannot start a regular expression.");
335 return false;
336 }
337 ++m_position;
338 return parse_repeat(low: 0,high: 1);
339 case regex_constants::syntax_plus:
340 if(m_position == this->m_base)
341 {
342 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"+\" cannot start a regular expression.");
343 return false;
344 }
345 ++m_position;
346 return parse_repeat(low: 1);
347 case regex_constants::syntax_open_brace:
348 ++m_position;
349 return parse_repeat_range(isbasic: false);
350 case regex_constants::syntax_close_brace:
351 if((this->flags() & regbase::no_perl_ex) == regbase::no_perl_ex)
352 {
353 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
354 return false;
355 }
356 result = parse_literal();
357 break;
358 case regex_constants::syntax_or:
359 return parse_alt();
360 case regex_constants::syntax_open_set:
361 return parse_set();
362 case regex_constants::syntax_newline:
363 if(this->flags() & regbase::newline_alt)
364 return parse_alt();
365 else
366 return parse_literal();
367 case regex_constants::syntax_hash:
368 //
369 // If we have a mod_x flag set, then skip until
370 // we get to a newline character:
371 //
372 if((this->flags()
373 & (regbase::no_perl_ex|regbase::mod_x))
374 == regbase::mod_x)
375 {
376 while((m_position != m_end) && !is_separator(*m_position++)){}
377 return true;
378 }
379 BOOST_FALLTHROUGH;
380 default:
381 result = parse_literal();
382 break;
383 }
384 return result;
385}
386#ifdef BOOST_MSVC
387#pragma warning(pop)
388#endif
389
390template <class charT, class traits>
391bool basic_regex_parser<charT, traits>::parse_literal()
392{
393 // append this as a literal provided it's not a space character
394 // or the perl option regbase::mod_x is not set:
395 if(
396 ((this->flags()
397 & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex))
398 != regbase::mod_x)
399 || !this->m_traits.isctype(*m_position, this->m_mask_space))
400 this->append_literal(*m_position);
401 ++m_position;
402 return true;
403}
404
405template <class charT, class traits>
406bool basic_regex_parser<charT, traits>::parse_open_paren()
407{
408 //
409 // skip the '(' and error check:
410 //
411 if(++m_position == m_end)
412 {
413 fail(regex_constants::error_paren, m_position - m_base);
414 return false;
415 }
416 //
417 // begin by checking for a perl-style (?...) extension:
418 //
419 if(
420 ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
421 || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
422 )
423 {
424 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
425 return parse_perl_extension();
426 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_star)
427 return parse_perl_verb();
428 }
429 //
430 // update our mark count, and append the required state:
431 //
432 unsigned markid = 0;
433 if(0 == (this->flags() & regbase::nosubs))
434 {
435 markid = ++m_mark_count;
436#ifndef BOOST_NO_STD_DISTANCE
437 if(this->flags() & regbase::save_subexpression_location)
438 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0));
439#else
440 if(this->flags() & regbase::save_subexpression_location)
441 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 1, 0));
442#endif
443 }
444 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
445 pb->index = markid;
446 pb->icase = this->flags() & regbase::icase;
447 std::ptrdiff_t last_paren_start = this->getoffset(pb);
448 // back up insertion point for alternations, and set new point:
449 std::ptrdiff_t last_alt_point = m_alt_insert_point;
450 this->m_pdata->m_data.align();
451 m_alt_insert_point = this->m_pdata->m_data.size();
452 //
453 // back up the current flags in case we have a nested (?imsx) group:
454 //
455 regex_constants::syntax_option_type opts = this->flags();
456 bool old_case_change = m_has_case_change;
457 m_has_case_change = false; // no changes to this scope as yet...
458 //
459 // Back up branch reset data in case we have a nested (?|...)
460 //
461 int mark_reset = m_mark_reset;
462 m_mark_reset = -1;
463 //
464 // now recursively add more states, this will terminate when we get to a
465 // matching ')' :
466 //
467 parse_all();
468 //
469 // Unwind pushed alternatives:
470 //
471 if(0 == unwind_alts(last_paren_start))
472 return false;
473 //
474 // restore flags:
475 //
476 if(m_has_case_change)
477 {
478 // the case has changed in one or more of the alternatives
479 // within the scoped (...) block: we have to add a state
480 // to reset the case sensitivity:
481 static_cast<re_case*>(
482 this->append_state(syntax_element_toggle_case, sizeof(re_case))
483 )->icase = opts & regbase::icase;
484 }
485 this->flags(opts);
486 m_has_case_change = old_case_change;
487 //
488 // restore branch reset:
489 //
490 m_mark_reset = mark_reset;
491 //
492 // we either have a ')' or we have run out of characters prematurely:
493 //
494 if(m_position == m_end)
495 {
496 this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
497 return false;
498 }
499 BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
500#ifndef BOOST_NO_STD_DISTANCE
501 if(markid && (this->flags() & regbase::save_subexpression_location))
502 this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
503#else
504 if(markid && (this->flags() & regbase::save_subexpression_location))
505 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base);
506#endif
507 ++m_position;
508 //
509 // append closing parenthesis state:
510 //
511 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
512 pb->index = markid;
513 pb->icase = this->flags() & regbase::icase;
514 this->m_paren_start = last_paren_start;
515 //
516 // restore the alternate insertion point:
517 //
518 this->m_alt_insert_point = last_alt_point;
519 //
520 // allow backrefs to this mark:
521 //
522 if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT))
523 this->m_backrefs |= 1u << (markid - 1);
524
525 return true;
526}
527
528template <class charT, class traits>
529bool basic_regex_parser<charT, traits>::parse_basic_escape()
530{
531 ++m_position;
532 bool result = true;
533 switch(this->m_traits.escape_syntax_type(*m_position))
534 {
535 case regex_constants::syntax_open_mark:
536 return parse_open_paren();
537 case regex_constants::syntax_close_mark:
538 return false;
539 case regex_constants::syntax_plus:
540 if(this->flags() & regex_constants::bk_plus_qm)
541 {
542 ++m_position;
543 return parse_repeat(low: 1);
544 }
545 else
546 return parse_literal();
547 case regex_constants::syntax_question:
548 if(this->flags() & regex_constants::bk_plus_qm)
549 {
550 ++m_position;
551 return parse_repeat(low: 0, high: 1);
552 }
553 else
554 return parse_literal();
555 case regex_constants::syntax_open_brace:
556 if(this->flags() & regbase::no_intervals)
557 return parse_literal();
558 ++m_position;
559 return parse_repeat_range(isbasic: true);
560 case regex_constants::syntax_close_brace:
561 if(this->flags() & regbase::no_intervals)
562 return parse_literal();
563 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
564 return false;
565 case regex_constants::syntax_or:
566 if(this->flags() & regbase::bk_vbar)
567 return parse_alt();
568 else
569 result = parse_literal();
570 break;
571 case regex_constants::syntax_digit:
572 return parse_backref();
573 case regex_constants::escape_type_start_buffer:
574 if(this->flags() & regbase::emacs_ex)
575 {
576 ++m_position;
577 this->append_state(syntax_element_buffer_start);
578 }
579 else
580 result = parse_literal();
581 break;
582 case regex_constants::escape_type_end_buffer:
583 if(this->flags() & regbase::emacs_ex)
584 {
585 ++m_position;
586 this->append_state(syntax_element_buffer_end);
587 }
588 else
589 result = parse_literal();
590 break;
591 case regex_constants::escape_type_word_assert:
592 if(this->flags() & regbase::emacs_ex)
593 {
594 ++m_position;
595 this->append_state(syntax_element_word_boundary);
596 }
597 else
598 result = parse_literal();
599 break;
600 case regex_constants::escape_type_not_word_assert:
601 if(this->flags() & regbase::emacs_ex)
602 {
603 ++m_position;
604 this->append_state(syntax_element_within_word);
605 }
606 else
607 result = parse_literal();
608 break;
609 case regex_constants::escape_type_left_word:
610 if(this->flags() & regbase::emacs_ex)
611 {
612 ++m_position;
613 this->append_state(syntax_element_word_start);
614 }
615 else
616 result = parse_literal();
617 break;
618 case regex_constants::escape_type_right_word:
619 if(this->flags() & regbase::emacs_ex)
620 {
621 ++m_position;
622 this->append_state(syntax_element_word_end);
623 }
624 else
625 result = parse_literal();
626 break;
627 default:
628 if(this->flags() & regbase::emacs_ex)
629 {
630 bool negate = true;
631 switch(*m_position)
632 {
633 case 'w':
634 negate = false;
635 BOOST_FALLTHROUGH;
636 case 'W':
637 {
638 basic_char_set<charT, traits> char_set;
639 if(negate)
640 char_set.negate();
641 char_set.add_class(this->m_word_mask);
642 if(0 == this->append_set(char_set))
643 {
644 fail(regex_constants::error_ctype, m_position - m_base);
645 return false;
646 }
647 ++m_position;
648 return true;
649 }
650 case 's':
651 negate = false;
652 BOOST_FALLTHROUGH;
653 case 'S':
654 return add_emacs_code(negate);
655 case 'c':
656 case 'C':
657 // not supported yet:
658 fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead.");
659 return false;
660 default:
661 break;
662 }
663 }
664 result = parse_literal();
665 break;
666 }
667 return result;
668}
669
670template <class charT, class traits>
671bool basic_regex_parser<charT, traits>::parse_extended_escape()
672{
673 ++m_position;
674 if(m_position == m_end)
675 {
676 fail(regex_constants::error_escape, m_position - m_base, "Incomplete escape sequence found.");
677 return false;
678 }
679 bool negate = false; // in case this is a character class escape: \w \d etc
680 switch(this->m_traits.escape_syntax_type(*m_position))
681 {
682 case regex_constants::escape_type_not_class:
683 negate = true;
684 BOOST_FALLTHROUGH;
685 case regex_constants::escape_type_class:
686 {
687escape_type_class_jump:
688 typedef typename traits::char_class_type m_type;
689 m_type m = this->m_traits.lookup_classname(m_position, m_position+1);
690 if(m != 0)
691 {
692 basic_char_set<charT, traits> char_set;
693 if(negate)
694 char_set.negate();
695 char_set.add_class(m);
696 if(0 == this->append_set(char_set))
697 {
698 fail(regex_constants::error_ctype, m_position - m_base);
699 return false;
700 }
701 ++m_position;
702 return true;
703 }
704 //
705 // not a class, just a regular unknown escape:
706 //
707 this->append_literal(unescape_character());
708 break;
709 }
710 case regex_constants::syntax_digit:
711 return parse_backref();
712 case regex_constants::escape_type_left_word:
713 ++m_position;
714 this->append_state(syntax_element_word_start);
715 break;
716 case regex_constants::escape_type_right_word:
717 ++m_position;
718 this->append_state(syntax_element_word_end);
719 break;
720 case regex_constants::escape_type_start_buffer:
721 ++m_position;
722 this->append_state(syntax_element_buffer_start);
723 break;
724 case regex_constants::escape_type_end_buffer:
725 ++m_position;
726 this->append_state(syntax_element_buffer_end);
727 break;
728 case regex_constants::escape_type_word_assert:
729 ++m_position;
730 this->append_state(syntax_element_word_boundary);
731 break;
732 case regex_constants::escape_type_not_word_assert:
733 ++m_position;
734 this->append_state(syntax_element_within_word);
735 break;
736 case regex_constants::escape_type_Z:
737 ++m_position;
738 this->append_state(syntax_element_soft_buffer_end);
739 break;
740 case regex_constants::escape_type_Q:
741 return parse_QE();
742 case regex_constants::escape_type_C:
743 return parse_match_any();
744 case regex_constants::escape_type_X:
745 ++m_position;
746 this->append_state(syntax_element_combining);
747 break;
748 case regex_constants::escape_type_G:
749 ++m_position;
750 this->append_state(syntax_element_restart_continue);
751 break;
752 case regex_constants::escape_type_not_property:
753 negate = true;
754 BOOST_FALLTHROUGH;
755 case regex_constants::escape_type_property:
756 {
757 ++m_position;
758 char_class_type m;
759 if(m_position == m_end)
760 {
761 fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found.");
762 return false;
763 }
764 // maybe have \p{ddd}
765 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
766 {
767 const charT* base = m_position;
768 // skip forward until we find enclosing brace:
769 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
770 ++m_position;
771 if(m_position == m_end)
772 {
773 fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence.");
774 return false;
775 }
776 m = this->m_traits.lookup_classname(++base, m_position++);
777 }
778 else
779 {
780 m = this->m_traits.lookup_classname(m_position, m_position+1);
781 ++m_position;
782 }
783 if(m != 0)
784 {
785 basic_char_set<charT, traits> char_set;
786 if(negate)
787 char_set.negate();
788 char_set.add_class(m);
789 if(0 == this->append_set(char_set))
790 {
791 fail(regex_constants::error_ctype, m_position - m_base);
792 return false;
793 }
794 return true;
795 }
796 fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name.");
797 return false;
798 }
799 case regex_constants::escape_type_reset_start_mark:
800 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
801 {
802 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
803 pb->index = -5;
804 pb->icase = this->flags() & regbase::icase;
805 this->m_pdata->m_data.align();
806 ++m_position;
807 return true;
808 }
809 goto escape_type_class_jump;
810 case regex_constants::escape_type_line_ending:
811 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
812 {
813 const charT* e = get_escape_R_string<charT>();
814 const charT* old_position = m_position;
815 const charT* old_end = m_end;
816 const charT* old_base = m_base;
817 m_position = e;
818 m_base = e;
819 m_end = e + traits::length(e);
820 bool r = parse_all();
821 m_position = ++old_position;
822 m_end = old_end;
823 m_base = old_base;
824 return r;
825 }
826 goto escape_type_class_jump;
827 case regex_constants::escape_type_extended_backref:
828 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
829 {
830 bool have_brace = false;
831 bool negative = false;
832 static const char* incomplete_message = "Incomplete \\g escape found.";
833 if(++m_position == m_end)
834 {
835 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
836 return false;
837 }
838 // maybe have \g{ddd}
839 regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position);
840 regex_constants::syntax_type syn_end = 0;
841 if((syn == regex_constants::syntax_open_brace)
842 || (syn == regex_constants::escape_type_left_word)
843 || (syn == regex_constants::escape_type_end_buffer))
844 {
845 if(++m_position == m_end)
846 {
847 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
848 return false;
849 }
850 have_brace = true;
851 switch(syn)
852 {
853 case regex_constants::syntax_open_brace:
854 syn_end = regex_constants::syntax_close_brace;
855 break;
856 case regex_constants::escape_type_left_word:
857 syn_end = regex_constants::escape_type_right_word;
858 break;
859 default:
860 syn_end = regex_constants::escape_type_end_buffer;
861 break;
862 }
863 }
864 negative = (*m_position == static_cast<charT>('-'));
865 if((negative) && (++m_position == m_end))
866 {
867 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
868 return false;
869 }
870 const charT* pc = m_position;
871 int i = this->m_traits.toi(pc, m_end, 10);
872 if((i < 0) && syn_end)
873 {
874 // Check for a named capture, get the leftmost one if there is more than one:
875 const charT* base = m_position;
876 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end))
877 {
878 ++m_position;
879 }
880 i = hash_value_from_capture_name(base, m_position);
881 pc = m_position;
882 }
883 if(negative)
884 i = 1 + m_mark_count - i;
885 if(((i > 0) && (this->m_backrefs & (1u << (i-1)))) || ((i > 10000) && (this->m_pdata->get_id(i) > 0) && (this->m_backrefs & (1u << (this->m_pdata->get_id(i)-1)))))
886 {
887 m_position = pc;
888 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
889 pb->index = i;
890 pb->icase = this->flags() & regbase::icase;
891 }
892 else
893 {
894 fail(regex_constants::error_backref, m_position - m_base);
895 return false;
896 }
897 m_position = pc;
898 if(have_brace)
899 {
900 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != syn_end))
901 {
902 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
903 return false;
904 }
905 ++m_position;
906 }
907 return true;
908 }
909 goto escape_type_class_jump;
910 case regex_constants::escape_type_control_v:
911 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
912 goto escape_type_class_jump;
913 BOOST_FALLTHROUGH;
914 default:
915 this->append_literal(unescape_character());
916 break;
917 }
918 return true;
919}
920
921template <class charT, class traits>
922bool basic_regex_parser<charT, traits>::parse_match_any()
923{
924 //
925 // we have a '.' that can match any character:
926 //
927 ++m_position;
928 static_cast<re_dot*>(
929 this->append_state(syntax_element_wild, sizeof(re_dot))
930 )->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s
931 ? BOOST_REGEX_DETAIL_NS::force_not_newline
932 : this->flags() & regbase::mod_s ?
933 BOOST_REGEX_DETAIL_NS::force_newline : BOOST_REGEX_DETAIL_NS::dont_care);
934 return true;
935}
936
937template <class charT, class traits>
938bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high)
939{
940 bool greedy = true;
941 bool pocessive = false;
942 std::size_t insert_point;
943 //
944 // when we get to here we may have a non-greedy ? mark still to come:
945 //
946 if((m_position != m_end)
947 && (
948 (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
949 || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex)))
950 )
951 )
952 {
953 // OK we have a perl or emacs regex, check for a '?':
954 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
955 {
956 greedy = false;
957 ++m_position;
958 }
959 // for perl regexes only check for pocessive ++ repeats.
960 if((m_position != m_end)
961 && (0 == (this->flags() & regbase::main_option_type))
962 && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus))
963 {
964 pocessive = true;
965 ++m_position;
966 }
967 }
968 if(0 == this->m_last_state)
969 {
970 fail(regex_constants::error_badrepeat, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Nothing to repeat.");
971 return false;
972 }
973 if(this->m_last_state->type == syntax_element_endmark)
974 {
975 // insert a repeat before the '(' matching the last ')':
976 insert_point = this->m_paren_start;
977 }
978 else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1))
979 {
980 // the last state was a literal with more than one character, split it in two:
981 re_literal* lit = static_cast<re_literal*>(this->m_last_state);
982 charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1];
983 lit->length -= 1;
984 // now append new state:
985 lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
986 lit->length = 1;
987 (static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c;
988 insert_point = this->getoffset(this->m_last_state);
989 }
990 else
991 {
992 // repeat the last state whatever it was, need to add some error checking here:
993 switch(this->m_last_state->type)
994 {
995 case syntax_element_start_line:
996 case syntax_element_end_line:
997 case syntax_element_word_boundary:
998 case syntax_element_within_word:
999 case syntax_element_word_start:
1000 case syntax_element_word_end:
1001 case syntax_element_buffer_start:
1002 case syntax_element_buffer_end:
1003 case syntax_element_alt:
1004 case syntax_element_soft_buffer_end:
1005 case syntax_element_restart_continue:
1006 case syntax_element_jump:
1007 case syntax_element_startmark:
1008 case syntax_element_backstep:
1009 // can't legally repeat any of the above:
1010 fail(regex_constants::error_badrepeat, m_position - m_base);
1011 return false;
1012 default:
1013 // do nothing...
1014 break;
1015 }
1016 insert_point = this->getoffset(this->m_last_state);
1017 }
1018 //
1019 // OK we now know what to repeat, so insert the repeat around it:
1020 //
1021 re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size));
1022 rep->min = low;
1023 rep->max = high;
1024 rep->greedy = greedy;
1025 rep->leading = false;
1026 // store our repeater position for later:
1027 std::ptrdiff_t rep_off = this->getoffset(rep);
1028 // and append a back jump to the repeat:
1029 re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump)));
1030 jmp->alt.i = rep_off - this->getoffset(jmp);
1031 this->m_pdata->m_data.align();
1032 // now fill in the alt jump for the repeat:
1033 rep = static_cast<re_repeat*>(this->getaddress(rep_off));
1034 rep->alt.i = this->m_pdata->m_data.size() - rep_off;
1035 //
1036 // If the repeat is pocessive then bracket the repeat with a (?>...)
1037 // independent sub-expression construct:
1038 //
1039 if(pocessive)
1040 {
1041 if(m_position != m_end)
1042 {
1043 //
1044 // Check for illegal following quantifier, we have to do this here, because
1045 // the extra states we insert below circumvents our usual error checking :-(
1046 //
1047 switch(this->m_traits.syntax_type(*m_position))
1048 {
1049 case regex_constants::syntax_star:
1050 case regex_constants::syntax_plus:
1051 case regex_constants::syntax_question:
1052 case regex_constants::syntax_open_brace:
1053 fail(regex_constants::error_badrepeat, m_position - m_base);
1054 return false;
1055 }
1056 }
1057 re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
1058 pb->index = -3;
1059 pb->icase = this->flags() & regbase::icase;
1060 jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump)));
1061 this->m_pdata->m_data.align();
1062 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
1063 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
1064 pb->index = -3;
1065 pb->icase = this->flags() & regbase::icase;
1066 }
1067 return true;
1068}
1069
1070template <class charT, class traits>
1071bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
1072{
1073 static const char* incomplete_message = "Missing } in quantified repetition.";
1074 //
1075 // parse a repeat-range:
1076 //
1077 std::size_t min, max;
1078 int v;
1079 // skip whitespace:
1080 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1081 ++m_position;
1082 if(this->m_position == this->m_end)
1083 {
1084 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1085 {
1086 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1087 return false;
1088 }
1089 // Treat the opening '{' as a literal character, rewind to start of error:
1090 --m_position;
1091 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1092 return parse_literal();
1093 }
1094 // get min:
1095 v = this->m_traits.toi(m_position, m_end, 10);
1096 // skip whitespace:
1097 if(v < 0)
1098 {
1099 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1100 {
1101 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1102 return false;
1103 }
1104 // Treat the opening '{' as a literal character, rewind to start of error:
1105 --m_position;
1106 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1107 return parse_literal();
1108 }
1109 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1110 ++m_position;
1111 if(this->m_position == this->m_end)
1112 {
1113 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1114 {
1115 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1116 return false;
1117 }
1118 // Treat the opening '{' as a literal character, rewind to start of error:
1119 --m_position;
1120 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1121 return parse_literal();
1122 }
1123 min = v;
1124 // see if we have a comma:
1125 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
1126 {
1127 // move on and error check:
1128 ++m_position;
1129 // skip whitespace:
1130 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1131 ++m_position;
1132 if(this->m_position == this->m_end)
1133 {
1134 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1135 {
1136 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1137 return false;
1138 }
1139 // Treat the opening '{' as a literal character, rewind to start of error:
1140 --m_position;
1141 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1142 return parse_literal();
1143 }
1144 // get the value if any:
1145 v = this->m_traits.toi(m_position, m_end, 10);
1146 max = (v >= 0) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
1147 }
1148 else
1149 {
1150 // no comma, max = min:
1151 max = min;
1152 }
1153 // skip whitespace:
1154 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1155 ++m_position;
1156 // OK now check trailing }:
1157 if(this->m_position == this->m_end)
1158 {
1159 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1160 {
1161 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1162 return false;
1163 }
1164 // Treat the opening '{' as a literal character, rewind to start of error:
1165 --m_position;
1166 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1167 return parse_literal();
1168 }
1169 if(isbasic)
1170 {
1171 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)
1172 {
1173 ++m_position;
1174 if(this->m_position == this->m_end)
1175 {
1176 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1177 return false;
1178 }
1179 }
1180 else
1181 {
1182 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1183 return false;
1184 }
1185 }
1186 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)
1187 ++m_position;
1188 else
1189 {
1190 // Treat the opening '{' as a literal character, rewind to start of error:
1191 --m_position;
1192 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1193 return parse_literal();
1194 }
1195 //
1196 // finally go and add the repeat, unless error:
1197 //
1198 if(min > max)
1199 {
1200 // Backtrack to error location:
1201 m_position -= 2;
1202 while(this->m_traits.isctype(*m_position, this->m_word_mask)) --m_position;
1203 ++m_position;
1204 fail(regex_constants::error_badbrace, m_position - m_base);
1205 return false;
1206 }
1207 return parse_repeat(low: min, high: max);
1208}
1209
1210template <class charT, class traits>
1211bool basic_regex_parser<charT, traits>::parse_alt()
1212{
1213 //
1214 // error check: if there have been no previous states,
1215 // or if the last state was a '(' then error:
1216 //
1217 if(
1218 ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
1219 &&
1220 !(
1221 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
1222 &&
1223 ((this->flags() & regbase::no_empty_expressions) == 0)
1224 )
1225 )
1226 {
1227 fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression cannot start with the alternation operator |.");
1228 return false;
1229 }
1230 //
1231 // Reset mark count if required:
1232 //
1233 if(m_max_mark < m_mark_count)
1234 m_max_mark = m_mark_count;
1235 if(m_mark_reset >= 0)
1236 m_mark_count = m_mark_reset;
1237
1238 ++m_position;
1239 //
1240 // we need to append a trailing jump:
1241 //
1242 re_syntax_base* pj = this->append_state(BOOST_REGEX_DETAIL_NS::syntax_element_jump, sizeof(re_jump));
1243 std::ptrdiff_t jump_offset = this->getoffset(pj);
1244 //
1245 // now insert the alternative:
1246 //
1247 re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
1248 jump_offset += re_alt_size;
1249 this->m_pdata->m_data.align();
1250 palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
1251 //
1252 // update m_alt_insert_point so that the next alternate gets
1253 // inserted at the start of the second of the two we've just created:
1254 //
1255 this->m_alt_insert_point = this->m_pdata->m_data.size();
1256 //
1257 // the start of this alternative must have a case changes state
1258 // if the current block has messed around with case changes:
1259 //
1260 if(m_has_case_change)
1261 {
1262 static_cast<re_case*>(
1263 this->append_state(syntax_element_toggle_case, sizeof(re_case))
1264 )->icase = this->m_icase;
1265 }
1266 //
1267 // push the alternative onto our stack, a recursive
1268 // implementation here is easier to understand (and faster
1269 // as it happens), but causes all kinds of stack overflow problems
1270 // on programs with small stacks (COM+).
1271 //
1272 m_alt_jumps.push_back(x: jump_offset);
1273 return true;
1274}
1275
1276template <class charT, class traits>
1277bool basic_regex_parser<charT, traits>::parse_set()
1278{
1279 static const char* incomplete_message = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
1280 ++m_position;
1281 if(m_position == m_end)
1282 {
1283 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1284 return false;
1285 }
1286 basic_char_set<charT, traits> char_set;
1287
1288 const charT* base = m_position; // where the '[' was
1289 const charT* item_base = m_position; // where the '[' or '^' was
1290
1291 while(m_position != m_end)
1292 {
1293 switch(this->m_traits.syntax_type(*m_position))
1294 {
1295 case regex_constants::syntax_caret:
1296 if(m_position == base)
1297 {
1298 char_set.negate();
1299 ++m_position;
1300 item_base = m_position;
1301 }
1302 else
1303 parse_set_literal(char_set);
1304 break;
1305 case regex_constants::syntax_close_set:
1306 if(m_position == item_base)
1307 {
1308 parse_set_literal(char_set);
1309 break;
1310 }
1311 else
1312 {
1313 ++m_position;
1314 if(0 == this->append_set(char_set))
1315 {
1316 fail(regex_constants::error_ctype, m_position - m_base);
1317 return false;
1318 }
1319 }
1320 return true;
1321 case regex_constants::syntax_open_set:
1322 if(parse_inner_set(char_set))
1323 break;
1324 return true;
1325 case regex_constants::syntax_escape:
1326 {
1327 //
1328 // look ahead and see if this is a character class shortcut
1329 // \d \w \s etc...
1330 //
1331 ++m_position;
1332 if(this->m_traits.escape_syntax_type(*m_position)
1333 == regex_constants::escape_type_class)
1334 {
1335 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
1336 if(m != 0)
1337 {
1338 char_set.add_class(m);
1339 ++m_position;
1340 break;
1341 }
1342 }
1343 else if(this->m_traits.escape_syntax_type(*m_position)
1344 == regex_constants::escape_type_not_class)
1345 {
1346 // negated character class:
1347 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
1348 if(m != 0)
1349 {
1350 char_set.add_negated_class(m);
1351 ++m_position;
1352 break;
1353 }
1354 }
1355 // not a character class, just a regular escape:
1356 --m_position;
1357 parse_set_literal(char_set);
1358 break;
1359 }
1360 default:
1361 parse_set_literal(char_set);
1362 break;
1363 }
1364 }
1365 return m_position != m_end;
1366}
1367
1368template <class charT, class traits>
1369bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
1370{
1371 static const char* incomplete_message = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
1372 //
1373 // we have either a character class [:name:]
1374 // a collating element [.name.]
1375 // or an equivalence class [=name=]
1376 //
1377 if(m_end == ++m_position)
1378 {
1379 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1380 return false;
1381 }
1382 switch(this->m_traits.syntax_type(*m_position))
1383 {
1384 case regex_constants::syntax_dot:
1385 //
1386 // a collating element is treated as a literal:
1387 //
1388 --m_position;
1389 parse_set_literal(char_set);
1390 return true;
1391 case regex_constants::syntax_colon:
1392 {
1393 // check that character classes are actually enabled:
1394 if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))
1395 == (regbase::basic_syntax_group | regbase::no_char_classes))
1396 {
1397 --m_position;
1398 parse_set_literal(char_set);
1399 return true;
1400 }
1401 // skip the ':'
1402 if(m_end == ++m_position)
1403 {
1404 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1405 return false;
1406 }
1407 const charT* name_first = m_position;
1408 // skip at least one character, then find the matching ':]'
1409 if(m_end == ++m_position)
1410 {
1411 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1412 return false;
1413 }
1414 while((m_position != m_end)
1415 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
1416 ++m_position;
1417 const charT* name_last = m_position;
1418 if(m_end == m_position)
1419 {
1420 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1421 return false;
1422 }
1423 if((m_end == ++m_position)
1424 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1425 {
1426 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1427 return false;
1428 }
1429 //
1430 // check for negated class:
1431 //
1432 bool negated = false;
1433 if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
1434 {
1435 ++name_first;
1436 negated = true;
1437 }
1438 typedef typename traits::char_class_type m_type;
1439 m_type m = this->m_traits.lookup_classname(name_first, name_last);
1440 if(m == 0)
1441 {
1442 if(char_set.empty() && (name_last - name_first == 1))
1443 {
1444 // maybe a special case:
1445 ++m_position;
1446 if( (m_position != m_end)
1447 && (this->m_traits.syntax_type(*m_position)
1448 == regex_constants::syntax_close_set))
1449 {
1450 if(this->m_traits.escape_syntax_type(*name_first)
1451 == regex_constants::escape_type_left_word)
1452 {
1453 ++m_position;
1454 this->append_state(syntax_element_word_start);
1455 return false;
1456 }
1457 if(this->m_traits.escape_syntax_type(*name_first)
1458 == regex_constants::escape_type_right_word)
1459 {
1460 ++m_position;
1461 this->append_state(syntax_element_word_end);
1462 return false;
1463 }
1464 }
1465 }
1466 fail(regex_constants::error_ctype, name_first - m_base);
1467 return false;
1468 }
1469 if(negated == false)
1470 char_set.add_class(m);
1471 else
1472 char_set.add_negated_class(m);
1473 ++m_position;
1474 break;
1475 }
1476 case regex_constants::syntax_equal:
1477 {
1478 // skip the '='
1479 if(m_end == ++m_position)
1480 {
1481 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1482 return false;
1483 }
1484 const charT* name_first = m_position;
1485 // skip at least one character, then find the matching '=]'
1486 if(m_end == ++m_position)
1487 {
1488 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1489 return false;
1490 }
1491 while((m_position != m_end)
1492 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))
1493 ++m_position;
1494 const charT* name_last = m_position;
1495 if(m_end == m_position)
1496 {
1497 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1498 return false;
1499 }
1500 if((m_end == ++m_position)
1501 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1502 {
1503 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1504 return false;
1505 }
1506 string_type m = this->m_traits.lookup_collatename(name_first, name_last);
1507 if((0 == m.size()) || (m.size() > 2))
1508 {
1509 fail(regex_constants::error_collate, name_first - m_base);
1510 return false;
1511 }
1512 digraph<charT> d;
1513 d.first = m[0];
1514 if(m.size() > 1)
1515 d.second = m[1];
1516 else
1517 d.second = 0;
1518 char_set.add_equivalent(d);
1519 ++m_position;
1520 break;
1521 }
1522 default:
1523 --m_position;
1524 parse_set_literal(char_set);
1525 break;
1526 }
1527 return true;
1528}
1529
1530template <class charT, class traits>
1531void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
1532{
1533 digraph<charT> start_range(get_next_set_literal(char_set));
1534 if(m_end == m_position)
1535 {
1536 fail(regex_constants::error_brack, m_position - m_base);
1537 return;
1538 }
1539 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1540 {
1541 // we have a range:
1542 if(m_end == ++m_position)
1543 {
1544 fail(regex_constants::error_brack, m_position - m_base);
1545 return;
1546 }
1547 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
1548 {
1549 digraph<charT> end_range = get_next_set_literal(char_set);
1550 char_set.add_range(start_range, end_range);
1551 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1552 {
1553 if(m_end == ++m_position)
1554 {
1555 fail(regex_constants::error_brack, m_position - m_base);
1556 return;
1557 }
1558 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)
1559 {
1560 // trailing - :
1561 --m_position;
1562 return;
1563 }
1564 fail(regex_constants::error_range, m_position - m_base);
1565 return;
1566 }
1567 return;
1568 }
1569 --m_position;
1570 }
1571 char_set.add_single(start_range);
1572}
1573
1574template <class charT, class traits>
1575digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
1576{
1577 digraph<charT> result;
1578 switch(this->m_traits.syntax_type(*m_position))
1579 {
1580 case regex_constants::syntax_dash:
1581 if(!char_set.empty())
1582 {
1583 // see if we are at the end of the set:
1584 if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1585 {
1586 fail(regex_constants::error_range, m_position - m_base);
1587 return result;
1588 }
1589 --m_position;
1590 }
1591 result.first = *m_position++;
1592 return result;
1593 case regex_constants::syntax_escape:
1594 // check to see if escapes are supported first:
1595 if(this->flags() & regex_constants::no_escape_in_lists)
1596 {
1597 result = *m_position++;
1598 break;
1599 }
1600 ++m_position;
1601 result = unescape_character();
1602 break;
1603 case regex_constants::syntax_open_set:
1604 {
1605 if(m_end == ++m_position)
1606 {
1607 fail(regex_constants::error_collate, m_position - m_base);
1608 return result;
1609 }
1610 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
1611 {
1612 --m_position;
1613 result.first = *m_position;
1614 ++m_position;
1615 return result;
1616 }
1617 if(m_end == ++m_position)
1618 {
1619 fail(regex_constants::error_collate, m_position - m_base);
1620 return result;
1621 }
1622 const charT* name_first = m_position;
1623 // skip at least one character, then find the matching ':]'
1624 if(m_end == ++m_position)
1625 {
1626 fail(regex_constants::error_collate, name_first - m_base);
1627 return result;
1628 }
1629 while((m_position != m_end)
1630 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))
1631 ++m_position;
1632 const charT* name_last = m_position;
1633 if(m_end == m_position)
1634 {
1635 fail(regex_constants::error_collate, name_first - m_base);
1636 return result;
1637 }
1638 if((m_end == ++m_position)
1639 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1640 {
1641 fail(regex_constants::error_collate, name_first - m_base);
1642 return result;
1643 }
1644 ++m_position;
1645 string_type s = this->m_traits.lookup_collatename(name_first, name_last);
1646 if(s.empty() || (s.size() > 2))
1647 {
1648 fail(regex_constants::error_collate, name_first - m_base);
1649 return result;
1650 }
1651 result.first = s[0];
1652 if(s.size() > 1)
1653 result.second = s[1];
1654 else
1655 result.second = 0;
1656 return result;
1657 }
1658 default:
1659 result = *m_position++;
1660 }
1661 return result;
1662}
1663
1664//
1665// does a value fit in the specified charT type?
1666//
1667template <class charT>
1668bool valid_value(charT, int v, const mpl::true_&)
1669{
1670 return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
1671}
1672template <class charT>
1673bool valid_value(charT, int, const mpl::false_&)
1674{
1675 return true; // v will alsways fit in a charT
1676}
1677template <class charT>
1678bool valid_value(charT c, int v)
1679{
1680 return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(int))>());
1681}
1682
1683template <class charT, class traits>
1684charT basic_regex_parser<charT, traits>::unescape_character()
1685{
1686#ifdef BOOST_MSVC
1687#pragma warning(push)
1688#pragma warning(disable:4127)
1689#endif
1690 charT result(0);
1691 if(m_position == m_end)
1692 {
1693 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely.");
1694 return false;
1695 }
1696 switch(this->m_traits.escape_syntax_type(*m_position))
1697 {
1698 case regex_constants::escape_type_control_a:
1699 result = charT('\a');
1700 break;
1701 case regex_constants::escape_type_e:
1702 result = charT(27);
1703 break;
1704 case regex_constants::escape_type_control_f:
1705 result = charT('\f');
1706 break;
1707 case regex_constants::escape_type_control_n:
1708 result = charT('\n');
1709 break;
1710 case regex_constants::escape_type_control_r:
1711 result = charT('\r');
1712 break;
1713 case regex_constants::escape_type_control_t:
1714 result = charT('\t');
1715 break;
1716 case regex_constants::escape_type_control_v:
1717 result = charT('\v');
1718 break;
1719 case regex_constants::escape_type_word_assert:
1720 result = charT('\b');
1721 break;
1722 case regex_constants::escape_type_ascii_control:
1723 ++m_position;
1724 if(m_position == m_end)
1725 {
1726 // Rewind to start of escape:
1727 --m_position;
1728 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1729 fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely.");
1730 return result;
1731 }
1732 result = static_cast<charT>(*m_position % 32);
1733 break;
1734 case regex_constants::escape_type_hex:
1735 ++m_position;
1736 if(m_position == m_end)
1737 {
1738 // Rewind to start of escape:
1739 --m_position;
1740 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1741 fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely.");
1742 return result;
1743 }
1744 // maybe have \x{ddd}
1745 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1746 {
1747 ++m_position;
1748 if(m_position == m_end)
1749 {
1750 // Rewind to start of escape:
1751 --m_position;
1752 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1753 fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
1754 return result;
1755 }
1756 int i = this->m_traits.toi(m_position, m_end, 16);
1757 if((m_position == m_end)
1758 || (i < 0)
1759 || ((std::numeric_limits<charT>::is_specialized) && (i > (int)(std::numeric_limits<charT>::max)()))
1760 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1761 {
1762 // Rewind to start of escape:
1763 --m_position;
1764 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1765 fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid.");
1766 return result;
1767 }
1768 ++m_position;
1769 result = charT(i);
1770 }
1771 else
1772 {
1773 std::ptrdiff_t len = (std::min)(a: static_cast<std::ptrdiff_t>(2), b: static_cast<std::ptrdiff_t>(m_end - m_position));
1774 int i = this->m_traits.toi(m_position, m_position + len, 16);
1775 if((i < 0)
1776 || !valid_value(charT(0), i))
1777 {
1778 // Rewind to start of escape:
1779 --m_position;
1780 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1781 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character.");
1782 return result;
1783 }
1784 result = charT(i);
1785 }
1786 return result;
1787 case regex_constants::syntax_digit:
1788 {
1789 // an octal escape sequence, the first character must be a zero
1790 // followed by up to 3 octal digits:
1791 std::ptrdiff_t len = (std::min)(::boost::BOOST_REGEX_DETAIL_NS::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
1792 const charT* bp = m_position;
1793 int val = this->m_traits.toi(bp, bp + 1, 8);
1794 if(val != 0)
1795 {
1796 // Rewind to start of escape:
1797 --m_position;
1798 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1799 // Oops not an octal escape after all:
1800 fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence.");
1801 return result;
1802 }
1803 val = this->m_traits.toi(m_position, m_position + len, 8);
1804 if(val < 0)
1805 {
1806 // Rewind to start of escape:
1807 --m_position;
1808 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1809 fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid.");
1810 return result;
1811 }
1812 return static_cast<charT>(val);
1813 }
1814 case regex_constants::escape_type_named_char:
1815 {
1816 ++m_position;
1817 if(m_position == m_end)
1818 {
1819 // Rewind to start of escape:
1820 --m_position;
1821 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1822 fail(regex_constants::error_escape, m_position - m_base);
1823 return false;
1824 }
1825 // maybe have \N{name}
1826 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1827 {
1828 const charT* base = m_position;
1829 // skip forward until we find enclosing brace:
1830 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1831 ++m_position;
1832 if(m_position == m_end)
1833 {
1834 // Rewind to start of escape:
1835 --m_position;
1836 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1837 fail(regex_constants::error_escape, m_position - m_base);
1838 return false;
1839 }
1840 string_type s = this->m_traits.lookup_collatename(++base, m_position++);
1841 if(s.empty())
1842 {
1843 // Rewind to start of escape:
1844 --m_position;
1845 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1846 fail(regex_constants::error_collate, m_position - m_base);
1847 return false;
1848 }
1849 if(s.size() == 1)
1850 {
1851 return s[0];
1852 }
1853 }
1854 // fall through is a failure:
1855 // Rewind to start of escape:
1856 --m_position;
1857 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1858 fail(regex_constants::error_escape, m_position - m_base);
1859 return false;
1860 }
1861 default:
1862 result = *m_position;
1863 break;
1864 }
1865 ++m_position;
1866 return result;
1867#ifdef BOOST_MSVC
1868#pragma warning(pop)
1869#endif
1870}
1871
1872template <class charT, class traits>
1873bool basic_regex_parser<charT, traits>::parse_backref()
1874{
1875 BOOST_ASSERT(m_position != m_end);
1876 const charT* pc = m_position;
1877 int i = this->m_traits.toi(pc, pc + 1, 10);
1878 if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
1879 {
1880 // not a backref at all but an octal escape sequence:
1881 charT c = unescape_character();
1882 this->append_literal(c);
1883 }
1884 else if((i > 0) && (this->m_backrefs & (1u << (i-1))))
1885 {
1886 m_position = pc;
1887 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
1888 pb->index = i;
1889 pb->icase = this->flags() & regbase::icase;
1890 }
1891 else
1892 {
1893 // Rewind to start of escape:
1894 --m_position;
1895 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1896 fail(regex_constants::error_backref, m_position - m_base);
1897 return false;
1898 }
1899 return true;
1900}
1901
1902template <class charT, class traits>
1903bool basic_regex_parser<charT, traits>::parse_QE()
1904{
1905#ifdef BOOST_MSVC
1906#pragma warning(push)
1907#pragma warning(disable:4127)
1908#endif
1909 //
1910 // parse a \Q...\E sequence:
1911 //
1912 ++m_position; // skip the Q
1913 const charT* start = m_position;
1914 const charT* end;
1915 do
1916 {
1917 while((m_position != m_end)
1918 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
1919 ++m_position;
1920 if(m_position == m_end)
1921 {
1922 // a \Q...\E sequence may terminate with the end of the expression:
1923 end = m_position;
1924 break;
1925 }
1926 if(++m_position == m_end) // skip the escape
1927 {
1928 fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence.");
1929 return false;
1930 }
1931 // check to see if it's a \E:
1932 if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)
1933 {
1934 ++m_position;
1935 end = m_position - 2;
1936 break;
1937 }
1938 // otherwise go round again:
1939 }while(true);
1940 //
1941 // now add all the character between the two escapes as literals:
1942 //
1943 while(start != end)
1944 {
1945 this->append_literal(*start);
1946 ++start;
1947 }
1948 return true;
1949#ifdef BOOST_MSVC
1950#pragma warning(pop)
1951#endif
1952}
1953
1954template <class charT, class traits>
1955bool basic_regex_parser<charT, traits>::parse_perl_extension()
1956{
1957 if(++m_position == m_end)
1958 {
1959 // Rewind to start of (? sequence:
1960 --m_position;
1961 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
1962 fail(regex_constants::error_perl_extension, m_position - m_base);
1963 return false;
1964 }
1965 //
1966 // treat comments as a special case, as these
1967 // are the only ones that don't start with a leading
1968 // startmark state:
1969 //
1970 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
1971 {
1972 while((m_position != m_end)
1973 && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
1974 {}
1975 return true;
1976 }
1977 //
1978 // backup some state, and prepare the way:
1979 //
1980 int markid = 0;
1981 std::ptrdiff_t jump_offset = 0;
1982 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
1983 pb->icase = this->flags() & regbase::icase;
1984 std::ptrdiff_t last_paren_start = this->getoffset(pb);
1985 // back up insertion point for alternations, and set new point:
1986 std::ptrdiff_t last_alt_point = m_alt_insert_point;
1987 this->m_pdata->m_data.align();
1988 m_alt_insert_point = this->m_pdata->m_data.size();
1989 std::ptrdiff_t expected_alt_point = m_alt_insert_point;
1990 bool restore_flags = true;
1991 regex_constants::syntax_option_type old_flags = this->flags();
1992 bool old_case_change = m_has_case_change;
1993 m_has_case_change = false;
1994 charT name_delim;
1995 int mark_reset = m_mark_reset;
1996 int max_mark = m_max_mark;
1997 m_mark_reset = -1;
1998 m_max_mark = m_mark_count;
1999 int v;
2000 //
2001 // select the actual extension used:
2002 //
2003 switch(this->m_traits.syntax_type(*m_position))
2004 {
2005 case regex_constants::syntax_or:
2006 m_mark_reset = m_mark_count;
2007 BOOST_FALLTHROUGH;
2008 case regex_constants::syntax_colon:
2009 //
2010 // a non-capturing mark:
2011 //
2012 pb->index = markid = 0;
2013 ++m_position;
2014 break;
2015 case regex_constants::syntax_digit:
2016 {
2017 //
2018 // a recursive subexpression:
2019 //
2020 v = this->m_traits.toi(m_position, m_end, 10);
2021 if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2022 {
2023 // Rewind to start of (? sequence:
2024 --m_position;
2025 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2026 fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated.");
2027 return false;
2028 }
2029insert_recursion:
2030 pb->index = markid = 0;
2031 re_recurse* pr = static_cast<re_recurse*>(this->append_state(syntax_element_recurse, sizeof(re_recurse)));
2032 pr->alt.i = v;
2033 pr->state_id = 0;
2034 static_cast<re_case*>(
2035 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2036 )->icase = this->flags() & regbase::icase;
2037 break;
2038 }
2039 case regex_constants::syntax_plus:
2040 //
2041 // A forward-relative recursive subexpression:
2042 //
2043 ++m_position;
2044 v = this->m_traits.toi(m_position, m_end, 10);
2045 if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2046 {
2047 // Rewind to start of (? sequence:
2048 --m_position;
2049 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2050 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2051 return false;
2052 }
2053 v += m_mark_count;
2054 goto insert_recursion;
2055 case regex_constants::syntax_dash:
2056 //
2057 // Possibly a backward-relative recursive subexpression:
2058 //
2059 ++m_position;
2060 v = this->m_traits.toi(m_position, m_end, 10);
2061 if(v <= 0)
2062 {
2063 --m_position;
2064 // Oops not a relative recursion at all, but a (?-imsx) group:
2065 goto option_group_jump;
2066 }
2067 v = m_mark_count + 1 - v;
2068 if(v <= 0)
2069 {
2070 // Rewind to start of (? sequence:
2071 --m_position;
2072 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2073 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2074 return false;
2075 }
2076 goto insert_recursion;
2077 case regex_constants::syntax_equal:
2078 pb->index = markid = -1;
2079 ++m_position;
2080 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2081 this->m_pdata->m_data.align();
2082 m_alt_insert_point = this->m_pdata->m_data.size();
2083 break;
2084 case regex_constants::syntax_not:
2085 pb->index = markid = -2;
2086 ++m_position;
2087 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2088 this->m_pdata->m_data.align();
2089 m_alt_insert_point = this->m_pdata->m_data.size();
2090 break;
2091 case regex_constants::escape_type_left_word:
2092 {
2093 // a lookbehind assertion:
2094 if(++m_position == m_end)
2095 {
2096 // Rewind to start of (? sequence:
2097 --m_position;
2098 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2099 fail(regex_constants::error_perl_extension, m_position - m_base);
2100 return false;
2101 }
2102 regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
2103 if(t == regex_constants::syntax_not)
2104 pb->index = markid = -2;
2105 else if(t == regex_constants::syntax_equal)
2106 pb->index = markid = -1;
2107 else
2108 {
2109 // Probably a named capture which also starts (?< :
2110 name_delim = '>';
2111 --m_position;
2112 goto named_capture_jump;
2113 }
2114 ++m_position;
2115 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2116 this->append_state(syntax_element_backstep, sizeof(re_brace));
2117 this->m_pdata->m_data.align();
2118 m_alt_insert_point = this->m_pdata->m_data.size();
2119 break;
2120 }
2121 case regex_constants::escape_type_right_word:
2122 //
2123 // an independent sub-expression:
2124 //
2125 pb->index = markid = -3;
2126 ++m_position;
2127 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2128 this->m_pdata->m_data.align();
2129 m_alt_insert_point = this->m_pdata->m_data.size();
2130 break;
2131 case regex_constants::syntax_open_mark:
2132 {
2133 // a conditional expression:
2134 pb->index = markid = -4;
2135 if(++m_position == m_end)
2136 {
2137 // Rewind to start of (? sequence:
2138 --m_position;
2139 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2140 fail(regex_constants::error_perl_extension, m_position - m_base);
2141 return false;
2142 }
2143 v = this->m_traits.toi(m_position, m_end, 10);
2144 if(m_position == m_end)
2145 {
2146 // Rewind to start of (? sequence:
2147 --m_position;
2148 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2149 fail(regex_constants::error_perl_extension, m_position - m_base);
2150 return false;
2151 }
2152 if(*m_position == charT('R'))
2153 {
2154 if(++m_position == m_end)
2155 {
2156 // Rewind to start of (? sequence:
2157 --m_position;
2158 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2159 fail(regex_constants::error_perl_extension, m_position - m_base);
2160 return false;
2161 }
2162 if(*m_position == charT('&'))
2163 {
2164 const charT* base = ++m_position;
2165 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2166 ++m_position;
2167 if(m_position == m_end)
2168 {
2169 // Rewind to start of (? sequence:
2170 --m_position;
2171 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2172 fail(regex_constants::error_perl_extension, m_position - m_base);
2173 return false;
2174 }
2175 v = -static_cast<int>(hash_value_from_capture_name(base, m_position));
2176 }
2177 else
2178 {
2179 v = -this->m_traits.toi(m_position, m_end, 10);
2180 }
2181 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2182 br->index = v < 0 ? (v - 1) : 0;
2183 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2184 {
2185 // Rewind to start of (? sequence:
2186 --m_position;
2187 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2188 fail(regex_constants::error_perl_extension, m_position - m_base);
2189 return false;
2190 }
2191 if(++m_position == m_end)
2192 {
2193 // Rewind to start of (? sequence:
2194 --m_position;
2195 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2196 fail(regex_constants::error_perl_extension, m_position - m_base);
2197 return false;
2198 }
2199 }
2200 else if((*m_position == charT('\'')) || (*m_position == charT('<')))
2201 {
2202 const charT* base = ++m_position;
2203 while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\'')))
2204 ++m_position;
2205 if(m_position == m_end)
2206 {
2207 // Rewind to start of (? sequence:
2208 --m_position;
2209 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2210 fail(regex_constants::error_perl_extension, m_position - m_base);
2211 return false;
2212 }
2213 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2214 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2215 br->index = v;
2216 if(((*m_position != charT('>')) && (*m_position != charT('\''))) || (++m_position == m_end))
2217 {
2218 // Rewind to start of (? sequence:
2219 --m_position;
2220 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2221 fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture.");
2222 return false;
2223 }
2224 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2225 {
2226 // Rewind to start of (? sequence:
2227 --m_position;
2228 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2229 fail(regex_constants::error_perl_extension, m_position - m_base);
2230 return false;
2231 }
2232 if(++m_position == m_end)
2233 {
2234 // Rewind to start of (? sequence:
2235 --m_position;
2236 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2237 fail(regex_constants::error_perl_extension, m_position - m_base);
2238 return false;
2239 }
2240 }
2241 else if(*m_position == charT('D'))
2242 {
2243 const char* def = "DEFINE";
2244 while(*def && (m_position != m_end) && (*m_position == charT(*def)))
2245 ++m_position, ++def;
2246 if((m_position == m_end) || *def)
2247 {
2248 // Rewind to start of (? sequence:
2249 --m_position;
2250 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2251 fail(regex_constants::error_perl_extension, m_position - m_base);
2252 return false;
2253 }
2254 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2255 br->index = 9999; // special magic value!
2256 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2257 {
2258 // Rewind to start of (? sequence:
2259 --m_position;
2260 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2261 fail(regex_constants::error_perl_extension, m_position - m_base);
2262 return false;
2263 }
2264 if(++m_position == m_end)
2265 {
2266 // Rewind to start of (? sequence:
2267 --m_position;
2268 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2269 fail(regex_constants::error_perl_extension, m_position - m_base);
2270 return false;
2271 }
2272 }
2273 else if(v > 0)
2274 {
2275 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2276 br->index = v;
2277 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2278 {
2279 // Rewind to start of (? sequence:
2280 --m_position;
2281 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2282 fail(regex_constants::error_perl_extension, m_position - m_base);
2283 return false;
2284 }
2285 if(++m_position == m_end)
2286 {
2287 // Rewind to start of (? sequence:
2288 --m_position;
2289 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2290 fail(regex_constants::error_perl_extension, m_position - m_base);
2291 return false;
2292 }
2293 }
2294 else
2295 {
2296 // verify that we have a lookahead or lookbehind assert:
2297 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
2298 {
2299 // Rewind to start of (? sequence:
2300 --m_position;
2301 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2302 fail(regex_constants::error_perl_extension, m_position - m_base);
2303 return false;
2304 }
2305 if(++m_position == m_end)
2306 {
2307 // Rewind to start of (? sequence:
2308 --m_position;
2309 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2310 fail(regex_constants::error_perl_extension, m_position - m_base);
2311 return false;
2312 }
2313 if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
2314 {
2315 if(++m_position == m_end)
2316 {
2317 // Rewind to start of (? sequence:
2318 --m_position;
2319 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2320 fail(regex_constants::error_perl_extension, m_position - m_base);
2321 return false;
2322 }
2323 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2324 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2325 {
2326 // Rewind to start of (? sequence:
2327 --m_position;
2328 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2329 fail(regex_constants::error_perl_extension, m_position - m_base);
2330 return false;
2331 }
2332 m_position -= 3;
2333 }
2334 else
2335 {
2336 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2337 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2338 {
2339 // Rewind to start of (? sequence:
2340 --m_position;
2341 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2342 fail(regex_constants::error_perl_extension, m_position - m_base);
2343 return false;
2344 }
2345 m_position -= 2;
2346 }
2347 }
2348 break;
2349 }
2350 case regex_constants::syntax_close_mark:
2351 // Rewind to start of (? sequence:
2352 --m_position;
2353 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2354 fail(regex_constants::error_perl_extension, m_position - m_base);
2355 return false;
2356 case regex_constants::escape_type_end_buffer:
2357 {
2358 name_delim = *m_position;
2359named_capture_jump:
2360 markid = 0;
2361 if(0 == (this->flags() & regbase::nosubs))
2362 {
2363 markid = ++m_mark_count;
2364 #ifndef BOOST_NO_STD_DISTANCE
2365 if(this->flags() & regbase::save_subexpression_location)
2366 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 2, 0));
2367 #else
2368 if(this->flags() & regbase::save_subexpression_location)
2369 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 2, 0));
2370 #endif
2371 }
2372 pb->index = markid;
2373 const charT* base = ++m_position;
2374 if(m_position == m_end)
2375 {
2376 // Rewind to start of (? sequence:
2377 --m_position;
2378 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2379 fail(regex_constants::error_perl_extension, m_position - m_base);
2380 return false;
2381 }
2382 while((m_position != m_end) && (*m_position != name_delim))
2383 ++m_position;
2384 if(m_position == m_end)
2385 {
2386 // Rewind to start of (? sequence:
2387 --m_position;
2388 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2389 fail(regex_constants::error_perl_extension, m_position - m_base);
2390 return false;
2391 }
2392 this->m_pdata->set_name(base, m_position, markid);
2393 ++m_position;
2394 break;
2395 }
2396 default:
2397 if(*m_position == charT('R'))
2398 {
2399 ++m_position;
2400 v = 0;
2401 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2402 {
2403 // Rewind to start of (? sequence:
2404 --m_position;
2405 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2406 fail(regex_constants::error_perl_extension, m_position - m_base);
2407 return false;
2408 }
2409 goto insert_recursion;
2410 }
2411 if(*m_position == charT('&'))
2412 {
2413 ++m_position;
2414 const charT* base = m_position;
2415 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2416 ++m_position;
2417 if(m_position == m_end)
2418 {
2419 // Rewind to start of (? sequence:
2420 --m_position;
2421 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2422 fail(regex_constants::error_perl_extension, m_position - m_base);
2423 return false;
2424 }
2425 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2426 goto insert_recursion;
2427 }
2428 if(*m_position == charT('P'))
2429 {
2430 ++m_position;
2431 if(m_position == m_end)
2432 {
2433 // Rewind to start of (? sequence:
2434 --m_position;
2435 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2436 fail(regex_constants::error_perl_extension, m_position - m_base);
2437 return false;
2438 }
2439 if(*m_position == charT('>'))
2440 {
2441 ++m_position;
2442 const charT* base = m_position;
2443 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2444 ++m_position;
2445 if(m_position == m_end)
2446 {
2447 // Rewind to start of (? sequence:
2448 --m_position;
2449 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2450 fail(regex_constants::error_perl_extension, m_position - m_base);
2451 return false;
2452 }
2453 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2454 goto insert_recursion;
2455 }
2456 }
2457 //
2458 // lets assume that we have a (?imsx) group and try and parse it:
2459 //
2460option_group_jump:
2461 regex_constants::syntax_option_type opts = parse_options();
2462 if(m_position == m_end)
2463 {
2464 // Rewind to start of (? sequence:
2465 --m_position;
2466 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2467 fail(regex_constants::error_perl_extension, m_position - m_base);
2468 return false;
2469 }
2470 // make a note of whether we have a case change:
2471 m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
2472 pb->index = markid = 0;
2473 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
2474 {
2475 // update flags and carry on as normal:
2476 this->flags(opts);
2477 restore_flags = false;
2478 old_case_change |= m_has_case_change; // defer end of scope by one ')'
2479 }
2480 else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
2481 {
2482 // update flags and carry on until the matching ')' is found:
2483 this->flags(opts);
2484 ++m_position;
2485 }
2486 else
2487 {
2488 // Rewind to start of (? sequence:
2489 --m_position;
2490 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2491 fail(regex_constants::error_perl_extension, m_position - m_base);
2492 return false;
2493 }
2494
2495 // finally append a case change state if we need it:
2496 if(m_has_case_change)
2497 {
2498 static_cast<re_case*>(
2499 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2500 )->icase = opts & regbase::icase;
2501 }
2502
2503 }
2504 //
2505 // now recursively add more states, this will terminate when we get to a
2506 // matching ')' :
2507 //
2508 parse_all();
2509 //
2510 // Unwind alternatives:
2511 //
2512 if(0 == unwind_alts(last_paren_start))
2513 {
2514 // Rewind to start of (? sequence:
2515 --m_position;
2516 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2517 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block.");
2518 return false;
2519 }
2520 //
2521 // we either have a ')' or we have run out of characters prematurely:
2522 //
2523 if(m_position == m_end)
2524 {
2525 // Rewind to start of (? sequence:
2526 --m_position;
2527 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2528 this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
2529 return false;
2530 }
2531 BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
2532 ++m_position;
2533 //
2534 // restore the flags:
2535 //
2536 if(restore_flags)
2537 {
2538 // append a case change state if we need it:
2539 if(m_has_case_change)
2540 {
2541 static_cast<re_case*>(
2542 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2543 )->icase = old_flags & regbase::icase;
2544 }
2545 this->flags(old_flags);
2546 }
2547 //
2548 // set up the jump pointer if we have one:
2549 //
2550 if(jump_offset)
2551 {
2552 this->m_pdata->m_data.align();
2553 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
2554 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
2555 if((this->m_last_state == jmp) && (markid != -2))
2556 {
2557 // Oops... we didn't have anything inside the assertion.
2558 // Note we don't get here for negated forward lookahead as (?!)
2559 // does have some uses.
2560 // Rewind to start of (? sequence:
2561 --m_position;
2562 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2563 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion.");
2564 return false;
2565 }
2566 }
2567 //
2568 // verify that if this is conditional expression, that we do have
2569 // an alternative, if not add one:
2570 //
2571 if(markid == -4)
2572 {
2573 re_syntax_base* b = this->getaddress(expected_alt_point);
2574 // Make sure we have exactly one alternative following this state:
2575 if(b->type != syntax_element_alt)
2576 {
2577 re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
2578 alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
2579 }
2580 else if(this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
2581 {
2582 // Can't have seen more than one alternative:
2583 // Rewind to start of (? sequence:
2584 --m_position;
2585 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2586 fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator | was encountered inside a conditional expression.");
2587 return false;
2588 }
2589 else
2590 {
2591 // We must *not* have seen an alternative inside a (DEFINE) block:
2592 b = this->getaddress(b->next.i, b);
2593 if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == 9999))
2594 {
2595 // Rewind to start of (? sequence:
2596 --m_position;
2597 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2598 fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block.");
2599 return false;
2600 }
2601 }
2602 // check for invalid repetition of next state:
2603 b = this->getaddress(expected_alt_point);
2604 b = this->getaddress(static_cast<re_alt*>(b)->next.i, b);
2605 if((b->type != syntax_element_assert_backref)
2606 && (b->type != syntax_element_startmark))
2607 {
2608 // Rewind to start of (? sequence:
2609 --m_position;
2610 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2611 fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion.");
2612 return false;
2613 }
2614 }
2615 //
2616 // append closing parenthesis state:
2617 //
2618 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
2619 pb->index = markid;
2620 pb->icase = this->flags() & regbase::icase;
2621 this->m_paren_start = last_paren_start;
2622 //
2623 // restore the alternate insertion point:
2624 //
2625 this->m_alt_insert_point = last_alt_point;
2626 //
2627 // and the case change data:
2628 //
2629 m_has_case_change = old_case_change;
2630 //
2631 // And the mark_reset data:
2632 //
2633 if(m_max_mark > m_mark_count)
2634 {
2635 m_mark_count = m_max_mark;
2636 }
2637 m_mark_reset = mark_reset;
2638 m_max_mark = max_mark;
2639
2640
2641 if(markid > 0)
2642 {
2643#ifndef BOOST_NO_STD_DISTANCE
2644 if(this->flags() & regbase::save_subexpression_location)
2645 this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position) - 1;
2646#else
2647 if(this->flags() & regbase::save_subexpression_location)
2648 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1;
2649#endif
2650 //
2651 // allow backrefs to this mark:
2652 //
2653 if((markid > 0) && (markid < (int)(sizeof(unsigned) * CHAR_BIT)))
2654 this->m_backrefs |= 1u << (markid - 1);
2655 }
2656 return true;
2657}
2658
2659template <class charT, class traits>
2660bool basic_regex_parser<charT, traits>::match_verb(const char* verb)
2661{
2662 while(*verb)
2663 {
2664 if(static_cast<charT>(*verb) != *m_position)
2665 {
2666 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2667 fail(regex_constants::error_perl_extension, m_position - m_base);
2668 return false;
2669 }
2670 if(++m_position == m_end)
2671 {
2672 --m_position;
2673 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2674 fail(regex_constants::error_perl_extension, m_position - m_base);
2675 return false;
2676 }
2677 ++verb;
2678 }
2679 return true;
2680}
2681
2682template <class charT, class traits>
2683bool basic_regex_parser<charT, traits>::parse_perl_verb()
2684{
2685 if(++m_position == m_end)
2686 {
2687 // Rewind to start of (* sequence:
2688 --m_position;
2689 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2690 fail(regex_constants::error_perl_extension, m_position - m_base);
2691 return false;
2692 }
2693 switch(*m_position)
2694 {
2695 case 'F':
2696 if(++m_position == m_end)
2697 {
2698 // Rewind to start of (* sequence:
2699 --m_position;
2700 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2701 fail(regex_constants::error_perl_extension, m_position - m_base);
2702 return false;
2703 }
2704 if((this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) || match_verb(verb: "AIL"))
2705 {
2706 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2707 {
2708 // Rewind to start of (* sequence:
2709 --m_position;
2710 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2711 fail(regex_constants::error_perl_extension, m_position - m_base);
2712 return false;
2713 }
2714 ++m_position;
2715 this->append_state(syntax_element_fail);
2716 return true;
2717 }
2718 break;
2719 case 'A':
2720 if(++m_position == m_end)
2721 {
2722 // Rewind to start of (* sequence:
2723 --m_position;
2724 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2725 fail(regex_constants::error_perl_extension, m_position - m_base);
2726 return false;
2727 }
2728 if(match_verb(verb: "CCEPT"))
2729 {
2730 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2731 {
2732 // Rewind to start of (* sequence:
2733 --m_position;
2734 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2735 fail(regex_constants::error_perl_extension, m_position - m_base);
2736 return false;
2737 }
2738 ++m_position;
2739 this->append_state(syntax_element_accept);
2740 return true;
2741 }
2742 break;
2743 case 'C':
2744 if(++m_position == m_end)
2745 {
2746 // Rewind to start of (* sequence:
2747 --m_position;
2748 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2749 fail(regex_constants::error_perl_extension, m_position - m_base);
2750 return false;
2751 }
2752 if(match_verb(verb: "OMMIT"))
2753 {
2754 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2755 {
2756 // Rewind to start of (* sequence:
2757 --m_position;
2758 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2759 fail(regex_constants::error_perl_extension, m_position - m_base);
2760 return false;
2761 }
2762 ++m_position;
2763 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_commit;
2764 this->m_pdata->m_disable_match_any = true;
2765 return true;
2766 }
2767 break;
2768 case 'P':
2769 if(++m_position == m_end)
2770 {
2771 // Rewind to start of (* sequence:
2772 --m_position;
2773 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2774 fail(regex_constants::error_perl_extension, m_position - m_base);
2775 return false;
2776 }
2777 if(match_verb(verb: "RUNE"))
2778 {
2779 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2780 {
2781 // Rewind to start of (* sequence:
2782 --m_position;
2783 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2784 fail(regex_constants::error_perl_extension, m_position - m_base);
2785 return false;
2786 }
2787 ++m_position;
2788 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_prune;
2789 this->m_pdata->m_disable_match_any = true;
2790 return true;
2791 }
2792 break;
2793 case 'S':
2794 if(++m_position == m_end)
2795 {
2796 // Rewind to start of (* sequence:
2797 --m_position;
2798 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2799 fail(regex_constants::error_perl_extension, m_position - m_base);
2800 return false;
2801 }
2802 if(match_verb(verb: "KIP"))
2803 {
2804 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2805 {
2806 // Rewind to start of (* sequence:
2807 --m_position;
2808 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2809 fail(regex_constants::error_perl_extension, m_position - m_base);
2810 return false;
2811 }
2812 ++m_position;
2813 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_skip;
2814 this->m_pdata->m_disable_match_any = true;
2815 return true;
2816 }
2817 break;
2818 case 'T':
2819 if(++m_position == m_end)
2820 {
2821 // Rewind to start of (* sequence:
2822 --m_position;
2823 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2824 fail(regex_constants::error_perl_extension, m_position - m_base);
2825 return false;
2826 }
2827 if(match_verb(verb: "HEN"))
2828 {
2829 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2830 {
2831 // Rewind to start of (* sequence:
2832 --m_position;
2833 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2834 fail(regex_constants::error_perl_extension, m_position - m_base);
2835 return false;
2836 }
2837 ++m_position;
2838 this->append_state(syntax_element_then);
2839 this->m_pdata->m_disable_match_any = true;
2840 return true;
2841 }
2842 break;
2843 }
2844 return false;
2845}
2846
2847template <class charT, class traits>
2848bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
2849{
2850 //
2851 // parses an emacs style \sx or \Sx construct.
2852 //
2853 if(++m_position == m_end)
2854 {
2855 // Rewind to start of sequence:
2856 --m_position;
2857 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
2858 fail(regex_constants::error_escape, m_position - m_base);
2859 return false;
2860 }
2861 basic_char_set<charT, traits> char_set;
2862 if(negate)
2863 char_set.negate();
2864
2865 static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', };
2866
2867 switch(*m_position)
2868 {
2869 case 's':
2870 case ' ':
2871 char_set.add_class(this->m_mask_space);
2872 break;
2873 case 'w':
2874 char_set.add_class(this->m_word_mask);
2875 break;
2876 case '_':
2877 char_set.add_single(digraph<charT>(charT('$')));
2878 char_set.add_single(digraph<charT>(charT('&')));
2879 char_set.add_single(digraph<charT>(charT('*')));
2880 char_set.add_single(digraph<charT>(charT('+')));
2881 char_set.add_single(digraph<charT>(charT('-')));
2882 char_set.add_single(digraph<charT>(charT('_')));
2883 char_set.add_single(digraph<charT>(charT('<')));
2884 char_set.add_single(digraph<charT>(charT('>')));
2885 break;
2886 case '.':
2887 char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));
2888 break;
2889 case '(':
2890 char_set.add_single(digraph<charT>(charT('(')));
2891 char_set.add_single(digraph<charT>(charT('[')));
2892 char_set.add_single(digraph<charT>(charT('{')));
2893 break;
2894 case ')':
2895 char_set.add_single(digraph<charT>(charT(')')));
2896 char_set.add_single(digraph<charT>(charT(']')));
2897 char_set.add_single(digraph<charT>(charT('}')));
2898 break;
2899 case '"':
2900 char_set.add_single(digraph<charT>(charT('"')));
2901 char_set.add_single(digraph<charT>(charT('\'')));
2902 char_set.add_single(digraph<charT>(charT('`')));
2903 break;
2904 case '\'':
2905 char_set.add_single(digraph<charT>(charT('\'')));
2906 char_set.add_single(digraph<charT>(charT(',')));
2907 char_set.add_single(digraph<charT>(charT('#')));
2908 break;
2909 case '<':
2910 char_set.add_single(digraph<charT>(charT(';')));
2911 break;
2912 case '>':
2913 char_set.add_single(digraph<charT>(charT('\n')));
2914 char_set.add_single(digraph<charT>(charT('\f')));
2915 break;
2916 default:
2917 fail(regex_constants::error_ctype, m_position - m_base);
2918 return false;
2919 }
2920 if(0 == this->append_set(char_set))
2921 {
2922 fail(regex_constants::error_ctype, m_position - m_base);
2923 return false;
2924 }
2925 ++m_position;
2926 return true;
2927}
2928
2929template <class charT, class traits>
2930regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
2931{
2932 // we have a (?imsx-imsx) group, convert it into a set of flags:
2933 regex_constants::syntax_option_type f = this->flags();
2934 bool breakout = false;
2935 do
2936 {
2937 switch(*m_position)
2938 {
2939 case 's':
2940 f |= regex_constants::mod_s;
2941 f &= ~regex_constants::no_mod_s;
2942 break;
2943 case 'm':
2944 f &= ~regex_constants::no_mod_m;
2945 break;
2946 case 'i':
2947 f |= regex_constants::icase;
2948 break;
2949 case 'x':
2950 f |= regex_constants::mod_x;
2951 break;
2952 default:
2953 breakout = true;
2954 continue;
2955 }
2956 if(++m_position == m_end)
2957 {
2958 // Rewind to start of (? sequence:
2959 --m_position;
2960 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2961 fail(regex_constants::error_paren, m_position - m_base);
2962 return false;
2963 }
2964 }
2965 while(!breakout);
2966
2967 breakout = false;
2968
2969 if(*m_position == static_cast<charT>('-'))
2970 {
2971 if(++m_position == m_end)
2972 {
2973 // Rewind to start of (? sequence:
2974 --m_position;
2975 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2976 fail(regex_constants::error_paren, m_position - m_base);
2977 return false;
2978 }
2979 do
2980 {
2981 switch(*m_position)
2982 {
2983 case 's':
2984 f &= ~regex_constants::mod_s;
2985 f |= regex_constants::no_mod_s;
2986 break;
2987 case 'm':
2988 f |= regex_constants::no_mod_m;
2989 break;
2990 case 'i':
2991 f &= ~regex_constants::icase;
2992 break;
2993 case 'x':
2994 f &= ~regex_constants::mod_x;
2995 break;
2996 default:
2997 breakout = true;
2998 continue;
2999 }
3000 if(++m_position == m_end)
3001 {
3002 // Rewind to start of (? sequence:
3003 --m_position;
3004 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
3005 fail(regex_constants::error_paren, m_position - m_base);
3006 return false;
3007 }
3008 }
3009 while(!breakout);
3010 }
3011 return f;
3012}
3013
3014template <class charT, class traits>
3015bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start)
3016{
3017 //
3018 // If we didn't actually add any states after the last
3019 // alternative then that's an error:
3020 //
3021 if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
3022 && m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start)
3023 &&
3024 !(
3025 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
3026 &&
3027 ((this->flags() & regbase::no_empty_expressions) == 0)
3028 )
3029 )
3030 {
3031 fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator |.");
3032 return false;
3033 }
3034 //
3035 // Fix up our alternatives:
3036 //
3037 while(m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start))
3038 {
3039 //
3040 // fix up the jump to point to the end of the states
3041 // that we've just added:
3042 //
3043 std::ptrdiff_t jump_offset = m_alt_jumps.back();
3044 m_alt_jumps.pop_back();
3045 this->m_pdata->m_data.align();
3046 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
3047 BOOST_ASSERT(jmp->type == syntax_element_jump);
3048 jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
3049 }
3050 return true;
3051}
3052
3053#ifdef BOOST_MSVC
3054#pragma warning(pop)
3055#endif
3056
3057} // namespace BOOST_REGEX_DETAIL_NS
3058} // namespace boost
3059
3060#ifdef BOOST_MSVC
3061#pragma warning(push)
3062#pragma warning(disable: 4103)
3063#endif
3064#ifdef BOOST_HAS_ABI_HEADERS
3065# include BOOST_ABI_SUFFIX
3066#endif
3067#ifdef BOOST_MSVC
3068#pragma warning(pop)
3069#endif
3070
3071#endif
3072

source code of boost/boost/regex/v4/basic_regex_parser.hpp