basic_regex_parser.hpp source code [boost/boost/regex/v4/basic_regex_parser.hpp]

1	/*
2	*
3	* Copyright (c) 2004
4	* John Maddock
5	*
6	* Use, modification and distribution are subject to the
7	* Boost Software License, Version 1.0. (See accompanying file
8	* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9	*
10	*/
11
12	/*
13	* LOCATION: see http://www.boost.org for most recent version.
14	* FILE basic_regex_parser.cpp
15	* VERSION see <boost/version.hpp>
16	* DESCRIPTION: Declares template class basic_regex_parser.
17	*/
18
19	#ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
20	#define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
21
22	#ifdef BOOST_MSVC
23	#pragma warning(push)
24	#pragma warning(disable: 4103)
25	#endif
26	#ifdef BOOST_HAS_ABI_HEADERS
27	# include BOOST_ABI_PREFIX
28	#endif
29	#ifdef BOOST_MSVC
30	#pragma warning(pop)
31	#endif
32
33	namespace boost{
34	namespace BOOST_REGEX_DETAIL_NS{
35
36	#ifdef BOOST_MSVC
37	#pragma warning(push)
38	#pragma warning(disable:4244 4800)
39	#endif
40
41	template <class charT, class traits>
42	class basic_regex_parser : public basic_regex_creator<charT, traits>
43	{
44	public:
45	basic_regex_parser(regex_data<charT, traits>* data);
46	void parse(const charT* p1, const charT* p2, unsigned flags);
47	void fail(regex_constants::error_type error_code, std::ptrdiff_t position);
48	void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos);
49	void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message)
50	{
51	fail(error_code, position, message, position);
52	}
53
54	bool parse_all();
55	bool parse_basic();
56	bool parse_extended();
57	bool parse_literal();
58	bool parse_open_paren();
59	bool parse_basic_escape();
60	bool parse_extended_escape();
61	bool parse_match_any();
62	bool parse_repeat(std::size_t low = `0`, std::size_t high = (std::numeric_limits<std::size_t>::max)());
63	bool parse_repeat_range(bool isbasic);
64	bool parse_alt();
65	bool parse_set();
66	bool parse_backref();
67	void parse_set_literal(basic_char_set<charT, traits>& char_set);
68	bool parse_inner_set(basic_char_set<charT, traits>& char_set);
69	bool parse_QE();
70	bool parse_perl_extension();
71	bool parse_perl_verb();
72	bool match_verb(const char*);
73	bool add_emacs_code(bool negate);
74	bool unwind_alts(std::ptrdiff_t last_paren_start);
75	digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
76	charT unescape_character();
77	regex_constants::syntax_option_type parse_options();
78
79	private:
80	typedef bool (basic_regex_parser::*parser_proc_type)();
81	typedef typename traits::string_type string_type;
82	typedef typename traits::char_class_type char_class_type;
83	parser_proc_type m_parser_proc; // the main parser to use
84	const charT* m_base; // the start of the string being parsed
85	const charT* m_end; // the end of the string being parsed
86	const charT* m_position; // our current parser position
87	unsigned m_mark_count; // how many sub-expressions we have
88	int m_mark_reset; // used to indicate that we're inside a (?\|...) block.
89	unsigned m_max_mark; // largest mark count seen inside a (?\|...) block.
90	std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
91	std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
92	bool m_has_case_change; // true if somewhere in the current block the case has changed
93	#if defined(BOOST_MSVC) && defined(_M_IX86)
94	// This is an ugly warning suppression workaround (for warnings inside* std::vector*
95	// that can not otherwise be suppressed)...
96	BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*));
97	std::vector<long> m_alt_jumps; // list of alternative in the current scope.
98	#else
99	std::vector<std::ptrdiff_t> m_alt_jumps; // list of alternative in the current scope.
100	#endif
101
102	basic_regex_parser& operator=(const basic_regex_parser&);
103	basic_regex_parser(const basic_regex_parser&);
104	};
105
106	template <class charT, class traits>
107	basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
108	: basic_regex_creator<charT, traits>(data), m_mark_count(`0`), m_mark_reset(-`1`), m_max_mark(`0`), m_paren_start(`0`), m_alt_insert_point(`0`), m_has_case_change(false)
109	{
110	}
111
112	template <class charT, class traits>
113	void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags)
114	{
115	// pass l_flags on to base class:
116	this->init(l_flags);
117	// set up pointers:
118	m_position = m_base = p1;
119	m_end = p2;
120	// empty strings are errors:
121	if((p1 == p2) &&
122	(
123	((l_flags & regbase::main_option_type) != regbase::perl_syntax_group)
124	\|\| (l_flags & regbase::no_empty_expressions)
125	)
126	)
127	{
128	fail(regex_constants::error_empty, `0`);
129	return;
130	}
131	// select which parser to use:
132	switch(l_flags & regbase::main_option_type)
133	{
134	case regbase::perl_syntax_group:
135	{
136	m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
137	//
138	// Add a leading paren with index zero to give recursions a target:
139	//
140	re_brace* br = static_cast<re_brace>(this->append_state(syntax_element_startmark, sizeof*(re_brace)));
141	br->index = `0`;
142	br->icase = this->flags() & regbase::icase;
143	break;
144	}
145	case regbase::basic_syntax_group:
146	m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
147	break;
148	case regbase::literal:
149	m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;
150	break;
151	default:
152	// Ooops, someone has managed to set more than one of the main option flags,
153	// so this must be an error:
154	fail(regex_constants::error_unknown, `0`, "An invalid combination of regular expression syntax flags was used.");
155	return;
156	}
157
158	// parse all our characters:
159	bool result = parse_all();
160	//
161	// Unwind our alternatives:
162	//
163	unwind_alts(last_paren_start: -`1`);
164	// reset l_flags as a global scope (?imsx) may have altered them:
165	this->flags(l_flags);
166	// if we haven't gobbled up all the characters then we must
167	// have had an unexpected ')' :
168	if(!result)
169	{
170	fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Found a closing ) with no corresponding openening parenthesis.");
171	return;
172	}
173	// if an error has been set then give up now:
174	if(this->m_pdata->m_status)
175	return;
176	// fill in our sub-expression count:
177	this->m_pdata->m_mark_count = `1` + m_mark_count;
178	this->finalize(p1, p2);
179	}
180
181	template <class charT, class traits>
182	void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position)
183	{
184	// get the error message:
185	std::string message = this->m_pdata->m_ptraits->error_string(error_code);
186	fail(error_code, position, message);
187	}
188
189	template <class charT, class traits>
190	void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos)
191	{
192	if(`0` == this->m_pdata->m_status) // update the error code if not already set
193	this->m_pdata->m_status = error_code;
194	m_position = m_end; // don't bother parsing anything else
195
196	#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
197	//
198	// Augment error message with the regular expression text:
199	//
200	if(start_pos == position)
201	start_pos = (std::max)(a: static_cast<std::ptrdiff_t>(`0`), b: position - static_cast<std::ptrdiff_t>(`10`));
202	std::ptrdiff_t end_pos = (std::min)(a: position + static_cast<std::ptrdiff_t>(`10`), b: static_cast<std::ptrdiff_t>(m_end - m_base));
203	if(error_code != regex_constants::error_empty)
204	{
205	if((start_pos != `0`) \|\| (end_pos != (m_end - m_base)))
206	message += " The error occurred while parsing the regular expression fragment: '";
207	else
208	message += " The error occurred while parsing the regular expression: '";
209	if(start_pos != end_pos)
210	{
211	message += std::string(m_base + start_pos, m_base + position);
212	message += ">>>HERE>>>";
213	message += std::string(m_base + position, m_base + end_pos);
214	}
215	message += "'.";
216	}
217	#endif
218
219	#ifndef BOOST_NO_EXCEPTIONS
220	if(`0` == (this->flags() & regex_constants::no_except))
221	{
222	boost::regex_error e(message, error_code, position);
223	e.raise();
224	}
225	#else
226	(void)position; // suppress warnings.
227	#endif
228	}
229
230	template <class charT, class traits>
231	bool basic_regex_parser<charT, traits>::parse_all()
232	{
233	bool result = true;
234	while(result && (m_position != m_end))
235	{
236	result = (this->*m_parser_proc)();
237	}
238	return result;
239	}
240
241	#ifdef BOOST_MSVC
242	#pragma warning(push)
243	#pragma warning(disable:4702)
244	#endif
245	template <class charT, class traits>
246	bool basic_regex_parser<charT, traits>::parse_basic()
247	{
248	switch(this->m_traits.syntax_type(*m_position))
249	{
250	case regex_constants::syntax_escape:
251	return parse_basic_escape();
252	case regex_constants::syntax_dot:
253	return parse_match_any();
254	case regex_constants::syntax_caret:
255	++m_position;
256	this->append_state(syntax_element_start_line);
257	break;
258	case regex_constants::syntax_dollar:
259	++m_position;
260	this->append_state(syntax_element_end_line);
261	break;
262	case regex_constants::syntax_star:
263	if(!(this->m_last_state) \|\| (this->m_last_state->type == syntax_element_start_line))
264	return parse_literal();
265	else
266	{
267	++m_position;
268	return parse_repeat();
269	}
270	case regex_constants::syntax_plus:
271	if(!(this->m_last_state) \|\| (this->m_last_state->type == syntax_element_start_line) \|\| !(this->flags() & regbase::emacs_ex))
272	return parse_literal();
273	else
274	{
275	++m_position;
276	return parse_repeat(low: `1`);
277	}
278	case regex_constants::syntax_question:
279	if(!(this->m_last_state) \|\| (this->m_last_state->type == syntax_element_start_line) \|\| !(this->flags() & regbase::emacs_ex))
280	return parse_literal();
281	else
282	{
283	++m_position;
284	return parse_repeat(low: `0`, high: `1`);
285	}
286	case regex_constants::syntax_open_set:
287	return parse_set();
288	case regex_constants::syntax_newline:
289	if(this->flags() & regbase::newline_alt)
290	return parse_alt();
291	else
292	return parse_literal();
293	default:
294	return parse_literal();
295	}
296	return true;
297	}
298
299	template <class charT, class traits>
300	bool basic_regex_parser<charT, traits>::parse_extended()
301	{
302	bool result = true;
303	switch(this->m_traits.syntax_type(*m_position))
304	{
305	case regex_constants::syntax_open_mark:
306	return parse_open_paren();
307	case regex_constants::syntax_close_mark:
308	return false;
309	case regex_constants::syntax_escape:
310	return parse_extended_escape();
311	case regex_constants::syntax_dot:
312	return parse_match_any();
313	case regex_constants::syntax_caret:
314	++m_position;
315	this->append_state(
316	(this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
317	break;
318	case regex_constants::syntax_dollar:
319	++m_position;
320	this->append_state(
321	(this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
322	break;
323	case regex_constants::syntax_star:
324	if(m_position == this->m_base)
325	{
326	fail(regex_constants::error_badrepeat, `0`, "The repeat operator \"*\" cannot start a regular expression.");
327	return false;
328	}
329	++m_position;
330	return parse_repeat();
331	case regex_constants::syntax_question:
332	if(m_position == this->m_base)
333	{
334	fail(regex_constants::error_badrepeat, `0`, "The repeat operator \"?\" cannot start a regular expression.");
335	return false;
336	}
337	++m_position;
338	return parse_repeat(low: `0`,high: `1`);
339	case regex_constants::syntax_plus:
340	if(m_position == this->m_base)
341	{
342	fail(regex_constants::error_badrepeat, `0`, "The repeat operator \"+\" cannot start a regular expression.");
343	return false;
344	}
345	++m_position;
346	return parse_repeat(low: `1`);
347	case regex_constants::syntax_open_brace:
348	++m_position;
349	return parse_repeat_range(isbasic: false);
350	case regex_constants::syntax_close_brace:
351	if((this->flags() & regbase::no_perl_ex) == regbase::no_perl_ex)
352	{
353	fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
354	return false;
355	}
356	result = parse_literal();
357	break;
358	case regex_constants::syntax_or:
359	return parse_alt();
360	case regex_constants::syntax_open_set:
361	return parse_set();
362	case regex_constants::syntax_newline:
363	if(this->flags() & regbase::newline_alt)
364	return parse_alt();
365	else
366	return parse_literal();
367	case regex_constants::syntax_hash:
368	//
369	// If we have a mod_x flag set, then skip until
370	// we get to a newline character:
371	//
372	if((this->flags()
373	& (regbase::no_perl_ex\|regbase::mod_x))
374	== regbase::mod_x)
375	{
376	while((m_position != m_end) && !is_separator(*m_position++)){}
377	return true;
378	}
379	BOOST_FALLTHROUGH;
380	default:
381	result = parse_literal();
382	break;
383	}
384	return result;
385	}
386	#ifdef BOOST_MSVC
387	#pragma warning(pop)
388	#endif
389
390	template <class charT, class traits>
391	bool basic_regex_parser<charT, traits>::parse_literal()
392	{
393	// append this as a literal provided it's not a space character
394	// or the perl option regbase::mod_x is not set:
395	if(
396	((this->flags()
397	& (regbase::main_option_type\|regbase::mod_x\|regbase::no_perl_ex))
398	!= regbase::mod_x)
399	\|\| !this->m_traits.isctype(m_position, this*->m_mask_space))
400	this->append_literal(*m_position);
401	++m_position;
402	return true;
403	}
404
405	template <class charT, class traits>
406	bool basic_regex_parser<charT, traits>::parse_open_paren()
407	{
408	//
409	// skip the '(' and error check:
410	//
411	if(++m_position == m_end)
412	{
413	fail(regex_constants::error_paren, m_position - m_base);
414	return false;
415	}
416	//
417	// begin by checking for a perl-style (?...) extension:
418	//
419	if(
420	((this->flags() & (regbase::main_option_type \| regbase::no_perl_ex)) == `0`)
421	\|\| ((this->flags() & (regbase::main_option_type \| regbase::emacs_ex)) == (regbase::basic_syntax_group\|regbase::emacs_ex))
422	)
423	{
424	if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
425	return parse_perl_extension();
426	if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_star)
427	return parse_perl_verb();
428	}
429	//
430	// update our mark count, and append the required state:
431	//
432	unsigned markid = `0`;
433	if(`0` == (this->flags() & regbase::nosubs))
434	{
435	markid = ++m_mark_count;
436	#ifndef BOOST_NO_STD_DISTANCE
437	if(this->flags() & regbase::save_subexpression_location)
438	this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - `1`, `0`));
439	#else
440	if(this->flags() & regbase::save_subexpression_location)
441	this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - `1`, `0`));
442	#endif
443	}
444	re_brace* pb = static_cast<re_brace>(this->append_state(syntax_element_startmark, sizeof*(re_brace)));
445	pb->index = markid;
446	pb->icase = this->flags() & regbase::icase;
447	std::ptrdiff_t last_paren_start = this->getoffset(pb);
448	// back up insertion point for alternations, and set new point:
449	std::ptrdiff_t last_alt_point = m_alt_insert_point;
450	this->m_pdata->m_data.align();
451	m_alt_insert_point = this->m_pdata->m_data.size();
452	//
453	// back up the current flags in case we have a nested (?imsx) group:
454	//
455	regex_constants::syntax_option_type opts = this->flags();
456	bool old_case_change = m_has_case_change;
457	m_has_case_change = false; // no changes to this scope as yet...
458	//
459	// Back up branch reset data in case we have a nested (?\|...)
460	//
461	int mark_reset = m_mark_reset;
462	m_mark_reset = -`1`;
463	//
464	// now recursively add more states, this will terminate when we get to a
465	// matching ')' :
466	//
467	parse_all();
468	//
469	// Unwind pushed alternatives:
470	//
471	if(`0` == unwind_alts(last_paren_start))
472	return false;
473	//
474	// restore flags:
475	//
476	if(m_has_case_change)
477	{
478	// the case has changed in one or more of the alternatives
479	// within the scoped (...) block: we have to add a state
480	// to reset the case sensitivity:
481	static_cast<re_case*>(
482	this->append_state(syntax_element_toggle_case, sizeof(re_case))
483	)->icase = opts & regbase::icase;
484	}
485	this->flags(opts);
486	m_has_case_change = old_case_change;
487	//
488	// restore branch reset:
489	//
490	m_mark_reset = mark_reset;
491	//
492	// we either have a ')' or we have run out of characters prematurely:
493	//
494	if(m_position == m_end)
495	{
496	this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
497	return false;
498	}
499	BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
500	#ifndef BOOST_NO_STD_DISTANCE
501	if(markid && (this->flags() & regbase::save_subexpression_location))
502	this->m_pdata->m_subs.at(markid - `1`).second = std::distance(m_base, m_position);
503	#else
504	if(markid && (this->flags() & regbase::save_subexpression_location))
505	this->m_pdata->m_subs.at(markid - `1`).second = (m_position - m_base);
506	#endif
507	++m_position;
508	//
509	// append closing parenthesis state:
510	//
511	pb = static_cast<re_brace>(this->append_state(syntax_element_endmark, sizeof*(re_brace)));
512	pb->index = markid;
513	pb->icase = this->flags() & regbase::icase;
514	this->m_paren_start = last_paren_start;
515	//
516	// restore the alternate insertion point:
517	//
518	this->m_alt_insert_point = last_alt_point;
519	//
520	// allow backrefs to this mark:
521	//
522	if((markid > `0`) && (markid < sizeof(unsigned) * CHAR_BIT))
523	this->m_backrefs \|= `1u` << (markid - `1`);
524
525	return true;
526	}
527
528	template <class charT, class traits>
529	bool basic_regex_parser<charT, traits>::parse_basic_escape()
530	{
531	++m_position;
532	bool result = true;
533	switch(this->m_traits.escape_syntax_type(*m_position))
534	{
535	case regex_constants::syntax_open_mark:
536	return parse_open_paren();
537	case regex_constants::syntax_close_mark:
538	return false;
539	case regex_constants::syntax_plus:
540	if(this->flags() & regex_constants::bk_plus_qm)
541	{
542	++m_position;
543	return parse_repeat(low: `1`);
544	}
545	else
546	return parse_literal();
547	case regex_constants::syntax_question:
548	if(this->flags() & regex_constants::bk_plus_qm)
549	{
550	++m_position;
551	return parse_repeat(low: `0`, high: `1`);
552	}
553	else
554	return parse_literal();
555	case regex_constants::syntax_open_brace:
556	if(this->flags() & regbase::no_intervals)
557	return parse_literal();
558	++m_position;
559	return parse_repeat_range(isbasic: true);
560	case regex_constants::syntax_close_brace:
561	if(this->flags() & regbase::no_intervals)
562	return parse_literal();
563	fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
564	return false;
565	case regex_constants::syntax_or:
566	if(this->flags() & regbase::bk_vbar)
567	return parse_alt();
568	else
569	result = parse_literal();
570	break;
571	case regex_constants::syntax_digit:
572	return parse_backref();
573	case regex_constants::escape_type_start_buffer:
574	if(this->flags() & regbase::emacs_ex)
575	{
576	++m_position;
577	this->append_state(syntax_element_buffer_start);
578	}
579	else
580	result = parse_literal();
581	break;
582	case regex_constants::escape_type_end_buffer:
583	if(this->flags() & regbase::emacs_ex)
584	{
585	++m_position;
586	this->append_state(syntax_element_buffer_end);
587	}
588	else
589	result = parse_literal();
590	break;
591	case regex_constants::escape_type_word_assert:
592	if(this->flags() & regbase::emacs_ex)
593	{
594	++m_position;
595	this->append_state(syntax_element_word_boundary);
596	}
597	else
598	result = parse_literal();
599	break;
600	case regex_constants::escape_type_not_word_assert:
601	if(this->flags() & regbase::emacs_ex)
602	{
603	++m_position;
604	this->append_state(syntax_element_within_word);
605	}
606	else
607	result = parse_literal();
608	break;
609	case regex_constants::escape_type_left_word:
610	if(this->flags() & regbase::emacs_ex)
611	{
612	++m_position;
613	this->append_state(syntax_element_word_start);
614	}
615	else
616	result = parse_literal();
617	break;
618	case regex_constants::escape_type_right_word:
619	if(this->flags() & regbase::emacs_ex)
620	{
621	++m_position;
622	this->append_state(syntax_element_word_end);
623	}
624	else
625	result = parse_literal();
626	break;
627	default:
628	if(this->flags() & regbase::emacs_ex)
629	{
630	bool negate = true;
631	switch(*m_position)
632	{
633	case `'w'`:
634	negate = false;
635	BOOST_FALLTHROUGH;
636	case `'W'`:
637	{
638	basic_char_set<charT, traits> char_set;
639	if(negate)
640	char_set.negate();
641	char_set.add_class(this->m_word_mask);
642	if(`0` == this->append_set(char_set))
643	{
644	fail(regex_constants::error_ctype, m_position - m_base);
645	return false;
646	}
647	++m_position;
648	return true;
649	}
650	case `'s'`:
651	negate = false;
652	BOOST_FALLTHROUGH;
653	case `'S'`:
654	return add_emacs_code(negate);
655	case `'c'`:
656	case `'C'`:
657	// not supported yet:
658	fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead.");
659	return false;
660	default:
661	break;
662	}
663	}
664	result = parse_literal();
665	break;
666	}
667	return result;
668	}
669
670	template <class charT, class traits>
671	bool basic_regex_parser<charT, traits>::parse_extended_escape()
672	{
673	++m_position;
674	if(m_position == m_end)
675	{
676	fail(regex_constants::error_escape, m_position - m_base, "Incomplete escape sequence found.");
677	return false;
678	}
679	bool negate = false; // in case this is a character class escape: \w \d etc
680	switch(this->m_traits.escape_syntax_type(*m_position))
681	{
682	case regex_constants::escape_type_not_class:
683	negate = true;
684	BOOST_FALLTHROUGH;
685	case regex_constants::escape_type_class:
686	{
687	escape_type_class_jump:
688	typedef typename traits::char_class_type m_type;
689	m_type m = this->m_traits.lookup_classname(m_position, m_position+`1`);
690	if(m != `0`)
691	{
692	basic_char_set<charT, traits> char_set;
693	if(negate)
694	char_set.negate();
695	char_set.add_class(m);
696	if(`0` == this->append_set(char_set))
697	{
698	fail(regex_constants::error_ctype, m_position - m_base);
699	return false;
700	}
701	++m_position;
702	return true;
703	}
704	//
705	// not a class, just a regular unknown escape:
706	//
707	this->append_literal(unescape_character());
708	break;
709	}
710	case regex_constants::syntax_digit:
711	return parse_backref();
712	case regex_constants::escape_type_left_word:
713	++m_position;
714	this->append_state(syntax_element_word_start);
715	break;
716	case regex_constants::escape_type_right_word:
717	++m_position;
718	this->append_state(syntax_element_word_end);
719	break;
720	case regex_constants::escape_type_start_buffer:
721	++m_position;
722	this->append_state(syntax_element_buffer_start);
723	break;
724	case regex_constants::escape_type_end_buffer:
725	++m_position;
726	this->append_state(syntax_element_buffer_end);
727	break;
728	case regex_constants::escape_type_word_assert:
729	++m_position;
730	this->append_state(syntax_element_word_boundary);
731	break;
732	case regex_constants::escape_type_not_word_assert:
733	++m_position;
734	this->append_state(syntax_element_within_word);
735	break;
736	case regex_constants::escape_type_Z:
737	++m_position;
738	this->append_state(syntax_element_soft_buffer_end);
739	break;
740	case regex_constants::escape_type_Q:
741	return parse_QE();
742	case regex_constants::escape_type_C:
743	return parse_match_any();
744	case regex_constants::escape_type_X:
745	++m_position;
746	this->append_state(syntax_element_combining);
747	break;
748	case regex_constants::escape_type_G:
749	++m_position;
750	this->append_state(syntax_element_restart_continue);
751	break;
752	case regex_constants::escape_type_not_property:
753	negate = true;
754	BOOST_FALLTHROUGH;
755	case regex_constants::escape_type_property:
756	{
757	++m_position;
758	char_class_type m;
759	if(m_position == m_end)
760	{
761	fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found.");
762	return false;
763	}
764	// maybe have \p{ddd}
765	if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
766	{
767	const charT* base = m_position;
768	// skip forward until we find enclosing brace:
769	while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
770	++m_position;
771	if(m_position == m_end)
772	{
773	fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence.");
774	return false;
775	}
776	m = this->m_traits.lookup_classname(++base, m_position++);
777	}
778	else
779	{
780	m = this->m_traits.lookup_classname(m_position, m_position+`1`);
781	++m_position;
782	}
783	if(m != `0`)
784	{
785	basic_char_set<charT, traits> char_set;
786	if(negate)
787	char_set.negate();
788	char_set.add_class(m);
789	if(`0` == this->append_set(char_set))
790	{
791	fail(regex_constants::error_ctype, m_position - m_base);
792	return false;
793	}
794	return true;
795	}
796	fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name.");
797	return false;
798	}
799	case regex_constants::escape_type_reset_start_mark:
800	if(`0` == (this->flags() & (regbase::main_option_type \| regbase::no_perl_ex)))
801	{
802	re_brace* pb = static_cast<re_brace>(this->append_state(syntax_element_startmark, sizeof*(re_brace)));
803	pb->index = -`5`;
804	pb->icase = this->flags() & regbase::icase;
805	this->m_pdata->m_data.align();
806	++m_position;
807	return true;
808	}
809	goto escape_type_class_jump;
810	case regex_constants::escape_type_line_ending:
811	if(`0` == (this->flags() & (regbase::main_option_type \| regbase::no_perl_ex)))
812	{
813	const charT* e = get_escape_R_string<charT>();
814	const charT* old_position = m_position;
815	const charT* old_end = m_end;
816	const charT* old_base = m_base;
817	m_position = e;
818	m_base = e;
819	m_end = e + traits::length(e);
820	bool r = parse_all();
821	m_position = ++old_position;
822	m_end = old_end;
823	m_base = old_base;
824	return r;
825	}
826	goto escape_type_class_jump;
827	case regex_constants::escape_type_extended_backref:
828	if(`0` == (this->flags() & (regbase::main_option_type \| regbase::no_perl_ex)))
829	{
830	bool have_brace = false;
831	bool negative = false;
832	static const char* incomplete_message = "Incomplete \\g escape found.";
833	if(++m_position == m_end)
834	{
835	fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
836	return false;
837	}
838	// maybe have \g{ddd}
839	regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position);
840	regex_constants::syntax_type syn_end = `0`;
841	if((syn == regex_constants::syntax_open_brace)
842	\|\| (syn == regex_constants::escape_type_left_word)
843	\|\| (syn == regex_constants::escape_type_end_buffer))
844	{
845	if(++m_position == m_end)
846	{
847	fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
848	return false;
849	}
850	have_brace = true;
851	switch(syn)
852	{
853	case regex_constants::syntax_open_brace:
854	syn_end = regex_constants::syntax_close_brace;
855	break;
856	case regex_constants::escape_type_left_word:
857	syn_end = regex_constants::escape_type_right_word;
858	break;
859	default:
860	syn_end = regex_constants::escape_type_end_buffer;
861	break;
862	}
863	}
864	negative = (m_position == static_cast*<charT>(`'-'`));
865	if((negative) && (++m_position == m_end))
866	{
867	fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
868	return false;
869	}
870	const charT* pc = m_position;
871	int i = this->m_traits.toi(pc, m_end, `10`);
872	if((i < `0`) && syn_end)
873	{
874	// Check for a named capture, get the leftmost one if there is more than one:
875	const charT* base = m_position;
876	while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end))
877	{
878	++m_position;
879	}
880	i = hash_value_from_capture_name(base, m_position);
881	pc = m_position;
882	}
883	if(negative)
884	i = `1` + m_mark_count - i;
885	if(((i > `0`) && (this->m_backrefs & (`1u` << (i-`1`)))) \|\| ((i > `10000`) && (this->m_pdata->get_id(i) > `0`) && (this->m_backrefs & (`1u` << (this->m_pdata->get_id(i)-`1`)))))
886	{
887	m_position = pc;
888	re_brace* pb = static_cast<re_brace>(this->append_state(syntax_element_backref, sizeof*(re_brace)));
889	pb->index = i;
890	pb->icase = this->flags() & regbase::icase;
891	}
892	else
893	{
894	fail(regex_constants::error_backref, m_position - m_base);
895	return false;
896	}
897	m_position = pc;
898	if(have_brace)
899	{
900	if((m_position == m_end) \|\| (this->m_traits.syntax_type(*m_position) != syn_end))
901	{
902	fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
903	return false;
904	}
905	++m_position;
906	}
907	return true;
908	}
909	goto escape_type_class_jump;
910	case regex_constants::escape_type_control_v:
911	if(`0` == (this->flags() & (regbase::main_option_type \| regbase::no_perl_ex)))
912	goto escape_type_class_jump;
913	BOOST_FALLTHROUGH;
914	default:
915	this->append_literal(unescape_character());
916	break;
917	}
918	return true;
919	}
920
921	template <class charT, class traits>
922	bool basic_regex_parser<charT, traits>::parse_match_any()
923	{
924	//
925	// we have a '.' that can match any character:
926	//
927	++m_position;
928	static_cast<re_dot*>(
929	this->append_state(syntax_element_wild, sizeof(re_dot))
930	)->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s
931	? BOOST_REGEX_DETAIL_NS::force_not_newline
932	: this->flags() & regbase::mod_s ?
933	BOOST_REGEX_DETAIL_NS::force_newline : BOOST_REGEX_DETAIL_NS::dont_care);
934	return true;
935	}
936
937	template <class charT, class traits>
938	bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high)
939	{
940	bool greedy = true;
941	bool pocessive = false;
942	std::size_t insert_point;
943	//
944	// when we get to here we may have a non-greedy ? mark still to come:
945	//
946	if((m_position != m_end)
947	&& (
948	(`0` == (this->flags() & (regbase::main_option_type \| regbase::no_perl_ex)))
949	\|\| ((regbase::basic_syntax_group\|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type \| regbase::emacs_ex)))
950	)
951	)
952	{
953	// OK we have a perl or emacs regex, check for a '?':
954	if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
955	{
956	greedy = false;
957	++m_position;
958	}
959	// for perl regexes only check for pocessive ++ repeats.
960	if((m_position != m_end)
961	&& (`0` == (this->flags() & regbase::main_option_type))
962	&& (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus))
963	{
964	pocessive = true;
965	++m_position;
966	}
967	}
968	if(`0` == this->m_last_state)
969	{
970	fail(regex_constants::error_badrepeat, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Nothing to repeat.");
971	return false;
972	}
973	if(this->m_last_state->type == syntax_element_endmark)
974	{
975	// insert a repeat before the '(' matching the last ')':
976	insert_point = this->m_paren_start;
977	}
978	else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal>(this*->m_last_state)->length > `1`))
979	{
980	// the last state was a literal with more than one character, split it in two:
981	re_literal* lit = static_cast<re_literal>(this*->m_last_state);
982	charT c = (static_cast<charT>(static_cast<void**>(lit+`1`)))[lit->length - `1`];
983	lit->length -= `1`;
984	// now append new state:
985	lit = static_cast<re_literal>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof*(charT)));
986	lit->length = `1`;
987	(static_cast<charT>(static_cast<void**>(lit+`1`)))[`0`] = c;
988	insert_point = this->getoffset(this->m_last_state);
989	}
990	else
991	{
992	// repeat the last state whatever it was, need to add some error checking here:
993	switch(this->m_last_state->type)
994	{
995	case syntax_element_start_line:
996	case syntax_element_end_line:
997	case syntax_element_word_boundary:
998	case syntax_element_within_word:
999	case syntax_element_word_start:
1000	case syntax_element_word_end:
1001	case syntax_element_buffer_start:
1002	case syntax_element_buffer_end:
1003	case syntax_element_alt:
1004	case syntax_element_soft_buffer_end:
1005	case syntax_element_restart_continue:
1006	case syntax_element_jump:
1007	case syntax_element_startmark:
1008	case syntax_element_backstep:
1009	// can't legally repeat any of the above:
1010	fail(regex_constants::error_badrepeat, m_position - m_base);
1011	return false;
1012	default:
1013	// do nothing...
1014	break;
1015	}
1016	insert_point = this->getoffset(this->m_last_state);
1017	}
1018	//
1019	// OK we now know what to repeat, so insert the repeat around it:
1020	//
1021	re_repeat* rep = static_cast<re_repeat>(this*->insert_state(insert_point, syntax_element_rep, re_repeater_size));
1022	rep->min = low;
1023	rep->max = high;
1024	rep->greedy = greedy;
1025	rep->leading = false;
1026	// store our repeater position for later:
1027	std::ptrdiff_t rep_off = this->getoffset(rep);
1028	// and append a back jump to the repeat:
1029	re_jump* jmp = static_cast<re_jump>(this->append_state(syntax_element_jump, sizeof*(re_jump)));
1030	jmp->alt.i = rep_off - this->getoffset(jmp);
1031	this->m_pdata->m_data.align();
1032	// now fill in the alt jump for the repeat:
1033	rep = static_cast<re_repeat>(this*->getaddress(rep_off));
1034	rep->alt.i = this->m_pdata->m_data.size() - rep_off;
1035	//
1036	// If the repeat is pocessive then bracket the repeat with a (?>...)
1037	// independent sub-expression construct:
1038	//
1039	if(pocessive)
1040	{
1041	if(m_position != m_end)
1042	{
1043	//
1044	// Check for illegal following quantifier, we have to do this here, because
1045	// the extra states we insert below circumvents our usual error checking :-(
1046	//
1047	switch(this->m_traits.syntax_type(*m_position))
1048	{
1049	case regex_constants::syntax_star:
1050	case regex_constants::syntax_plus:
1051	case regex_constants::syntax_question:
1052	case regex_constants::syntax_open_brace:
1053	fail(regex_constants::error_badrepeat, m_position - m_base);
1054	return false;
1055	}
1056	}
1057	re_brace* pb = static_cast<re_brace>(this->insert_state(insert_point, syntax_element_startmark, sizeof*(re_brace)));
1058	pb->index = -`3`;
1059	pb->icase = this->flags() & regbase::icase;
1060	jmp = static_cast<re_jump>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof*(re_jump)));
1061	this->m_pdata->m_data.align();
1062	jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
1063	pb = static_cast<re_brace>(this->append_state(syntax_element_endmark, sizeof*(re_brace)));
1064	pb->index = -`3`;
1065	pb->icase = this->flags() & regbase::icase;
1066	}
1067	return true;
1068	}
1069
1070	template <class charT, class traits>
1071	bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
1072	{
1073	static const char* incomplete_message = "Missing } in quantified repetition.";
1074	//
1075	// parse a repeat-range:
1076	//
1077	std::size_t min, max;
1078	int v;
1079	// skip whitespace:
1080	while((m_position != m_end) && this->m_traits.isctype(m_position, this*->m_mask_space))
1081	++m_position;
1082	if(this->m_position == this->m_end)
1083	{
1084	if(this->flags() & (regbase::main_option_type \| regbase::no_perl_ex))
1085	{
1086	fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1087	return false;
1088	}
1089	// Treat the opening '{' as a literal character, rewind to start of error:
1090	--m_position;
1091	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1092	return parse_literal();
1093	}
1094	// get min:
1095	v = this->m_traits.toi(m_position, m_end, `10`);
1096	// skip whitespace:
1097	if(v < `0`)
1098	{
1099	if(this->flags() & (regbase::main_option_type \| regbase::no_perl_ex))
1100	{
1101	fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1102	return false;
1103	}
1104	// Treat the opening '{' as a literal character, rewind to start of error:
1105	--m_position;
1106	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1107	return parse_literal();
1108	}
1109	while((m_position != m_end) && this->m_traits.isctype(m_position, this*->m_mask_space))
1110	++m_position;
1111	if(this->m_position == this->m_end)
1112	{
1113	if(this->flags() & (regbase::main_option_type \| regbase::no_perl_ex))
1114	{
1115	fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1116	return false;
1117	}
1118	// Treat the opening '{' as a literal character, rewind to start of error:
1119	--m_position;
1120	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1121	return parse_literal();
1122	}
1123	min = v;
1124	// see if we have a comma:
1125	if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
1126	{
1127	// move on and error check:
1128	++m_position;
1129	// skip whitespace:
1130	while((m_position != m_end) && this->m_traits.isctype(m_position, this*->m_mask_space))
1131	++m_position;
1132	if(this->m_position == this->m_end)
1133	{
1134	if(this->flags() & (regbase::main_option_type \| regbase::no_perl_ex))
1135	{
1136	fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1137	return false;
1138	}
1139	// Treat the opening '{' as a literal character, rewind to start of error:
1140	--m_position;
1141	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1142	return parse_literal();
1143	}
1144	// get the value if any:
1145	v = this->m_traits.toi(m_position, m_end, `10`);
1146	max = (v >= `0`) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
1147	}
1148	else
1149	{
1150	// no comma, max = min:
1151	max = min;
1152	}
1153	// skip whitespace:
1154	while((m_position != m_end) && this->m_traits.isctype(m_position, this*->m_mask_space))
1155	++m_position;
1156	// OK now check trailing }:
1157	if(this->m_position == this->m_end)
1158	{
1159	if(this->flags() & (regbase::main_option_type \| regbase::no_perl_ex))
1160	{
1161	fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1162	return false;
1163	}
1164	// Treat the opening '{' as a literal character, rewind to start of error:
1165	--m_position;
1166	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1167	return parse_literal();
1168	}
1169	if(isbasic)
1170	{
1171	if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)
1172	{
1173	++m_position;
1174	if(this->m_position == this->m_end)
1175	{
1176	fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1177	return false;
1178	}
1179	}
1180	else
1181	{
1182	fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1183	return false;
1184	}
1185	}
1186	if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)
1187	++m_position;
1188	else
1189	{
1190	// Treat the opening '{' as a literal character, rewind to start of error:
1191	--m_position;
1192	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1193	return parse_literal();
1194	}
1195	//
1196	// finally go and add the repeat, unless error:
1197	//
1198	if(min > max)
1199	{
1200	// Backtrack to error location:
1201	m_position -= `2`;
1202	while(this->m_traits.isctype(m_position, this*->m_word_mask)) --m_position;
1203	++m_position;
1204	fail(regex_constants::error_badbrace, m_position - m_base);
1205	return false;
1206	}
1207	return parse_repeat(low: min, high: max);
1208	}
1209
1210	template <class charT, class traits>
1211	bool basic_regex_parser<charT, traits>::parse_alt()
1212	{
1213	//
1214	// error check: if there have been no previous states,
1215	// or if the last state was a '(' then error:
1216	//
1217	if(
1218	((this->m_last_state == `0`) \|\| (this->m_last_state->type == syntax_element_startmark))
1219	&&
1220	!(
1221	((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
1222	&&
1223	((this->flags() & regbase::no_empty_expressions) == `0`)
1224	)
1225	)
1226	{
1227	fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression cannot start with the alternation operator \|.");
1228	return false;
1229	}
1230	//
1231	// Reset mark count if required:
1232	//
1233	if(m_max_mark < m_mark_count)
1234	m_max_mark = m_mark_count;
1235	if(m_mark_reset >= `0`)
1236	m_mark_count = m_mark_reset;
1237
1238	++m_position;
1239	//
1240	// we need to append a trailing jump:
1241	//
1242	re_syntax_base* pj = this->append_state(BOOST_REGEX_DETAIL_NS::syntax_element_jump, sizeof(re_jump));
1243	std::ptrdiff_t jump_offset = this->getoffset(pj);
1244	//
1245	// now insert the alternative:
1246	//
1247	re_alt* palt = static_cast<re_alt>(this->insert_state(this*->m_alt_insert_point, syntax_element_alt, re_alt_size));
1248	jump_offset += re_alt_size;
1249	this->m_pdata->m_data.align();
1250	palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
1251	//
1252	// update m_alt_insert_point so that the next alternate gets
1253	// inserted at the start of the second of the two we've just created:
1254	//
1255	this->m_alt_insert_point = this->m_pdata->m_data.size();
1256	//
1257	// the start of this alternative must have a case changes state
1258	// if the current block has messed around with case changes:
1259	//
1260	if(m_has_case_change)
1261	{
1262	static_cast<re_case*>(
1263	this->append_state(syntax_element_toggle_case, sizeof(re_case))
1264	)->icase = this->m_icase;
1265	}
1266	//
1267	// push the alternative onto our stack, a recursive
1268	// implementation here is easier to understand (and faster
1269	// as it happens), but causes all kinds of stack overflow problems
1270	// on programs with small stacks (COM+).
1271	//
1272	m_alt_jumps.push_back(x: jump_offset);
1273	return true;
1274	}
1275
1276	template <class charT, class traits>
1277	bool basic_regex_parser<charT, traits>::parse_set()
1278	{
1279	static const char* incomplete_message = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
1280	++m_position;
1281	if(m_position == m_end)
1282	{
1283	fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1284	return false;
1285	}
1286	basic_char_set<charT, traits> char_set;
1287
1288	const charT* base = m_position; // where the '[' was
1289	const charT* item_base = m_position; // where the '[' or '^' was
1290
1291	while(m_position != m_end)
1292	{
1293	switch(this->m_traits.syntax_type(*m_position))
1294	{
1295	case regex_constants::syntax_caret:
1296	if(m_position == base)
1297	{
1298	char_set.negate();
1299	++m_position;
1300	item_base = m_position;
1301	}
1302	else
1303	parse_set_literal(char_set);
1304	break;
1305	case regex_constants::syntax_close_set:
1306	if(m_position == item_base)
1307	{
1308	parse_set_literal(char_set);
1309	break;
1310	}
1311	else
1312	{
1313	++m_position;
1314	if(`0` == this->append_set(char_set))
1315	{
1316	fail(regex_constants::error_ctype, m_position - m_base);
1317	return false;
1318	}
1319	}
1320	return true;
1321	case regex_constants::syntax_open_set:
1322	if(parse_inner_set(char_set))
1323	break;
1324	return true;
1325	case regex_constants::syntax_escape:
1326	{
1327	//
1328	// look ahead and see if this is a character class shortcut
1329	// \d \w \s etc...
1330	//
1331	++m_position;
1332	if(this->m_traits.escape_syntax_type(*m_position)
1333	== regex_constants::escape_type_class)
1334	{
1335	char_class_type m = this->m_traits.lookup_classname(m_position, m_position+`1`);
1336	if(m != `0`)
1337	{
1338	char_set.add_class(m);
1339	++m_position;
1340	break;
1341	}
1342	}
1343	else if(this->m_traits.escape_syntax_type(*m_position)
1344	== regex_constants::escape_type_not_class)
1345	{
1346	// negated character class:
1347	char_class_type m = this->m_traits.lookup_classname(m_position, m_position+`1`);
1348	if(m != `0`)
1349	{
1350	char_set.add_negated_class(m);
1351	++m_position;
1352	break;
1353	}
1354	}
1355	// not a character class, just a regular escape:
1356	--m_position;
1357	parse_set_literal(char_set);
1358	break;
1359	}
1360	default:
1361	parse_set_literal(char_set);
1362	break;
1363	}
1364	}
1365	return m_position != m_end;
1366	}
1367
1368	template <class charT, class traits>
1369	bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
1370	{
1371	static const char* incomplete_message = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
1372	//
1373	// we have either a character class [:name:]
1374	// a collating element [.name.]
1375	// or an equivalence class [=name=]
1376	//
1377	if(m_end == ++m_position)
1378	{
1379	fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1380	return false;
1381	}
1382	switch(this->m_traits.syntax_type(*m_position))
1383	{
1384	case regex_constants::syntax_dot:
1385	//
1386	// a collating element is treated as a literal:
1387	//
1388	--m_position;
1389	parse_set_literal(char_set);
1390	return true;
1391	case regex_constants::syntax_colon:
1392	{
1393	// check that character classes are actually enabled:
1394	if((this->flags() & (regbase::main_option_type \| regbase::no_char_classes))
1395	== (regbase::basic_syntax_group \| regbase::no_char_classes))
1396	{
1397	--m_position;
1398	parse_set_literal(char_set);
1399	return true;
1400	}
1401	// skip the ':'
1402	if(m_end == ++m_position)
1403	{
1404	fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1405	return false;
1406	}
1407	const charT* name_first = m_position;
1408	// skip at least one character, then find the matching ':]'
1409	if(m_end == ++m_position)
1410	{
1411	fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1412	return false;
1413	}
1414	while((m_position != m_end)
1415	&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
1416	++m_position;
1417	const charT* name_last = m_position;
1418	if(m_end == m_position)
1419	{
1420	fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1421	return false;
1422	}
1423	if((m_end == ++m_position)
1424	\|\| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1425	{
1426	fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1427	return false;
1428	}
1429	//
1430	// check for negated class:
1431	//
1432	bool negated = false;
1433	if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
1434	{
1435	++name_first;
1436	negated = true;
1437	}
1438	typedef typename traits::char_class_type m_type;
1439	m_type m = this->m_traits.lookup_classname(name_first, name_last);
1440	if(m == `0`)
1441	{
1442	if(char_set.empty() && (name_last - name_first == `1`))
1443	{
1444	// maybe a special case:
1445	++m_position;
1446	if( (m_position != m_end)
1447	&& (this->m_traits.syntax_type(*m_position)
1448	== regex_constants::syntax_close_set))
1449	{
1450	if(this->m_traits.escape_syntax_type(*name_first)
1451	== regex_constants::escape_type_left_word)
1452	{
1453	++m_position;
1454	this->append_state(syntax_element_word_start);
1455	return false;
1456	}
1457	if(this->m_traits.escape_syntax_type(*name_first)
1458	== regex_constants::escape_type_right_word)
1459	{
1460	++m_position;
1461	this->append_state(syntax_element_word_end);
1462	return false;
1463	}
1464	}
1465	}
1466	fail(regex_constants::error_ctype, name_first - m_base);
1467	return false;
1468	}
1469	if(negated == false)
1470	char_set.add_class(m);
1471	else
1472	char_set.add_negated_class(m);
1473	++m_position;
1474	break;
1475	}
1476	case regex_constants::syntax_equal:
1477	{
1478	// skip the '='
1479	if(m_end == ++m_position)
1480	{
1481	fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1482	return false;
1483	}
1484	const charT* name_first = m_position;
1485	// skip at least one character, then find the matching '=]'
1486	if(m_end == ++m_position)
1487	{
1488	fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1489	return false;
1490	}
1491	while((m_position != m_end)
1492	&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))
1493	++m_position;
1494	const charT* name_last = m_position;
1495	if(m_end == m_position)
1496	{
1497	fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1498	return false;
1499	}
1500	if((m_end == ++m_position)
1501	\|\| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1502	{
1503	fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1504	return false;
1505	}
1506	string_type m = this->m_traits.lookup_collatename(name_first, name_last);
1507	if((`0` == m.size()) \|\| (m.size() > `2`))
1508	{
1509	fail(regex_constants::error_collate, name_first - m_base);
1510	return false;
1511	}
1512	digraph<charT> d;
1513	d.first = m[`0`];
1514	if(m.size() > `1`)
1515	d.second = m[`1`];
1516	else
1517	d.second = `0`;
1518	char_set.add_equivalent(d);
1519	++m_position;
1520	break;
1521	}
1522	default:
1523	--m_position;
1524	parse_set_literal(char_set);
1525	break;
1526	}
1527	return true;
1528	}
1529
1530	template <class charT, class traits>
1531	void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
1532	{
1533	digraph<charT> start_range(get_next_set_literal(char_set));
1534	if(m_end == m_position)
1535	{
1536	fail(regex_constants::error_brack, m_position - m_base);
1537	return;
1538	}
1539	if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1540	{
1541	// we have a range:
1542	if(m_end == ++m_position)
1543	{
1544	fail(regex_constants::error_brack, m_position - m_base);
1545	return;
1546	}
1547	if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
1548	{
1549	digraph<charT> end_range = get_next_set_literal(char_set);
1550	char_set.add_range(start_range, end_range);
1551	if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1552	{
1553	if(m_end == ++m_position)
1554	{
1555	fail(regex_constants::error_brack, m_position - m_base);
1556	return;
1557	}
1558	if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)
1559	{
1560	// trailing - :
1561	--m_position;
1562	return;
1563	}
1564	fail(regex_constants::error_range, m_position - m_base);
1565	return;
1566	}
1567	return;
1568	}
1569	--m_position;
1570	}
1571	char_set.add_single(start_range);
1572	}
1573
1574	template <class charT, class traits>
1575	digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
1576	{
1577	digraph<charT> result;
1578	switch(this->m_traits.syntax_type(*m_position))
1579	{
1580	case regex_constants::syntax_dash:
1581	if(!char_set.empty())
1582	{
1583	// see if we are at the end of the set:
1584	if((++m_position == m_end) \|\| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1585	{
1586	fail(regex_constants::error_range, m_position - m_base);
1587	return result;
1588	}
1589	--m_position;
1590	}
1591	result.first = *m_position++;
1592	return result;
1593	case regex_constants::syntax_escape:
1594	// check to see if escapes are supported first:
1595	if(this->flags() & regex_constants::no_escape_in_lists)
1596	{
1597	result = *m_position++;
1598	break;
1599	}
1600	++m_position;
1601	result = unescape_character();
1602	break;
1603	case regex_constants::syntax_open_set:
1604	{
1605	if(m_end == ++m_position)
1606	{
1607	fail(regex_constants::error_collate, m_position - m_base);
1608	return result;
1609	}
1610	if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
1611	{
1612	--m_position;
1613	result.first = *m_position;
1614	++m_position;
1615	return result;
1616	}
1617	if(m_end == ++m_position)
1618	{
1619	fail(regex_constants::error_collate, m_position - m_base);
1620	return result;
1621	}
1622	const charT* name_first = m_position;
1623	// skip at least one character, then find the matching ':]'
1624	if(m_end == ++m_position)
1625	{
1626	fail(regex_constants::error_collate, name_first - m_base);
1627	return result;
1628	}
1629	while((m_position != m_end)
1630	&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))
1631	++m_position;
1632	const charT* name_last = m_position;
1633	if(m_end == m_position)
1634	{
1635	fail(regex_constants::error_collate, name_first - m_base);
1636	return result;
1637	}
1638	if((m_end == ++m_position)
1639	\|\| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1640	{
1641	fail(regex_constants::error_collate, name_first - m_base);
1642	return result;
1643	}
1644	++m_position;
1645	string_type s = this->m_traits.lookup_collatename(name_first, name_last);
1646	if(s.empty() \|\| (s.size() > `2`))
1647	{
1648	fail(regex_constants::error_collate, name_first - m_base);
1649	return result;
1650	}
1651	result.first = s[`0`];
1652	if(s.size() > `1`)
1653	result.second = s[`1`];
1654	else
1655	result.second = `0`;
1656	return result;
1657	}
1658	default:
1659	result = *m_position++;
1660	}
1661	return result;
1662	}
1663
1664	//
1665	// does a value fit in the specified charT type?
1666	//
1667	template <class charT>
1668	bool valid_value(charT, int v, const mpl::true_&)
1669	{
1670	return (v >> (sizeof(charT) * CHAR_BIT)) == `0`;
1671	}
1672	template <class charT>
1673	bool valid_value(charT, int, const mpl::false_&)
1674	{
1675	return true; // v will alsways fit in a charT
1676	}
1677	template <class charT>
1678	bool valid_value(charT c, int v)
1679	{
1680	return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(int))>());
1681	}
1682
1683	template <class charT, class traits>
1684	charT basic_regex_parser<charT, traits>::unescape_character()
1685	{
1686	#ifdef BOOST_MSVC
1687	#pragma warning(push)
1688	#pragma warning(disable:4127)
1689	#endif
1690	charT result(`0`);
1691	if(m_position == m_end)
1692	{
1693	fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely.");
1694	return false;
1695	}
1696	switch(this->m_traits.escape_syntax_type(*m_position))
1697	{
1698	case regex_constants::escape_type_control_a:
1699	result = charT(`'\a'`);
1700	break;
1701	case regex_constants::escape_type_e:
1702	result = charT(`27`);
1703	break;
1704	case regex_constants::escape_type_control_f:
1705	result = charT(`'\f'`);
1706	break;
1707	case regex_constants::escape_type_control_n:
1708	result = charT(`'\n'`);
1709	break;
1710	case regex_constants::escape_type_control_r:
1711	result = charT(`'\r'`);
1712	break;
1713	case regex_constants::escape_type_control_t:
1714	result = charT(`'\t'`);
1715	break;
1716	case regex_constants::escape_type_control_v:
1717	result = charT(`'\v'`);
1718	break;
1719	case regex_constants::escape_type_word_assert:
1720	result = charT(`'\b'`);
1721	break;
1722	case regex_constants::escape_type_ascii_control:
1723	++m_position;
1724	if(m_position == m_end)
1725	{
1726	// Rewind to start of escape:
1727	--m_position;
1728	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1729	fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely.");
1730	return result;
1731	}
1732	result = static_cast<charT>(*m_position % `32`);
1733	break;
1734	case regex_constants::escape_type_hex:
1735	++m_position;
1736	if(m_position == m_end)
1737	{
1738	// Rewind to start of escape:
1739	--m_position;
1740	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1741	fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely.");
1742	return result;
1743	}
1744	// maybe have \x{ddd}
1745	if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1746	{
1747	++m_position;
1748	if(m_position == m_end)
1749	{
1750	// Rewind to start of escape:
1751	--m_position;
1752	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1753	fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
1754	return result;
1755	}
1756	int i = this->m_traits.toi(m_position, m_end, `16`);
1757	if((m_position == m_end)
1758	\|\| (i < `0`)
1759	\|\| ((std::numeric_limits<charT>::is_specialized) && (i > (int)(std::numeric_limits<charT>::max)()))
1760	\|\| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1761	{
1762	// Rewind to start of escape:
1763	--m_position;
1764	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1765	fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid.");
1766	return result;
1767	}
1768	++m_position;
1769	result = charT(i);
1770	}
1771	else
1772	{
1773	std::ptrdiff_t len = (std::min)(a: static_cast<std::ptrdiff_t>(`2`), b: static_cast<std::ptrdiff_t>(m_end - m_position));
1774	int i = this->m_traits.toi(m_position, m_position + len, `16`);
1775	if((i < `0`)
1776	\|\| !valid_value(charT(`0`), i))
1777	{
1778	// Rewind to start of escape:
1779	--m_position;
1780	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1781	fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character.");
1782	return result;
1783	}
1784	result = charT(i);
1785	}
1786	return result;
1787	case regex_constants::syntax_digit:
1788	{
1789	// an octal escape sequence, the first character must be a zero
1790	// followed by up to 3 octal digits:
1791	std::ptrdiff_t len = (std::min)(::boost::BOOST_REGEX_DETAIL_NS::distance(m_position, m_end), static_cast<std::ptrdiff_t>(`4`));
1792	const charT* bp = m_position;
1793	int val = this->m_traits.toi(bp, bp + `1`, `8`);
1794	if(val != `0`)
1795	{
1796	// Rewind to start of escape:
1797	--m_position;
1798	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1799	// Oops not an octal escape after all:
1800	fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence.");
1801	return result;
1802	}
1803	val = this->m_traits.toi(m_position, m_position + len, `8`);
1804	if(val < `0`)
1805	{
1806	// Rewind to start of escape:
1807	--m_position;
1808	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1809	fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid.");
1810	return result;
1811	}
1812	return static_cast<charT>(val);
1813	}
1814	case regex_constants::escape_type_named_char:
1815	{
1816	++m_position;
1817	if(m_position == m_end)
1818	{
1819	// Rewind to start of escape:
1820	--m_position;
1821	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1822	fail(regex_constants::error_escape, m_position - m_base);
1823	return false;
1824	}
1825	// maybe have \N{name}
1826	if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1827	{
1828	const charT* base = m_position;
1829	// skip forward until we find enclosing brace:
1830	while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1831	++m_position;
1832	if(m_position == m_end)
1833	{
1834	// Rewind to start of escape:
1835	--m_position;
1836	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1837	fail(regex_constants::error_escape, m_position - m_base);
1838	return false;
1839	}
1840	string_type s = this->m_traits.lookup_collatename(++base, m_position++);
1841	if(s.empty())
1842	{
1843	// Rewind to start of escape:
1844	--m_position;
1845	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1846	fail(regex_constants::error_collate, m_position - m_base);
1847	return false;
1848	}
1849	if(s.size() == `1`)
1850	{
1851	return s[`0`];
1852	}
1853	}
1854	// fall through is a failure:
1855	// Rewind to start of escape:
1856	--m_position;
1857	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1858	fail(regex_constants::error_escape, m_position - m_base);
1859	return false;
1860	}
1861	default:
1862	result = *m_position;
1863	break;
1864	}
1865	++m_position;
1866	return result;
1867	#ifdef BOOST_MSVC
1868	#pragma warning(pop)
1869	#endif
1870	}
1871
1872	template <class charT, class traits>
1873	bool basic_regex_parser<charT, traits>::parse_backref()
1874	{
1875	BOOST_ASSERT(m_position != m_end);
1876	const charT* pc = m_position;
1877	int i = this->m_traits.toi(pc, pc + `1`, `10`);
1878	if((i == `0`) \|\| (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
1879	{
1880	// not a backref at all but an octal escape sequence:
1881	charT c = unescape_character();
1882	this->append_literal(c);
1883	}
1884	else if((i > `0`) && (this->m_backrefs & (`1u` << (i-`1`))))
1885	{
1886	m_position = pc;
1887	re_brace* pb = static_cast<re_brace>(this->append_state(syntax_element_backref, sizeof*(re_brace)));
1888	pb->index = i;
1889	pb->icase = this->flags() & regbase::icase;
1890	}
1891	else
1892	{
1893	// Rewind to start of escape:
1894	--m_position;
1895	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1896	fail(regex_constants::error_backref, m_position - m_base);
1897	return false;
1898	}
1899	return true;
1900	}
1901
1902	template <class charT, class traits>
1903	bool basic_regex_parser<charT, traits>::parse_QE()
1904	{
1905	#ifdef BOOST_MSVC
1906	#pragma warning(push)
1907	#pragma warning(disable:4127)
1908	#endif
1909	//
1910	// parse a \Q...\E sequence:
1911	//
1912	++m_position; // skip the Q
1913	const charT* start = m_position;
1914	const charT* end;
1915	do
1916	{
1917	while((m_position != m_end)
1918	&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
1919	++m_position;
1920	if(m_position == m_end)
1921	{
1922	// a \Q...\E sequence may terminate with the end of the expression:
1923	end = m_position;
1924	break;
1925	}
1926	if(++m_position == m_end) // skip the escape
1927	{
1928	fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence.");
1929	return false;
1930	}
1931	// check to see if it's a \E:
1932	if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)
1933	{
1934	++m_position;
1935	end = m_position - `2`;
1936	break;
1937	}
1938	// otherwise go round again:
1939	}while(true);
1940	//
1941	// now add all the character between the two escapes as literals:
1942	//
1943	while(start != end)
1944	{
1945	this->append_literal(*start);
1946	++start;
1947	}
1948	return true;
1949	#ifdef BOOST_MSVC
1950	#pragma warning(pop)
1951	#endif
1952	}
1953
1954	template <class charT, class traits>
1955	bool basic_regex_parser<charT, traits>::parse_perl_extension()
1956	{
1957	if(++m_position == m_end)
1958	{
1959	// Rewind to start of (? sequence:
1960	--m_position;
1961	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
1962	fail(regex_constants::error_perl_extension, m_position - m_base);
1963	return false;
1964	}
1965	//
1966	// treat comments as a special case, as these
1967	// are the only ones that don't start with a leading
1968	// startmark state:
1969	//
1970	if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
1971	{
1972	while((m_position != m_end)
1973	&& (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
1974	{}
1975	return true;
1976	}
1977	//
1978	// backup some state, and prepare the way:
1979	//
1980	int markid = `0`;
1981	std::ptrdiff_t jump_offset = `0`;
1982	re_brace* pb = static_cast<re_brace>(this->append_state(syntax_element_startmark, sizeof*(re_brace)));
1983	pb->icase = this->flags() & regbase::icase;
1984	std::ptrdiff_t last_paren_start = this->getoffset(pb);
1985	// back up insertion point for alternations, and set new point:
1986	std::ptrdiff_t last_alt_point = m_alt_insert_point;
1987	this->m_pdata->m_data.align();
1988	m_alt_insert_point = this->m_pdata->m_data.size();
1989	std::ptrdiff_t expected_alt_point = m_alt_insert_point;
1990	bool restore_flags = true;
1991	regex_constants::syntax_option_type old_flags = this->flags();
1992	bool old_case_change = m_has_case_change;
1993	m_has_case_change = false;
1994	charT name_delim;
1995	int mark_reset = m_mark_reset;
1996	int max_mark = m_max_mark;
1997	m_mark_reset = -`1`;
1998	m_max_mark = m_mark_count;
1999	int v;
2000	//
2001	// select the actual extension used:
2002	//
2003	switch(this->m_traits.syntax_type(*m_position))
2004	{
2005	case regex_constants::syntax_or:
2006	m_mark_reset = m_mark_count;
2007	BOOST_FALLTHROUGH;
2008	case regex_constants::syntax_colon:
2009	//
2010	// a non-capturing mark:
2011	//
2012	pb->index = markid = `0`;
2013	++m_position;
2014	break;
2015	case regex_constants::syntax_digit:
2016	{
2017	//
2018	// a recursive subexpression:
2019	//
2020	v = this->m_traits.toi(m_position, m_end, `10`);
2021	if((v < `0`) \|\| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2022	{
2023	// Rewind to start of (? sequence:
2024	--m_position;
2025	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2026	fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated.");
2027	return false;
2028	}
2029	insert_recursion:
2030	pb->index = markid = `0`;
2031	re_recurse* pr = static_cast<re_recurse>(this->append_state(syntax_element_recurse, sizeof*(re_recurse)));
2032	pr->alt.i = v;
2033	pr->state_id = `0`;
2034	static_cast<re_case*>(
2035	this->append_state(syntax_element_toggle_case, sizeof(re_case))
2036	)->icase = this->flags() & regbase::icase;
2037	break;
2038	}
2039	case regex_constants::syntax_plus:
2040	//
2041	// A forward-relative recursive subexpression:
2042	//
2043	++m_position;
2044	v = this->m_traits.toi(m_position, m_end, `10`);
2045	if((v <= `0`) \|\| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2046	{
2047	// Rewind to start of (? sequence:
2048	--m_position;
2049	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2050	fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2051	return false;
2052	}
2053	v += m_mark_count;
2054	goto insert_recursion;
2055	case regex_constants::syntax_dash:
2056	//
2057	// Possibly a backward-relative recursive subexpression:
2058	//
2059	++m_position;
2060	v = this->m_traits.toi(m_position, m_end, `10`);
2061	if(v <= `0`)
2062	{
2063	--m_position;
2064	// Oops not a relative recursion at all, but a (?-imsx) group:
2065	goto option_group_jump;
2066	}
2067	v = m_mark_count + `1` - v;
2068	if(v <= `0`)
2069	{
2070	// Rewind to start of (? sequence:
2071	--m_position;
2072	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2073	fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2074	return false;
2075	}
2076	goto insert_recursion;
2077	case regex_constants::syntax_equal:
2078	pb->index = markid = -`1`;
2079	++m_position;
2080	jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2081	this->m_pdata->m_data.align();
2082	m_alt_insert_point = this->m_pdata->m_data.size();
2083	break;
2084	case regex_constants::syntax_not:
2085	pb->index = markid = -`2`;
2086	++m_position;
2087	jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2088	this->m_pdata->m_data.align();
2089	m_alt_insert_point = this->m_pdata->m_data.size();
2090	break;
2091	case regex_constants::escape_type_left_word:
2092	{
2093	// a lookbehind assertion:
2094	if(++m_position == m_end)
2095	{
2096	// Rewind to start of (? sequence:
2097	--m_position;
2098	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2099	fail(regex_constants::error_perl_extension, m_position - m_base);
2100	return false;
2101	}
2102	regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
2103	if(t == regex_constants::syntax_not)
2104	pb->index = markid = -`2`;
2105	else if(t == regex_constants::syntax_equal)
2106	pb->index = markid = -`1`;
2107	else
2108	{
2109	// Probably a named capture which also starts (?< :
2110	name_delim = `'>'`;
2111	--m_position;
2112	goto named_capture_jump;
2113	}
2114	++m_position;
2115	jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2116	this->append_state(syntax_element_backstep, sizeof(re_brace));
2117	this->m_pdata->m_data.align();
2118	m_alt_insert_point = this->m_pdata->m_data.size();
2119	break;
2120	}
2121	case regex_constants::escape_type_right_word:
2122	//
2123	// an independent sub-expression:
2124	//
2125	pb->index = markid = -`3`;
2126	++m_position;
2127	jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2128	this->m_pdata->m_data.align();
2129	m_alt_insert_point = this->m_pdata->m_data.size();
2130	break;
2131	case regex_constants::syntax_open_mark:
2132	{
2133	// a conditional expression:
2134	pb->index = markid = -`4`;
2135	if(++m_position == m_end)
2136	{
2137	// Rewind to start of (? sequence:
2138	--m_position;
2139	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2140	fail(regex_constants::error_perl_extension, m_position - m_base);
2141	return false;
2142	}
2143	v = this->m_traits.toi(m_position, m_end, `10`);
2144	if(m_position == m_end)
2145	{
2146	// Rewind to start of (? sequence:
2147	--m_position;
2148	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2149	fail(regex_constants::error_perl_extension, m_position - m_base);
2150	return false;
2151	}
2152	if(*m_position == charT(`'R'`))
2153	{
2154	if(++m_position == m_end)
2155	{
2156	// Rewind to start of (? sequence:
2157	--m_position;
2158	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2159	fail(regex_constants::error_perl_extension, m_position - m_base);
2160	return false;
2161	}
2162	if(*m_position == charT(`'&'`))
2163	{
2164	const charT* base = ++m_position;
2165	while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2166	++m_position;
2167	if(m_position == m_end)
2168	{
2169	// Rewind to start of (? sequence:
2170	--m_position;
2171	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2172	fail(regex_constants::error_perl_extension, m_position - m_base);
2173	return false;
2174	}
2175	v = -static_cast<int>(hash_value_from_capture_name(base, m_position));
2176	}
2177	else
2178	{
2179	v = -this->m_traits.toi(m_position, m_end, `10`);
2180	}
2181	re_brace* br = static_cast<re_brace>(this->append_state(syntax_element_assert_backref, sizeof*(re_brace)));
2182	br->index = v < `0` ? (v - `1`) : `0`;
2183	if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2184	{
2185	// Rewind to start of (? sequence:
2186	--m_position;
2187	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2188	fail(regex_constants::error_perl_extension, m_position - m_base);
2189	return false;
2190	}
2191	if(++m_position == m_end)
2192	{
2193	// Rewind to start of (? sequence:
2194	--m_position;
2195	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2196	fail(regex_constants::error_perl_extension, m_position - m_base);
2197	return false;
2198	}
2199	}
2200	else if((m_position == charT(`'\''`)) \|\| (m_position == charT(`'<'`)))
2201	{
2202	const charT* base = ++m_position;
2203	while((m_position != m_end) && (m_position != charT(`'>'`)) && (m_position != charT(`'\''`)))
2204	++m_position;
2205	if(m_position == m_end)
2206	{
2207	// Rewind to start of (? sequence:
2208	--m_position;
2209	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2210	fail(regex_constants::error_perl_extension, m_position - m_base);
2211	return false;
2212	}
2213	v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2214	re_brace* br = static_cast<re_brace>(this->append_state(syntax_element_assert_backref, sizeof*(re_brace)));
2215	br->index = v;
2216	if(((m_position != charT(`'>'`)) && (m_position != charT(`'\''`))) \|\| (++m_position == m_end))
2217	{
2218	// Rewind to start of (? sequence:
2219	--m_position;
2220	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2221	fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture.");
2222	return false;
2223	}
2224	if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2225	{
2226	// Rewind to start of (? sequence:
2227	--m_position;
2228	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2229	fail(regex_constants::error_perl_extension, m_position - m_base);
2230	return false;
2231	}
2232	if(++m_position == m_end)
2233	{
2234	// Rewind to start of (? sequence:
2235	--m_position;
2236	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2237	fail(regex_constants::error_perl_extension, m_position - m_base);
2238	return false;
2239	}
2240	}
2241	else if(*m_position == charT(`'D'`))
2242	{
2243	const char* def = "DEFINE";
2244	while(def && (m_position != m_end) && (m_position == charT(*def)))
2245	++m_position, ++def;
2246	if((m_position == m_end) \|\| *def)
2247	{
2248	// Rewind to start of (? sequence:
2249	--m_position;
2250	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2251	fail(regex_constants::error_perl_extension, m_position - m_base);
2252	return false;
2253	}
2254	re_brace* br = static_cast<re_brace>(this->append_state(syntax_element_assert_backref, sizeof*(re_brace)));
2255	br->index = `9999`; // special magic value!
2256	if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2257	{
2258	// Rewind to start of (? sequence:
2259	--m_position;
2260	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2261	fail(regex_constants::error_perl_extension, m_position - m_base);
2262	return false;
2263	}
2264	if(++m_position == m_end)
2265	{
2266	// Rewind to start of (? sequence:
2267	--m_position;
2268	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2269	fail(regex_constants::error_perl_extension, m_position - m_base);
2270	return false;
2271	}
2272	}
2273	else if(v > `0`)
2274	{
2275	re_brace* br = static_cast<re_brace>(this->append_state(syntax_element_assert_backref, sizeof*(re_brace)));
2276	br->index = v;
2277	if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2278	{
2279	// Rewind to start of (? sequence:
2280	--m_position;
2281	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2282	fail(regex_constants::error_perl_extension, m_position - m_base);
2283	return false;
2284	}
2285	if(++m_position == m_end)
2286	{
2287	// Rewind to start of (? sequence:
2288	--m_position;
2289	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2290	fail(regex_constants::error_perl_extension, m_position - m_base);
2291	return false;
2292	}
2293	}
2294	else
2295	{
2296	// verify that we have a lookahead or lookbehind assert:
2297	if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
2298	{
2299	// Rewind to start of (? sequence:
2300	--m_position;
2301	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2302	fail(regex_constants::error_perl_extension, m_position - m_base);
2303	return false;
2304	}
2305	if(++m_position == m_end)
2306	{
2307	// Rewind to start of (? sequence:
2308	--m_position;
2309	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2310	fail(regex_constants::error_perl_extension, m_position - m_base);
2311	return false;
2312	}
2313	if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
2314	{
2315	if(++m_position == m_end)
2316	{
2317	// Rewind to start of (? sequence:
2318	--m_position;
2319	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2320	fail(regex_constants::error_perl_extension, m_position - m_base);
2321	return false;
2322	}
2323	if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2324	&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2325	{
2326	// Rewind to start of (? sequence:
2327	--m_position;
2328	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2329	fail(regex_constants::error_perl_extension, m_position - m_base);
2330	return false;
2331	}
2332	m_position -= `3`;
2333	}
2334	else
2335	{
2336	if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2337	&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2338	{
2339	// Rewind to start of (? sequence:
2340	--m_position;
2341	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2342	fail(regex_constants::error_perl_extension, m_position - m_base);
2343	return false;
2344	}
2345	m_position -= `2`;
2346	}
2347	}
2348	break;
2349	}
2350	case regex_constants::syntax_close_mark:
2351	// Rewind to start of (? sequence:
2352	--m_position;
2353	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2354	fail(regex_constants::error_perl_extension, m_position - m_base);
2355	return false;
2356	case regex_constants::escape_type_end_buffer:
2357	{
2358	name_delim = *m_position;
2359	named_capture_jump:
2360	markid = `0`;
2361	if(`0` == (this->flags() & regbase::nosubs))
2362	{
2363	markid = ++m_mark_count;
2364	#ifndef BOOST_NO_STD_DISTANCE
2365	if(this->flags() & regbase::save_subexpression_location)
2366	this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - `2`, `0`));
2367	#else
2368	if(this->flags() & regbase::save_subexpression_location)
2369	this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - `2`, `0`));
2370	#endif
2371	}
2372	pb->index = markid;
2373	const charT* base = ++m_position;
2374	if(m_position == m_end)
2375	{
2376	// Rewind to start of (? sequence:
2377	--m_position;
2378	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2379	fail(regex_constants::error_perl_extension, m_position - m_base);
2380	return false;
2381	}
2382	while((m_position != m_end) && (*m_position != name_delim))
2383	++m_position;
2384	if(m_position == m_end)
2385	{
2386	// Rewind to start of (? sequence:
2387	--m_position;
2388	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2389	fail(regex_constants::error_perl_extension, m_position - m_base);
2390	return false;
2391	}
2392	this->m_pdata->set_name(base, m_position, markid);
2393	++m_position;
2394	break;
2395	}
2396	default:
2397	if(*m_position == charT(`'R'`))
2398	{
2399	++m_position;
2400	v = `0`;
2401	if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2402	{
2403	// Rewind to start of (? sequence:
2404	--m_position;
2405	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2406	fail(regex_constants::error_perl_extension, m_position - m_base);
2407	return false;
2408	}
2409	goto insert_recursion;
2410	}
2411	if(*m_position == charT(`'&'`))
2412	{
2413	++m_position;
2414	const charT* base = m_position;
2415	while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2416	++m_position;
2417	if(m_position == m_end)
2418	{
2419	// Rewind to start of (? sequence:
2420	--m_position;
2421	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2422	fail(regex_constants::error_perl_extension, m_position - m_base);
2423	return false;
2424	}
2425	v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2426	goto insert_recursion;
2427	}
2428	if(*m_position == charT(`'P'`))
2429	{
2430	++m_position;
2431	if(m_position == m_end)
2432	{
2433	// Rewind to start of (? sequence:
2434	--m_position;
2435	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2436	fail(regex_constants::error_perl_extension, m_position - m_base);
2437	return false;
2438	}
2439	if(*m_position == charT(`'>'`))
2440	{
2441	++m_position;
2442	const charT* base = m_position;
2443	while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2444	++m_position;
2445	if(m_position == m_end)
2446	{
2447	// Rewind to start of (? sequence:
2448	--m_position;
2449	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2450	fail(regex_constants::error_perl_extension, m_position - m_base);
2451	return false;
2452	}
2453	v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2454	goto insert_recursion;
2455	}
2456	}
2457	//
2458	// lets assume that we have a (?imsx) group and try and parse it:
2459	//
2460	option_group_jump:
2461	regex_constants::syntax_option_type opts = parse_options();
2462	if(m_position == m_end)
2463	{
2464	// Rewind to start of (? sequence:
2465	--m_position;
2466	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2467	fail(regex_constants::error_perl_extension, m_position - m_base);
2468	return false;
2469	}
2470	// make a note of whether we have a case change:
2471	m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
2472	pb->index = markid = `0`;
2473	if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
2474	{
2475	// update flags and carry on as normal:
2476	this->flags(opts);
2477	restore_flags = false;
2478	old_case_change \|= m_has_case_change; // defer end of scope by one ')'
2479	}
2480	else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
2481	{
2482	// update flags and carry on until the matching ')' is found:
2483	this->flags(opts);
2484	++m_position;
2485	}
2486	else
2487	{
2488	// Rewind to start of (? sequence:
2489	--m_position;
2490	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2491	fail(regex_constants::error_perl_extension, m_position - m_base);
2492	return false;
2493	}
2494
2495	// finally append a case change state if we need it:
2496	if(m_has_case_change)
2497	{
2498	static_cast<re_case*>(
2499	this->append_state(syntax_element_toggle_case, sizeof(re_case))
2500	)->icase = opts & regbase::icase;
2501	}
2502
2503	}
2504	//
2505	// now recursively add more states, this will terminate when we get to a
2506	// matching ')' :
2507	//
2508	parse_all();
2509	//
2510	// Unwind alternatives:
2511	//
2512	if(`0` == unwind_alts(last_paren_start))
2513	{
2514	// Rewind to start of (? sequence:
2515	--m_position;
2516	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2517	fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block.");
2518	return false;
2519	}
2520	//
2521	// we either have a ')' or we have run out of characters prematurely:
2522	//
2523	if(m_position == m_end)
2524	{
2525	// Rewind to start of (? sequence:
2526	--m_position;
2527	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2528	this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
2529	return false;
2530	}
2531	BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
2532	++m_position;
2533	//
2534	// restore the flags:
2535	//
2536	if(restore_flags)
2537	{
2538	// append a case change state if we need it:
2539	if(m_has_case_change)
2540	{
2541	static_cast<re_case*>(
2542	this->append_state(syntax_element_toggle_case, sizeof(re_case))
2543	)->icase = old_flags & regbase::icase;
2544	}
2545	this->flags(old_flags);
2546	}
2547	//
2548	// set up the jump pointer if we have one:
2549	//
2550	if(jump_offset)
2551	{
2552	this->m_pdata->m_data.align();
2553	re_jump* jmp = static_cast<re_jump>(this*->getaddress(jump_offset));
2554	jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
2555	if((this->m_last_state == jmp) && (markid != -`2`))
2556	{
2557	// Oops... we didn't have anything inside the assertion.
2558	// Note we don't get here for negated forward lookahead as (?!)
2559	// does have some uses.
2560	// Rewind to start of (? sequence:
2561	--m_position;
2562	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2563	fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion.");
2564	return false;
2565	}
2566	}
2567	//
2568	// verify that if this is conditional expression, that we do have
2569	// an alternative, if not add one:
2570	//
2571	if(markid == -`4`)
2572	{
2573	re_syntax_base* b = this->getaddress(expected_alt_point);
2574	// Make sure we have exactly one alternative following this state:
2575	if(b->type != syntax_element_alt)
2576	{
2577	re_alt* alt = static_cast<re_alt>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof*(re_alt)));
2578	alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
2579	}
2580	else if(this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
2581	{
2582	// Can't have seen more than one alternative:
2583	// Rewind to start of (? sequence:
2584	--m_position;
2585	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2586	fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator \| was encountered inside a conditional expression.");
2587	return false;
2588	}
2589	else
2590	{
2591	// We must not* have seen an alternative inside a (DEFINE) block:*
2592	b = this->getaddress(b->next.i, b);
2593	if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == `9999`))
2594	{
2595	// Rewind to start of (? sequence:
2596	--m_position;
2597	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2598	fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block.");
2599	return false;
2600	}
2601	}
2602	// check for invalid repetition of next state:
2603	b = this->getaddress(expected_alt_point);
2604	b = this->getaddress(static_cast<re_alt*>(b)->next.i, b);
2605	if((b->type != syntax_element_assert_backref)
2606	&& (b->type != syntax_element_startmark))
2607	{
2608	// Rewind to start of (? sequence:
2609	--m_position;
2610	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2611	fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion.");
2612	return false;
2613	}
2614	}
2615	//
2616	// append closing parenthesis state:
2617	//
2618	pb = static_cast<re_brace>(this->append_state(syntax_element_endmark, sizeof*(re_brace)));
2619	pb->index = markid;
2620	pb->icase = this->flags() & regbase::icase;
2621	this->m_paren_start = last_paren_start;
2622	//
2623	// restore the alternate insertion point:
2624	//
2625	this->m_alt_insert_point = last_alt_point;
2626	//
2627	// and the case change data:
2628	//
2629	m_has_case_change = old_case_change;
2630	//
2631	// And the mark_reset data:
2632	//
2633	if(m_max_mark > m_mark_count)
2634	{
2635	m_mark_count = m_max_mark;
2636	}
2637	m_mark_reset = mark_reset;
2638	m_max_mark = max_mark;
2639
2640
2641	if(markid > `0`)
2642	{
2643	#ifndef BOOST_NO_STD_DISTANCE
2644	if(this->flags() & regbase::save_subexpression_location)
2645	this->m_pdata->m_subs.at(markid - `1`).second = std::distance(m_base, m_position) - `1`;
2646	#else
2647	if(this->flags() & regbase::save_subexpression_location)
2648	this->m_pdata->m_subs.at(markid - `1`).second = (m_position - m_base) - `1`;
2649	#endif
2650	//
2651	// allow backrefs to this mark:
2652	//
2653	if((markid > `0`) && (markid < (int)(sizeof(unsigned) * CHAR_BIT)))
2654	this->m_backrefs \|= `1u` << (markid - `1`);
2655	}
2656	return true;
2657	}
2658
2659	template <class charT, class traits>
2660	bool basic_regex_parser<charT, traits>::match_verb(const char* verb)
2661	{
2662	while(*verb)
2663	{
2664	if(static_cast<charT>(verb) != m_position)
2665	{
2666	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2667	fail(regex_constants::error_perl_extension, m_position - m_base);
2668	return false;
2669	}
2670	if(++m_position == m_end)
2671	{
2672	--m_position;
2673	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2674	fail(regex_constants::error_perl_extension, m_position - m_base);
2675	return false;
2676	}
2677	++verb;
2678	}
2679	return true;
2680	}
2681
2682	template <class charT, class traits>
2683	bool basic_regex_parser<charT, traits>::parse_perl_verb()
2684	{
2685	if(++m_position == m_end)
2686	{
2687	// Rewind to start of ( sequence:*
2688	--m_position;
2689	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2690	fail(regex_constants::error_perl_extension, m_position - m_base);
2691	return false;
2692	}
2693	switch(*m_position)
2694	{
2695	case `'F'`:
2696	if(++m_position == m_end)
2697	{
2698	// Rewind to start of ( sequence:*
2699	--m_position;
2700	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2701	fail(regex_constants::error_perl_extension, m_position - m_base);
2702	return false;
2703	}
2704	if((this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) \|\| match_verb(verb: "AIL"))
2705	{
2706	if((m_position == m_end) \|\| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2707	{
2708	// Rewind to start of ( sequence:*
2709	--m_position;
2710	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2711	fail(regex_constants::error_perl_extension, m_position - m_base);
2712	return false;
2713	}
2714	++m_position;
2715	this->append_state(syntax_element_fail);
2716	return true;
2717	}
2718	break;
2719	case `'A'`:
2720	if(++m_position == m_end)
2721	{
2722	// Rewind to start of ( sequence:*
2723	--m_position;
2724	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2725	fail(regex_constants::error_perl_extension, m_position - m_base);
2726	return false;
2727	}
2728	if(match_verb(verb: "CCEPT"))
2729	{
2730	if((m_position == m_end) \|\| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2731	{
2732	// Rewind to start of ( sequence:*
2733	--m_position;
2734	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2735	fail(regex_constants::error_perl_extension, m_position - m_base);
2736	return false;
2737	}
2738	++m_position;
2739	this->append_state(syntax_element_accept);
2740	return true;
2741	}
2742	break;
2743	case `'C'`:
2744	if(++m_position == m_end)
2745	{
2746	// Rewind to start of ( sequence:*
2747	--m_position;
2748	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2749	fail(regex_constants::error_perl_extension, m_position - m_base);
2750	return false;
2751	}
2752	if(match_verb(verb: "OMMIT"))
2753	{
2754	if((m_position == m_end) \|\| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2755	{
2756	// Rewind to start of ( sequence:*
2757	--m_position;
2758	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2759	fail(regex_constants::error_perl_extension, m_position - m_base);
2760	return false;
2761	}
2762	++m_position;
2763	static_cast<re_commit>(this->append_state(syntax_element_commit, sizeof*(re_commit)))->action = commit_commit;
2764	this->m_pdata->m_disable_match_any = true;
2765	return true;
2766	}
2767	break;
2768	case `'P'`:
2769	if(++m_position == m_end)
2770	{
2771	// Rewind to start of ( sequence:*
2772	--m_position;
2773	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2774	fail(regex_constants::error_perl_extension, m_position - m_base);
2775	return false;
2776	}
2777	if(match_verb(verb: "RUNE"))
2778	{
2779	if((m_position == m_end) \|\| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2780	{
2781	// Rewind to start of ( sequence:*
2782	--m_position;
2783	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2784	fail(regex_constants::error_perl_extension, m_position - m_base);
2785	return false;
2786	}
2787	++m_position;
2788	static_cast<re_commit>(this->append_state(syntax_element_commit, sizeof*(re_commit)))->action = commit_prune;
2789	this->m_pdata->m_disable_match_any = true;
2790	return true;
2791	}
2792	break;
2793	case `'S'`:
2794	if(++m_position == m_end)
2795	{
2796	// Rewind to start of ( sequence:*
2797	--m_position;
2798	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2799	fail(regex_constants::error_perl_extension, m_position - m_base);
2800	return false;
2801	}
2802	if(match_verb(verb: "KIP"))
2803	{
2804	if((m_position == m_end) \|\| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2805	{
2806	// Rewind to start of ( sequence:*
2807	--m_position;
2808	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2809	fail(regex_constants::error_perl_extension, m_position - m_base);
2810	return false;
2811	}
2812	++m_position;
2813	static_cast<re_commit>(this->append_state(syntax_element_commit, sizeof*(re_commit)))->action = commit_skip;
2814	this->m_pdata->m_disable_match_any = true;
2815	return true;
2816	}
2817	break;
2818	case `'T'`:
2819	if(++m_position == m_end)
2820	{
2821	// Rewind to start of ( sequence:*
2822	--m_position;
2823	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2824	fail(regex_constants::error_perl_extension, m_position - m_base);
2825	return false;
2826	}
2827	if(match_verb(verb: "HEN"))
2828	{
2829	if((m_position == m_end) \|\| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2830	{
2831	// Rewind to start of ( sequence:*
2832	--m_position;
2833	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2834	fail(regex_constants::error_perl_extension, m_position - m_base);
2835	return false;
2836	}
2837	++m_position;
2838	this->append_state(syntax_element_then);
2839	this->m_pdata->m_disable_match_any = true;
2840	return true;
2841	}
2842	break;
2843	}
2844	return false;
2845	}
2846
2847	template <class charT, class traits>
2848	bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
2849	{
2850	//
2851	// parses an emacs style \sx or \Sx construct.
2852	//
2853	if(++m_position == m_end)
2854	{
2855	// Rewind to start of sequence:
2856	--m_position;
2857	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
2858	fail(regex_constants::error_escape, m_position - m_base);
2859	return false;
2860	}
2861	basic_char_set<charT, traits> char_set;
2862	if(negate)
2863	char_set.negate();
2864
2865	static const charT s_punct[`5`] = { `'p'`, `'u'`, `'n'`, `'c'`, `'t'`, };
2866
2867	switch(*m_position)
2868	{
2869	case `'s'`:
2870	case `' '`:
2871	char_set.add_class(this->m_mask_space);
2872	break;
2873	case `'w'`:
2874	char_set.add_class(this->m_word_mask);
2875	break;
2876	case `'_'`:
2877	char_set.add_single(digraph<charT>(charT(`'$'`)));
2878	char_set.add_single(digraph<charT>(charT(`'&'`)));
2879	char_set.add_single(digraph<charT>(charT(`'*'`)));
2880	char_set.add_single(digraph<charT>(charT(`'+'`)));
2881	char_set.add_single(digraph<charT>(charT(`'-'`)));
2882	char_set.add_single(digraph<charT>(charT(`'_'`)));
2883	char_set.add_single(digraph<charT>(charT(`'<'`)));
2884	char_set.add_single(digraph<charT>(charT(`'>'`)));
2885	break;
2886	case `'.'`:
2887	char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+`5`));
2888	break;
2889	case `'('`:
2890	char_set.add_single(digraph<charT>(charT(`'('`)));
2891	char_set.add_single(digraph<charT>(charT(`'['`)));
2892	char_set.add_single(digraph<charT>(charT(`'{'`)));
2893	break;
2894	case `')'`:
2895	char_set.add_single(digraph<charT>(charT(`')'`)));
2896	char_set.add_single(digraph<charT>(charT(`']'`)));
2897	char_set.add_single(digraph<charT>(charT(`'}'`)));
2898	break;
2899	case `'"'`:
2900	char_set.add_single(digraph<charT>(charT(`'"'`)));
2901	char_set.add_single(digraph<charT>(charT(`'\''`)));
2902	char_set.add_single(digraph<charT>(charT('`')));
2903	break;
2904	case `'\''`:
2905	char_set.add_single(digraph<charT>(charT(`'\''`)));
2906	char_set.add_single(digraph<charT>(charT(`','`)));
2907	char_set.add_single(digraph<charT>(charT(`'#'`)));
2908	break;
2909	case `'<'`:
2910	char_set.add_single(digraph<charT>(charT(`';'`)));
2911	break;
2912	case `'>'`:
2913	char_set.add_single(digraph<charT>(charT(`'\n'`)));
2914	char_set.add_single(digraph<charT>(charT(`'\f'`)));
2915	break;
2916	default:
2917	fail(regex_constants::error_ctype, m_position - m_base);
2918	return false;
2919	}
2920	if(`0` == this->append_set(char_set))
2921	{
2922	fail(regex_constants::error_ctype, m_position - m_base);
2923	return false;
2924	}
2925	++m_position;
2926	return true;
2927	}
2928
2929	template <class charT, class traits>
2930	regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
2931	{
2932	// we have a (?imsx-imsx) group, convert it into a set of flags:
2933	regex_constants::syntax_option_type f = this->flags();
2934	bool breakout = false;
2935	do
2936	{
2937	switch(*m_position)
2938	{
2939	case `'s'`:
2940	f \|= regex_constants::mod_s;
2941	f &= ~regex_constants::no_mod_s;
2942	break;
2943	case `'m'`:
2944	f &= ~regex_constants::no_mod_m;
2945	break;
2946	case `'i'`:
2947	f \|= regex_constants::icase;
2948	break;
2949	case `'x'`:
2950	f \|= regex_constants::mod_x;
2951	break;
2952	default:
2953	breakout = true;
2954	continue;
2955	}
2956	if(++m_position == m_end)
2957	{
2958	// Rewind to start of (? sequence:
2959	--m_position;
2960	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2961	fail(regex_constants::error_paren, m_position - m_base);
2962	return false;
2963	}
2964	}
2965	while(!breakout);
2966
2967	breakout = false;
2968
2969	if(m_position == static_cast*<charT>(`'-'`))
2970	{
2971	if(++m_position == m_end)
2972	{
2973	// Rewind to start of (? sequence:
2974	--m_position;
2975	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2976	fail(regex_constants::error_paren, m_position - m_base);
2977	return false;
2978	}
2979	do
2980	{
2981	switch(*m_position)
2982	{
2983	case `'s'`:
2984	f &= ~regex_constants::mod_s;
2985	f \|= regex_constants::no_mod_s;
2986	break;
2987	case `'m'`:
2988	f \|= regex_constants::no_mod_m;
2989	break;
2990	case `'i'`:
2991	f &= ~regex_constants::icase;
2992	break;
2993	case `'x'`:
2994	f &= ~regex_constants::mod_x;
2995	break;
2996	default:
2997	breakout = true;
2998	continue;
2999	}
3000	if(++m_position == m_end)
3001	{
3002	// Rewind to start of (? sequence:
3003	--m_position;
3004	while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
3005	fail(regex_constants::error_paren, m_position - m_base);
3006	return false;
3007	}
3008	}
3009	while(!breakout);
3010	}
3011	return f;
3012	}
3013
3014	template <class charT, class traits>
3015	bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start)
3016	{
3017	//
3018	// If we didn't actually add any states after the last
3019	// alternative then that's an error:
3020	//
3021	if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
3022	&& m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start)
3023	&&
3024	!(
3025	((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
3026	&&
3027	((this->flags() & regbase::no_empty_expressions) == `0`)
3028	)
3029	)
3030	{
3031	fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator \|.");
3032	return false;
3033	}
3034	//
3035	// Fix up our alternatives:
3036	//
3037	while(m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start))
3038	{
3039	//
3040	// fix up the jump to point to the end of the states
3041	// that we've just added:
3042	//
3043	std::ptrdiff_t jump_offset = m_alt_jumps.back();
3044	m_alt_jumps.pop_back();
3045	this->m_pdata->m_data.align();
3046	re_jump* jmp = static_cast<re_jump>(this*->getaddress(jump_offset));
3047	BOOST_ASSERT(jmp->type == syntax_element_jump);
3048	jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
3049	}
3050	return true;
3051	}
3052
3053	#ifdef BOOST_MSVC
3054	#pragma warning(pop)
3055	#endif
3056
3057	} // namespace BOOST_REGEX_DETAIL_NS
3058	} // namespace boost
3059
3060	#ifdef BOOST_MSVC
3061	#pragma warning(push)
3062	#pragma warning(disable: 4103)
3063	#endif
3064	#ifdef BOOST_HAS_ABI_HEADERS
3065	# include BOOST_ABI_SUFFIX
3066	#endif
3067	#ifdef BOOST_MSVC
3068	#pragma warning(pop)
3069	#endif
3070
3071	#endif
3072

source code of boost/boost/regex/v4/basic_regex_parser.hpp