1///////////////////////////////////////////////////////////////////////////////
2/// \file regex_primitives.hpp
3/// Contains the syntax elements for writing static regular expressions.
4//
5// Copyright 2008 Eric Niebler. Distributed under the Boost
6// Software License, Version 1.0. (See accompanying file
7// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
8
9#ifndef BOOST_XPRESSIVE_REGEX_PRIMITIVES_HPP_EAN_10_04_2005
10#define BOOST_XPRESSIVE_REGEX_PRIMITIVES_HPP_EAN_10_04_2005
11
12#include <vector>
13#include <climits>
14#include <boost/config.hpp>
15#include <boost/assert.hpp>
16#include <boost/mpl/if.hpp>
17#include <boost/mpl/and.hpp>
18#include <boost/mpl/assert.hpp>
19#include <boost/detail/workaround.hpp>
20#include <boost/preprocessor/cat.hpp>
21#include <boost/xpressive/detail/detail_fwd.hpp>
22#include <boost/xpressive/detail/core/matchers.hpp>
23#include <boost/xpressive/detail/core/regex_domain.hpp>
24#include <boost/xpressive/detail/utility/ignore_unused.hpp>
25
26// Doxygen can't handle proto :-(
27#ifndef BOOST_XPRESSIVE_DOXYGEN_INVOKED
28# include <boost/proto/core.hpp>
29# include <boost/proto/transform/arg.hpp>
30# include <boost/proto/transform/when.hpp>
31# include <boost/xpressive/detail/core/icase.hpp>
32# include <boost/xpressive/detail/static/compile.hpp>
33# include <boost/xpressive/detail/static/modifier.hpp>
34#endif
35
36namespace boost { namespace xpressive { namespace detail
37{
38
39 typedef assert_word_placeholder<word_boundary<mpl::true_> > assert_word_boundary;
40 typedef assert_word_placeholder<word_begin> assert_word_begin;
41 typedef assert_word_placeholder<word_end> assert_word_end;
42
43 // workaround msvc-7.1 bug with function pointer types
44 // within function types:
45 #if BOOST_WORKAROUND(BOOST_MSVC, == 1310)
46 #define mark_number(x) proto::call<mark_number(x)>
47 #define minus_one() proto::make<minus_one()>
48 #endif
49
50 struct push_back : proto::callable
51 {
52 typedef int result_type;
53
54 template<typename Subs>
55 int operator ()(Subs &subs, int i) const
56 {
57 subs.push_back(i);
58 return i;
59 }
60 };
61
62 struct mark_number : proto::callable
63 {
64 typedef int result_type;
65
66 template<typename Expr>
67 int operator ()(Expr const &expr) const
68 {
69 return expr.mark_number_;
70 }
71 };
72
73 typedef mpl::int_<-1> minus_one;
74
75 // s1 or -s1
76 struct SubMatch
77 : proto::or_<
78 proto::when<basic_mark_tag, push_back(proto::_data, mark_number(proto::_value)) >
79 , proto::when<proto::negate<basic_mark_tag>, push_back(proto::_data, minus_one()) >
80 >
81 {};
82
83 struct SubMatchList
84 : proto::or_<SubMatch, proto::comma<SubMatchList, SubMatch> >
85 {};
86
87 template<typename Subs>
88 typename enable_if<
89 mpl::and_<proto::is_expr<Subs>, proto::matches<Subs, SubMatchList> >
90 , std::vector<int>
91 >::type
92 to_vector(Subs const &subs)
93 {
94 std::vector<int> subs_;
95 SubMatchList()(subs, 0, subs_);
96 return subs_;
97 }
98
99 #if BOOST_WORKAROUND(BOOST_MSVC, == 1310)
100 #undef mark_number
101 #undef minus_one
102 #endif
103
104 // replace "Expr" with "keep(*State) >> Expr"
105 struct skip_primitives : proto::transform<skip_primitives>
106 {
107 template<typename Expr, typename State, typename Data>
108 struct impl : proto::transform_impl<Expr, State, Data>
109 {
110 typedef
111 typename proto::shift_right<
112 typename proto::unary_expr<
113 keeper_tag
114 , typename proto::dereference<State>::type
115 >::type
116 , Expr
117 >::type
118 result_type;
119
120 result_type operator ()(
121 typename impl::expr_param expr
122 , typename impl::state_param state
123 , typename impl::data_param
124 ) const
125 {
126 result_type that = {{{state}}, expr};
127 return that;
128 }
129 };
130 };
131
132 struct Primitives
133 : proto::or_<
134 proto::terminal<proto::_>
135 , proto::comma<proto::_, proto::_>
136 , proto::subscript<proto::terminal<set_initializer>, proto::_>
137 , proto::assign<proto::terminal<set_initializer>, proto::_>
138 , proto::assign<proto::terminal<attribute_placeholder<proto::_> >, proto::_>
139 , proto::complement<Primitives>
140 >
141 {};
142
143 struct SkipGrammar
144 : proto::or_<
145 proto::when<Primitives, skip_primitives>
146 , proto::assign<proto::terminal<mark_placeholder>, SkipGrammar> // don't "skip" mark tags
147 , proto::subscript<SkipGrammar, proto::_> // don't put skips in actions
148 , proto::binary_expr<modifier_tag, proto::_, SkipGrammar> // don't skip modifiers
149 , proto::unary_expr<lookbehind_tag, proto::_> // don't skip lookbehinds
150 , proto::nary_expr<proto::_, proto::vararg<SkipGrammar> > // everything else is fair game!
151 >
152 {};
153
154 template<typename Skip>
155 struct skip_directive
156 {
157 typedef typename proto::result_of::as_expr<Skip>::type skip_type;
158
159 skip_directive(Skip const &skip)
160 : skip_(proto::as_expr(skip))
161 {}
162
163 template<typename Sig>
164 struct result {};
165
166 template<typename This, typename Expr>
167 struct result<This(Expr)>
168 {
169 typedef
170 SkipGrammar::impl<
171 typename proto::result_of::as_expr<Expr>::type
172 , skip_type const &
173 , mpl::void_ &
174 >
175 skip_transform;
176
177 typedef
178 typename proto::shift_right<
179 typename skip_transform::result_type
180 , typename proto::dereference<skip_type>::type
181 >::type
182 type;
183 };
184
185 template<typename Expr>
186 typename result<skip_directive(Expr)>::type
187 operator ()(Expr const &expr) const
188 {
189 mpl::void_ ignore;
190 typedef result<skip_directive(Expr)> result_fun;
191 typename result_fun::type that = {
192 typename result_fun::skip_transform()(proto::as_expr(expr), this->skip_, ignore)
193 , {skip_}
194 };
195 return that;
196 }
197
198 private:
199 skip_type skip_;
200 };
201
202/*
203///////////////////////////////////////////////////////////////////////////////
204/// INTERNAL ONLY
205// BOOST_XPRESSIVE_GLOBAL
206// for defining globals that neither violate the One Definition Rule nor
207// lead to undefined behavior due to global object initialization order.
208//#define BOOST_XPRESSIVE_GLOBAL(type, name, init) \
209// namespace detail \
210// { \
211// template<int Dummy> \
212// struct BOOST_PP_CAT(global_pod_, name) \
213// { \
214// static type const value; \
215// private: \
216// union type_must_be_pod \
217// { \
218// type t; \
219// char ch; \
220// } u; \
221// }; \
222// template<int Dummy> \
223// type const BOOST_PP_CAT(global_pod_, name)<Dummy>::value = init; \
224// } \
225// type const &name = detail::BOOST_PP_CAT(global_pod_, name)<0>::value
226*/
227
228
229} // namespace detail
230
231/// INTERNAL ONLY (for backwards compatibility)
232unsigned int const repeat_max = UINT_MAX-1;
233
234///////////////////////////////////////////////////////////////////////////////
235/// \brief For infinite repetition of a sub-expression.
236///
237/// Magic value used with the repeat\<\>() function template
238/// to specify an unbounded repeat. Use as: repeat<17, inf>('a').
239/// The equivalent in perl is /a{17,}/.
240unsigned int const inf = UINT_MAX-1;
241
242/// INTERNAL ONLY (for backwards compatibility)
243proto::terminal<detail::epsilon_matcher>::type const epsilon = {.child0: {}};
244
245///////////////////////////////////////////////////////////////////////////////
246/// \brief Successfully matches nothing.
247///
248/// Successfully matches a zero-width sequence. nil always succeeds and
249/// never consumes any characters.
250proto::terminal<detail::epsilon_matcher>::type const nil = {.child0: {}};
251
252///////////////////////////////////////////////////////////////////////////////
253/// \brief Matches an alpha-numeric character.
254///
255/// The regex traits are used to determine which characters are alpha-numeric.
256/// To match any character that is not alpha-numeric, use ~alnum.
257///
258/// \attention alnum is equivalent to /[[:alnum:]]/ in perl. ~alnum is equivalent
259/// to /[[:^alnum:]]/ in perl.
260proto::terminal<detail::posix_charset_placeholder>::type const alnum = {.child0: {.name_: "alnum", .not_: false}};
261
262///////////////////////////////////////////////////////////////////////////////
263/// \brief Matches an alphabetic character.
264///
265/// The regex traits are used to determine which characters are alphabetic.
266/// To match any character that is not alphabetic, use ~alpha.
267///
268/// \attention alpha is equivalent to /[[:alpha:]]/ in perl. ~alpha is equivalent
269/// to /[[:^alpha:]]/ in perl.
270proto::terminal<detail::posix_charset_placeholder>::type const alpha = {.child0: {.name_: "alpha", .not_: false}};
271
272///////////////////////////////////////////////////////////////////////////////
273/// \brief Matches a blank (horizonal white-space) character.
274///
275/// The regex traits are used to determine which characters are blank characters.
276/// To match any character that is not blank, use ~blank.
277///
278/// \attention blank is equivalent to /[[:blank:]]/ in perl. ~blank is equivalent
279/// to /[[:^blank:]]/ in perl.
280proto::terminal<detail::posix_charset_placeholder>::type const blank = {.child0: {.name_: "blank", .not_: false}};
281
282///////////////////////////////////////////////////////////////////////////////
283/// \brief Matches a control character.
284///
285/// The regex traits are used to determine which characters are control characters.
286/// To match any character that is not a control character, use ~cntrl.
287///
288/// \attention cntrl is equivalent to /[[:cntrl:]]/ in perl. ~cntrl is equivalent
289/// to /[[:^cntrl:]]/ in perl.
290proto::terminal<detail::posix_charset_placeholder>::type const cntrl = {.child0: {.name_: "cntrl", .not_: false}};
291
292///////////////////////////////////////////////////////////////////////////////
293/// \brief Matches a digit character.
294///
295/// The regex traits are used to determine which characters are digits.
296/// To match any character that is not a digit, use ~digit.
297///
298/// \attention digit is equivalent to /[[:digit:]]/ in perl. ~digit is equivalent
299/// to /[[:^digit:]]/ in perl.
300proto::terminal<detail::posix_charset_placeholder>::type const digit = {.child0: {.name_: "digit", .not_: false}};
301
302///////////////////////////////////////////////////////////////////////////////
303/// \brief Matches a graph character.
304///
305/// The regex traits are used to determine which characters are graphable.
306/// To match any character that is not graphable, use ~graph.
307///
308/// \attention graph is equivalent to /[[:graph:]]/ in perl. ~graph is equivalent
309/// to /[[:^graph:]]/ in perl.
310proto::terminal<detail::posix_charset_placeholder>::type const graph = {.child0: {.name_: "graph", .not_: false}};
311
312///////////////////////////////////////////////////////////////////////////////
313/// \brief Matches a lower-case character.
314///
315/// The regex traits are used to determine which characters are lower-case.
316/// To match any character that is not a lower-case character, use ~lower.
317///
318/// \attention lower is equivalent to /[[:lower:]]/ in perl. ~lower is equivalent
319/// to /[[:^lower:]]/ in perl.
320proto::terminal<detail::posix_charset_placeholder>::type const lower = {.child0: {.name_: "lower", .not_: false}};
321
322///////////////////////////////////////////////////////////////////////////////
323/// \brief Matches a printable character.
324///
325/// The regex traits are used to determine which characters are printable.
326/// To match any character that is not printable, use ~print.
327///
328/// \attention print is equivalent to /[[:print:]]/ in perl. ~print is equivalent
329/// to /[[:^print:]]/ in perl.
330proto::terminal<detail::posix_charset_placeholder>::type const print = {.child0: {.name_: "print", .not_: false}};
331
332///////////////////////////////////////////////////////////////////////////////
333/// \brief Matches a punctuation character.
334///
335/// The regex traits are used to determine which characters are punctuation.
336/// To match any character that is not punctuation, use ~punct.
337///
338/// \attention punct is equivalent to /[[:punct:]]/ in perl. ~punct is equivalent
339/// to /[[:^punct:]]/ in perl.
340proto::terminal<detail::posix_charset_placeholder>::type const punct = {.child0: {.name_: "punct", .not_: false}};
341
342///////////////////////////////////////////////////////////////////////////////
343/// \brief Matches a space character.
344///
345/// The regex traits are used to determine which characters are space characters.
346/// To match any character that is not white-space, use ~space.
347///
348/// \attention space is equivalent to /[[:space:]]/ in perl. ~space is equivalent
349/// to /[[:^space:]]/ in perl.
350proto::terminal<detail::posix_charset_placeholder>::type const space = {.child0: {.name_: "space", .not_: false}};
351
352///////////////////////////////////////////////////////////////////////////////
353/// \brief Matches an upper-case character.
354///
355/// The regex traits are used to determine which characters are upper-case.
356/// To match any character that is not upper-case, use ~upper.
357///
358/// \attention upper is equivalent to /[[:upper:]]/ in perl. ~upper is equivalent
359/// to /[[:^upper:]]/ in perl.
360proto::terminal<detail::posix_charset_placeholder>::type const upper = {.child0: {.name_: "upper", .not_: false}};
361
362///////////////////////////////////////////////////////////////////////////////
363/// \brief Matches a hexadecimal digit character.
364///
365/// The regex traits are used to determine which characters are hex digits.
366/// To match any character that is not a hex digit, use ~xdigit.
367///
368/// \attention xdigit is equivalent to /[[:xdigit:]]/ in perl. ~xdigit is equivalent
369/// to /[[:^xdigit:]]/ in perl.
370proto::terminal<detail::posix_charset_placeholder>::type const xdigit = {.child0: {.name_: "xdigit", .not_: false}};
371
372///////////////////////////////////////////////////////////////////////////////
373/// \brief Beginning of sequence assertion.
374///
375/// For the character sequence [begin, end), 'bos' matches the
376/// zero-width sub-sequence [begin, begin).
377proto::terminal<detail::assert_bos_matcher>::type const bos = {.child0: {}};
378
379///////////////////////////////////////////////////////////////////////////////
380/// \brief End of sequence assertion.
381///
382/// For the character sequence [begin, end),
383/// 'eos' matches the zero-width sub-sequence [end, end).
384///
385/// \attention Unlike the perl end of sequence assertion \$, 'eos' will
386/// not match at the position [end-1, end-1) if *(end-1) is '\\n'. To
387/// get that behavior, use (!_n >> eos).
388proto::terminal<detail::assert_eos_matcher>::type const eos = {.child0: {}};
389
390///////////////////////////////////////////////////////////////////////////////
391/// \brief Beginning of line assertion.
392///
393/// 'bol' matches the zero-width sub-sequence
394/// immediately following a logical newline sequence. The regex traits
395/// is used to determine what constitutes a logical newline sequence.
396proto::terminal<detail::assert_bol_placeholder>::type const bol = {.child0: {}};
397
398///////////////////////////////////////////////////////////////////////////////
399/// \brief End of line assertion.
400///
401/// 'eol' matches the zero-width sub-sequence
402/// immediately preceeding a logical newline sequence. The regex traits
403/// is used to determine what constitutes a logical newline sequence.
404proto::terminal<detail::assert_eol_placeholder>::type const eol = {.child0: {}};
405
406///////////////////////////////////////////////////////////////////////////////
407/// \brief Beginning of word assertion.
408///
409/// 'bow' matches the zero-width sub-sequence
410/// immediately following a non-word character and preceeding a word character.
411/// The regex traits are used to determine what constitutes a word character.
412proto::terminal<detail::assert_word_begin>::type const bow = {.child0: {}};
413
414///////////////////////////////////////////////////////////////////////////////
415/// \brief End of word assertion.
416///
417/// 'eow' matches the zero-width sub-sequence
418/// immediately following a word character and preceeding a non-word character.
419/// The regex traits are used to determine what constitutes a word character.
420proto::terminal<detail::assert_word_end>::type const eow = {.child0: {}};
421
422///////////////////////////////////////////////////////////////////////////////
423/// \brief Word boundary assertion.
424///
425/// '_b' matches the zero-width sub-sequence at the beginning or the end of a word.
426/// It is equivalent to (bow | eow). The regex traits are used to determine what
427/// constitutes a word character. To match a non-word boundary, use ~_b.
428///
429/// \attention _b is like \\b in perl. ~_b is like \\B in perl.
430proto::terminal<detail::assert_word_boundary>::type const _b = {.child0: {}};
431
432///////////////////////////////////////////////////////////////////////////////
433/// \brief Matches a word character.
434///
435/// '_w' matches a single word character. The regex traits are used to determine which
436/// characters are word characters. Use ~_w to match a character that is not a word
437/// character.
438///
439/// \attention _w is like \\w in perl. ~_w is like \\W in perl.
440proto::terminal<detail::posix_charset_placeholder>::type const _w = {.child0: {.name_: "w", .not_: false}};
441
442///////////////////////////////////////////////////////////////////////////////
443/// \brief Matches a digit character.
444///
445/// '_d' matches a single digit character. The regex traits are used to determine which
446/// characters are digits. Use ~_d to match a character that is not a digit
447/// character.
448///
449/// \attention _d is like \\d in perl. ~_d is like \\D in perl.
450proto::terminal<detail::posix_charset_placeholder>::type const _d = {.child0: {.name_: "d", .not_: false}};
451
452///////////////////////////////////////////////////////////////////////////////
453/// \brief Matches a space character.
454///
455/// '_s' matches a single space character. The regex traits are used to determine which
456/// characters are space characters. Use ~_s to match a character that is not a space
457/// character.
458///
459/// \attention _s is like \\s in perl. ~_s is like \\S in perl.
460proto::terminal<detail::posix_charset_placeholder>::type const _s = {.child0: {.name_: "s", .not_: false}};
461
462///////////////////////////////////////////////////////////////////////////////
463/// \brief Matches a literal newline character, '\\n'.
464///
465/// '_n' matches a single newline character, '\\n'. Use ~_n to match a character
466/// that is not a newline.
467///
468/// \attention ~_n is like '.' in perl without the /s modifier.
469proto::terminal<char>::type const _n = {.child0: '\n'};
470
471///////////////////////////////////////////////////////////////////////////////
472/// \brief Matches a logical newline sequence.
473///
474/// '_ln' matches a logical newline sequence. This can be any character in the
475/// line separator class, as determined by the regex traits, or the '\\r\\n' sequence.
476/// For the purpose of back-tracking, '\\r\\n' is treated as a unit.
477/// To match any one character that is not a logical newline, use ~_ln.
478detail::logical_newline_xpression const _ln = {.child0: {}};
479
480///////////////////////////////////////////////////////////////////////////////
481/// \brief Matches any one character.
482///
483/// Match any character, similar to '.' in perl syntax with the /s modifier.
484/// '_' matches any one character, including the newline.
485///
486/// \attention To match any character except the newline, use ~_n
487proto::terminal<detail::any_matcher>::type const _ = {.child0: {}};
488
489///////////////////////////////////////////////////////////////////////////////
490/// \brief Reference to the current regex object
491///
492/// Useful when constructing recursive regular expression objects. The 'self'
493/// identifier is a short-hand for the current regex object. For instance,
494/// sregex rx = '(' >> (self | nil) >> ')'; will create a regex object that
495/// matches balanced parens such as "((()))".
496proto::terminal<detail::self_placeholder>::type const self = {.child0: {}};
497
498///////////////////////////////////////////////////////////////////////////////
499/// \brief Used to create character sets.
500///
501/// There are two ways to create character sets with the 'set' identifier. The
502/// easiest is to create a comma-separated list of the characters in the set,
503/// as in (set= 'a','b','c'). This set will match 'a', 'b', or 'c'. The other
504/// way is to define the set as an argument to the set subscript operator.
505/// For instance, set[ 'a' | range('b','c') | digit ] will match an 'a', 'b',
506/// 'c' or a digit character.
507///
508/// To complement a set, apply the '~' operator. For instance, ~(set= 'a','b','c')
509/// will match any character that is not an 'a', 'b', or 'c'.
510///
511/// Sets can be composed of other, possibly complemented, sets. For instance,
512/// set[ ~digit | ~(set= 'a','b','c') ].
513detail::set_initializer_type const set = {.child0: {}};
514
515///////////////////////////////////////////////////////////////////////////////
516/// \brief Sub-match placeholder type, used to create named captures in
517/// static regexes.
518///
519/// \c mark_tag is the type of the global sub-match placeholders \c s0, \c s1, etc.. You
520/// can use the \c mark_tag type to create your own sub-match placeholders with
521/// more meaningful names. This is roughly equivalent to the "named capture"
522/// feature of dynamic regular expressions.
523///
524/// To create a named sub-match placeholder, initialize it with a unique integer.
525/// The integer must only be unique within the regex in which the placeholder
526/// is used. Then you can use it within static regexes to created sub-matches
527/// by assigning a sub-expression to it, or to refer back to already created
528/// sub-matches.
529///
530/// \code
531/// mark_tag number(1); // "number" is now equivalent to "s1"
532/// // Match a number, followed by a space and the same number again
533/// sregex rx = (number = +_d) >> ' ' >> number;
534/// \endcode
535///
536/// After a successful \c regex_match() or \c regex_search(), the sub-match placeholder
537/// can be used to index into the <tt>match_results\<\></tt> object to retrieve the
538/// corresponding sub-match.
539struct mark_tag
540 : proto::extends<detail::basic_mark_tag, mark_tag, detail::regex_domain>
541{
542private:
543 typedef proto::extends<detail::basic_mark_tag, mark_tag, detail::regex_domain> base_type;
544
545 static detail::basic_mark_tag make_tag(int mark_nbr)
546 {
547 detail::basic_mark_tag mark = {.child0: {.mark_number_: mark_nbr}};
548 return mark;
549 }
550
551public:
552 /// \brief Initialize a mark_tag placeholder
553 /// \param mark_nbr An integer that uniquely identifies this \c mark_tag
554 /// within the static regexes in which this \c mark_tag will be used.
555 /// \pre <tt>mark_nbr \> 0</tt>
556 mark_tag(int mark_nbr)
557 : base_type(mark_tag::make_tag(mark_nbr))
558 {
559 // Marks numbers must be integers greater than 0.
560 BOOST_ASSERT(mark_nbr > 0);
561 }
562
563 /// INTERNAL ONLY
564 operator detail::basic_mark_tag const &() const
565 {
566 return this->proto_base();
567 }
568
569 BOOST_PROTO_EXTENDS_USING_ASSIGN_NON_DEPENDENT(mark_tag)
570};
571
572// This macro is used when declaring mark_tags that are global because
573// it guarantees that they are statically initialized. That avoids
574// order-of-initialization bugs. In user code, the simpler: mark_tag s0(0);
575// would be preferable.
576/// INTERNAL ONLY
577#define BOOST_XPRESSIVE_GLOBAL_MARK_TAG(NAME, VALUE) \
578 boost::xpressive::mark_tag::proto_base_expr const NAME = {{VALUE}} \
579 /**/
580
581///////////////////////////////////////////////////////////////////////////////
582/// \brief Sub-match placeholder, like $& in Perl
583BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s0, 0);
584
585///////////////////////////////////////////////////////////////////////////////
586/// \brief Sub-match placeholder, like $1 in perl.
587///
588/// To create a sub-match, assign a sub-expression to the sub-match placeholder.
589/// For instance, (s1= _) will match any one character and remember which
590/// character was matched in the 1st sub-match. Later in the pattern, you can
591/// refer back to the sub-match. For instance, (s1= _) >> s1 will match any
592/// character, and then match the same character again.
593///
594/// After a successful regex_match() or regex_search(), the sub-match placeholders
595/// can be used to index into the match_results\<\> object to retrieve the Nth
596/// sub-match.
597BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s1, 1);
598BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s2, 2);
599BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s3, 3);
600BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s4, 4);
601BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s5, 5);
602BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s6, 6);
603BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s7, 7);
604BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s8, 8);
605BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s9, 9);
606
607// NOTE: For the purpose of xpressive's documentation, make icase() look like an
608// ordinary function. In reality, it is a function object defined in detail/icase.hpp
609// so that it can serve double-duty as regex_constants::icase, the syntax_option_type.
610#ifdef BOOST_XPRESSIVE_DOXYGEN_INVOKED
611///////////////////////////////////////////////////////////////////////////////
612/// \brief Makes a sub-expression case-insensitive.
613///
614/// Use icase() to make a sub-expression case-insensitive. For instance,
615/// "foo" >> icase(set['b'] >> "ar") will match "foo" exactly followed by
616/// "bar" irrespective of case.
617template<typename Expr> detail::unspecified icase(Expr const &expr) { return 0; }
618#endif
619
620///////////////////////////////////////////////////////////////////////////////
621/// \brief Makes a literal into a regular expression.
622///
623/// Use as_xpr() to turn a literal into a regular expression. For instance,
624/// "foo" >> "bar" will not compile because both operands to the right-shift
625/// operator are const char*, and no such operator exists. Use as_xpr("foo") >> "bar"
626/// instead.
627///
628/// You can use as_xpr() with character literals in addition to string literals.
629/// For instance, as_xpr('a') will match an 'a'. You can also complement a
630/// character literal, as with ~as_xpr('a'). This will match any one character
631/// that is not an 'a'.
632#ifdef BOOST_XPRESSIVE_DOXYGEN_INVOKED
633template<typename Literal> detail::unspecified as_xpr(Literal const &literal) { return 0; }
634#else
635proto::functional::as_expr<> const as_xpr = {};
636#endif
637
638///////////////////////////////////////////////////////////////////////////////
639/// \brief Embed a regex object by reference.
640///
641/// \param rex The basic_regex object to embed by reference.
642template<typename BidiIter>
643inline typename proto::terminal<reference_wrapper<basic_regex<BidiIter> const> >::type const
644by_ref(basic_regex<BidiIter> const &rex)
645{
646 reference_wrapper<basic_regex<BidiIter> const> ref(rex);
647 return proto::terminal<reference_wrapper<basic_regex<BidiIter> const> >::type::make(ref);
648}
649
650///////////////////////////////////////////////////////////////////////////////
651/// \brief Match a range of characters.
652///
653/// Match any character in the range [ch_min, ch_max].
654///
655/// \param ch_min The lower end of the range to match.
656/// \param ch_max The upper end of the range to match.
657template<typename Char>
658inline typename proto::terminal<detail::range_placeholder<Char> >::type const
659range(Char ch_min, Char ch_max)
660{
661 detail::range_placeholder<Char> that = {ch_min, ch_max, false};
662 return proto::terminal<detail::range_placeholder<Char> >::type::make(that);
663}
664
665///////////////////////////////////////////////////////////////////////////////
666/// \brief Make a sub-expression optional. Equivalent to !as_xpr(expr).
667///
668/// \param expr The sub-expression to make optional.
669template<typename Expr>
670typename proto::result_of::make_expr<
671 proto::tag::logical_not
672 , proto::default_domain
673 , Expr const &
674>::type const
675optional(Expr const &expr)
676{
677 return proto::make_expr<
678 proto::tag::logical_not
679 , proto::default_domain
680 >(boost::ref(expr));
681}
682
683///////////////////////////////////////////////////////////////////////////////
684/// \brief Repeat a sub-expression multiple times.
685///
686/// There are two forms of the repeat\<\>() function template. To match a
687/// sub-expression N times, use repeat\<N\>(expr). To match a sub-expression
688/// from M to N times, use repeat\<M,N\>(expr).
689///
690/// The repeat\<\>() function creates a greedy quantifier. To make the quantifier
691/// non-greedy, apply the unary minus operator, as in -repeat\<M,N\>(expr).
692///
693/// \param expr The sub-expression to repeat.
694template<unsigned int Min, unsigned int Max, typename Expr>
695typename proto::result_of::make_expr<
696 detail::generic_quant_tag<Min, Max>
697 , proto::default_domain
698 , Expr const &
699>::type const
700repeat(Expr const &expr)
701{
702 return proto::make_expr<
703 detail::generic_quant_tag<Min, Max>
704 , proto::default_domain
705 >(boost::ref(expr));
706}
707
708/// \overload
709///
710template<unsigned int Count, typename Expr2>
711typename proto::result_of::make_expr<
712 detail::generic_quant_tag<Count, Count>
713 , proto::default_domain
714 , Expr2 const &
715>::type const
716repeat(Expr2 const &expr2)
717{
718 return proto::make_expr<
719 detail::generic_quant_tag<Count, Count>
720 , proto::default_domain
721 >(boost::ref(expr2));
722}
723
724///////////////////////////////////////////////////////////////////////////////
725/// \brief Create an independent sub-expression.
726///
727/// Turn off back-tracking for a sub-expression. Any branches or repeats within
728/// the sub-expression will match only one way, and no other alternatives are
729/// tried.
730///
731/// \attention keep(expr) is equivalent to the perl (?>...) extension.
732///
733/// \param expr The sub-expression to modify.
734template<typename Expr>
735typename proto::result_of::make_expr<
736 detail::keeper_tag
737 , proto::default_domain
738 , Expr const &
739>::type const
740keep(Expr const &expr)
741{
742 return proto::make_expr<
743 detail::keeper_tag
744 , proto::default_domain
745 >(boost::ref(expr));
746}
747
748///////////////////////////////////////////////////////////////////////////////
749/// \brief Look-ahead assertion.
750///
751/// before(expr) succeeds if the expr sub-expression would match at the current
752/// position in the sequence, but expr is not included in the match. For instance,
753/// before("foo") succeeds if we are before a "foo". Look-ahead assertions can be
754/// negated with the bit-compliment operator.
755///
756/// \attention before(expr) is equivalent to the perl (?=...) extension.
757/// ~before(expr) is a negative look-ahead assertion, equivalent to the
758/// perl (?!...) extension.
759///
760/// \param expr The sub-expression to put in the look-ahead assertion.
761template<typename Expr>
762typename proto::result_of::make_expr<
763 detail::lookahead_tag
764 , proto::default_domain
765 , Expr const &
766>::type const
767before(Expr const &expr)
768{
769 return proto::make_expr<
770 detail::lookahead_tag
771 , proto::default_domain
772 >(boost::ref(expr));
773}
774
775///////////////////////////////////////////////////////////////////////////////
776/// \brief Look-behind assertion.
777///
778/// after(expr) succeeds if the expr sub-expression would match at the current
779/// position minus N in the sequence, where N is the width of expr. expr is not included in
780/// the match. For instance, after("foo") succeeds if we are after a "foo". Look-behind
781/// assertions can be negated with the bit-complement operator.
782///
783/// \attention after(expr) is equivalent to the perl (?<=...) extension.
784/// ~after(expr) is a negative look-behind assertion, equivalent to the
785/// perl (?<!...) extension.
786///
787/// \param expr The sub-expression to put in the look-ahead assertion.
788///
789/// \pre expr cannot match a variable number of characters.
790template<typename Expr>
791typename proto::result_of::make_expr<
792 detail::lookbehind_tag
793 , proto::default_domain
794 , Expr const &
795>::type const
796after(Expr const &expr)
797{
798 return proto::make_expr<
799 detail::lookbehind_tag
800 , proto::default_domain
801 >(boost::ref(expr));
802}
803
804///////////////////////////////////////////////////////////////////////////////
805/// \brief Specify a regex traits or a std::locale.
806///
807/// imbue() instructs the regex engine to use the specified traits or locale
808/// when matching the regex. The entire expression must use the same traits/locale.
809/// For instance, the following specifies a locale for use with a regex:
810/// std::locale loc;
811/// sregex rx = imbue(loc)(+digit);
812///
813/// \param loc The std::locale or regex traits object.
814template<typename Locale>
815inline detail::modifier_op<detail::locale_modifier<Locale> > const
816imbue(Locale const &loc)
817{
818 detail::modifier_op<detail::locale_modifier<Locale> > mod =
819 {
820 detail::locale_modifier<Locale>(loc)
821 , regex_constants::ECMAScript
822 };
823 return mod;
824}
825
826proto::terminal<detail::attribute_placeholder<mpl::int_<1> > >::type const a1 = {.child0: {}};
827proto::terminal<detail::attribute_placeholder<mpl::int_<2> > >::type const a2 = {.child0: {}};
828proto::terminal<detail::attribute_placeholder<mpl::int_<3> > >::type const a3 = {.child0: {}};
829proto::terminal<detail::attribute_placeholder<mpl::int_<4> > >::type const a4 = {.child0: {}};
830proto::terminal<detail::attribute_placeholder<mpl::int_<5> > >::type const a5 = {.child0: {}};
831proto::terminal<detail::attribute_placeholder<mpl::int_<6> > >::type const a6 = {.child0: {}};
832proto::terminal<detail::attribute_placeholder<mpl::int_<7> > >::type const a7 = {.child0: {}};
833proto::terminal<detail::attribute_placeholder<mpl::int_<8> > >::type const a8 = {.child0: {}};
834proto::terminal<detail::attribute_placeholder<mpl::int_<9> > >::type const a9 = {.child0: {}};
835
836///////////////////////////////////////////////////////////////////////////////
837/// \brief Specify which characters to skip when matching a regex.
838///
839/// <tt>skip()</tt> instructs the regex engine to skip certain characters when matching
840/// a regex. It is most useful for writing regexes that ignore whitespace.
841/// For instance, the following specifies a regex that skips whitespace and
842/// punctuation:
843///
844/// \code
845/// // A sentence is one or more words separated by whitespace
846/// // and punctuation.
847/// sregex word = +alpha;
848/// sregex sentence = skip(set[_s | punct])( +word );
849/// \endcode
850///
851/// The way it works in the above example is to insert
852/// <tt>keep(*set[_s | punct])</tt> before each primitive within the regex.
853/// A "primitive" includes terminals like strings, character sets and nested
854/// regexes. A final <tt>*set[_s | punct]</tt> is added to the end of the
855/// regex. The regex <tt>sentence</tt> specified above is equivalent to
856/// the following:
857///
858/// \code
859/// sregex sentence = +( keep(*set[_s | punct]) >> word )
860/// >> *set[_s | punct];
861/// \endcode
862///
863/// \attention Skipping does not affect how nested regexes are handled because
864/// they are treated atomically. String literals are also treated
865/// atomically; that is, no skipping is done within a string literal. So
866/// <tt>skip(_s)("this that")</tt> is not the same as
867/// <tt>skip(_s)("this" >> as_xpr("that"))</tt>. The first will only match
868/// when there is only one space between "this" and "that". The second will
869/// skip any and all whitespace between "this" and "that".
870///
871/// \param skip A regex that specifies which characters to skip.
872template<typename Skip>
873detail::skip_directive<Skip> skip(Skip const &skip)
874{
875 return detail::skip_directive<Skip>(skip);
876}
877
878namespace detail
879{
880 inline void ignore_unused_regex_primitives()
881 {
882 detail::ignore_unused(repeat_max);
883 detail::ignore_unused(inf);
884 detail::ignore_unused(epsilon);
885 detail::ignore_unused(nil);
886 detail::ignore_unused(alnum);
887 detail::ignore_unused(bos);
888 detail::ignore_unused(eos);
889 detail::ignore_unused(bol);
890 detail::ignore_unused(eol);
891 detail::ignore_unused(bow);
892 detail::ignore_unused(eow);
893 detail::ignore_unused(_b);
894 detail::ignore_unused(_w);
895 detail::ignore_unused(_d);
896 detail::ignore_unused(_s);
897 detail::ignore_unused(_n);
898 detail::ignore_unused(_ln);
899 detail::ignore_unused(_);
900 detail::ignore_unused(self);
901 detail::ignore_unused(set);
902 detail::ignore_unused(s0);
903 detail::ignore_unused(s1);
904 detail::ignore_unused(s2);
905 detail::ignore_unused(s3);
906 detail::ignore_unused(s4);
907 detail::ignore_unused(s5);
908 detail::ignore_unused(s6);
909 detail::ignore_unused(s7);
910 detail::ignore_unused(s8);
911 detail::ignore_unused(s9);
912 detail::ignore_unused(a1);
913 detail::ignore_unused(a2);
914 detail::ignore_unused(a3);
915 detail::ignore_unused(a4);
916 detail::ignore_unused(a5);
917 detail::ignore_unused(a6);
918 detail::ignore_unused(a7);
919 detail::ignore_unused(a8);
920 detail::ignore_unused(a9);
921 detail::ignore_unused(as_xpr);
922 }
923}
924
925}} // namespace boost::xpressive
926
927#endif
928

source code of boost/boost/xpressive/regex_primitives.hpp