1/*
2 *
3 * Copyright (c) 1998-2004 John Maddock
4 * Copyright 2011 Garmin Ltd. or its subsidiaries
5 *
6 * Distributed under the Boost Software License, Version 1.0.
7 * (See accompanying file LICENSE_1_0.txt or copy at
8 * http://www.boost.org/LICENSE_1_0.txt)
9 *
10 */
11
12 /*
13 * LOCATION: see http://www.boost.org/ for most recent version.
14 * FILE basic_regex.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Declares template class basic_regex.
17 */
18
19#ifndef BOOST_REGEX_V5_BASIC_REGEX_HPP
20#define BOOST_REGEX_V5_BASIC_REGEX_HPP
21
22#include <vector>
23
24namespace boost{
25#ifdef BOOST_REGEX_MSVC
26#pragma warning(push)
27#pragma warning(disable : 4251)
28#if BOOST_REGEX_MSVC < 1700
29# pragma warning(disable : 4231)
30#endif
31#if BOOST_REGEX_MSVC < 1600
32#pragma warning(disable : 4660)
33#endif
34#if BOOST_REGEX_MSVC < 1910
35#pragma warning(disable:4800)
36#endif
37#endif
38
39namespace BOOST_REGEX_DETAIL_NS{
40
41//
42// forward declaration, we will need this one later:
43//
44template <class charT, class traits>
45class basic_regex_parser;
46
47template <class I>
48void bubble_down_one(I first, I last)
49{
50 if(first != last)
51 {
52 I next = last - 1;
53 while((next != first) && (*next < *(next-1)))
54 {
55 (next-1)->swap(*next);
56 --next;
57 }
58 }
59}
60
61static const int hash_value_mask = 1 << (std::numeric_limits<int>::digits - 1);
62
63template <class Iterator>
64inline int hash_value_from_capture_name(Iterator i, Iterator j)
65{
66 std::size_t r = 0;
67 while (i != j)
68 {
69 r ^= *i + 0x9e3779b9 + (r << 6) + (r >> 2);
70 ++i;
71 }
72 r %= ((std::numeric_limits<int>::max)());
73 return static_cast<int>(r) | hash_value_mask;
74}
75
76class named_subexpressions
77{
78public:
79 struct name
80 {
81 template <class charT>
82 name(const charT* i, const charT* j, int idx)
83 : index(idx)
84 {
85 hash = hash_value_from_capture_name(i, j);
86 }
87 name(int h, int idx)
88 : index(idx), hash(h)
89 {
90 }
91 int index;
92 int hash;
93 bool operator < (const name& other)const
94 {
95 return hash < other.hash;
96 }
97 bool operator == (const name& other)const
98 {
99 return hash == other.hash;
100 }
101 void swap(name& other)
102 {
103 std::swap(a&: index, b&: other.index);
104 std::swap(a&: hash, b&: other.hash);
105 }
106 };
107
108 typedef std::vector<name>::const_iterator const_iterator;
109 typedef std::pair<const_iterator, const_iterator> range_type;
110
111 named_subexpressions(){}
112
113 template <class charT>
114 void set_name(const charT* i, const charT* j, int index)
115 {
116 m_sub_names.push_back(x: name(i, j, index));
117 bubble_down_one(first: m_sub_names.begin(), last: m_sub_names.end());
118 }
119 template <class charT>
120 int get_id(const charT* i, const charT* j)const
121 {
122 name t(i, j, 0);
123 typename std::vector<name>::const_iterator pos = std::lower_bound(first: m_sub_names.begin(), last: m_sub_names.end(), val: t);
124 if((pos != m_sub_names.end()) && (*pos == t))
125 {
126 return pos->index;
127 }
128 return -1;
129 }
130 template <class charT>
131 range_type equal_range(const charT* i, const charT* j)const
132 {
133 name t(i, j, 0);
134 return std::equal_range(first: m_sub_names.begin(), last: m_sub_names.end(), val: t);
135 }
136 int get_id(int h)const
137 {
138 name t(h, 0);
139 std::vector<name>::const_iterator pos = std::lower_bound(first: m_sub_names.begin(), last: m_sub_names.end(), val: t);
140 if((pos != m_sub_names.end()) && (*pos == t))
141 {
142 return pos->index;
143 }
144 return -1;
145 }
146 range_type equal_range(int h)const
147 {
148 name t(h, 0);
149 return std::equal_range(first: m_sub_names.begin(), last: m_sub_names.end(), val: t);
150 }
151private:
152 std::vector<name> m_sub_names;
153};
154
155//
156// class regex_data:
157// represents the data we wish to expose to the matching algorithms.
158//
159template <class charT, class traits>
160struct regex_data : public named_subexpressions
161{
162 typedef regex_constants::syntax_option_type flag_type;
163 typedef std::size_t size_type;
164
165 regex_data(const ::std::shared_ptr<
166 ::boost::regex_traits_wrapper<traits> >& t)
167 : m_ptraits(t), m_flags(0), m_status(0), m_expression(0), m_expression_len(0),
168 m_mark_count(0), m_first_state(0), m_restart_type(0),
169 m_startmap{ 0 },
170 m_can_be_null(0), m_word_mask(0), m_has_recursions(false), m_disable_match_any(false) {}
171 regex_data()
172 : m_ptraits(new ::boost::regex_traits_wrapper<traits>()), m_flags(0), m_status(0), m_expression(0), m_expression_len(0),
173 m_mark_count(0), m_first_state(0), m_restart_type(0),
174 m_startmap{ 0 },
175 m_can_be_null(0), m_word_mask(0), m_has_recursions(false), m_disable_match_any(false) {}
176
177 ::std::shared_ptr<
178 ::boost::regex_traits_wrapper<traits>
179 > m_ptraits; // traits class instance
180 flag_type m_flags; // flags with which we were compiled
181 int m_status; // error code (0 implies OK).
182 const charT* m_expression; // the original expression
183 std::ptrdiff_t m_expression_len; // the length of the original expression
184 size_type m_mark_count; // the number of marked sub-expressions
185 BOOST_REGEX_DETAIL_NS::re_syntax_base* m_first_state; // the first state of the machine
186 unsigned m_restart_type; // search optimisation type
187 unsigned char m_startmap[1 << CHAR_BIT]; // which characters can start a match
188 unsigned int m_can_be_null; // whether we can match a null string
189 BOOST_REGEX_DETAIL_NS::raw_storage m_data; // the buffer in which our states are constructed
190 typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
191 std::vector<
192 std::pair<
193 std::size_t, std::size_t> > m_subs; // Position of sub-expressions within the *string*.
194 bool m_has_recursions; // whether we have recursive expressions;
195 bool m_disable_match_any; // when set we need to disable the match_any flag as it causes different/buggy behaviour.
196};
197//
198// class basic_regex_implementation
199// pimpl implementation class for basic_regex.
200//
201template <class charT, class traits>
202class basic_regex_implementation
203 : public regex_data<charT, traits>
204{
205public:
206 typedef regex_constants::syntax_option_type flag_type;
207 typedef std::ptrdiff_t difference_type;
208 typedef std::size_t size_type;
209 typedef typename traits::locale_type locale_type;
210 typedef const charT* const_iterator;
211
212 basic_regex_implementation(){}
213 basic_regex_implementation(const ::std::shared_ptr<
214 ::boost::regex_traits_wrapper<traits> >& t)
215 : regex_data<charT, traits>(t) {}
216 void assign(const charT* arg_first,
217 const charT* arg_last,
218 flag_type f)
219 {
220 regex_data<charT, traits>* pdat = this;
221 basic_regex_parser<charT, traits> parser(pdat);
222 parser.parse(arg_first, arg_last, f);
223 }
224
225 locale_type imbue(locale_type l)
226 {
227 return this->m_ptraits->imbue(l);
228 }
229 locale_type getloc()const
230 {
231 return this->m_ptraits->getloc();
232 }
233 std::basic_string<charT> str()const
234 {
235 std::basic_string<charT> result;
236 if(this->m_status == 0)
237 result = std::basic_string<charT>(this->m_expression, this->m_expression_len);
238 return result;
239 }
240 const_iterator expression()const
241 {
242 return this->m_expression;
243 }
244 std::pair<const_iterator, const_iterator> subexpression(std::size_t n)const
245 {
246 const std::pair<std::size_t, std::size_t>& pi = this->m_subs.at(n);
247 std::pair<const_iterator, const_iterator> p(expression() + pi.first, expression() + pi.second);
248 return p;
249 }
250 //
251 // begin, end:
252 const_iterator begin()const
253 {
254 return (this->m_status ? 0 : this->m_expression);
255 }
256 const_iterator end()const
257 {
258 return (this->m_status ? 0 : this->m_expression + this->m_expression_len);
259 }
260 flag_type flags()const
261 {
262 return this->m_flags;
263 }
264 size_type size()const
265 {
266 return this->m_expression_len;
267 }
268 int status()const
269 {
270 return this->m_status;
271 }
272 size_type mark_count()const
273 {
274 return this->m_mark_count - 1;
275 }
276 const BOOST_REGEX_DETAIL_NS::re_syntax_base* get_first_state()const
277 {
278 return this->m_first_state;
279 }
280 unsigned get_restart_type()const
281 {
282 return this->m_restart_type;
283 }
284 const unsigned char* get_map()const
285 {
286 return this->m_startmap;
287 }
288 const ::boost::regex_traits_wrapper<traits>& get_traits()const
289 {
290 return *(this->m_ptraits);
291 }
292 bool can_be_null()const
293 {
294 return this->m_can_be_null;
295 }
296 const regex_data<charT, traits>& get_data()const
297 {
298 basic_regex_implementation<charT, traits> const* p = this;
299 return *static_cast<const regex_data<charT, traits>*>(p);
300 }
301};
302
303} // namespace BOOST_REGEX_DETAIL_NS
304//
305// class basic_regex:
306// represents the compiled
307// regular expression:
308//
309
310#ifdef BOOST_REGEX_NO_FWD
311template <class charT, class traits = regex_traits<charT> >
312#else
313template <class charT, class traits >
314#endif
315class basic_regex : public regbase
316{
317public:
318 // typedefs:
319 typedef std::size_t traits_size_type;
320 typedef typename traits::string_type traits_string_type;
321 typedef charT char_type;
322 typedef traits traits_type;
323
324 typedef charT value_type;
325 typedef charT& reference;
326 typedef const charT& const_reference;
327 typedef const charT* const_iterator;
328 typedef const_iterator iterator;
329 typedef std::ptrdiff_t difference_type;
330 typedef std::size_t size_type;
331 typedef regex_constants::syntax_option_type flag_type;
332 // locale_type
333 // placeholder for actual locale type used by the
334 // traits class to localise *this.
335 typedef typename traits::locale_type locale_type;
336
337public:
338 explicit basic_regex(){}
339 explicit basic_regex(const charT* p, flag_type f = regex_constants::normal)
340 {
341 assign(p, f);
342 }
343 basic_regex(const charT* p1, const charT* p2, flag_type f = regex_constants::normal)
344 {
345 assign(p1, p2, f);
346 }
347 basic_regex(const charT* p, size_type len, flag_type f)
348 {
349 assign(p, len, f);
350 }
351 basic_regex(const basic_regex& that)
352 : m_pimpl(that.m_pimpl) {}
353 ~basic_regex(){}
354 basic_regex& operator=(const basic_regex& that)
355 {
356 return assign(that);
357 }
358 basic_regex& operator=(const charT* ptr)
359 {
360 return assign(ptr);
361 }
362
363 //
364 // assign:
365 basic_regex& assign(const basic_regex& that)
366 {
367 m_pimpl = that.m_pimpl;
368 return *this;
369 }
370 basic_regex& assign(const charT* p, flag_type f = regex_constants::normal)
371 {
372 return assign(p, p + traits::length(p), f);
373 }
374 basic_regex& assign(const charT* p, size_type len, flag_type f)
375 {
376 return assign(p, p + len, f);
377 }
378private:
379 basic_regex& do_assign(const charT* p1,
380 const charT* p2,
381 flag_type f);
382public:
383 basic_regex& assign(const charT* p1,
384 const charT* p2,
385 flag_type f = regex_constants::normal)
386 {
387 return do_assign(p1, p2, f);
388 }
389
390 template <class ST, class SA>
391 unsigned int set_expression(const std::basic_string<charT, ST, SA>& p, flag_type f = regex_constants::normal)
392 {
393 return set_expression(p.data(), p.data() + p.size(), f);
394 }
395
396 template <class ST, class SA>
397 explicit basic_regex(const std::basic_string<charT, ST, SA>& p, flag_type f = regex_constants::normal)
398 {
399 assign(p, f);
400 }
401
402 template <class InputIterator>
403 basic_regex(InputIterator arg_first, InputIterator arg_last, flag_type f = regex_constants::normal)
404 {
405 typedef typename traits::string_type seq_type;
406 seq_type a(arg_first, arg_last);
407 if(!a.empty())
408 assign(static_cast<const charT*>(&*a.begin()), static_cast<const charT*>(&*a.begin() + a.size()), f);
409 else
410 assign(static_cast<const charT*>(0), static_cast<const charT*>(0), f);
411 }
412
413 template <class ST, class SA>
414 basic_regex& operator=(const std::basic_string<charT, ST, SA>& p)
415 {
416 return assign(p.data(), p.data() + p.size(), regex_constants::normal);
417 }
418
419 template <class string_traits, class A>
420 basic_regex& assign(
421 const std::basic_string<charT, string_traits, A>& s,
422 flag_type f = regex_constants::normal)
423 {
424 return assign(s.data(), s.data() + s.size(), f);
425 }
426
427 template <class InputIterator>
428 basic_regex& assign(InputIterator arg_first,
429 InputIterator arg_last,
430 flag_type f = regex_constants::normal)
431 {
432 typedef typename traits::string_type seq_type;
433 seq_type a(arg_first, arg_last);
434 if(a.size())
435 {
436 const charT* p1 = &*a.begin();
437 const charT* p2 = &*a.begin() + a.size();
438 return assign(p1, p2, f);
439 }
440 return assign(static_cast<const charT*>(0), static_cast<const charT*>(0), f);
441 }
442
443 //
444 // locale:
445 locale_type imbue(locale_type l);
446 locale_type getloc()const
447 {
448 return m_pimpl.get() ? m_pimpl->getloc() : locale_type();
449 }
450 //
451 // getflags:
452 // retained for backwards compatibility only, "flags"
453 // is now the preferred name:
454 flag_type getflags()const
455 {
456 return flags();
457 }
458 flag_type flags()const
459 {
460 return m_pimpl.get() ? m_pimpl->flags() : 0;
461 }
462 //
463 // str:
464 std::basic_string<charT> str()const
465 {
466 return m_pimpl.get() ? m_pimpl->str() : std::basic_string<charT>();
467 }
468 //
469 // begin, end, subexpression:
470 std::pair<const_iterator, const_iterator> subexpression(std::size_t n)const
471 {
472#ifdef BOOST_REGEX_STANDALONE
473 if (!m_pimpl.get())
474 throw std::logic_error("Can't access subexpressions in an invalid regex.");
475#else
476 if(!m_pimpl.get())
477 boost::throw_exception(e: std::logic_error("Can't access subexpressions in an invalid regex."));
478#endif
479 return m_pimpl->subexpression(n);
480 }
481 const_iterator begin()const
482 {
483 return (m_pimpl.get() ? m_pimpl->begin() : 0);
484 }
485 const_iterator end()const
486 {
487 return (m_pimpl.get() ? m_pimpl->end() : 0);
488 }
489 //
490 // swap:
491 void swap(basic_regex& that)throw()
492 {
493 m_pimpl.swap(that.m_pimpl);
494 }
495 //
496 // size:
497 size_type size()const
498 {
499 return (m_pimpl.get() ? m_pimpl->size() : 0);
500 }
501 //
502 // max_size:
503 size_type max_size()const
504 {
505 return UINT_MAX;
506 }
507 //
508 // empty:
509 bool empty()const
510 {
511 return (m_pimpl.get() ? 0 != m_pimpl->status() : true);
512 }
513
514 size_type mark_count()const
515 {
516 return (m_pimpl.get() ? m_pimpl->mark_count() : 0);
517 }
518
519 int status()const
520 {
521 return (m_pimpl.get() ? m_pimpl->status() : regex_constants::error_empty);
522 }
523
524 int compare(const basic_regex& that) const
525 {
526 if(m_pimpl.get() == that.m_pimpl.get())
527 return 0;
528 if(!m_pimpl.get())
529 return -1;
530 if(!that.m_pimpl.get())
531 return 1;
532 if(status() != that.status())
533 return status() - that.status();
534 if(flags() != that.flags())
535 return flags() - that.flags();
536 return str().compare(that.str());
537 }
538 bool operator==(const basic_regex& e)const
539 {
540 return compare(that: e) == 0;
541 }
542 bool operator != (const basic_regex& e)const
543 {
544 return compare(that: e) != 0;
545 }
546 bool operator<(const basic_regex& e)const
547 {
548 return compare(that: e) < 0;
549 }
550 bool operator>(const basic_regex& e)const
551 {
552 return compare(that: e) > 0;
553 }
554 bool operator<=(const basic_regex& e)const
555 {
556 return compare(that: e) <= 0;
557 }
558 bool operator>=(const basic_regex& e)const
559 {
560 return compare(that: e) >= 0;
561 }
562
563 //
564 // The following are deprecated as public interfaces
565 // but are available for compatibility with earlier versions.
566 const charT* expression()const
567 {
568 return (m_pimpl.get() && !m_pimpl->status() ? m_pimpl->expression() : 0);
569 }
570 unsigned int set_expression(const charT* p1, const charT* p2, flag_type f = regex_constants::normal)
571 {
572 assign(p1, p2, f | regex_constants::no_except);
573 return status();
574 }
575 unsigned int set_expression(const charT* p, flag_type f = regex_constants::normal)
576 {
577 assign(p, f | regex_constants::no_except);
578 return status();
579 }
580 unsigned int error_code()const
581 {
582 return status();
583 }
584 //
585 // private access methods:
586 //
587 const BOOST_REGEX_DETAIL_NS::re_syntax_base* get_first_state()const
588 {
589 BOOST_REGEX_ASSERT(0 != m_pimpl.get());
590 return m_pimpl->get_first_state();
591 }
592 unsigned get_restart_type()const
593 {
594 BOOST_REGEX_ASSERT(0 != m_pimpl.get());
595 return m_pimpl->get_restart_type();
596 }
597 const unsigned char* get_map()const
598 {
599 BOOST_REGEX_ASSERT(0 != m_pimpl.get());
600 return m_pimpl->get_map();
601 }
602 const ::boost::regex_traits_wrapper<traits>& get_traits()const
603 {
604 BOOST_REGEX_ASSERT(0 != m_pimpl.get());
605 return m_pimpl->get_traits();
606 }
607 bool can_be_null()const
608 {
609 BOOST_REGEX_ASSERT(0 != m_pimpl.get());
610 return m_pimpl->can_be_null();
611 }
612 const BOOST_REGEX_DETAIL_NS::regex_data<charT, traits>& get_data()const
613 {
614 BOOST_REGEX_ASSERT(0 != m_pimpl.get());
615 return m_pimpl->get_data();
616 }
617 std::shared_ptr<BOOST_REGEX_DETAIL_NS::named_subexpressions > get_named_subs()const
618 {
619 return m_pimpl;
620 }
621
622private:
623 std::shared_ptr<BOOST_REGEX_DETAIL_NS::basic_regex_implementation<charT, traits> > m_pimpl;
624};
625
626//
627// out of line members;
628// these are the only members that mutate the basic_regex object,
629// and are designed to provide the strong exception guarantee
630// (in the event of a throw, the state of the object remains unchanged).
631//
632template <class charT, class traits>
633basic_regex<charT, traits>& basic_regex<charT, traits>::do_assign(const charT* p1,
634 const charT* p2,
635 flag_type f)
636{
637 std::shared_ptr<BOOST_REGEX_DETAIL_NS::basic_regex_implementation<charT, traits> > temp;
638 if(!m_pimpl.get())
639 {
640 temp = std::shared_ptr<BOOST_REGEX_DETAIL_NS::basic_regex_implementation<charT, traits> >(new BOOST_REGEX_DETAIL_NS::basic_regex_implementation<charT, traits>());
641 }
642 else
643 {
644 temp = std::shared_ptr<BOOST_REGEX_DETAIL_NS::basic_regex_implementation<charT, traits> >(new BOOST_REGEX_DETAIL_NS::basic_regex_implementation<charT, traits>(m_pimpl->m_ptraits));
645 }
646 temp->assign(p1, p2, f);
647 temp.swap(m_pimpl);
648 return *this;
649}
650
651template <class charT, class traits>
652typename basic_regex<charT, traits>::locale_type basic_regex<charT, traits>::imbue(locale_type l)
653{
654 std::shared_ptr<BOOST_REGEX_DETAIL_NS::basic_regex_implementation<charT, traits> > temp(new BOOST_REGEX_DETAIL_NS::basic_regex_implementation<charT, traits>());
655 locale_type result = temp->imbue(l);
656 temp.swap(m_pimpl);
657 return result;
658}
659
660//
661// non-members:
662//
663template <class charT, class traits>
664void swap(basic_regex<charT, traits>& e1, basic_regex<charT, traits>& e2)
665{
666 e1.swap(e2);
667}
668
669template <class charT, class traits, class traits2>
670std::basic_ostream<charT, traits>&
671 operator << (std::basic_ostream<charT, traits>& os,
672 const basic_regex<charT, traits2>& e)
673{
674 return (os << e.str());
675}
676
677//
678// class reg_expression:
679// this is provided for backwards compatibility only,
680// it is deprecated, no not use!
681//
682#ifdef BOOST_REGEX_NO_FWD
683template <class charT, class traits = regex_traits<charT> >
684#else
685template <class charT, class traits >
686#endif
687class reg_expression : public basic_regex<charT, traits>
688{
689public:
690 typedef typename basic_regex<charT, traits>::flag_type flag_type;
691 typedef typename basic_regex<charT, traits>::size_type size_type;
692 explicit reg_expression(){}
693 explicit reg_expression(const charT* p, flag_type f = regex_constants::normal)
694 : basic_regex<charT, traits>(p, f){}
695 reg_expression(const charT* p1, const charT* p2, flag_type f = regex_constants::normal)
696 : basic_regex<charT, traits>(p1, p2, f){}
697 reg_expression(const charT* p, size_type len, flag_type f)
698 : basic_regex<charT, traits>(p, len, f){}
699 reg_expression(const reg_expression& that)
700 : basic_regex<charT, traits>(that) {}
701 ~reg_expression(){}
702 reg_expression& operator=(const reg_expression& that)
703 {
704 return this->assign(that);
705 }
706
707 template <class ST, class SA>
708 explicit reg_expression(const std::basic_string<charT, ST, SA>& p, flag_type f = regex_constants::normal)
709 : basic_regex<charT, traits>(p, f)
710 {
711 }
712
713 template <class InputIterator>
714 reg_expression(InputIterator arg_first, InputIterator arg_last, flag_type f = regex_constants::normal)
715 : basic_regex<charT, traits>(arg_first, arg_last, f)
716 {
717 }
718
719 template <class ST, class SA>
720 reg_expression& operator=(const std::basic_string<charT, ST, SA>& p)
721 {
722 this->assign(p);
723 return *this;
724 }
725
726};
727
728#ifdef BOOST_REGEX_MSVC
729#pragma warning (pop)
730#endif
731
732} // namespace boost
733
734#endif
735

source code of boost/libs/regex/include/boost/regex/v5/basic_regex.hpp