1// Boost string_algo library regex.hpp header file ---------------------------//
2
3// Copyright Pavol Droba 2002-2003.
4//
5// Distributed under the Boost Software License, Version 1.0.
6// (See accompanying file LICENSE_1_0.txt or copy at
7// http://www.boost.org/LICENSE_1_0.txt)
8
9// See http://www.boost.org/ for updates, documentation, and revision history.
10
11#ifndef BOOST_STRING_REGEX_HPP
12#define BOOST_STRING_REGEX_HPP
13
14#include <boost/algorithm/string/config.hpp>
15#include <boost/regex.hpp>
16
17#include <boost/range/iterator_range_core.hpp>
18#include <boost/range/begin.hpp>
19#include <boost/range/end.hpp>
20#include <boost/range/iterator.hpp>
21#include <boost/range/as_literal.hpp>
22
23#include <boost/algorithm/string/find_format.hpp>
24#include <boost/algorithm/string/regex_find_format.hpp>
25#include <boost/algorithm/string/formatter.hpp>
26#include <boost/algorithm/string/iter_find.hpp>
27
28/*! \file
29 Defines regex variants of the algorithms.
30*/
31
32namespace boost {
33 namespace algorithm {
34
35// find_regex -----------------------------------------------//
36
37 //! Find regex algorithm
38 /*!
39 Search for a substring matching the given regex in the input.
40
41 \param Input A container which will be searched.
42 \param Rx A regular expression
43 \param Flags Regex options
44 \return
45 An \c iterator_range delimiting the match.
46 Returned iterator is either \c RangeT::iterator or
47 \c RangeT::const_iterator, depending on the constness of
48 the input parameter.
49
50 \note This function provides the strong exception-safety guarantee
51 */
52 template<
53 typename RangeT,
54 typename CharT,
55 typename RegexTraitsT>
56 inline iterator_range<
57 BOOST_STRING_TYPENAME range_iterator<RangeT>::type >
58 find_regex(
59 RangeT& Input,
60 const basic_regex<CharT, RegexTraitsT>& Rx,
61 match_flag_type Flags=match_default )
62 {
63 iterator_range<BOOST_STRING_TYPENAME range_iterator<RangeT>::type> lit_input(::boost::as_literal(Input));
64
65 return ::boost::algorithm::regex_finder(Rx,Flags)(
66 ::boost::begin(lit_input), ::boost::end(lit_input) );
67 }
68
69// replace_regex --------------------------------------------------------------------//
70
71 //! Replace regex algorithm
72 /*!
73 Search for a substring matching given regex and format it with
74 the specified format.
75 The result is a modified copy of the input. It is returned as a sequence
76 or copied to the output iterator.
77
78 \param Output An output iterator to which the result will be copied
79 \param Input An input string
80 \param Rx A regular expression
81 \param Format Regex format definition
82 \param Flags Regex options
83 \return An output iterator pointing just after the last inserted character or
84 a modified copy of the input
85
86 \note The second variant of this function provides the strong exception-safety guarantee
87 */
88 template<
89 typename OutputIteratorT,
90 typename RangeT,
91 typename CharT,
92 typename RegexTraitsT,
93 typename FormatStringTraitsT, typename FormatStringAllocatorT >
94 inline OutputIteratorT replace_regex_copy(
95 OutputIteratorT Output,
96 const RangeT& Input,
97 const basic_regex<CharT, RegexTraitsT>& Rx,
98 const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
99 match_flag_type Flags=match_default | format_default )
100 {
101 return ::boost::algorithm::find_format_copy(
102 Output,
103 Input,
104 ::boost::algorithm::regex_finder( Rx, Flags ),
105 ::boost::algorithm::regex_formatter( Format, Flags ) );
106 }
107
108 //! Replace regex algorithm
109 /*!
110 \overload
111 */
112 template<
113 typename SequenceT,
114 typename CharT,
115 typename RegexTraitsT,
116 typename FormatStringTraitsT, typename FormatStringAllocatorT >
117 inline SequenceT replace_regex_copy(
118 const SequenceT& Input,
119 const basic_regex<CharT, RegexTraitsT>& Rx,
120 const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
121 match_flag_type Flags=match_default | format_default )
122 {
123 return ::boost::algorithm::find_format_copy(
124 Input,
125 ::boost::algorithm::regex_finder( Rx, Flags ),
126 ::boost::algorithm::regex_formatter( Format, Flags ) );
127 }
128
129 //! Replace regex algorithm
130 /*!
131 Search for a substring matching given regex and format it with
132 the specified format. The input string is modified in-place.
133
134 \param Input An input string
135 \param Rx A regular expression
136 \param Format Regex format definition
137 \param Flags Regex options
138 */
139 template<
140 typename SequenceT,
141 typename CharT,
142 typename RegexTraitsT,
143 typename FormatStringTraitsT, typename FormatStringAllocatorT >
144 inline void replace_regex(
145 SequenceT& Input,
146 const basic_regex<CharT, RegexTraitsT>& Rx,
147 const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
148 match_flag_type Flags=match_default | format_default )
149 {
150 ::boost::algorithm::find_format(
151 Input,
152 ::boost::algorithm::regex_finder( Rx, Flags ),
153 ::boost::algorithm::regex_formatter( Format, Flags ) );
154 }
155
156// replace_all_regex --------------------------------------------------------------------//
157
158 //! Replace all regex algorithm
159 /*!
160 Format all substrings, matching given regex, with the specified format.
161 The result is a modified copy of the input. It is returned as a sequence
162 or copied to the output iterator.
163
164 \param Output An output iterator to which the result will be copied
165 \param Input An input string
166 \param Rx A regular expression
167 \param Format Regex format definition
168 \param Flags Regex options
169 \return An output iterator pointing just after the last inserted character or
170 a modified copy of the input
171
172 \note The second variant of this function provides the strong exception-safety guarantee
173 */
174 template<
175 typename OutputIteratorT,
176 typename RangeT,
177 typename CharT,
178 typename RegexTraitsT,
179 typename FormatStringTraitsT, typename FormatStringAllocatorT >
180 inline OutputIteratorT replace_all_regex_copy(
181 OutputIteratorT Output,
182 const RangeT& Input,
183 const basic_regex<CharT, RegexTraitsT>& Rx,
184 const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
185 match_flag_type Flags=match_default | format_default )
186 {
187 return ::boost::algorithm::find_format_all_copy(
188 Output,
189 Input,
190 ::boost::algorithm::regex_finder( Rx, Flags ),
191 ::boost::algorithm::regex_formatter( Format, Flags ) );
192 }
193
194 //! Replace all regex algorithm
195 /*!
196 \overload
197 */
198 template<
199 typename SequenceT,
200 typename CharT,
201 typename RegexTraitsT,
202 typename FormatStringTraitsT, typename FormatStringAllocatorT >
203 inline SequenceT replace_all_regex_copy(
204 const SequenceT& Input,
205 const basic_regex<CharT, RegexTraitsT>& Rx,
206 const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
207 match_flag_type Flags=match_default | format_default )
208 {
209 return ::boost::algorithm::find_format_all_copy(
210 Input,
211 ::boost::algorithm::regex_finder( Rx, Flags ),
212 ::boost::algorithm::regex_formatter( Format, Flags ) );
213 }
214
215 //! Replace all regex algorithm
216 /*!
217 Format all substrings, matching given regex, with the specified format.
218 The input string is modified in-place.
219
220 \param Input An input string
221 \param Rx A regular expression
222 \param Format Regex format definition
223 \param Flags Regex options
224 */
225 template<
226 typename SequenceT,
227 typename CharT,
228 typename RegexTraitsT,
229 typename FormatStringTraitsT, typename FormatStringAllocatorT >
230 inline void replace_all_regex(
231 SequenceT& Input,
232 const basic_regex<CharT, RegexTraitsT>& Rx,
233 const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
234 match_flag_type Flags=match_default | format_default )
235 {
236 ::boost::algorithm::find_format_all(
237 Input,
238 ::boost::algorithm::regex_finder( Rx, Flags ),
239 ::boost::algorithm::regex_formatter( Format, Flags ) );
240 }
241
242// erase_regex --------------------------------------------------------------------//
243
244 //! Erase regex algorithm
245 /*!
246 Remove a substring matching given regex from the input.
247 The result is a modified copy of the input. It is returned as a sequence
248 or copied to the output iterator.
249
250 \param Output An output iterator to which the result will be copied
251 \param Input An input string
252 \param Rx A regular expression
253 \param Flags Regex options
254 \return An output iterator pointing just after the last inserted character or
255 a modified copy of the input
256
257 \note The second variant of this function provides the strong exception-safety guarantee
258 */
259 template<
260 typename OutputIteratorT,
261 typename RangeT,
262 typename CharT,
263 typename RegexTraitsT >
264 inline OutputIteratorT erase_regex_copy(
265 OutputIteratorT Output,
266 const RangeT& Input,
267 const basic_regex<CharT, RegexTraitsT>& Rx,
268 match_flag_type Flags=match_default )
269 {
270 return ::boost::algorithm::find_format_copy(
271 Output,
272 Input,
273 ::boost::algorithm::regex_finder( Rx, Flags ),
274 ::boost::algorithm::empty_formatter( Input ) );
275 }
276
277 //! Erase regex algorithm
278 /*!
279 \overload
280 */
281 template<
282 typename SequenceT,
283 typename CharT,
284 typename RegexTraitsT >
285 inline SequenceT erase_regex_copy(
286 const SequenceT& Input,
287 const basic_regex<CharT, RegexTraitsT>& Rx,
288 match_flag_type Flags=match_default )
289 {
290 return ::boost::algorithm::find_format_copy(
291 Input,
292 ::boost::algorithm::regex_finder( Rx, Flags ),
293 ::boost::algorithm::empty_formatter( Input ) );
294 }
295
296 //! Erase regex algorithm
297 /*!
298 Remove a substring matching given regex from the input.
299 The input string is modified in-place.
300
301 \param Input An input string
302 \param Rx A regular expression
303 \param Flags Regex options
304 */
305 template<
306 typename SequenceT,
307 typename CharT,
308 typename RegexTraitsT >
309 inline void erase_regex(
310 SequenceT& Input,
311 const basic_regex<CharT, RegexTraitsT>& Rx,
312 match_flag_type Flags=match_default )
313 {
314 ::boost::algorithm::find_format(
315 Input,
316 ::boost::algorithm::regex_finder( Rx, Flags ),
317 ::boost::algorithm::empty_formatter( Input ) );
318 }
319
320// erase_all_regex --------------------------------------------------------------------//
321
322 //! Erase all regex algorithm
323 /*!
324 Erase all substrings, matching given regex, from the input.
325 The result is a modified copy of the input. It is returned as a sequence
326 or copied to the output iterator.
327
328
329 \param Output An output iterator to which the result will be copied
330 \param Input An input string
331 \param Rx A regular expression
332 \param Flags Regex options
333 \return An output iterator pointing just after the last inserted character or
334 a modified copy of the input
335
336 \note The second variant of this function provides the strong exception-safety guarantee
337 */
338 template<
339 typename OutputIteratorT,
340 typename RangeT,
341 typename CharT,
342 typename RegexTraitsT >
343 inline OutputIteratorT erase_all_regex_copy(
344 OutputIteratorT Output,
345 const RangeT& Input,
346 const basic_regex<CharT, RegexTraitsT>& Rx,
347 match_flag_type Flags=match_default )
348 {
349 return ::boost::algorithm::find_format_all_copy(
350 Output,
351 Input,
352 ::boost::algorithm::regex_finder( Rx, Flags ),
353 ::boost::algorithm::empty_formatter( Input ) );
354 }
355
356 //! Erase all regex algorithm
357 /*!
358 \overload
359 */
360 template<
361 typename SequenceT,
362 typename CharT,
363 typename RegexTraitsT >
364 inline SequenceT erase_all_regex_copy(
365 const SequenceT& Input,
366 const basic_regex<CharT, RegexTraitsT>& Rx,
367 match_flag_type Flags=match_default )
368 {
369 return ::boost::algorithm::find_format_all_copy(
370 Input,
371 ::boost::algorithm::regex_finder( Rx, Flags ),
372 ::boost::algorithm::empty_formatter( Input ) );
373 }
374
375 //! Erase all regex algorithm
376 /*!
377 Erase all substrings, matching given regex, from the input.
378 The input string is modified in-place.
379
380 \param Input An input string
381 \param Rx A regular expression
382 \param Flags Regex options
383 */
384 template<
385 typename SequenceT,
386 typename CharT,
387 typename RegexTraitsT>
388 inline void erase_all_regex(
389 SequenceT& Input,
390 const basic_regex<CharT, RegexTraitsT>& Rx,
391 match_flag_type Flags=match_default )
392 {
393 ::boost::algorithm::find_format_all(
394 Input,
395 ::boost::algorithm::regex_finder( Rx, Flags ),
396 ::boost::algorithm::empty_formatter( Input ) );
397 }
398
399// find_all_regex ------------------------------------------------------------------//
400
401 //! Find all regex algorithm
402 /*!
403 This algorithm finds all substrings matching the give regex
404 in the input.
405
406 Each part is copied and added as a new element to the output container.
407 Thus the result container must be able to hold copies
408 of the matches (in a compatible structure like std::string) or
409 a reference to it (e.g. using the iterator range class).
410 Examples of such a container are \c std::vector<std::string>
411 or \c std::list<boost::iterator_range<std::string::iterator>>
412
413 \param Result A container that can hold copies of references to the substrings.
414 \param Input A container which will be searched.
415 \param Rx A regular expression
416 \param Flags Regex options
417 \return A reference to the result
418
419 \note Prior content of the result will be overwritten.
420
421 \note This function provides the strong exception-safety guarantee
422 */
423 template<
424 typename SequenceSequenceT,
425 typename RangeT,
426 typename CharT,
427 typename RegexTraitsT >
428 inline SequenceSequenceT& find_all_regex(
429 SequenceSequenceT& Result,
430 const RangeT& Input,
431 const basic_regex<CharT, RegexTraitsT>& Rx,
432 match_flag_type Flags=match_default )
433 {
434 return ::boost::algorithm::iter_find(
435 Result,
436 Input,
437 ::boost::algorithm::regex_finder(Rx,Flags) );
438 }
439
440// split_regex ------------------------------------------------------------------//
441
442 //! Split regex algorithm
443 /*!
444 Tokenize expression. This function is equivalent to C strtok. Input
445 sequence is split into tokens, separated by separators. Separator
446 is an every match of the given regex.
447 Each part is copied and added as a new element to the output container.
448 Thus the result container must be able to hold copies
449 of the matches (in a compatible structure like std::string) or
450 a reference to it (e.g. using the iterator range class).
451 Examples of such a container are \c std::vector<std::string>
452 or \c std::list<boost::iterator_range<std::string::iterator>>
453
454 \param Result A container that can hold copies of references to the substrings.
455 \param Input A container which will be searched.
456 \param Rx A regular expression
457 \param Flags Regex options
458 \return A reference to the result
459
460 \note Prior content of the result will be overwritten.
461
462 \note This function provides the strong exception-safety guarantee
463 */
464 template<
465 typename SequenceSequenceT,
466 typename RangeT,
467 typename CharT,
468 typename RegexTraitsT >
469 inline SequenceSequenceT& split_regex(
470 SequenceSequenceT& Result,
471 const RangeT& Input,
472 const basic_regex<CharT, RegexTraitsT>& Rx,
473 match_flag_type Flags=match_default )
474 {
475 return ::boost::algorithm::iter_split(
476 Result,
477 Input,
478 ::boost::algorithm::regex_finder(Rx,Flags) );
479 }
480
481// join_if ------------------------------------------------------------------//
482
483#ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING
484
485 //! Conditional join algorithm
486 /*!
487 This algorithm joins all strings in a 'list' into one long string.
488 Segments are concatenated by given separator. Only segments that
489 match the given regular expression will be added to the result
490
491 This is a specialization of join_if algorithm.
492
493 \param Input A container that holds the input strings. It must be a container-of-containers.
494 \param Separator A string that will separate the joined segments.
495 \param Rx A regular expression
496 \param Flags Regex options
497 \return Concatenated string.
498
499 \note This function provides the strong exception-safety guarantee
500 */
501 template<
502 typename SequenceSequenceT,
503 typename Range1T,
504 typename CharT,
505 typename RegexTraitsT >
506 inline typename range_value<SequenceSequenceT>::type
507 join_if(
508 const SequenceSequenceT& Input,
509 const Range1T& Separator,
510 const basic_regex<CharT, RegexTraitsT>& Rx,
511 match_flag_type Flags=match_default )
512 {
513 // Define working types
514 typedef typename range_value<SequenceSequenceT>::type ResultT;
515 typedef typename range_const_iterator<SequenceSequenceT>::type InputIteratorT;
516
517 // Parse input
518 InputIteratorT itBegin=::boost::begin(Input);
519 InputIteratorT itEnd=::boost::end(Input);
520
521 // Construct container to hold the result
522 ResultT Result;
523
524
525 // Roll to the first element that will be added
526 while(
527 itBegin!=itEnd &&
528 !::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags)) ++itBegin;
529
530 // Add this element
531 if(itBegin!=itEnd)
532 {
533 detail::insert(Result, ::boost::end(Result), *itBegin);
534 ++itBegin;
535 }
536
537 for(;itBegin!=itEnd; ++itBegin)
538 {
539 if(::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags))
540 {
541 // Add separator
542 detail::insert(Result, ::boost::end(Result), ::boost::as_literal(Separator));
543 // Add element
544 detail::insert(Result, ::boost::end(Result), *itBegin);
545 }
546 }
547
548 return Result;
549 }
550
551#else // BOOST_NO_FUNCTION_TEMPLATE_ORDERING
552
553 //! Conditional join algorithm
554 /*!
555 This algorithm joins all strings in a 'list' into one long string.
556 Segments are concatenated by given separator. Only segments that
557 match the given regular expression will be added to the result
558
559 This is a specialization of join_if algorithm.
560
561 \param Input A container that holds the input strings. It must be a container-of-containers.
562 \param Separator A string that will separate the joined segments.
563 \param Rx A regular expression
564 \param Flags Regex options
565 \return Concatenated string.
566
567 \note This function provides the strong exception-safety guarantee
568 */
569 template<
570 typename SequenceSequenceT,
571 typename Range1T,
572 typename CharT,
573 typename RegexTraitsT >
574 inline typename range_value<SequenceSequenceT>::type
575 join_if_regex(
576 const SequenceSequenceT& Input,
577 const Range1T& Separator,
578 const basic_regex<CharT, RegexTraitsT>& Rx,
579 match_flag_type Flags=match_default )
580 {
581 // Define working types
582 typedef typename range_value<SequenceSequenceT>::type ResultT;
583 typedef typename range_const_iterator<SequenceSequenceT>::type InputIteratorT;
584
585 // Parse input
586 InputIteratorT itBegin=::boost::begin(Input);
587 InputIteratorT itEnd=::boost::end(Input);
588
589 // Construct container to hold the result
590 ResultT Result;
591
592
593 // Roll to the first element that will be added
594 while(
595 itBegin!=itEnd &&
596 !::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags)) ++itBegin;
597
598 // Add this element
599 if(itBegin!=itEnd)
600 {
601 detail::insert(Result, ::boost::end(Result), *itBegin);
602 ++itBegin;
603 }
604
605 for(;itBegin!=itEnd; ++itBegin)
606 {
607 if(::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags))
608 {
609 // Add separator
610 detail::insert(Result, ::boost::end(Result), ::boost::as_literal(Separator));
611 // Add element
612 detail::insert(Result, ::boost::end(Result), *itBegin);
613 }
614 }
615
616 return Result;
617 }
618
619
620#endif // BOOST_NO_FUNCTION_TEMPLATE_ORDERING
621
622 } // namespace algorithm
623
624 // pull names into the boost namespace
625 using algorithm::find_regex;
626 using algorithm::replace_regex;
627 using algorithm::replace_regex_copy;
628 using algorithm::replace_all_regex;
629 using algorithm::replace_all_regex_copy;
630 using algorithm::erase_regex;
631 using algorithm::erase_regex_copy;
632 using algorithm::erase_all_regex;
633 using algorithm::erase_all_regex_copy;
634 using algorithm::find_all_regex;
635 using algorithm::split_regex;
636
637#ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING
638 using algorithm::join_if;
639#else // BOOST_NO_FUNCTION_TEMPLATE_ORDERING
640 using algorithm::join_if_regex;
641#endif // BOOST_NO_FUNCTION_TEMPLATE_ORDERING
642
643} // namespace boost
644
645
646#endif // BOOST_STRING_REGEX_HPP
647

source code of boost/boost/algorithm/string/regex.hpp