1/*
2 Copyright (c) Marshall Clow 2010-2012.
3
4 Distributed under the Boost Software License, Version 1.0. (See accompanying
5 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
7 For more information, see http://www.boost.org
8*/
9
10#ifndef BOOST_ALGORITHM_BOYER_MOORE_HORSPOOOL_SEARCH_HPP
11#define BOOST_ALGORITHM_BOYER_MOORE_HORSPOOOL_SEARCH_HPP
12
13#include <iterator> // for std::iterator_traits
14
15#include <boost/config.hpp>
16#include <boost/assert.hpp>
17#include <boost/static_assert.hpp>
18
19#include <boost/range/begin.hpp>
20#include <boost/range/end.hpp>
21
22#include <boost/core/enable_if.hpp>
23#include <boost/type_traits/is_same.hpp>
24
25#include <boost/algorithm/searching/detail/bm_traits.hpp>
26#include <boost/algorithm/searching/detail/debugging.hpp>
27
28// #define BOOST_ALGORITHM_BOYER_MOORE_HORSPOOL_DEBUG_HPP
29
30namespace boost { namespace algorithm {
31
32/*
33 A templated version of the boyer-moore-horspool searching algorithm.
34
35 Requirements:
36 * Random access iterators
37 * The two iterator types (patIter and corpusIter) must
38 "point to" the same underlying type.
39 * Additional requirements may be imposed buy the skip table, such as:
40 ** Numeric type (array-based skip table)
41 ** Hashable type (map-based skip table)
42
43http://www-igm.univ-mlv.fr/%7Elecroq/string/node18.html
44
45*/
46
47 template <typename patIter, typename traits = detail::BM_traits<patIter> >
48 class boyer_moore_horspool {
49 typedef typename std::iterator_traits<patIter>::difference_type difference_type;
50 public:
51 boyer_moore_horspool ( patIter first, patIter last )
52 : pat_first ( first ), pat_last ( last ),
53 k_pattern_length ( std::distance ( pat_first, pat_last )),
54 skip_ ( k_pattern_length, k_pattern_length ) {
55
56 // Build the skip table
57 std::size_t i = 0;
58 if ( first != last ) // empty pattern?
59 for ( patIter iter = first; iter != last-1; ++iter, ++i )
60 skip_.insert ( *iter, k_pattern_length - 1 - i );
61#ifdef BOOST_ALGORITHM_BOYER_MOORE_HORSPOOL_DEBUG_HPP
62 skip_.PrintSkipTable ();
63#endif
64 }
65
66 ~boyer_moore_horspool () {}
67
68 /// \fn operator ( corpusIter corpus_first, corpusIter corpus_last)
69 /// \brief Searches the corpus for the pattern that was passed into the constructor
70 ///
71 /// \param corpus_first The start of the data to search (Random Access Iterator)
72 /// \param corpus_last One past the end of the data to search
73 ///
74 template <typename corpusIter>
75 std::pair<corpusIter, corpusIter>
76 operator () ( corpusIter corpus_first, corpusIter corpus_last ) const {
77 BOOST_STATIC_ASSERT (( boost::is_same<
78 typename std::iterator_traits<patIter>::value_type,
79 typename std::iterator_traits<corpusIter>::value_type>::value ));
80
81 if ( corpus_first == corpus_last ) return std::make_pair(corpus_last, corpus_last); // if nothing to search, we didn't find it!
82 if ( pat_first == pat_last ) return std::make_pair(corpus_first, corpus_first); // empty pattern matches at start
83
84 const difference_type k_corpus_length = std::distance ( corpus_first, corpus_last );
85 // If the pattern is larger than the corpus, we can't find it!
86 if ( k_corpus_length < k_pattern_length )
87 return std::make_pair(corpus_last, corpus_last);
88
89 // Do the search
90 return this->do_search ( corpus_first, corpus_last );
91 }
92
93 template <typename Range>
94 std::pair<typename boost::range_iterator<Range>::type, typename boost::range_iterator<Range>::type>
95 operator () ( Range &r ) const {
96 return (*this) (boost::begin(r), boost::end(r));
97 }
98
99 private:
100/// \cond DOXYGEN_HIDE
101 patIter pat_first, pat_last;
102 const difference_type k_pattern_length;
103 typename traits::skip_table_t skip_;
104
105 /// \fn do_search ( corpusIter corpus_first, corpusIter corpus_last )
106 /// \brief Searches the corpus for the pattern that was passed into the constructor
107 ///
108 /// \param corpus_first The start of the data to search (Random Access Iterator)
109 /// \param corpus_last One past the end of the data to search
110 /// \param k_corpus_length The length of the corpus to search
111 ///
112 template <typename corpusIter>
113 std::pair<corpusIter, corpusIter>
114 do_search ( corpusIter corpus_first, corpusIter corpus_last ) const {
115 corpusIter curPos = corpus_first;
116 const corpusIter lastPos = corpus_last - k_pattern_length;
117 while ( curPos <= lastPos ) {
118 // Do we match right where we are?
119 std::size_t j = k_pattern_length - 1;
120 while ( pat_first [j] == curPos [j] ) {
121 // We matched - we're done!
122 if ( j == 0 )
123 return std::make_pair(curPos, curPos + k_pattern_length);
124 j--;
125 }
126
127 curPos += skip_ [ curPos [ k_pattern_length - 1 ]];
128 }
129
130 return std::make_pair(corpus_last, corpus_last);
131 }
132// \endcond
133 };
134
135/* Two ranges as inputs gives us four possibilities; with 2,3,3,4 parameters
136 Use a bit of TMP to disambiguate the 3-argument templates */
137
138/// \fn boyer_moore_horspool_search ( corpusIter corpus_first, corpusIter corpus_last,
139/// patIter pat_first, patIter pat_last )
140/// \brief Searches the corpus for the pattern.
141///
142/// \param corpus_first The start of the data to search (Random Access Iterator)
143/// \param corpus_last One past the end of the data to search
144/// \param pat_first The start of the pattern to search for (Random Access Iterator)
145/// \param pat_last One past the end of the data to search for
146///
147 template <typename patIter, typename corpusIter>
148 std::pair<corpusIter, corpusIter> boyer_moore_horspool_search (
149 corpusIter corpus_first, corpusIter corpus_last,
150 patIter pat_first, patIter pat_last )
151 {
152 boyer_moore_horspool<patIter> bmh ( pat_first, pat_last );
153 return bmh ( corpus_first, corpus_last );
154 }
155
156 template <typename PatternRange, typename corpusIter>
157 std::pair<corpusIter, corpusIter> boyer_moore_horspool_search (
158 corpusIter corpus_first, corpusIter corpus_last, const PatternRange &pattern )
159 {
160 typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
161 boyer_moore_horspool<pattern_iterator> bmh ( boost::begin(pattern), boost::end (pattern));
162 return bmh ( corpus_first, corpus_last );
163 }
164
165 template <typename patIter, typename CorpusRange>
166 typename boost::disable_if_c<
167 boost::is_same<CorpusRange, patIter>::value,
168 std::pair<typename boost::range_iterator<CorpusRange>::type, typename boost::range_iterator<CorpusRange>::type> >
169 ::type
170 boyer_moore_horspool_search ( CorpusRange &corpus, patIter pat_first, patIter pat_last )
171 {
172 boyer_moore_horspool<patIter> bmh ( pat_first, pat_last );
173 return bm (boost::begin (corpus), boost::end (corpus));
174 }
175
176 template <typename PatternRange, typename CorpusRange>
177 std::pair<typename boost::range_iterator<CorpusRange>::type, typename boost::range_iterator<CorpusRange>::type>
178 boyer_moore_horspool_search ( CorpusRange &corpus, const PatternRange &pattern )
179 {
180 typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
181 boyer_moore_horspool<pattern_iterator> bmh ( boost::begin(pattern), boost::end (pattern));
182 return bmh (boost::begin (corpus), boost::end (corpus));
183 }
184
185
186 // Creator functions -- take a pattern range, return an object
187 template <typename Range>
188 boost::algorithm::boyer_moore_horspool<typename boost::range_iterator<const Range>::type>
189 make_boyer_moore_horspool ( const Range &r ) {
190 return boost::algorithm::boyer_moore_horspool
191 <typename boost::range_iterator<const Range>::type> (boost::begin(r), boost::end(r));
192 }
193
194 template <typename Range>
195 boost::algorithm::boyer_moore_horspool<typename boost::range_iterator<Range>::type>
196 make_boyer_moore_horspool ( Range &r ) {
197 return boost::algorithm::boyer_moore_horspool
198 <typename boost::range_iterator<Range>::type> (boost::begin(r), boost::end(r));
199 }
200
201}}
202
203#endif // BOOST_ALGORITHM_BOYER_MOORE_HORSPOOOL_SEARCH_HPP
204

source code of boost/libs/algorithm/include/boost/algorithm/searching/boyer_moore_horspool.hpp