1/*
2 Copyright (c) Marshall Clow 2010-2012.
3
4 Distributed under the Boost Software License, Version 1.0. (See accompanying
5 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
7 For more information, see http://www.boost.org
8*/
9
10#ifndef BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP
11#define BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP
12
13#include <vector>
14#include <iterator> // for std::iterator_traits
15
16#include <boost/assert.hpp>
17#include <boost/static_assert.hpp>
18
19#include <boost/range/begin.hpp>
20#include <boost/range/end.hpp>
21
22#include <boost/utility/enable_if.hpp>
23#include <boost/type_traits/is_same.hpp>
24
25#include <boost/algorithm/searching/detail/debugging.hpp>
26
27// #define BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_DEBUG
28
29namespace boost { namespace algorithm {
30
31// #define NEW_KMP
32
33/*
34 A templated version of the Knuth-Morris-Pratt searching algorithm.
35
36 Requirements:
37 * Random-access iterators
38 * The two iterator types (I1 and I2) must "point to" the same underlying type.
39
40 http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
41 http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm
42*/
43
44 template <typename patIter>
45 class knuth_morris_pratt {
46 typedef typename std::iterator_traits<patIter>::difference_type difference_type;
47 public:
48 knuth_morris_pratt ( patIter first, patIter last )
49 : pat_first ( first ), pat_last ( last ),
50 k_pattern_length ( std::distance ( pat_first, pat_last )),
51 skip_ ( k_pattern_length + 1 ) {
52#ifdef NEW_KMP
53 preKmp ( pat_first, pat_last );
54#else
55 init_skip_table ( first: pat_first, last: pat_last );
56#endif
57#ifdef BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_DEBUG
58 detail::PrintTable ( skip_.begin (), skip_.end ());
59#endif
60 }
61
62 ~knuth_morris_pratt () {}
63
64 /// \fn operator ( corpusIter corpus_first, corpusIter corpus_last, Pred p )
65 /// \brief Searches the corpus for the pattern that was passed into the constructor
66 ///
67 /// \param corpus_first The start of the data to search (Random Access Iterator)
68 /// \param corpus_last One past the end of the data to search
69 /// \param p A predicate used for the search comparisons.
70 ///
71 template <typename corpusIter>
72 corpusIter operator () ( corpusIter corpus_first, corpusIter corpus_last ) const {
73 BOOST_STATIC_ASSERT (( boost::is_same<
74 typename std::iterator_traits<patIter>::value_type,
75 typename std::iterator_traits<corpusIter>::value_type>::value ));
76 if ( corpus_first == corpus_last ) return corpus_last; // if nothing to search, we didn't find it!
77 if ( pat_first == pat_last ) return corpus_first; // empty pattern matches at start
78
79 const difference_type k_corpus_length = std::distance ( corpus_first, corpus_last );
80 // If the pattern is larger than the corpus, we can't find it!
81 if ( k_corpus_length < k_pattern_length )
82 return corpus_last;
83
84 return do_search ( corpus_first, corpus_last, k_corpus_length );
85 }
86
87 template <typename Range>
88 typename boost::range_iterator<Range>::type operator () ( Range &r ) const {
89 return (*this) (boost::begin(r), boost::end(r));
90 }
91
92 private:
93/// \cond DOXYGEN_HIDE
94 patIter pat_first, pat_last;
95 const difference_type k_pattern_length;
96 std::vector <difference_type> skip_;
97
98 /// \fn operator ( corpusIter corpus_first, corpusIter corpus_last, Pred p )
99 /// \brief Searches the corpus for the pattern that was passed into the constructor
100 ///
101 /// \param corpus_first The start of the data to search (Random Access Iterator)
102 /// \param corpus_last One past the end of the data to search
103 /// \param p A predicate used for the search comparisons.
104 ///
105 template <typename corpusIter>
106 corpusIter do_search ( corpusIter corpus_first, corpusIter corpus_last,
107 difference_type k_corpus_length ) const {
108 difference_type match_start = 0; // position in the corpus that we're matching
109
110#ifdef NEW_KMP
111 int patternIdx = 0;
112 while ( match_start < k_corpus_length ) {
113 while ( patternIdx > -1 && pat_first[patternIdx] != corpus_first [match_start] )
114 patternIdx = skip_ [patternIdx]; //<--- Shifting the pattern on mismatch
115
116 patternIdx++;
117 match_start++; //<--- corpus is always increased by 1
118
119 if ( patternIdx >= (int) k_pattern_length )
120 return corpus_first + match_start - patternIdx;
121 }
122
123#else
124// At this point, we know:
125// k_pattern_length <= k_corpus_length
126// for all elements of skip, it holds -1 .. k_pattern_length
127//
128// In the loop, we have the following invariants
129// idx is in the range 0 .. k_pattern_length
130// match_start is in the range 0 .. k_corpus_length - k_pattern_length + 1
131
132 const difference_type last_match = k_corpus_length - k_pattern_length;
133 difference_type idx = 0; // position in the pattern we're comparing
134
135 while ( match_start <= last_match ) {
136 while ( pat_first [ idx ] == corpus_first [ match_start + idx ] ) {
137 if ( ++idx == k_pattern_length )
138 return corpus_first + match_start;
139 }
140 // Figure out where to start searching again
141 // assert ( idx - skip_ [ idx ] > 0 ); // we're always moving forward
142 match_start += idx - skip_ [ idx ];
143 idx = skip_ [ idx ] >= 0 ? skip_ [ idx ] : 0;
144 // assert ( idx >= 0 && idx < k_pattern_length );
145 }
146#endif
147
148 // We didn't find anything
149 return corpus_last;
150 }
151
152
153 void preKmp ( patIter first, patIter last ) {
154 const /*std::size_t*/ int count = std::distance ( first, last );
155
156 int i, j;
157
158 i = 0;
159 j = skip_[0] = -1;
160 while (i < count) {
161 while (j > -1 && first[i] != first[j])
162 j = skip_[j];
163 i++;
164 j++;
165 if (first[i] == first[j])
166 skip_[i] = skip_[j];
167 else
168 skip_[i] = j;
169 }
170 }
171
172
173 void init_skip_table ( patIter first, patIter last ) {
174 const difference_type count = std::distance ( first, last );
175
176 int j;
177 skip_ [ 0 ] = -1;
178 for ( int i = 1; i <= count; ++i ) {
179 j = skip_ [ i - 1 ];
180 while ( j >= 0 ) {
181 if ( first [ j ] == first [ i - 1 ] )
182 break;
183 j = skip_ [ j ];
184 }
185 skip_ [ i ] = j + 1;
186 }
187 }
188// \endcond
189 };
190
191
192/* Two ranges as inputs gives us four possibilities; with 2,3,3,4 parameters
193 Use a bit of TMP to disambiguate the 3-argument templates */
194
195/// \fn knuth_morris_pratt_search ( corpusIter corpus_first, corpusIter corpus_last,
196/// patIter pat_first, patIter pat_last )
197/// \brief Searches the corpus for the pattern.
198///
199/// \param corpus_first The start of the data to search (Random Access Iterator)
200/// \param corpus_last One past the end of the data to search
201/// \param pat_first The start of the pattern to search for (Random Access Iterator)
202/// \param pat_last One past the end of the data to search for
203///
204 template <typename patIter, typename corpusIter>
205 corpusIter knuth_morris_pratt_search (
206 corpusIter corpus_first, corpusIter corpus_last,
207 patIter pat_first, patIter pat_last )
208 {
209 knuth_morris_pratt<patIter> kmp ( pat_first, pat_last );
210 return kmp ( corpus_first, corpus_last );
211 }
212
213 template <typename PatternRange, typename corpusIter>
214 corpusIter knuth_morris_pratt_search (
215 corpusIter corpus_first, corpusIter corpus_last, const PatternRange &pattern )
216 {
217 typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
218 knuth_morris_pratt<pattern_iterator> kmp ( boost::begin(pattern), boost::end (pattern));
219 return kmp ( corpus_first, corpus_last );
220 }
221
222 template <typename patIter, typename CorpusRange>
223 typename boost::lazy_disable_if_c<
224 boost::is_same<CorpusRange, patIter>::value, typename boost::range_iterator<CorpusRange> >
225 ::type
226 knuth_morris_pratt_search ( CorpusRange &corpus, patIter pat_first, patIter pat_last )
227 {
228 knuth_morris_pratt<patIter> kmp ( pat_first, pat_last );
229 return kmp (boost::begin (corpus), boost::end (corpus));
230 }
231
232 template <typename PatternRange, typename CorpusRange>
233 typename boost::range_iterator<CorpusRange>::type
234 knuth_morris_pratt_search ( CorpusRange &corpus, const PatternRange &pattern )
235 {
236 typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
237 knuth_morris_pratt<pattern_iterator> kmp ( boost::begin(pattern), boost::end (pattern));
238 return kmp (boost::begin (corpus), boost::end (corpus));
239 }
240
241
242 // Creator functions -- take a pattern range, return an object
243 template <typename Range>
244 boost::algorithm::knuth_morris_pratt<typename boost::range_iterator<const Range>::type>
245 make_knuth_morris_pratt ( const Range &r ) {
246 return boost::algorithm::knuth_morris_pratt
247 <typename boost::range_iterator<const Range>::type> (boost::begin(r), boost::end(r));
248 }
249
250 template <typename Range>
251 boost::algorithm::knuth_morris_pratt<typename boost::range_iterator<Range>::type>
252 make_knuth_morris_pratt ( Range &r ) {
253 return boost::algorithm::knuth_morris_pratt
254 <typename boost::range_iterator<Range>::type> (boost::begin(r), boost::end(r));
255 }
256}}
257
258#endif // BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP
259

source code of boost/boost/algorithm/searching/knuth_morris_pratt.hpp