1/*
2 Copyright (c) Marshall Clow 2010-2012.
3
4 Distributed under the Boost Software License, Version 1.0. (See accompanying
5 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
7 For more information, see http://www.boost.org
8*/
9
10#ifndef BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP
11#define BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP
12
13#include <vector>
14#include <iterator> // for std::iterator_traits
15
16#include <boost/config.hpp>
17#include <boost/assert.hpp>
18#include <boost/static_assert.hpp>
19
20#include <boost/range/begin.hpp>
21#include <boost/range/end.hpp>
22
23#include <boost/core/enable_if.hpp>
24#include <boost/type_traits/is_same.hpp>
25
26#include <boost/algorithm/searching/detail/debugging.hpp>
27
28// #define BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_DEBUG
29
30namespace boost { namespace algorithm {
31
32// #define NEW_KMP
33
34/*
35 A templated version of the Knuth-Morris-Pratt searching algorithm.
36
37 Requirements:
38 * Random-access iterators
39 * The two iterator types (I1 and I2) must "point to" the same underlying type.
40
41 http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
42 http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm
43*/
44
45 template <typename patIter>
46 class knuth_morris_pratt {
47 typedef typename std::iterator_traits<patIter>::difference_type difference_type;
48 public:
49 knuth_morris_pratt ( patIter first, patIter last )
50 : pat_first ( first ), pat_last ( last ),
51 k_pattern_length ( std::distance ( pat_first, pat_last )),
52 skip_ ( k_pattern_length + 1 ) {
53#ifdef NEW_KMP
54 preKmp ( pat_first, pat_last );
55#else
56 init_skip_table ( first: pat_first, last: pat_last );
57#endif
58#ifdef BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_DEBUG
59 detail::PrintTable ( skip_.begin (), skip_.end ());
60#endif
61 }
62
63 ~knuth_morris_pratt () {}
64
65 /// \fn operator ( corpusIter corpus_first, corpusIter corpus_last, Pred p )
66 /// \brief Searches the corpus for the pattern that was passed into the constructor
67 ///
68 /// \param corpus_first The start of the data to search (Random Access Iterator)
69 /// \param corpus_last One past the end of the data to search
70 /// \param p A predicate used for the search comparisons.
71 ///
72 template <typename corpusIter>
73 std::pair<corpusIter, corpusIter>
74 operator () ( corpusIter corpus_first, corpusIter corpus_last ) const {
75 BOOST_STATIC_ASSERT (( boost::is_same<
76 typename std::iterator_traits<patIter>::value_type,
77 typename std::iterator_traits<corpusIter>::value_type>::value ));
78
79 if ( corpus_first == corpus_last ) return std::make_pair(corpus_last, corpus_last); // if nothing to search, we didn't find it!
80 if ( pat_first == pat_last ) return std::make_pair(corpus_first, corpus_first); // empty pattern matches at start
81
82 const difference_type k_corpus_length = std::distance ( corpus_first, corpus_last );
83 // If the pattern is larger than the corpus, we can't find it!
84 if ( k_corpus_length < k_pattern_length )
85 return std::make_pair(corpus_last, corpus_last);
86
87 return do_search ( corpus_first, corpus_last, k_corpus_length );
88 }
89
90 template <typename Range>
91 std::pair<typename boost::range_iterator<Range>::type, typename boost::range_iterator<Range>::type>
92 operator () ( Range &r ) const {
93 return (*this) (boost::begin(r), boost::end(r));
94 }
95
96 private:
97/// \cond DOXYGEN_HIDE
98 patIter pat_first, pat_last;
99 const difference_type k_pattern_length;
100 std::vector <difference_type> skip_;
101
102 /// \fn operator ( corpusIter corpus_first, corpusIter corpus_last, Pred p )
103 /// \brief Searches the corpus for the pattern that was passed into the constructor
104 ///
105 /// \param corpus_first The start of the data to search (Random Access Iterator)
106 /// \param corpus_last One past the end of the data to search
107 /// \param p A predicate used for the search comparisons.
108 ///
109 template <typename corpusIter>
110 std::pair<corpusIter, corpusIter>
111 do_search ( corpusIter corpus_first, corpusIter corpus_last,
112 difference_type k_corpus_length ) const {
113 difference_type match_start = 0; // position in the corpus that we're matching
114
115#ifdef NEW_KMP
116 int patternIdx = 0;
117 while ( match_start < k_corpus_length ) {
118 while ( patternIdx > -1 && pat_first[patternIdx] != corpus_first [match_start] )
119 patternIdx = skip_ [patternIdx]; //<--- Shifting the pattern on mismatch
120
121 patternIdx++;
122 match_start++; //<--- corpus is always increased by 1
123
124 if ( patternIdx >= (int) k_pattern_length )
125 return corpus_first + match_start - patternIdx;
126 }
127
128#else
129// At this point, we know:
130// k_pattern_length <= k_corpus_length
131// for all elements of skip, it holds -1 .. k_pattern_length
132//
133// In the loop, we have the following invariants
134// idx is in the range 0 .. k_pattern_length
135// match_start is in the range 0 .. k_corpus_length - k_pattern_length + 1
136
137 const difference_type last_match = k_corpus_length - k_pattern_length;
138 difference_type idx = 0; // position in the pattern we're comparing
139
140 while ( match_start <= last_match ) {
141 while ( pat_first [ idx ] == corpus_first [ match_start + idx ] ) {
142 if ( ++idx == k_pattern_length )
143 return std::make_pair(corpus_first + match_start, corpus_first + match_start + k_pattern_length);
144 }
145 // Figure out where to start searching again
146 // assert ( idx - skip_ [ idx ] > 0 ); // we're always moving forward
147 match_start += idx - skip_ [ idx ];
148 idx = skip_ [ idx ] >= 0 ? skip_ [ idx ] : 0;
149 // assert ( idx >= 0 && idx < k_pattern_length );
150 }
151#endif
152
153 // We didn't find anything
154 return std::make_pair(corpus_last, corpus_last);
155 }
156
157
158 void preKmp ( patIter first, patIter last ) {
159 const difference_type count = std::distance ( first, last );
160
161 difference_type i, j;
162
163 i = 0;
164 j = skip_[0] = -1;
165 while (i < count) {
166 while (j > -1 && first[i] != first[j])
167 j = skip_[j];
168 i++;
169 j++;
170 if (first[i] == first[j])
171 skip_[i] = skip_[j];
172 else
173 skip_[i] = j;
174 }
175 }
176
177
178 void init_skip_table ( patIter first, patIter last ) {
179 const difference_type count = std::distance ( first, last );
180
181 difference_type j;
182 skip_ [ 0 ] = -1;
183 for ( int i = 1; i <= count; ++i ) {
184 j = skip_ [ i - 1 ];
185 while ( j >= 0 ) {
186 if ( first [ j ] == first [ i - 1 ] )
187 break;
188 j = skip_ [ j ];
189 }
190 skip_ [ i ] = j + 1;
191 }
192 }
193// \endcond
194 };
195
196
197/* Two ranges as inputs gives us four possibilities; with 2,3,3,4 parameters
198 Use a bit of TMP to disambiguate the 3-argument templates */
199
200/// \fn knuth_morris_pratt_search ( corpusIter corpus_first, corpusIter corpus_last,
201/// patIter pat_first, patIter pat_last )
202/// \brief Searches the corpus for the pattern.
203///
204/// \param corpus_first The start of the data to search (Random Access Iterator)
205/// \param corpus_last One past the end of the data to search
206/// \param pat_first The start of the pattern to search for (Random Access Iterator)
207/// \param pat_last One past the end of the data to search for
208///
209 template <typename patIter, typename corpusIter>
210 std::pair<corpusIter, corpusIter> knuth_morris_pratt_search (
211 corpusIter corpus_first, corpusIter corpus_last,
212 patIter pat_first, patIter pat_last )
213 {
214 knuth_morris_pratt<patIter> kmp ( pat_first, pat_last );
215 return kmp ( corpus_first, corpus_last );
216 }
217
218 template <typename PatternRange, typename corpusIter>
219 std::pair<corpusIter, corpusIter> knuth_morris_pratt_search (
220 corpusIter corpus_first, corpusIter corpus_last, const PatternRange &pattern )
221 {
222 typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
223 knuth_morris_pratt<pattern_iterator> kmp ( boost::begin(pattern), boost::end (pattern));
224 return kmp ( corpus_first, corpus_last );
225 }
226
227 template <typename patIter, typename CorpusRange>
228 typename boost::disable_if_c<
229 boost::is_same<CorpusRange, patIter>::value,
230 std::pair<typename boost::range_iterator<CorpusRange>::type, typename boost::range_iterator<CorpusRange>::type> >
231 ::type
232 knuth_morris_pratt_search ( CorpusRange &corpus, patIter pat_first, patIter pat_last )
233 {
234 knuth_morris_pratt<patIter> kmp ( pat_first, pat_last );
235 return kmp (boost::begin (corpus), boost::end (corpus));
236 }
237
238 template <typename PatternRange, typename CorpusRange>
239 std::pair<typename boost::range_iterator<CorpusRange>::type, typename boost::range_iterator<CorpusRange>::type>
240 knuth_morris_pratt_search ( CorpusRange &corpus, const PatternRange &pattern )
241 {
242 typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
243 knuth_morris_pratt<pattern_iterator> kmp ( boost::begin(pattern), boost::end (pattern));
244 return kmp (boost::begin (corpus), boost::end (corpus));
245 }
246
247
248 // Creator functions -- take a pattern range, return an object
249 template <typename Range>
250 boost::algorithm::knuth_morris_pratt<typename boost::range_iterator<const Range>::type>
251 make_knuth_morris_pratt ( const Range &r ) {
252 return boost::algorithm::knuth_morris_pratt
253 <typename boost::range_iterator<const Range>::type> (boost::begin(r), boost::end(r));
254 }
255
256 template <typename Range>
257 boost::algorithm::knuth_morris_pratt<typename boost::range_iterator<Range>::type>
258 make_knuth_morris_pratt ( Range &r ) {
259 return boost::algorithm::knuth_morris_pratt
260 <typename boost::range_iterator<Range>::type> (boost::begin(r), boost::end(r));
261 }
262}}
263
264#endif // BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP
265

source code of boost/libs/algorithm/include/boost/algorithm/searching/knuth_morris_pratt.hpp