1/*
2 Copyright (c) Marshall Clow 2010-2012.
3
4 Distributed under the Boost Software License, Version 1.0. (See accompanying
5 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
7 For more information, see http://www.boost.org
8*/
9
10#include <boost/algorithm/searching/boyer_moore.hpp>
11#include <boost/algorithm/searching/boyer_moore_horspool.hpp>
12#include <boost/algorithm/searching/knuth_morris_pratt.hpp>
13
14#define BOOST_TEST_MAIN
15#include <boost/test/unit_test.hpp>
16
17#include <iostream>
18#include <string>
19#include <vector>
20
21
22namespace ba = boost::algorithm;
23
24template <typename Iter>
25std::string make_str ( Iter first, std::size_t len ) {
26 std::string retVal ( len + 2, '\'' );
27 std::copy ( first, first+len, retVal.begin () + 1);
28 return retVal;
29 }
30
31namespace {
32
33// Check using iterators
34 template<typename Container>
35 void check_one_iter ( const Container &haystack, const std::string &needle, int expected ) {
36 typedef typename Container::const_iterator iter_type;
37 typedef std::string::const_iterator pattern_type;
38
39 iter_type hBeg = haystack.begin ();
40 iter_type hEnd = haystack.end ();
41 pattern_type nBeg = needle.begin ();
42 pattern_type nEnd = needle.end ();
43
44 iter_type it0 = std::search (hBeg, hEnd, nBeg, nEnd);
45 iter_type it1 = ba::boyer_moore_search (hBeg, hEnd, nBeg, nEnd);
46 iter_type it1r = ba::boyer_moore_search (haystack, nBeg, nEnd);
47 iter_type it2 = ba::boyer_moore_horspool_search (hBeg, hEnd, nBeg, nEnd);
48 iter_type it3 = ba::knuth_morris_pratt_search (hBeg, hEnd, nBeg, nEnd);
49 const int dist = it1 == hEnd ? -1 : std::distance ( hBeg, it1 );
50
51 std::cout << "(Iterators) Pattern is " << needle.length () << ", haysstack is " << haystack.length () << " chars long; " << std::endl;
52 try {
53 if ( it0 != it1 ) {
54 throw std::runtime_error (
55 std::string ( "results mismatch between std::search and boyer-moore search" ));
56 }
57
58 if ( it1 != it1r ) {
59 throw std::runtime_error (
60 std::string ( "results mismatch between iterator and range boyer_moore search" ));
61 }
62
63 if ( it1 != it2 ) {
64 throw std::runtime_error (
65 std::string ( "results mismatch between boyer-moore and boyer-moore-horspool search" ));
66 }
67
68 if ( it1 != it3 )
69 throw std::runtime_error (
70 std::string ( "results mismatch between boyer-moore and knuth-morris-pratt search" ));
71
72 }
73
74 catch ( ... ) {
75 std::cout << "Searching for: " << needle << std::endl;
76 std::cout << "Expected: " << expected << "\n";
77 std::cout << " std: " << std::distance ( hBeg, it0 ) << "\n";
78 std::cout << " bm: " << std::distance ( hBeg, it1 ) << "\n";
79 std::cout << " bm(r): " << std::distance ( hBeg, it1r ) << "\n";
80 std::cout << " bmh: " << std::distance ( hBeg, it2 ) << "\n";
81 std::cout << " kpm: " << std::distance ( hBeg, it3 )<< "\n";
82 std::cout << std::flush;
83 throw ;
84 }
85
86 BOOST_CHECK_EQUAL ( dist, expected );
87 }
88
89// Check using pointers
90// We're assuming that the container implements contiguous storage here.
91 template<typename Container>
92 void check_one_pointer ( const Container &haystack, const std::string &needle, int expected ) {
93 typedef const typename Container::value_type *ptr_type;
94 ptr_type hBeg = haystack.size () == 0 ? NULL : &*haystack.begin ();
95 ptr_type hEnd = hBeg + haystack.size ();
96 ptr_type nBeg = needle.size () == 0 ? NULL : &*needle.begin ();
97 ptr_type nEnd = nBeg + needle.size ();
98
99 ptr_type it0 = std::search (hBeg, hEnd, nBeg, nEnd);
100 ptr_type it1 = ba::boyer_moore_search (hBeg, hEnd, nBeg, nEnd);
101 ptr_type it2 = ba::boyer_moore_horspool_search (hBeg, hEnd, nBeg, nEnd);
102 ptr_type it3 = ba::knuth_morris_pratt_search (hBeg, hEnd, nBeg, nEnd);
103 const int dist = it1 == hEnd ? -1 : std::distance ( hBeg, it1 );
104
105 std::cout << "(Pointers) Pattern is " << needle.length () << ", haysstack is " << haystack.length () << " chars long; " << std::endl;
106 try {
107 if ( it0 != it1 ) {
108 throw std::runtime_error (
109 std::string ( "results mismatch between std::search and boyer-moore search" ));
110 }
111
112 if ( it1 != it2 ) {
113 throw std::runtime_error (
114 std::string ( "results mismatch between boyer-moore and boyer-moore-horspool search" ));
115 }
116
117 if ( it1 != it3 )
118 throw std::runtime_error (
119 std::string ( "results mismatch between boyer-moore and knuth-morris-pratt search" ));
120
121 }
122
123 catch ( ... ) {
124 std::cout << "Searching for: " << needle << std::endl;
125 std::cout << "Expected: " << expected << "\n";
126 std::cout << " std: " << std::distance ( hBeg, it0 ) << "\n";
127 std::cout << " bm: " << std::distance ( hBeg, it1 ) << "\n";
128 std::cout << " bmh: " << std::distance ( hBeg, it2 ) << "\n";
129 std::cout << " kpm: " << std::distance ( hBeg, it3 )<< "\n";
130 std::cout << std::flush;
131 throw ;
132 }
133
134 BOOST_CHECK_EQUAL ( dist, expected );
135 }
136
137// Check using objects
138 template<typename Container>
139 void check_one_object ( const Container &haystack, const std::string &needle, int expected ) {
140 typedef typename Container::const_iterator iter_type;
141 typedef std::string::const_iterator pattern_type;
142
143 iter_type hBeg = haystack.begin ();
144 iter_type hEnd = haystack.end ();
145 pattern_type nBeg = needle.begin ();
146 pattern_type nEnd = needle.end ();
147
148 ba::boyer_moore<pattern_type> bm_r = ba::make_boyer_moore ( needle );
149 ba::boyer_moore<pattern_type> bm ( nBeg, nEnd );
150 ba::boyer_moore_horspool<pattern_type> bmh ( nBeg, nEnd );
151 ba::knuth_morris_pratt<pattern_type> kmp ( nBeg, nEnd );
152
153 iter_type it0 = std::search (hBeg, hEnd, nBeg, nEnd);
154 iter_type it1 = bm (hBeg, hEnd);
155 iter_type it1r = bm (haystack);
156 iter_type rt1 = bm_r (hBeg, hEnd);
157 iter_type rt1r = bm_r (haystack);
158 iter_type it2 = bmh (hBeg, hEnd);
159 iter_type it3 = kmp (hBeg, hEnd);
160 const int dist = it1 == hEnd ? -1 : std::distance ( hBeg, it1 );
161
162 std::cout << "(Objects) Pattern is " << needle.length () << ", haysstack is " << haystack.length () << " chars long; " << std::endl;
163 try {
164 if ( it0 != it1 ) {
165 throw std::runtime_error (
166 std::string ( "results mismatch between std::search and boyer-moore search" ));
167 }
168
169 if ( it1 != it1r ) {
170 throw std::runtime_error (
171 std::string ( "results mismatch between iterator and range boyer_moore search(1)" ));
172 }
173
174 if ( it1 != rt1 ) {
175 throw std::runtime_error (
176 std::string ( "results mismatch between iterator and range boyer_moore search(2)" ));
177 }
178
179 if ( rt1 != rt1r ) {
180 throw std::runtime_error (
181 std::string ( "results mismatch between iterator and range boyer_moore search(3)" ));
182 }
183
184 if ( it1 != it2 ) {
185 throw std::runtime_error (
186 std::string ( "results mismatch between boyer-moore and boyer-moore-horspool search" ));
187 }
188
189 if ( it1 != it3 )
190 throw std::runtime_error (
191 std::string ( "results mismatch between boyer-moore and knuth-morris-pratt search" ));
192
193 }
194
195 catch ( ... ) {
196 std::cout << "Searching for: " << needle << std::endl;
197 std::cout << "Expected: " << expected << "\n";
198 std::cout << " std: " << std::distance ( hBeg, it0 ) << "\n";
199 std::cout << " bm: " << std::distance ( hBeg, it1 ) << "\n";
200 std::cout << " bm(r1): " << std::distance ( hBeg, it1r ) << "\n";
201 std::cout << " bm(r2): " << std::distance ( hBeg, rt1 ) << "\n";
202 std::cout << " bm(r3): " << std::distance ( hBeg, rt1r ) << "\n";
203 std::cout << " bmh: " << std::distance ( hBeg, it2 ) << "\n";
204 std::cout << " kpm: " << std::distance ( hBeg, it3 )<< "\n";
205 std::cout << std::flush;
206 throw ;
207 }
208
209 BOOST_CHECK_EQUAL ( dist, expected );
210 }
211
212
213 template<typename Container>
214 void check_one ( const Container &haystack, const std::string &needle, int expected ) {
215 check_one_iter ( haystack, needle, expected );
216 check_one_pointer ( haystack, needle, expected );
217 check_one_object ( haystack, needle, expected );
218 }
219 }
220
221
222BOOST_AUTO_TEST_CASE( test_main )
223{
224 std::string haystack1 ( "NOW AN FOWE\220ER ANNMAN THE ANPANMANEND" );
225 std::string needle1 ( "ANPANMAN" );
226 std::string needle2 ( "MAN THE" );
227 std::string needle3 ( "WE\220ER" );
228 std::string needle4 ( "NOW " ); // At the beginning
229 std::string needle5 ( "NEND" ); // At the end
230 std::string needle6 ( "NOT FOUND" ); // Nowhere
231 std::string needle7 ( "NOT FO\340ND" ); // Nowhere
232
233 std::string haystack2 ( "ABC ABCDAB ABCDABCDABDE" );
234 std::string needle11 ( "ABCDABD" );
235
236 std::string haystack3 ( "abra abracad abracadabra" );
237 std::string needle12 ( "abracadabra" );
238
239 std::string needle13 ( "" );
240 std::string haystack4 ( "" );
241
242 check_one ( haystack1, needle1, 26 );
243 check_one ( haystack1, needle2, 18 );
244 check_one ( haystack1, needle3, 9 );
245 check_one ( haystack1, needle4, 0 );
246 check_one ( haystack1, needle5, 33 );
247 check_one ( haystack1, needle6, -1 );
248 check_one ( haystack1, needle7, -1 );
249
250 check_one ( needle1, haystack1, -1 ); // cant find long pattern in short corpus
251 check_one ( haystack1, haystack1, 0 ); // find something in itself
252 check_one ( haystack2, haystack2, 0 ); // find something in itself
253
254 check_one ( haystack2, needle11, 15 );
255 check_one ( haystack3, needle12, 13 );
256
257 check_one ( haystack1, needle13, 0 ); // find the empty string
258 check_one ( haystack4, needle1, -1 ); // can't find in an empty haystack
259
260// Mikhail Levin <svarneticist@gmail.com> found a problem, and this was the test
261// that triggered it.
262
263 const std::string mikhail_pattern =
264"GATACACCTACCTTCACCAGTTACTCTATGCACTAGGTGCGCCAGGCCCATGCACAAGGGCTTGAGTGGATGGGAAGGA"
265"TGTGCCCTAGTGATGGCAGCATAAGCTACGCAGAGAAGTTCCAGGGCAGAGTCACCATGACCAGGGACACATCCACGAG"
266"CACAGCCTACATGGAGCTGAGCAGCCTGAGATCTGAAGACACGGCCATGTATTACTGTGGGAGAGATGTCTGGAGTGGT"
267"TATTATTGCCCCGGTAATATTACTACTACTACTACTACATGGACGTCTGGGGCAAAGGGACCACG"
268;
269 const std::string mikhail_corpus = std::string (8, 'a') + mikhail_pattern;
270
271 check_one ( mikhail_corpus, mikhail_pattern, 8 );
272 }
273