1 | /* |
2 | * Distributed under the Boost Software License, Version 1.0.(See accompanying |
3 | * file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt.) |
4 | * |
5 | * See http://www.boost.org/libs/iostreams for documentation. |
6 | |
7 | * File: libs/iostreams/test/grep_test.cpp |
8 | * Date: Mon May 26 17:48:45 MDT 2008 |
9 | * Copyright: 2008 CodeRage, LLC |
10 | * Author: Jonathan Turkanis |
11 | * Contact: turkanis at coderage dot com |
12 | * |
13 | * Tests the class template basic_grep_filter. |
14 | */ |
15 | |
16 | #include <iostream> |
17 | |
18 | #include <boost/config.hpp> // Make sure ptrdiff_t is in std. |
19 | #include <algorithm> |
20 | #include <cstddef> // std::ptrdiff_t |
21 | #include <string> |
22 | #include <boost/iostreams/compose.hpp> |
23 | #include <boost/iostreams/copy.hpp> |
24 | #include <boost/iostreams/device/array.hpp> |
25 | #include <boost/iostreams/device/back_inserter.hpp> |
26 | #include <boost/iostreams/filter/grep.hpp> |
27 | #include <boost/iostreams/filter/test.hpp> |
28 | #include <boost/ref.hpp> |
29 | #include <boost/regex.hpp> |
30 | #include <boost/test/test_tools.hpp> |
31 | #include <boost/test/unit_test.hpp> |
32 | |
33 | using namespace boost; |
34 | using namespace boost::iostreams; |
35 | namespace io = boost::iostreams; |
36 | using boost::unit_test::test_suite; |
37 | |
38 | // List of addresses of US Appeals Courts, from uscourts.gov |
39 | std::string addresses = |
40 | "John Joseph Moakley United States Courthouse, Suite 2500\n" |
41 | "One Courthouse Way\n" |
42 | "Boston, MA 02210-3002\n" |
43 | "\n" |
44 | "Thurgood Marshall United States Courthouse, 18th Floor\n" |
45 | "40 Centre Street\n" |
46 | "New York, NY 10007-1501\n" |
47 | "\n" |
48 | "21400 James A. Byrne United States Courthouse\n" |
49 | "601 Market Street\n" |
50 | "Philadelphia, PA 19106-1729\n" |
51 | "\n" |
52 | "Lewis F. Powell, Jr. United States Courthouse Annex, Suite 501\n" |
53 | "1100 East Main Street\n" |
54 | "Richmond, VA 23219-3525\n" |
55 | "\n" |
56 | "F. Edward Hebert Federal Bldg\n" |
57 | "600 South Maestri Place\n" |
58 | "New Orleans, LA 70130\n" |
59 | "\n" |
60 | "Bob Casey United States Courthouse, 1st Floor\n" |
61 | "515 Rusk Street\n" |
62 | "Houston, TX 77002-2600\n" |
63 | "\n" |
64 | "Potter Stewart United States Courthouse, Suite 540\n" |
65 | "100 East Fifth Street\n" |
66 | "Cincinnati, OH 45202\n" |
67 | "\n" |
68 | "2722 Everett McKinley Dirksen United States Courthouse\n" |
69 | "219 South Dearborn Street\n" |
70 | "Chicago, IL 60604\n" ; |
71 | |
72 | // Lines containing "United States Courthouse" |
73 | std::string us_courthouse = |
74 | "John Joseph Moakley United States Courthouse, Suite 2500\n" |
75 | "Thurgood Marshall United States Courthouse, 18th Floor\n" |
76 | "21400 James A. Byrne United States Courthouse\n" |
77 | "Lewis F. Powell, Jr. United States Courthouse Annex, Suite 501\n" |
78 | "Bob Casey United States Courthouse, 1st Floor\n" |
79 | "Potter Stewart United States Courthouse, Suite 540\n" |
80 | "2722 Everett McKinley Dirksen United States Courthouse\n" ; |
81 | |
82 | // Lines not containing "United States Courthouse" |
83 | std::string us_courthouse_inv = |
84 | "One Courthouse Way\n" |
85 | "Boston, MA 02210-3002\n" |
86 | "\n" |
87 | "40 Centre Street\n" |
88 | "New York, NY 10007-1501\n" |
89 | "\n" |
90 | "601 Market Street\n" |
91 | "Philadelphia, PA 19106-1729\n" |
92 | "\n" |
93 | "1100 East Main Street\n" |
94 | "Richmond, VA 23219-3525\n" |
95 | "\n" |
96 | "F. Edward Hebert Federal Bldg\n" |
97 | "600 South Maestri Place\n" |
98 | "New Orleans, LA 70130\n" |
99 | "\n" |
100 | "515 Rusk Street\n" |
101 | "Houston, TX 77002-2600\n" |
102 | "\n" |
103 | "100 East Fifth Street\n" |
104 | "Cincinnati, OH 45202\n" |
105 | "\n" |
106 | "219 South Dearborn Street\n" |
107 | "Chicago, IL 60604\n" ; |
108 | |
109 | // Lines containing a state and zip |
110 | std::string state_and_zip = |
111 | "Boston, MA 02210-3002\n" |
112 | "New York, NY 10007-1501\n" |
113 | "Philadelphia, PA 19106-1729\n" |
114 | "Richmond, VA 23219-3525\n" |
115 | "New Orleans, LA 70130\n" |
116 | "Houston, TX 77002-2600\n" |
117 | "Cincinnati, OH 45202\n" |
118 | "Chicago, IL 60604\n" ; |
119 | |
120 | // Lines not containing a state and zip |
121 | std::string state_and_zip_inv = |
122 | "John Joseph Moakley United States Courthouse, Suite 2500\n" |
123 | "One Courthouse Way\n" |
124 | "\n" |
125 | "Thurgood Marshall United States Courthouse, 18th Floor\n" |
126 | "40 Centre Street\n" |
127 | "\n" |
128 | "21400 James A. Byrne United States Courthouse\n" |
129 | "601 Market Street\n" |
130 | "\n" |
131 | "Lewis F. Powell, Jr. United States Courthouse Annex, Suite 501\n" |
132 | "1100 East Main Street\n" |
133 | "\n" |
134 | "F. Edward Hebert Federal Bldg\n" |
135 | "600 South Maestri Place\n" |
136 | "\n" |
137 | "Bob Casey United States Courthouse, 1st Floor\n" |
138 | "515 Rusk Street\n" |
139 | "\n" |
140 | "Potter Stewart United States Courthouse, Suite 540\n" |
141 | "100 East Fifth Street\n" |
142 | "\n" |
143 | "2722 Everett McKinley Dirksen United States Courthouse\n" |
144 | "219 South Dearborn Street\n" ; |
145 | |
146 | // Lines containing at least three words |
147 | std::string three_words = |
148 | "John Joseph Moakley United States Courthouse, Suite 2500\n" |
149 | "One Courthouse Way\n" |
150 | "Thurgood Marshall United States Courthouse, 18th Floor\n" |
151 | "40 Centre Street\n" |
152 | "21400 James A. Byrne United States Courthouse\n" |
153 | "601 Market Street\n" |
154 | "Lewis F. Powell, Jr. United States Courthouse Annex, Suite 501\n" |
155 | "1100 East Main Street\n" |
156 | "F. Edward Hebert Federal Bldg\n" |
157 | "600 South Maestri Place\n" |
158 | "Bob Casey United States Courthouse, 1st Floor\n" |
159 | "515 Rusk Street\n" |
160 | "Potter Stewart United States Courthouse, Suite 540\n" |
161 | "100 East Fifth Street\n" |
162 | "2722 Everett McKinley Dirksen United States Courthouse\n" |
163 | "219 South Dearborn Street\n" ; |
164 | |
165 | // Lines containing exactly three words |
166 | std::string exactly_three_words = |
167 | "One Courthouse Way\n" |
168 | "40 Centre Street\n" |
169 | "601 Market Street\n" |
170 | "515 Rusk Street\n" ; |
171 | |
172 | // Lines that don't contain exactly three words |
173 | std::string exactly_three_words_inv = |
174 | "John Joseph Moakley United States Courthouse, Suite 2500\n" |
175 | "Boston, MA 02210-3002\n" |
176 | "\n" |
177 | "Thurgood Marshall United States Courthouse, 18th Floor\n" |
178 | "New York, NY 10007-1501\n" |
179 | "\n" |
180 | "21400 James A. Byrne United States Courthouse\n" |
181 | "Philadelphia, PA 19106-1729\n" |
182 | "\n" |
183 | "Lewis F. Powell, Jr. United States Courthouse Annex, Suite 501\n" |
184 | "1100 East Main Street\n" |
185 | "Richmond, VA 23219-3525\n" |
186 | "\n" |
187 | "F. Edward Hebert Federal Bldg\n" |
188 | "600 South Maestri Place\n" |
189 | "New Orleans, LA 70130\n" |
190 | "\n" |
191 | "Bob Casey United States Courthouse, 1st Floor\n" |
192 | "Houston, TX 77002-2600\n" |
193 | "\n" |
194 | "Potter Stewart United States Courthouse, Suite 540\n" |
195 | "100 East Fifth Street\n" |
196 | "Cincinnati, OH 45202\n" |
197 | "\n" |
198 | "2722 Everett McKinley Dirksen United States Courthouse\n" |
199 | "219 South Dearborn Street\n" |
200 | "Chicago, IL 60604\n" ; |
201 | |
202 | void test_filter( grep_filter grep, |
203 | const std::string& input, |
204 | const std::string& output ); |
205 | |
206 | void grep_filter_test() |
207 | { |
208 | regex match_us_courthouse("\\bUnited States Courthouse\\b" ); |
209 | regex match_state_and_zip("\\b[A-Z]{2}\\s+[0-9]{5}(-[0-9]{4})?\\b" ); |
210 | regex match_three_words("\\b\\w+\\s+\\w+\\s+\\w+\\b" ); |
211 | regex_constants::match_flag_type match_default = |
212 | regex_constants::match_default; |
213 | |
214 | { |
215 | grep_filter grep(match_us_courthouse); |
216 | test_filter(grep, input: addresses, output: us_courthouse); |
217 | } |
218 | |
219 | { |
220 | grep_filter grep(match_us_courthouse, match_default, grep::invert); |
221 | test_filter(grep, input: addresses, output: us_courthouse_inv); |
222 | } |
223 | |
224 | { |
225 | grep_filter grep(match_state_and_zip); |
226 | test_filter(grep, input: addresses, output: state_and_zip); |
227 | } |
228 | |
229 | { |
230 | grep_filter grep(match_state_and_zip, match_default, grep::invert); |
231 | test_filter(grep, input: addresses, output: state_and_zip_inv); |
232 | } |
233 | |
234 | { |
235 | grep_filter grep(match_three_words); |
236 | test_filter(grep, input: addresses, output: three_words); |
237 | } |
238 | |
239 | { |
240 | grep_filter grep(match_three_words, match_default, grep::whole_line); |
241 | test_filter(grep, input: addresses, output: exactly_three_words); |
242 | } |
243 | |
244 | { |
245 | int options = grep::whole_line | grep::invert; |
246 | grep_filter grep(match_three_words, match_default, options); |
247 | test_filter(grep, input: addresses, output: exactly_three_words_inv); |
248 | } |
249 | } |
250 | |
251 | void test_filter( grep_filter grep, |
252 | const std::string& input, |
253 | const std::string& output ) |
254 | { |
255 | // Count lines in output |
256 | std::ptrdiff_t count = std::count(first: output.begin(), last: output.end(), value: '\n'); |
257 | |
258 | // Test as input filter |
259 | { |
260 | array_source src(input.data(), input.data() + input.size()); |
261 | std::string dest; |
262 | io::copy(src: compose(filter: boost::ref(t&: grep), fod: src), snk: io::back_inserter(cnt&: dest)); |
263 | BOOST_CHECK(dest == output); |
264 | BOOST_CHECK(grep.count() == count); |
265 | } |
266 | |
267 | // Test as output filter |
268 | { |
269 | array_source src(input.data(), input.data() + input.size()); |
270 | std::string dest; |
271 | io::copy(src, snk: compose(filter: boost::ref(t&: grep), fod: io::back_inserter(cnt&: dest))); |
272 | BOOST_CHECK(dest == output); |
273 | BOOST_CHECK(grep.count() == count); |
274 | } |
275 | } |
276 | |
277 | test_suite* init_unit_test_suite(int, char* []) |
278 | { |
279 | test_suite* test = BOOST_TEST_SUITE("grep_filter test" ); |
280 | test->add(BOOST_TEST_CASE(&grep_filter_test)); |
281 | return test; |
282 | } |
283 | |