1 | ////////////////////////////////////////////////////////////////////////////// |
2 | // |
3 | // (C) Copyright Ion Gaztanaga 2015-2016. |
4 | // Distributed under the Boost Software License, Version 1.0. |
5 | // (See accompanying file LICENSE_1_0.txt or copy at |
6 | // http://www.boost.org/LICENSE_1_0.txt) |
7 | // |
8 | // See http://www.boost.org/libs/move for documentation. |
9 | // |
10 | ////////////////////////////////////////////////////////////////////////////// |
11 | |
12 | //#define BOOST_MOVE_ADAPTIVE_SORT_STATS |
13 | //#define BOOST_MOVE_ADAPTIVE_SORT_STATS_LEVEL 2 |
14 | |
15 | #include <algorithm> //std::inplace_merge |
16 | #include <cstdio> //std::printf |
17 | #include <iostream> //std::cout |
18 | #include <boost/container/vector.hpp> //boost::container::vector |
19 | |
20 | #include <boost/config.hpp> |
21 | #include <cstdlib> |
22 | |
23 | #include <boost/move/unique_ptr.hpp> |
24 | #include <boost/move/detail/nsec_clock.hpp> |
25 | #include <boost/move/detail/force_ptr.hpp> |
26 | |
27 | #include "order_type.hpp" |
28 | #include "random_shuffle.hpp" |
29 | |
30 | using boost::move_detail::cpu_timer; |
31 | using boost::move_detail::nanosecond_type; |
32 | |
33 | void print_stats(const char *str, boost::ulong_long_type element_count) |
34 | { |
35 | std::printf( format: "%sCmp:%8.04f Cpy:%9.04f\n" , str |
36 | , double(order_perf_type::num_compare)/double(element_count) |
37 | , double(order_perf_type::num_copy)/double(element_count)); |
38 | } |
39 | |
40 | #include <boost/move/algo/adaptive_merge.hpp> |
41 | #include <boost/move/algo/detail/merge.hpp> |
42 | #include <boost/move/core.hpp> |
43 | |
44 | template<class T, class Compare> |
45 | std::size_t generate_elements(boost::container::vector<T> &elements, std::size_t L, std::size_t NK, Compare comp) |
46 | { |
47 | elements.resize(L); |
48 | boost::movelib::unique_ptr<std::size_t[]> key_reps(new std::size_t[NK ? NK : L]); |
49 | |
50 | std::srand(seed: 0); |
51 | for (std::size_t i = 0; i < (NK ? NK : L); ++i) { |
52 | key_reps[i] = 0; |
53 | } |
54 | for (std::size_t i = 0; i < L; ++i) { |
55 | std::size_t key = NK ? (i % NK) : i; |
56 | elements[i].key = key; |
57 | } |
58 | ::random_shuffle(elements.data(), elements.data() + L); |
59 | ::random_shuffle(elements.data(), elements.data() + L); |
60 | |
61 | for (std::size_t i = 0; i < L; ++i) { |
62 | elements[i].val = key_reps[elements[i].key]++; |
63 | } |
64 | std::size_t split_count = L / 2; |
65 | std::stable_sort(elements.data(), elements.data() + split_count, comp); |
66 | std::stable_sort(elements.data() + split_count, elements.data() + L, comp); |
67 | return split_count; |
68 | } |
69 | |
70 | template<class T, class Compare> |
71 | void adaptive_merge_buffered(T *elements, T *mid, T *last, Compare comp, std::size_t BufLen) |
72 | { |
73 | boost::movelib::unique_ptr<char[]> mem(new char[sizeof(T)*BufLen]); |
74 | boost::movelib::adaptive_merge(elements, mid, last, comp, boost::move_detail::force_ptr<T*>(mem.get()), BufLen); |
75 | } |
76 | |
77 | template<class T, class Compare> |
78 | void std_like_adaptive_merge_buffered(T *elements, T *mid, T *last, Compare comp, std::size_t BufLen) |
79 | { |
80 | boost::movelib::unique_ptr<char[]> mem(new char[sizeof(T)*BufLen]); |
81 | boost::movelib::merge_adaptive_ONlogN(elements, mid, last, comp, boost::move_detail::force_ptr<T*>(mem.get()), BufLen); |
82 | } |
83 | |
84 | enum AlgoType |
85 | { |
86 | StdMerge, |
87 | AdaptMerge, |
88 | SqrtHAdaptMerge, |
89 | SqrtAdaptMerge, |
90 | Sqrt2AdaptMerge, |
91 | QuartAdaptMerge, |
92 | StdInplaceMerge, |
93 | StdSqrtHAdaptMerge, |
94 | StdSqrtAdaptMerge, |
95 | StdSqrt2AdaptMerge, |
96 | StdQuartAdaptMerge, |
97 | MaxMerge |
98 | }; |
99 | |
100 | const char *AlgoNames [] = { "StdMerge " |
101 | , "AdaptMerge " |
102 | , "SqrtHAdaptMerge " |
103 | , "SqrtAdaptMerge " |
104 | , "Sqrt2AdaptMerge " |
105 | , "QuartAdaptMerge " |
106 | , "StdInplaceMerge " |
107 | , "StdSqrtHAdaptMerge " |
108 | , "StdSqrtAdaptMerge " |
109 | , "StdSqrt2AdaptMerge " |
110 | , "StdQuartAdaptMerge " |
111 | }; |
112 | |
113 | BOOST_MOVE_STATIC_ASSERT((sizeof(AlgoNames)/sizeof(*AlgoNames)) == MaxMerge); |
114 | |
115 | template<class T> |
116 | bool measure_algo(T *elements, std::size_t element_count, std::size_t split_pos, std::size_t alg, nanosecond_type &prev_clock) |
117 | { |
118 | std::printf(format: "%s " , AlgoNames[alg]); |
119 | order_perf_type::num_compare=0; |
120 | order_perf_type::num_copy=0; |
121 | order_perf_type::num_elements = element_count; |
122 | cpu_timer timer; |
123 | timer.resume(); |
124 | switch(alg) |
125 | { |
126 | case StdMerge: |
127 | std::inplace_merge(elements, elements+split_pos, elements+element_count, order_type_less()); |
128 | break; |
129 | case AdaptMerge: |
130 | boost::movelib::adaptive_merge(elements, elements+split_pos, elements+element_count, order_type_less()); |
131 | break; |
132 | case SqrtHAdaptMerge: |
133 | adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
134 | , boost::movelib::detail_adaptive::ceil_sqrt_multiple(n: element_count)/2+1); |
135 | break; |
136 | case SqrtAdaptMerge: |
137 | adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
138 | , boost::movelib::detail_adaptive::ceil_sqrt_multiple(n: element_count)); |
139 | break; |
140 | case Sqrt2AdaptMerge: |
141 | adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
142 | , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(n: element_count)); |
143 | break; |
144 | case QuartAdaptMerge: |
145 | adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
146 | , (element_count)/4+1); |
147 | break; |
148 | case StdInplaceMerge: |
149 | boost::movelib::merge_bufferless_ONlogN(elements, elements+split_pos, elements+element_count, order_type_less()); |
150 | break; |
151 | case StdSqrtHAdaptMerge: |
152 | std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
153 | , boost::movelib::detail_adaptive::ceil_sqrt_multiple(n: element_count)/2+1); |
154 | break; |
155 | case StdSqrtAdaptMerge: |
156 | std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
157 | , boost::movelib::detail_adaptive::ceil_sqrt_multiple(n: element_count)); |
158 | break; |
159 | case StdSqrt2AdaptMerge: |
160 | std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
161 | , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(n: element_count)); |
162 | break; |
163 | case StdQuartAdaptMerge: |
164 | std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
165 | , (element_count)/4+1); |
166 | break; |
167 | } |
168 | timer.stop(); |
169 | |
170 | if(order_perf_type::num_elements == element_count){ |
171 | std::printf(format: " Tmp Ok " ); |
172 | } else{ |
173 | std::printf(format: " Tmp KO " ); |
174 | } |
175 | nanosecond_type new_clock = timer.elapsed().wall; |
176 | |
177 | //std::cout << "Cmp:" << order_perf_type::num_compare << " Cpy:" << order_perf_type::num_copy; //for old compilers without ll size argument |
178 | std::printf(format: "Cmp:%8.04f Cpy:%9.04f" , double(order_perf_type::num_compare)/double(element_count), double(order_perf_type::num_copy)/double(element_count) ); |
179 | |
180 | double time = double(new_clock); |
181 | |
182 | const char *units = "ns" ; |
183 | if(time >= 1000000000.0){ |
184 | time /= 1000000000.0; |
185 | units = " s" ; |
186 | } |
187 | else if(time >= 1000000.0){ |
188 | time /= 1000000.0; |
189 | units = "ms" ; |
190 | } |
191 | else if(time >= 1000.0){ |
192 | time /= 1000.0; |
193 | units = "us" ; |
194 | } |
195 | |
196 | std::printf(format: " %6.02f%s (%6.02f)\n" |
197 | , time |
198 | , units |
199 | , prev_clock ? double(new_clock)/double(prev_clock): 1.0); |
200 | prev_clock = new_clock; |
201 | bool res = is_order_type_ordered(elements, element_count, true); |
202 | return res; |
203 | } |
204 | |
205 | template<class T> |
206 | bool measure_all(std::size_t L, std::size_t NK) |
207 | { |
208 | boost::container::vector<T> original_elements, elements; |
209 | std::size_t split_pos = generate_elements(original_elements, L, NK, order_type_less()); |
210 | std::printf(format: "\n - - N: %u, NK: %u - -\n" , (unsigned)L, (unsigned)NK); |
211 | |
212 | nanosecond_type prev_clock = 0; |
213 | nanosecond_type back_clock; |
214 | bool res = true; |
215 | |
216 | elements = original_elements; |
217 | res = res && measure_algo(elements.data(), L, split_pos, StdMerge, prev_clock); |
218 | back_clock = prev_clock; |
219 | // |
220 | |
221 | prev_clock = back_clock; |
222 | elements = original_elements; |
223 | res = res && measure_algo(elements.data(), L, split_pos, QuartAdaptMerge, prev_clock); |
224 | // |
225 | prev_clock = back_clock; |
226 | elements = original_elements; |
227 | res = res && measure_algo(elements.data(), L, split_pos, StdQuartAdaptMerge, prev_clock); |
228 | // |
229 | prev_clock = back_clock; |
230 | elements = original_elements; |
231 | res = res && measure_algo(elements.data(), L, split_pos, Sqrt2AdaptMerge, prev_clock); |
232 | // |
233 | prev_clock = back_clock; |
234 | elements = original_elements; |
235 | res = res && measure_algo(elements.data(), L, split_pos, StdSqrt2AdaptMerge, prev_clock); |
236 | // |
237 | prev_clock = back_clock; |
238 | elements = original_elements; |
239 | res = res && measure_algo(elements.data(), L, split_pos, SqrtAdaptMerge, prev_clock); |
240 | // |
241 | prev_clock = back_clock; |
242 | elements = original_elements; |
243 | res = res && measure_algo(elements.data(), L, split_pos, StdSqrtAdaptMerge, prev_clock); |
244 | // |
245 | prev_clock = back_clock; |
246 | elements = original_elements; |
247 | res = res && measure_algo(elements.data(), L, split_pos, SqrtHAdaptMerge, prev_clock); |
248 | // |
249 | prev_clock = back_clock; |
250 | elements = original_elements; |
251 | res = res && measure_algo(elements.data(), L, split_pos, StdSqrtHAdaptMerge, prev_clock); |
252 | // |
253 | prev_clock = back_clock; |
254 | elements = original_elements; |
255 | res = res && measure_algo(elements.data(), L, split_pos, AdaptMerge, prev_clock); |
256 | // |
257 | prev_clock = back_clock; |
258 | elements = original_elements; |
259 | res = res && measure_algo(elements.data(), L, split_pos,StdInplaceMerge, prev_clock); |
260 | // |
261 | if (!res) |
262 | std::abort(); |
263 | return res; |
264 | } |
265 | |
266 | //Undef it to run the long test |
267 | #define BENCH_MERGE_SHORT |
268 | #define BENCH_SORT_UNIQUE_VALUES |
269 | |
270 | int main() |
271 | { |
272 | #ifndef BENCH_SORT_UNIQUE_VALUES |
273 | measure_all<order_perf_type>(101,1); |
274 | measure_all<order_perf_type>(101,5); |
275 | measure_all<order_perf_type>(101,7); |
276 | measure_all<order_perf_type>(101,31); |
277 | #endif |
278 | measure_all<order_perf_type>(L: 101,NK: 0); |
279 | |
280 | // |
281 | #ifndef BENCH_SORT_UNIQUE_VALUES |
282 | measure_all<order_perf_type>(1101,1); |
283 | measure_all<order_perf_type>(1001,7); |
284 | measure_all<order_perf_type>(1001,31); |
285 | measure_all<order_perf_type>(1001,127); |
286 | measure_all<order_perf_type>(1001,511); |
287 | #endif |
288 | measure_all<order_perf_type>(L: 1001,NK: 0); |
289 | |
290 | // |
291 | #ifndef BENCH_SORT_UNIQUE_VALUES |
292 | measure_all<order_perf_type>(10001,65); |
293 | measure_all<order_perf_type>(10001,255); |
294 | measure_all<order_perf_type>(10001,1023); |
295 | measure_all<order_perf_type>(10001,4095); |
296 | #endif |
297 | measure_all<order_perf_type>(L: 10001,NK: 0); |
298 | |
299 | // |
300 | #if defined(NDEBUG) |
301 | #ifndef BENCH_SORT_UNIQUE_VALUES |
302 | measure_all<order_perf_type>(100001,511); |
303 | measure_all<order_perf_type>(100001,2047); |
304 | measure_all<order_perf_type>(100001,8191); |
305 | measure_all<order_perf_type>(100001,32767); |
306 | #endif |
307 | measure_all<order_perf_type>(L: 100001,NK: 0); |
308 | |
309 | // |
310 | #if !defined(BENCH_MERGE_SHORT) |
311 | #ifndef BENCH_SORT_UNIQUE_VALUES |
312 | measure_all<order_perf_type>(1000001, 8192); |
313 | measure_all<order_perf_type>(1000001, 32768); |
314 | measure_all<order_perf_type>(1000001, 131072); |
315 | measure_all<order_perf_type>(1000001, 524288); |
316 | #endif |
317 | measure_all<order_perf_type>(1000001,0); |
318 | |
319 | #ifndef BENCH_SORT_UNIQUE_VALUES |
320 | measure_all<order_perf_type>(10000001, 65536); |
321 | measure_all<order_perf_type>(10000001, 262144); |
322 | measure_all<order_perf_type>(10000001, 1048576); |
323 | measure_all<order_perf_type>(10000001, 4194304); |
324 | #endif |
325 | measure_all<order_perf_type>(10000001,0); |
326 | #endif //#ifndef BENCH_MERGE_SHORT |
327 | #endif //#ifdef NDEBUG |
328 | |
329 | return 0; |
330 | } |
331 | |
332 | |