1 | ////////////////////////////////////////////////////////////////////////////// |
2 | // |
3 | // (C) Copyright Ion Gaztanaga 2015-2016. |
4 | // Distributed under the Boost Software License, Version 1.0. |
5 | // (See accompanying file LICENSE_1_0.txt or copy at |
6 | // http://www.boost.org/LICENSE_1_0.txt) |
7 | // |
8 | // See http://www.boost.org/libs/move for documentation. |
9 | // |
10 | ////////////////////////////////////////////////////////////////////////////// |
11 | |
12 | #include <cstdlib> //std::srand |
13 | #include <algorithm> //std::stable_sort, std::make|sort_heap, std::random_shuffle |
14 | #include <cstdio> //std::printf |
15 | #include <iostream> //std::cout |
16 | #include <boost/container/vector.hpp> //boost::container::vector |
17 | |
18 | #include <boost/config.hpp> |
19 | #include <boost/move/unique_ptr.hpp> |
20 | #include <boost/move/detail/nsec_clock.hpp> |
21 | #include <boost/move/detail/force_ptr.hpp> |
22 | #include <cstdlib> |
23 | |
24 | using boost::move_detail::cpu_timer; |
25 | using boost::move_detail::nanosecond_type; |
26 | |
27 | #include "order_type.hpp" |
28 | #include "random_shuffle.hpp" |
29 | |
30 | //#define BOOST_MOVE_ADAPTIVE_SORT_STATS |
31 | //#define BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS |
32 | void print_stats(const char *str, boost::ulong_long_type element_count) |
33 | { |
34 | std::printf( format: "%sCmp:%7.03f Cpy:%8.03f\n" , str |
35 | , double(order_perf_type::num_compare)/double(element_count) |
36 | , double(order_perf_type::num_copy)/double(element_count) ); |
37 | } |
38 | |
39 | |
40 | #include <boost/move/algo/adaptive_sort.hpp> |
41 | #include <boost/move/algo/detail/merge_sort.hpp> |
42 | #include <boost/move/algo/detail/pdqsort.hpp> |
43 | #include <boost/move/algo/detail/heap_sort.hpp> |
44 | #include <boost/move/core.hpp> |
45 | |
46 | template<class T> |
47 | void generate_elements(boost::container::vector<T> &elements, std::size_t L, std::size_t NK) |
48 | { |
49 | elements.resize(L); |
50 | boost::movelib::unique_ptr<std::size_t[]> key_reps(new std::size_t[NK ? NK : L]); |
51 | |
52 | std::srand(seed: 0); |
53 | for (std::size_t i = 0; i < (NK ? NK : L); ++i) { |
54 | key_reps[i] = 0; |
55 | } |
56 | for (std::size_t i = 0; i < L; ++i) { |
57 | std::size_t key = NK ? (i % NK) : i; |
58 | elements[i].key = key; |
59 | } |
60 | ::random_shuffle(elements.data(), elements.data() + L); |
61 | ::random_shuffle(elements.data(), elements.data() + L); |
62 | |
63 | for (std::size_t i = 0; i < L; ++i) { |
64 | elements[i].val = key_reps[elements[i].key]++; |
65 | } |
66 | } |
67 | |
68 | template<class T, class Compare> |
69 | void adaptive_sort_buffered(T *elements, std::size_t element_count, Compare comp, std::size_t BufLen) |
70 | { |
71 | boost::movelib::unique_ptr<char[]> mem(new char[sizeof(T)*BufLen]); |
72 | boost::movelib::adaptive_sort(elements, elements + element_count, comp, boost::move_detail::force_ptr<T*>(mem.get()), BufLen); |
73 | } |
74 | |
75 | template<class T, class Compare> |
76 | void std_like_adaptive_stable_sort_buffered(T *elements, std::size_t element_count, Compare comp, std::size_t BufLen) |
77 | { |
78 | boost::movelib::unique_ptr<char[]> mem(new char[sizeof(T)*BufLen]); |
79 | boost::movelib::stable_sort_adaptive_ONlogN2(elements, elements + element_count, comp, boost::move_detail::force_ptr<T*>(mem.get()), BufLen); |
80 | } |
81 | |
82 | template<class T, class Compare> |
83 | void merge_sort_buffered(T *elements, std::size_t element_count, Compare comp) |
84 | { |
85 | boost::movelib::unique_ptr<char[]> mem(new char[sizeof(T)*((element_count+1)/2)]); |
86 | boost::movelib::merge_sort(elements, elements + element_count, comp, boost::move_detail::force_ptr<T*>(mem.get())); |
87 | } |
88 | |
89 | enum AlgoType |
90 | { |
91 | MergeSort, |
92 | StableSort, |
93 | PdQsort, |
94 | StdSort, |
95 | AdaptiveSort, |
96 | SqrtHAdaptiveSort, |
97 | SqrtAdaptiveSort, |
98 | Sqrt2AdaptiveSort, |
99 | QuartAdaptiveSort, |
100 | InplaceStableSort, |
101 | StdSqrtHAdpSort, |
102 | StdSqrtAdpSort, |
103 | StdSqrt2AdpSort, |
104 | StdQuartAdpSort, |
105 | SlowStableSort, |
106 | HeapSort, |
107 | MaxSort |
108 | }; |
109 | |
110 | const char *AlgoNames [] = { "MergeSort " |
111 | , "StableSort " |
112 | , "PdQsort " |
113 | , "StdSort " |
114 | , "AdaptSort " |
115 | , "SqrtHAdaptSort " |
116 | , "SqrtAdaptSort " |
117 | , "Sqrt2AdaptSort " |
118 | , "QuartAdaptSort " |
119 | , "InplStableSort " |
120 | , "StdSqrtHAdpSort" |
121 | , "StdSqrtAdpSort " |
122 | , "StdSqrt2AdpSort" |
123 | , "StdQuartAdpSort" |
124 | , "SlowSort " |
125 | , "HeapSort " |
126 | }; |
127 | |
128 | BOOST_MOVE_STATIC_ASSERT((sizeof(AlgoNames)/sizeof(*AlgoNames)) == MaxSort); |
129 | |
130 | template<class T> |
131 | bool measure_algo(T *elements, std::size_t element_count, std::size_t alg, nanosecond_type &prev_clock) |
132 | { |
133 | std::printf(format: "%s " , AlgoNames[alg]); |
134 | order_perf_type::num_compare=0; |
135 | order_perf_type::num_copy=0; |
136 | order_perf_type::num_elements = element_count; |
137 | cpu_timer timer; |
138 | timer.resume(); |
139 | switch(alg) |
140 | { |
141 | case MergeSort: |
142 | merge_sort_buffered(elements, element_count, order_type_less()); |
143 | break; |
144 | case StableSort: |
145 | std::stable_sort(elements,elements+element_count,order_type_less()); |
146 | break; |
147 | case PdQsort: |
148 | boost::movelib::pdqsort(elements,elements+element_count,order_type_less()); |
149 | break; |
150 | case StdSort: |
151 | std::sort(elements,elements+element_count,order_type_less()); |
152 | break; |
153 | case AdaptiveSort: |
154 | boost::movelib::adaptive_sort(elements, elements+element_count, order_type_less()); |
155 | break; |
156 | case SqrtHAdaptiveSort: |
157 | adaptive_sort_buffered( elements, element_count, order_type_less() |
158 | , boost::movelib::detail_adaptive::ceil_sqrt_multiple(n: element_count)/2+1); |
159 | break; |
160 | case SqrtAdaptiveSort: |
161 | adaptive_sort_buffered( elements, element_count, order_type_less() |
162 | , boost::movelib::detail_adaptive::ceil_sqrt_multiple(n: element_count)); |
163 | break; |
164 | case Sqrt2AdaptiveSort: |
165 | adaptive_sort_buffered( elements, element_count, order_type_less() |
166 | , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(n: element_count)); |
167 | break; |
168 | case QuartAdaptiveSort: |
169 | adaptive_sort_buffered( elements, element_count, order_type_less() |
170 | , (element_count-1)/4+1); |
171 | break; |
172 | case InplaceStableSort: |
173 | boost::movelib::inplace_stable_sort(elements, elements+element_count, order_type_less()); |
174 | break; |
175 | case StdSqrtHAdpSort: |
176 | std_like_adaptive_stable_sort_buffered( elements, element_count, order_type_less() |
177 | , boost::movelib::detail_adaptive::ceil_sqrt_multiple(n: element_count)/2+1); |
178 | break; |
179 | case StdSqrtAdpSort: |
180 | std_like_adaptive_stable_sort_buffered( elements, element_count, order_type_less() |
181 | , boost::movelib::detail_adaptive::ceil_sqrt_multiple(n: element_count)); |
182 | break; |
183 | case StdSqrt2AdpSort: |
184 | std_like_adaptive_stable_sort_buffered( elements, element_count, order_type_less() |
185 | , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(n: element_count)); |
186 | break; |
187 | case StdQuartAdpSort: |
188 | std_like_adaptive_stable_sort_buffered( elements, element_count, order_type_less() |
189 | , (element_count-1)/4+1); |
190 | break; |
191 | case SlowStableSort: |
192 | boost::movelib::detail_adaptive::slow_stable_sort(elements, elements+element_count, order_type_less()); |
193 | break; |
194 | case HeapSort: |
195 | boost::movelib::heap_sort(elements, elements+element_count, order_type_less()); |
196 | boost::movelib::heap_sort(first: (order_move_type*)0, last: (order_move_type*)0, comp: order_type_less()); |
197 | |
198 | break; |
199 | } |
200 | timer.stop(); |
201 | |
202 | if(order_perf_type::num_elements == element_count){ |
203 | std::printf(format: " Tmp Ok " ); |
204 | } else{ |
205 | std::printf(format: " Tmp KO " ); |
206 | } |
207 | nanosecond_type new_clock = timer.elapsed().wall; |
208 | |
209 | //std::cout << "Cmp:" << order_perf_type::num_compare << " Cpy:" << order_perf_type::num_copy; //for old compilers without ll size argument |
210 | std::printf(format: "Cmp:%7.03f Cpy:%8.03f" , double(order_perf_type::num_compare)/double(element_count), double(order_perf_type::num_copy)/double(element_count) ); |
211 | |
212 | double time = double(new_clock); |
213 | |
214 | const char *units = "ns" ; |
215 | if(time >= 1000000000.0){ |
216 | time /= 1000000000.0; |
217 | units = " s" ; |
218 | } |
219 | else if(time >= 1000000.0){ |
220 | time /= 1000000.0; |
221 | units = "ms" ; |
222 | } |
223 | else if(time >= 1000.0){ |
224 | time /= 1000.0; |
225 | units = "us" ; |
226 | } |
227 | |
228 | std::printf(format: " %6.02f%s (%6.02f)\n" |
229 | , time |
230 | , units |
231 | , prev_clock ? double(new_clock)/double(prev_clock): 1.0); |
232 | prev_clock = new_clock; |
233 | bool res = is_order_type_ordered(elements, element_count, alg != HeapSort && alg != PdQsort && alg != StdSort); |
234 | return res; |
235 | } |
236 | |
237 | template<class T> |
238 | bool measure_all(std::size_t L, std::size_t NK) |
239 | { |
240 | boost::container::vector<T> original_elements, elements; |
241 | generate_elements(original_elements, L, NK); |
242 | std::printf(format: "\n - - N: %u, NK: %u - -\n" , (unsigned)L, (unsigned)NK); |
243 | |
244 | nanosecond_type prev_clock = 0; |
245 | nanosecond_type back_clock; |
246 | bool res = true; |
247 | elements = original_elements; |
248 | res = res && measure_algo(elements.data(), L,MergeSort, prev_clock); |
249 | back_clock = prev_clock; |
250 | // |
251 | prev_clock = back_clock; |
252 | elements = original_elements; |
253 | res = res && measure_algo(elements.data(), L,StableSort, prev_clock); |
254 | // |
255 | prev_clock = back_clock; |
256 | elements = original_elements; |
257 | res = res && measure_algo(elements.data(), L,PdQsort, prev_clock); |
258 | // |
259 | prev_clock = back_clock; |
260 | elements = original_elements; |
261 | res = res && measure_algo(elements.data(), L,StdSort, prev_clock); |
262 | // |
263 | prev_clock = back_clock; |
264 | elements = original_elements; |
265 | res = res && measure_algo(elements.data(), L,HeapSort, prev_clock); |
266 | // |
267 | prev_clock = back_clock; |
268 | elements = original_elements; |
269 | res = res && measure_algo(elements.data(), L,QuartAdaptiveSort, prev_clock); |
270 | // |
271 | prev_clock = back_clock; |
272 | elements = original_elements; |
273 | res = res && measure_algo(elements.data(), L, StdQuartAdpSort, prev_clock); |
274 | // |
275 | prev_clock = back_clock; |
276 | elements = original_elements; |
277 | res = res && measure_algo(elements.data(), L,Sqrt2AdaptiveSort, prev_clock); |
278 | // |
279 | prev_clock = back_clock; |
280 | elements = original_elements; |
281 | res = res && measure_algo(elements.data(), L, StdSqrt2AdpSort, prev_clock); |
282 | // |
283 | prev_clock = back_clock; |
284 | elements = original_elements; |
285 | res = res && measure_algo(elements.data(), L,SqrtAdaptiveSort, prev_clock); |
286 | // |
287 | prev_clock = back_clock; |
288 | elements = original_elements; |
289 | res = res && measure_algo(elements.data(), L, StdSqrtAdpSort, prev_clock); |
290 | // |
291 | prev_clock = back_clock; |
292 | elements = original_elements; |
293 | res = res && measure_algo(elements.data(), L,SqrtHAdaptiveSort, prev_clock); |
294 | // |
295 | prev_clock = back_clock; |
296 | elements = original_elements; |
297 | res = res && measure_algo(elements.data(), L, StdSqrtHAdpSort, prev_clock); |
298 | // |
299 | prev_clock = back_clock; |
300 | elements = original_elements; |
301 | res = res && measure_algo(elements.data(), L,AdaptiveSort, prev_clock); |
302 | // |
303 | prev_clock = back_clock; |
304 | elements = original_elements; |
305 | res = res && measure_algo(elements.data(), L,InplaceStableSort, prev_clock); |
306 | // |
307 | //prev_clock = back_clock; |
308 | //elements = original_elements; |
309 | //res = res && measure_algo(elements.data(), L,SlowStableSort, prev_clock); |
310 | |
311 | if(!res) |
312 | std::abort(); |
313 | return res; |
314 | } |
315 | |
316 | //Undef it to run the long test |
317 | #define BENCH_SORT_SHORT |
318 | #define BENCH_SORT_UNIQUE_VALUES |
319 | |
320 | int main() |
321 | { |
322 | #ifndef BENCH_SORT_UNIQUE_VALUES |
323 | measure_all<order_perf_type>(101,1); |
324 | measure_all<order_perf_type>(101,7); |
325 | measure_all<order_perf_type>(101,31); |
326 | #endif |
327 | measure_all<order_perf_type>(L: 101,NK: 0); |
328 | |
329 | // |
330 | #ifndef BENCH_SORT_UNIQUE_VALUES |
331 | measure_all<order_perf_type>(1101,1); |
332 | measure_all<order_perf_type>(1001,7); |
333 | measure_all<order_perf_type>(1001,31); |
334 | measure_all<order_perf_type>(1001,127); |
335 | measure_all<order_perf_type>(1001,511); |
336 | #endif |
337 | measure_all<order_perf_type>(L: 1001,NK: 0); |
338 | // |
339 | #ifndef BENCH_SORT_UNIQUE_VALUES |
340 | measure_all<order_perf_type>(10001,65); |
341 | measure_all<order_perf_type>(10001,255); |
342 | measure_all<order_perf_type>(10001,1023); |
343 | measure_all<order_perf_type>(10001,4095); |
344 | #endif |
345 | measure_all<order_perf_type>(L: 10001,NK: 0); |
346 | |
347 | // |
348 | #ifdef NDEBUG |
349 | #ifndef BENCH_SORT_UNIQUE_VALUES |
350 | measure_all<order_perf_type>(100001,511); |
351 | measure_all<order_perf_type>(100001,2047); |
352 | measure_all<order_perf_type>(100001,8191); |
353 | measure_all<order_perf_type>(100001,32767); |
354 | #endif |
355 | measure_all<order_perf_type>(L: 100001,NK: 0); |
356 | |
357 | // |
358 | #ifndef BENCH_SORT_SHORT |
359 | #ifndef BENCH_SORT_UNIQUE_VALUES |
360 | measure_all<order_perf_type>(1000001, 8192); |
361 | measure_all<order_perf_type>(1000001, 32768); |
362 | measure_all<order_perf_type>(1000001, 131072); |
363 | measure_all<order_perf_type>(1000001, 524288); |
364 | #endif |
365 | measure_all<order_perf_type>(1000001,0); |
366 | |
367 | #ifndef BENCH_SORT_UNIQUE_VALUES |
368 | measure_all<order_perf_type>(10000001, 65536); |
369 | measure_all<order_perf_type>(10000001, 262144); |
370 | measure_all<order_perf_type>(10000001, 1048576); |
371 | measure_all<order_perf_type>(10000001, 4194304); |
372 | #endif |
373 | measure_all<order_perf_type>(1000001,0); |
374 | #endif //#ifndef BENCH_SORT_SHORT |
375 | #endif //NDEBUG |
376 | |
377 | //measure_all<order_perf_type>(100000001,0); |
378 | |
379 | return 0; |
380 | } |
381 | |