1 | /* Measure lock functions for different threads and critical sections. |
2 | Copyright (C) 2022-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #define TEST_MAIN |
20 | #define TIMEOUT (20 * 60) |
21 | |
22 | #include <stdio.h> |
23 | #include <stdlib.h> |
24 | #include <string.h> |
25 | #include <unistd.h> |
26 | #include <math.h> |
27 | #include <pthread.h> |
28 | #include <sys/time.h> |
29 | #include <sys/sysinfo.h> |
30 | #include "bench-timing.h" |
31 | #include "json-lib.h" |
32 | |
33 | static bench_lock_t lock; |
34 | static bench_lock_attr_t attr; |
35 | static pthread_barrier_t barrier; |
36 | |
37 | #define START_ITERS 1000 |
38 | |
39 | #pragma GCC push_options |
40 | #pragma GCC optimize(1) |
41 | |
42 | static int __attribute__ ((noinline)) fibonacci (int i) |
43 | { |
44 | asm("" ); |
45 | if (i > 2) |
46 | return fibonacci (i: i - 1) + fibonacci (i: i - 2); |
47 | return 10 + i; |
48 | } |
49 | |
50 | static void |
51 | do_filler (void) |
52 | { |
53 | char buf1[512], buf2[512]; |
54 | int f = fibonacci (i: 4); |
55 | memcpy (buf1, buf2, f); |
56 | } |
57 | |
58 | static void |
59 | do_filler_shared (void) |
60 | { |
61 | static char buf1[512], buf2[512]; |
62 | int f = fibonacci (i: 4); |
63 | memcpy (buf1, buf2, f); |
64 | } |
65 | |
66 | #pragma GCC pop_options |
67 | |
68 | #define UNIT_WORK_CRT do_filler_shared () |
69 | #define UNIT_WORK_NON_CRT do_filler () |
70 | |
71 | static inline void |
72 | critical_section (int length) |
73 | { |
74 | for (int i = length; i >= 0; i--) |
75 | UNIT_WORK_CRT; |
76 | } |
77 | |
78 | static inline void |
79 | non_critical_section (int length) |
80 | { |
81 | for (int i = length; i >= 0; i--) |
82 | UNIT_WORK_NON_CRT; |
83 | } |
84 | |
85 | typedef struct Worker_Params |
86 | { |
87 | long iters; |
88 | int crt_len; |
89 | int non_crt_len; |
90 | timing_t duration; |
91 | } Worker_Params; |
92 | |
93 | static void * |
94 | worker (void *v) |
95 | { |
96 | timing_t start, stop; |
97 | Worker_Params *p = (Worker_Params *) v; |
98 | long iters = p->iters; |
99 | int crt_len = p->crt_len; |
100 | int non_crt_len = p->non_crt_len; |
101 | |
102 | pthread_barrier_wait (barrier: &barrier); |
103 | TIMING_NOW (start); |
104 | while (iters--) |
105 | { |
106 | LOCK (&lock); |
107 | critical_section (length: crt_len); |
108 | UNLOCK (&lock); |
109 | non_critical_section (length: non_crt_len); |
110 | } |
111 | TIMING_NOW (stop); |
112 | |
113 | TIMING_DIFF (p->duration, start, stop); |
114 | return NULL; |
115 | } |
116 | |
117 | static double |
118 | do_one_test (int num_threads, int crt_len, int non_crt_len, long iters) |
119 | { |
120 | int i; |
121 | timing_t mean; |
122 | Worker_Params *p, params[num_threads]; |
123 | pthread_t threads[num_threads]; |
124 | |
125 | LOCK_INIT (&lock, &attr); |
126 | pthread_barrier_init (barrier: &barrier, NULL, count: num_threads); |
127 | |
128 | for (i = 0; i < num_threads; i++) |
129 | { |
130 | p = ¶ms[i]; |
131 | p->iters = iters; |
132 | p->crt_len = crt_len; |
133 | p->non_crt_len = non_crt_len; |
134 | pthread_create (newthread: &threads[i], NULL, start_routine: worker, arg: (void *) p); |
135 | } |
136 | for (i = 0; i < num_threads; i++) |
137 | pthread_join (th: threads[i], NULL); |
138 | |
139 | LOCK_DESTROY (&lock); |
140 | pthread_barrier_destroy (barrier: &barrier); |
141 | |
142 | mean = 0; |
143 | for (i = 0; i < num_threads; i++) |
144 | mean += params[i].duration; |
145 | mean /= num_threads; |
146 | return mean; |
147 | } |
148 | |
149 | #define RUN_COUNT 10 |
150 | #define MIN_TEST_SEC 0.01 |
151 | |
152 | static void |
153 | do_bench_one (const char *name, int num_threads, int crt_len, int non_crt_len, |
154 | json_ctx_t *js) |
155 | { |
156 | timing_t cur; |
157 | struct timeval ts, te; |
158 | double tsd, ted, td; |
159 | long iters, iters_limit, total_iters; |
160 | timing_t curs[RUN_COUNT + 2]; |
161 | int i, j; |
162 | double mean, stdev; |
163 | |
164 | iters = START_ITERS; |
165 | iters_limit = LONG_MAX / 100; |
166 | |
167 | while (1) |
168 | { |
169 | gettimeofday (tv: &ts, NULL); |
170 | cur = do_one_test (num_threads, crt_len, non_crt_len, iters); |
171 | gettimeofday (tv: &te, NULL); |
172 | /* Make sure the test to run at least MIN_TEST_SEC. */ |
173 | tsd = ts.tv_sec + ts.tv_usec / 1000000.0; |
174 | ted = te.tv_sec + te.tv_usec / 1000000.0; |
175 | td = ted - tsd; |
176 | if (td >= MIN_TEST_SEC || iters >= iters_limit) |
177 | break; |
178 | |
179 | iters *= 10; |
180 | } |
181 | |
182 | curs[0] = cur; |
183 | for (i = 1; i < RUN_COUNT + 2; i++) |
184 | curs[i] = do_one_test (num_threads, crt_len, non_crt_len, iters); |
185 | |
186 | /* Sort the results so we can discard the fastest and slowest |
187 | times as outliers. */ |
188 | for (i = 0; i < RUN_COUNT + 1; i++) |
189 | for (j = i + 1; j < RUN_COUNT + 2; j++) |
190 | if (curs[i] > curs[j]) |
191 | { |
192 | timing_t temp = curs[i]; |
193 | curs[i] = curs[j]; |
194 | curs[j] = temp; |
195 | } |
196 | |
197 | /* Calculate mean and standard deviation. */ |
198 | mean = 0.0; |
199 | total_iters = iters * num_threads; |
200 | for (i = 1; i < RUN_COUNT + 1; i++) |
201 | mean += (double) curs[i] / (double) total_iters; |
202 | mean /= RUN_COUNT; |
203 | |
204 | stdev = 0.0; |
205 | for (i = 1; i < RUN_COUNT + 1; i++) |
206 | { |
207 | double s = (double) curs[i] / (double) total_iters - mean; |
208 | stdev += s * s; |
209 | } |
210 | stdev = sqrt (stdev / (RUN_COUNT - 1)); |
211 | |
212 | char buf[256]; |
213 | snprintf (s: buf, maxlen: sizeof buf, format: "%s,non_crt_len=%d,crt_len=%d,threads=%d" , name, |
214 | non_crt_len, crt_len, num_threads); |
215 | |
216 | json_attr_object_begin (ctx: js, name: buf); |
217 | |
218 | json_attr_double (ctx: js, name: "duration" , d: (double) cur); |
219 | json_attr_double (ctx: js, name: "iterations" , d: (double) total_iters); |
220 | json_attr_double (ctx: js, name: "mean" , d: mean); |
221 | json_attr_double (ctx: js, name: "stdev" , d: stdev); |
222 | json_attr_double (ctx: js, name: "min-outlier" , |
223 | d: (double) curs[0] / (double) total_iters); |
224 | json_attr_double (ctx: js, name: "min" , d: (double) curs[1] / (double) total_iters); |
225 | json_attr_double (ctx: js, name: "max" , |
226 | d: (double) curs[RUN_COUNT] / (double) total_iters); |
227 | json_attr_double (ctx: js, name: "max-outlier" , |
228 | d: (double) curs[RUN_COUNT + 1] / (double) total_iters); |
229 | |
230 | json_attr_object_end (ctx: js); |
231 | } |
232 | |
233 | #define TH_CONF_MAX 10 |
234 | |
235 | int |
236 | do_bench (void) |
237 | { |
238 | int rv = 0; |
239 | json_ctx_t json_ctx; |
240 | int i, j, k; |
241 | int th_num, th_conf, nprocs; |
242 | int threads[TH_CONF_MAX]; |
243 | int crt_lens[] = { 0, 1, 2, 4, 8, 16, 32, 64, 128 }; |
244 | int non_crt_lens[] = { 1, 32, 128 }; |
245 | char name[128]; |
246 | |
247 | json_init (ctx: &json_ctx, indent_level: 2, stdout); |
248 | json_attr_object_begin (ctx: &json_ctx, name: TEST_NAME); |
249 | |
250 | /* The thread config begins from 1, and increases by 2x until nprocs. |
251 | We also wants to test over-saturation case (1.25*nprocs). */ |
252 | nprocs = get_nprocs (); |
253 | th_num = 1; |
254 | for (th_conf = 0; th_conf < (TH_CONF_MAX - 2) && th_num < nprocs; th_conf++) |
255 | { |
256 | threads[th_conf] = th_num; |
257 | th_num <<= 1; |
258 | } |
259 | threads[th_conf++] = nprocs; |
260 | threads[th_conf++] = nprocs + nprocs / 4; |
261 | |
262 | LOCK_ATTR_INIT (&attr); |
263 | snprintf (s: name, maxlen: sizeof name, format: "type=adaptive" ); |
264 | |
265 | for (k = 0; k < (sizeof (non_crt_lens) / sizeof (int)); k++) |
266 | { |
267 | int non_crt_len = non_crt_lens[k]; |
268 | for (j = 0; j < (sizeof (crt_lens) / sizeof (int)); j++) |
269 | { |
270 | int crt_len = crt_lens[j]; |
271 | for (i = 0; i < th_conf; i++) |
272 | { |
273 | th_num = threads[i]; |
274 | do_bench_one (name, num_threads: th_num, crt_len, non_crt_len, js: &json_ctx); |
275 | } |
276 | } |
277 | } |
278 | |
279 | json_attr_object_end (ctx: &json_ctx); |
280 | |
281 | return rv; |
282 | } |
283 | |
284 | #define TEST_FUNCTION do_bench () |
285 | |
286 | #include "../test-skeleton.c" |
287 | |