1#if USE_ITT_BUILD
2/*
3 * kmp_itt.inl -- Inline functions of ITT Notify.
4 */
5
6//===----------------------------------------------------------------------===//
7//
8// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9// See https://llvm.org/LICENSE.txt for license information.
10// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11//
12//===----------------------------------------------------------------------===//
13
14// Inline function definitions. This file should be included into kmp_itt.h file
15// for production build (to let compiler inline functions) or into kmp_itt.c
16// file for debug build (to reduce the number of files to recompile and save
17// build time).
18
19#include "kmp.h"
20#include "kmp_str.h"
21
22#if KMP_ITT_DEBUG
23extern kmp_bootstrap_lock_t __kmp_itt_debug_lock;
24#define KMP_ITT_DEBUG_LOCK() \
25 { __kmp_acquire_bootstrap_lock(&__kmp_itt_debug_lock); }
26#define KMP_ITT_DEBUG_PRINT(...) \
27 { \
28 fprintf(stderr, "#%02d: ", __kmp_get_gtid()); \
29 fprintf(stderr, __VA_ARGS__); \
30 fflush(stderr); \
31 __kmp_release_bootstrap_lock(&__kmp_itt_debug_lock); \
32 }
33#else
34#define KMP_ITT_DEBUG_LOCK()
35#define KMP_ITT_DEBUG_PRINT(...)
36#endif // KMP_ITT_DEBUG
37
38// Ensure that the functions are static if they're supposed to be being inlined.
39// Otherwise they cannot be used in more than one file, since there will be
40// multiple definitions.
41#if KMP_DEBUG
42#define LINKAGE
43#else
44#define LINKAGE static inline
45#endif
46
47// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses
48// this API to support user-defined synchronization primitives, but does not use
49// ZCA; it would be safe to turn this off until wider support becomes available.
50#if USE_ITT_ZCA
51#ifdef __INTEL_COMPILER
52#if __INTEL_COMPILER >= 1200
53#undef __itt_sync_acquired
54#undef __itt_sync_releasing
55#define __itt_sync_acquired(addr) \
56 __notify_zc_intrinsic((char *)"sync_acquired", addr)
57#define __itt_sync_releasing(addr) \
58 __notify_intrinsic((char *)"sync_releasing", addr)
59#endif
60#endif
61#endif
62
63static kmp_bootstrap_lock_t metadata_lock =
64 KMP_BOOTSTRAP_LOCK_INITIALIZER(metadata_lock);
65
66#if USE_ITT_NOTIFY
67LINKAGE size_t __kmp_itthash_hash(kmp_intptr_t addr, size_t hsize) {
68 return ((addr >> 6) ^ (addr >> 2)) % hsize;
69}
70LINKAGE kmp_itthash_entry *__kmp_itthash_find(kmp_info_t *thread,
71 kmp_itthash_t *h, ident_t *loc,
72 int team_size) {
73 kmp_itthash_entry_t *entry;
74 size_t bucket = __kmp_itthash_hash(addr: (kmp_intptr_t)loc, hsize: KMP_MAX_FRAME_DOMAINS);
75 for (entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket)
76 if (entry->loc == loc && entry->team_size == team_size)
77 break;
78
79 if (entry == NULL) {
80 // two foreign threads could report frames concurrently
81 int cnt = KMP_TEST_THEN_INC32(&h->count);
82 if (cnt >= KMP_MAX_FRAME_DOMAINS) {
83 KMP_TEST_THEN_DEC32(&h->count); // revert the count
84 return entry; // too many entries
85 }
86 // create new entry
87 entry = (kmp_itthash_entry_t *)__kmp_thread_malloc(
88 thread, sizeof(kmp_itthash_entry_t));
89 entry->loc = loc;
90 entry->team_size = team_size;
91 entry->d = NULL;
92 entry->next_in_bucket = h->buckets[bucket];
93 while (!KMP_COMPARE_AND_STORE_PTR(&h->buckets[bucket],
94 entry->next_in_bucket, entry)) {
95 KMP_CPU_PAUSE();
96 entry->next_in_bucket = h->buckets[bucket];
97 }
98 }
99#if KMP_DEBUG
100 else {
101 // check the contents of the location info is unique
102 KMP_DEBUG_ASSERT(loc->psource == entry->loc->psource);
103 }
104#endif
105 return entry;
106}
107#endif
108
109/* Parallel region reporting.
110 * __kmp_itt_region_forking should be called by primary thread of a team.
111 Exact moment of call does not matter, but it should be completed before any
112 thread of this team calls __kmp_itt_region_starting.
113 * __kmp_itt_region_starting should be called by each thread of a team just
114 before entering parallel region body.
115 * __kmp_itt_region_finished should be called by each thread of a team right
116 after returning from parallel region body.
117 * __kmp_itt_region_joined should be called by primary thread of a team, after
118 all threads called __kmp_itt_region_finished.
119
120 Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can
121 execute some more user code -- such a thread can execute tasks.
122
123 Note: The overhead of logging region_starting and region_finished in each
124 thread is too large, so these calls are not used. */
125
126LINKAGE void __kmp_itt_region_forking(int gtid, int team_size, int barriers) {
127#if USE_ITT_NOTIFY
128 kmp_team_t *team = __kmp_team_from_gtid(gtid);
129 if (team->t.t_active_level > 1) {
130 // The frame notifications are only supported for the outermost teams.
131 return;
132 }
133 kmp_info_t *th = __kmp_thread_from_gtid(gtid);
134 ident_t *loc = th->th.th_ident;
135 if (!loc) {
136 // no sense to report a region without location info
137 return;
138 }
139 kmp_itthash_entry *e;
140 e = __kmp_itthash_find(thread: th, h: &__kmp_itt_region_domains, loc, team_size);
141 if (e == NULL)
142 return; // too many entries in the hash
143 if (e->d == NULL) {
144 // Transform compiler-generated region location into the format
145 // that the tools more or less standardized on:
146 // "<func>$omp$parallel@[file:]<line>[:<col>]"
147 char *buff = NULL;
148 kmp_str_loc_t str_loc =
149 __kmp_str_loc_init(psource: loc->psource, /* init_fname */ init_fname: false);
150 buff = __kmp_str_format(format: "%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
151 team_size, str_loc.file, str_loc.line, str_loc.col);
152
153 __itt_suppress_push(__itt_suppress_memory_errors);
154 e->d = __itt_domain_create(buff);
155 KMP_ASSERT(e->d != NULL);
156 __itt_suppress_pop();
157
158 __kmp_str_free(str: &buff);
159 if (barriers) {
160 kmp_itthash_entry *e;
161 e = __kmp_itthash_find(thread: th, h: &__kmp_itt_barrier_domains, loc, team_size: 0);
162 if (e != NULL) {
163 KMP_DEBUG_ASSERT(e->d == NULL);
164 char *buff = NULL;
165 buff = __kmp_str_format(format: "%s$omp$barrier@%s:%d", str_loc.func,
166 str_loc.file, str_loc.line);
167 __itt_suppress_push(__itt_suppress_memory_errors);
168 e->d = __itt_domain_create(buff);
169 KMP_ASSERT(e->d != NULL);
170 __itt_suppress_pop();
171 __kmp_str_free(str: &buff);
172 }
173 }
174 __kmp_str_loc_free(loc: &str_loc);
175 }
176 __itt_frame_begin_v3(e->d, NULL);
177 KMP_ITT_DEBUG_LOCK();
178 KMP_ITT_DEBUG_PRINT("[frm beg] gtid=%d, domain=%p, loc:%p\n", gtid, e->d,
179 loc);
180#endif
181} // __kmp_itt_region_forking
182
183// -----------------------------------------------------------------------------
184LINKAGE void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin,
185 __itt_timestamp end, int imbalance,
186 ident_t *loc, int team_size, int region) {
187#if USE_ITT_NOTIFY
188 if (!loc) {
189 // no sense to report a region without location info
190 return;
191 }
192 kmp_info_t *th = __kmp_thread_from_gtid(gtid);
193 if (region) {
194 kmp_team_t *team = __kmp_team_from_gtid(gtid);
195 int serialized = (region == 2 ? 1 : 0);
196 if (team->t.t_active_level + serialized > 1) {
197 // The frame notifications are only supported for the outermost teams.
198 return;
199 }
200 // Check region domain has not been created before.
201 kmp_itthash_entry *e;
202 e = __kmp_itthash_find(thread: th, h: &__kmp_itt_region_domains, loc, team_size);
203 if (e == NULL)
204 return; // too many entries in the hash
205 if (e->d == NULL) { // new entry, need to calculate domain
206 // Transform compiler-generated region location into the format
207 // that the tools more or less standardized on:
208 // "<func>$omp$parallel:team_size@[file:]<line>[:<col>]"
209 char *buff = NULL;
210 kmp_str_loc_t str_loc =
211 __kmp_str_loc_init(psource: loc->psource, /* init_fname */ init_fname: false);
212 buff =
213 __kmp_str_format(format: "%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
214 team_size, str_loc.file, str_loc.line, str_loc.col);
215 __itt_suppress_push(__itt_suppress_memory_errors);
216 e->d = __itt_domain_create(buff);
217 KMP_ASSERT(e->d != NULL);
218 __itt_suppress_pop();
219
220 __kmp_str_free(str: &buff);
221 __kmp_str_loc_free(loc: &str_loc);
222 }
223 __itt_frame_submit_v3(e->d, NULL, begin, end);
224 KMP_ITT_DEBUG_LOCK();
225 KMP_ITT_DEBUG_PRINT(
226 "[reg sub] gtid=%d, domain=%p, region:%d, loc:%p, beg:%llu, end:%llu\n",
227 gtid, e->d, region, loc, begin, end);
228 return;
229 } else { // called for barrier reporting
230 kmp_itthash_entry *e;
231 e = __kmp_itthash_find(thread: th, h: &__kmp_itt_barrier_domains, loc, team_size: 0);
232 if (e == NULL)
233 return; // too many entries in the hash
234 if (e->d == NULL) { // new entry, need to calculate domain
235 // Transform compiler-generated region location into the format
236 // that the tools more or less standardized on:
237 // "<func>$omp$frame@[file:]<line>[:<col>]"
238 kmp_str_loc_t str_loc =
239 __kmp_str_loc_init(psource: loc->psource, /* init_fname */ init_fname: false);
240 char *buff = NULL;
241 if (imbalance) {
242 buff =
243 __kmp_str_format(format: "%s$omp$barrier-imbalance:%d@%s:%d", str_loc.func,
244 team_size, str_loc.file, str_loc.line);
245 } else {
246 buff = __kmp_str_format(format: "%s$omp$barrier@%s:%d", str_loc.func,
247 str_loc.file, str_loc.line);
248 }
249 __itt_suppress_push(__itt_suppress_memory_errors);
250 e->d = __itt_domain_create(buff);
251 KMP_ASSERT(e->d != NULL);
252 __itt_suppress_pop();
253 __kmp_str_free(str: &buff);
254 __kmp_str_loc_free(loc: &str_loc);
255 }
256 __itt_frame_submit_v3(e->d, NULL, begin, end);
257 KMP_ITT_DEBUG_LOCK();
258 KMP_ITT_DEBUG_PRINT(
259 "[frm sub] gtid=%d, domain=%p, loc:%p, beg:%llu, end:%llu\n", gtid,
260 e->d, loc, begin, end);
261 }
262#endif
263} // __kmp_itt_frame_submit
264
265// -----------------------------------------------------------------------------
266LINKAGE void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin,
267 kmp_uint64 end, kmp_uint64 imbalance,
268 kmp_uint64 reduction) {
269#if USE_ITT_NOTIFY
270 if (metadata_domain == NULL) {
271 __kmp_acquire_bootstrap_lock(lck: &metadata_lock);
272 if (metadata_domain == NULL) {
273 __itt_suppress_push(__itt_suppress_memory_errors);
274 metadata_domain = __itt_domain_create("OMP Metadata");
275 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
276 string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
277 string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
278 __itt_suppress_pop();
279 }
280 __kmp_release_bootstrap_lock(lck: &metadata_lock);
281 }
282
283 kmp_uint64 imbalance_data[4];
284 imbalance_data[0] = begin;
285 imbalance_data[1] = end;
286 imbalance_data[2] = imbalance;
287 imbalance_data[3] = reduction;
288
289 __itt_metadata_add(metadata_domain, __itt_null, string_handle_imbl,
290 __itt_metadata_u64, 4, imbalance_data);
291#endif
292} // __kmp_itt_metadata_imbalance
293
294// -----------------------------------------------------------------------------
295LINKAGE void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type,
296 kmp_uint64 iterations, kmp_uint64 chunk) {
297#if USE_ITT_NOTIFY
298 if (metadata_domain == NULL) {
299 __kmp_acquire_bootstrap_lock(lck: &metadata_lock);
300 if (metadata_domain == NULL) {
301 __itt_suppress_push(__itt_suppress_memory_errors);
302 metadata_domain = __itt_domain_create("OMP Metadata");
303 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
304 string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
305 string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
306 __itt_suppress_pop();
307 }
308 __kmp_release_bootstrap_lock(lck: &metadata_lock);
309 }
310
311 // Parse line and column from psource string: ";file;func;line;col;;"
312 KMP_DEBUG_ASSERT(loc->psource);
313 kmp_uint64 loop_data[5];
314 int line, col;
315 __kmp_str_loc_numbers(Psource: loc->psource, Line: &line, Col: &col);
316 loop_data[0] = line;
317 loop_data[1] = col;
318 loop_data[2] = sched_type;
319 loop_data[3] = iterations;
320 loop_data[4] = chunk;
321
322 __itt_metadata_add(metadata_domain, __itt_null, string_handle_loop,
323 __itt_metadata_u64, 5, loop_data);
324#endif
325} // __kmp_itt_metadata_loop
326
327// -----------------------------------------------------------------------------
328LINKAGE void __kmp_itt_metadata_single(ident_t *loc) {
329#if USE_ITT_NOTIFY
330 if (metadata_domain == NULL) {
331 __kmp_acquire_bootstrap_lock(lck: &metadata_lock);
332 if (metadata_domain == NULL) {
333 __itt_suppress_push(__itt_suppress_memory_errors);
334 metadata_domain = __itt_domain_create("OMP Metadata");
335 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
336 string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
337 string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
338 __itt_suppress_pop();
339 }
340 __kmp_release_bootstrap_lock(lck: &metadata_lock);
341 }
342
343 int line, col;
344 __kmp_str_loc_numbers(Psource: loc->psource, Line: &line, Col: &col);
345 kmp_uint64 single_data[2];
346 single_data[0] = line;
347 single_data[1] = col;
348
349 __itt_metadata_add(metadata_domain, __itt_null, string_handle_sngl,
350 __itt_metadata_u64, 2, single_data);
351#endif
352} // __kmp_itt_metadata_single
353
354// -----------------------------------------------------------------------------
355LINKAGE void __kmp_itt_region_starting(int gtid) {
356#if USE_ITT_NOTIFY
357#endif
358} // __kmp_itt_region_starting
359
360// -----------------------------------------------------------------------------
361LINKAGE void __kmp_itt_region_finished(int gtid) {
362#if USE_ITT_NOTIFY
363#endif
364} // __kmp_itt_region_finished
365
366// ----------------------------------------------------------------------------
367LINKAGE void __kmp_itt_region_joined(int gtid) {
368#if USE_ITT_NOTIFY
369 kmp_team_t *team = __kmp_team_from_gtid(gtid);
370 if (team->t.t_active_level > 1) {
371 // The frame notifications are only supported for the outermost teams.
372 return;
373 }
374 kmp_info_t *th = __kmp_thread_from_gtid(gtid);
375 ident_t *loc = th->th.th_ident;
376 if (loc) {
377 kmp_itthash_entry *e = __kmp_itthash_find(thread: th, h: &__kmp_itt_region_domains,
378 loc, team_size: th->th.th_team_nproc);
379 if (e == NULL)
380 return; // too many entries in the hash
381 KMP_DEBUG_ASSERT(e->d);
382 KMP_ITT_DEBUG_LOCK();
383 __itt_frame_end_v3(e->d, NULL);
384 KMP_ITT_DEBUG_PRINT("[frm end] gtid=%d, domain=%p, loc:%p\n", gtid, e->d,
385 loc);
386 }
387#endif
388} // __kmp_itt_region_joined
389
390/* Barriers reporting.
391
392 A barrier consists of two phases:
393 1. Gather -- primary thread waits for all worker threads to arrive; each
394 worker thread registers arrival and goes further.
395 2. Release -- each worker thread waits until primary thread lets it go;
396 primary thread lets worker threads go.
397
398 Function should be called by each thread:
399 * __kmp_itt_barrier_starting() -- before arriving to the gather phase.
400 * __kmp_itt_barrier_middle() -- between gather and release phases.
401 * __kmp_itt_barrier_finished() -- after release phase.
402
403 Note: Call __kmp_itt_barrier_object() before call to
404 __kmp_itt_barrier_starting() and save result in local variable.
405 __kmp_itt_barrier_object(), being called too late (e. g. after gather phase)
406 would return itt sync object for the next barrier!
407
408 ITT need an address (void *) to be specified as a sync object. OpenMP RTL
409 does not have barrier object or barrier data structure. Barrier is just a
410 counter in team and thread structures. We could use an address of team
411 structure as a barrier sync object, but ITT wants different objects for
412 different barriers (even whithin the same team). So let us use team address
413 as barrier sync object for the first barrier, then increase it by one for the
414 next barrier, and so on (but wrap it not to use addresses outside of team
415 structure). */
416
417void *__kmp_itt_barrier_object(int gtid, int bt, int set_name,
418 int delta // 0 (current barrier) is default
419 // value; specify -1 to get previous
420 // barrier.
421 ) {
422 void *object = NULL;
423#if USE_ITT_NOTIFY
424 kmp_info_t *thr = __kmp_thread_from_gtid(gtid);
425 kmp_team_t *team = thr->th.th_team;
426
427 // NOTE: If the function is called from __kmp_fork_barrier, team pointer can
428 // be NULL. This "if" helps to avoid crash. However, this is not complete
429 // solution, and reporting fork/join barriers to ITT should be revisited.
430
431 if (team != NULL) {
432 // Primary thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time.
433 // Divide b_arrived by KMP_BARRIER_STATE_BUMP to get plain barrier counter.
434 kmp_uint64 counter =
435 team->t.t_bar[bt].b_arrived / KMP_BARRIER_STATE_BUMP + delta;
436 // Now form the barrier id. Encode barrier type (bt) in barrier id too, so
437 // barriers of different types do not have the same ids.
438 KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= bs_last_barrier);
439 // This condition is a must (we would have zero divide otherwise).
440 KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= 2 * bs_last_barrier);
441 // More strong condition: make sure we have room at least for two
442 // different ids (for each barrier type).
443 object = reinterpret_cast<void *>(
444 (kmp_uintptr_t)(team) +
445 (kmp_uintptr_t)counter % (sizeof(kmp_team_t) / bs_last_barrier) *
446 bs_last_barrier +
447 bt);
448 KMP_ITT_DEBUG_LOCK();
449 KMP_ITT_DEBUG_PRINT("[bar obj] type=%d, counter=%lld, object=%p\n", bt,
450 counter, object);
451
452 if (set_name) {
453 ident_t const *loc = NULL;
454 char const *src = NULL;
455 char const *type = "OMP Barrier";
456 switch (bt) {
457 case bs_plain_barrier: {
458 // For plain barrier compiler calls __kmpc_barrier() function, which
459 // saves location in thr->th.th_ident.
460 loc = thr->th.th_ident;
461 // Get the barrier type from flags provided by compiler.
462 kmp_int32 expl = 0;
463 kmp_uint32 impl = 0;
464 if (loc != NULL) {
465 src = loc->psource;
466 expl = (loc->flags & KMP_IDENT_BARRIER_EXPL) != 0;
467 impl = (loc->flags & KMP_IDENT_BARRIER_IMPL) != 0;
468 }
469 if (impl) {
470 switch (loc->flags & KMP_IDENT_BARRIER_IMPL_MASK) {
471 case KMP_IDENT_BARRIER_IMPL_FOR: {
472 type = "OMP For Barrier";
473 } break;
474 case KMP_IDENT_BARRIER_IMPL_SECTIONS: {
475 type = "OMP Sections Barrier";
476 } break;
477 case KMP_IDENT_BARRIER_IMPL_SINGLE: {
478 type = "OMP Single Barrier";
479 } break;
480 case KMP_IDENT_BARRIER_IMPL_WORKSHARE: {
481 type = "OMP Workshare Barrier";
482 } break;
483 default: {
484 type = "OMP Implicit Barrier";
485 KMP_DEBUG_ASSERT(0);
486 }
487 }
488 } else if (expl) {
489 type = "OMP Explicit Barrier";
490 }
491 } break;
492 case bs_forkjoin_barrier: {
493 // In case of fork/join barrier we can read thr->th.th_ident, because it
494 // contains location of last passed construct (while join barrier is not
495 // such one). Use th_ident of primary thread instead --
496 // __kmp_join_call() called by the primary thread saves location.
497 //
498 // AC: cannot read from primary thread because __kmp_join_call may not
499 // be called yet, so we read the location from team. This is the
500 // same location. Team is valid on entry to join barrier where this
501 // happens.
502 loc = team->t.t_ident;
503 if (loc != NULL) {
504 src = loc->psource;
505 }
506 type = "OMP Join Barrier";
507 } break;
508 }
509 KMP_ITT_DEBUG_LOCK();
510 __itt_sync_create(object, type, src, __itt_attr_barrier);
511 KMP_ITT_DEBUG_PRINT(
512 "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object,
513 type, src);
514 }
515 }
516#endif
517 return object;
518} // __kmp_itt_barrier_object
519
520// -----------------------------------------------------------------------------
521void __kmp_itt_barrier_starting(int gtid, void *object) {
522#if USE_ITT_NOTIFY
523 if (!KMP_MASTER_GTID(gtid)) {
524 KMP_ITT_DEBUG_LOCK();
525 __itt_sync_releasing(object);
526 KMP_ITT_DEBUG_PRINT("[bar sta] srel( %p )\n", object);
527 }
528 KMP_ITT_DEBUG_LOCK();
529 __itt_sync_prepare(object);
530 KMP_ITT_DEBUG_PRINT("[bar sta] spre( %p )\n", object);
531#endif
532} // __kmp_itt_barrier_starting
533
534// -----------------------------------------------------------------------------
535void __kmp_itt_barrier_middle(int gtid, void *object) {
536#if USE_ITT_NOTIFY
537 if (KMP_MASTER_GTID(gtid)) {
538 KMP_ITT_DEBUG_LOCK();
539 __itt_sync_acquired(object);
540 KMP_ITT_DEBUG_PRINT("[bar mid] sacq( %p )\n", object);
541 KMP_ITT_DEBUG_LOCK();
542 __itt_sync_releasing(object);
543 KMP_ITT_DEBUG_PRINT("[bar mid] srel( %p )\n", object);
544 } else {
545 }
546#endif
547} // __kmp_itt_barrier_middle
548
549// -----------------------------------------------------------------------------
550void __kmp_itt_barrier_finished(int gtid, void *object) {
551#if USE_ITT_NOTIFY
552 if (KMP_MASTER_GTID(gtid)) {
553 } else {
554 KMP_ITT_DEBUG_LOCK();
555 __itt_sync_acquired(object);
556 KMP_ITT_DEBUG_PRINT("[bar end] sacq( %p )\n", object);
557 }
558#endif
559} // __kmp_itt_barrier_finished
560
561/* Taskwait reporting.
562 ITT need an address (void *) to be specified as a sync object. OpenMP RTL
563 does not have taskwait structure, so we need to construct something. */
564
565void *__kmp_itt_taskwait_object(int gtid) {
566 void *object = NULL;
567#if USE_ITT_NOTIFY
568 if (UNLIKELY(__itt_sync_create_ptr)) {
569 kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
570 kmp_taskdata_t *taskdata = thread->th.th_current_task;
571 object = reinterpret_cast<void *>(kmp_uintptr_t(taskdata) +
572 taskdata->td_taskwait_counter %
573 sizeof(kmp_taskdata_t));
574 }
575#endif
576 return object;
577} // __kmp_itt_taskwait_object
578
579void __kmp_itt_taskwait_starting(int gtid, void *object) {
580#if USE_ITT_NOTIFY
581 kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
582 kmp_taskdata_t *taskdata = thread->th.th_current_task;
583 ident_t const *loc = taskdata->td_taskwait_ident;
584 char const *src = (loc == NULL ? NULL : loc->psource);
585 KMP_ITT_DEBUG_LOCK();
586 __itt_sync_create(object, "OMP Taskwait", src, 0);
587 KMP_ITT_DEBUG_PRINT("[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n",
588 object, src);
589 KMP_ITT_DEBUG_LOCK();
590 __itt_sync_prepare(object);
591 KMP_ITT_DEBUG_PRINT("[twa sta] spre( %p )\n", object);
592#endif
593} // __kmp_itt_taskwait_starting
594
595void __kmp_itt_taskwait_finished(int gtid, void *object) {
596#if USE_ITT_NOTIFY
597 KMP_ITT_DEBUG_LOCK();
598 __itt_sync_acquired(object);
599 KMP_ITT_DEBUG_PRINT("[twa end] sacq( %p )\n", object);
600 KMP_ITT_DEBUG_LOCK();
601 __itt_sync_destroy(object);
602 KMP_ITT_DEBUG_PRINT("[twa end] sdes( %p )\n", object);
603#endif
604} // __kmp_itt_taskwait_finished
605
606/* Task reporting.
607 Only those tasks are reported which are executed by a thread spinning at
608 barrier (or taskwait). Synch object passed to the function must be barrier of
609 taskwait the threads waiting at. */
610
611void __kmp_itt_task_starting(
612 void *object // ITT sync object: barrier or taskwait.
613 ) {
614#if USE_ITT_NOTIFY
615 if (UNLIKELY(object != NULL)) {
616 KMP_ITT_DEBUG_LOCK();
617 __itt_sync_cancel(object);
618 KMP_ITT_DEBUG_PRINT("[tsk sta] scan( %p )\n", object);
619 }
620#endif
621} // __kmp_itt_task_starting
622
623// -----------------------------------------------------------------------------
624void __kmp_itt_task_finished(
625 void *object // ITT sync object: barrier or taskwait.
626 ) {
627#if USE_ITT_NOTIFY
628 KMP_ITT_DEBUG_LOCK();
629 __itt_sync_prepare(object);
630 KMP_ITT_DEBUG_PRINT("[tsk end] spre( %p )\n", object);
631#endif
632} // __kmp_itt_task_finished
633
634/* Lock reporting.
635 * __kmp_itt_lock_creating( lock ) should be called *before* the first lock
636 operation (set/unset). It is not a real event shown to the user but just
637 setting a name for synchronization object. `lock' is an address of sync
638 object, the same address should be used in all subsequent calls.
639 * __kmp_itt_lock_acquiring() should be called before setting the lock.
640 * __kmp_itt_lock_acquired() should be called after setting the lock.
641 * __kmp_itt_lock_realeasing() should be called before unsetting the lock.
642 * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting
643 for the lock.
644 * __kmp_itt_lock_destroyed( lock ) should be called after the last lock
645 operation. After __kmp_itt_lock_destroyed() all the references to the same
646 address will be considered as another sync object, not related with the
647 original one. */
648
649#if KMP_USE_DYNAMIC_LOCK
650// Takes location information directly
651__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type,
652 const ident_t *loc) {
653#if USE_ITT_NOTIFY
654 if (__itt_sync_create_ptr) {
655 char const *src = (loc == NULL ? NULL : loc->psource);
656 KMP_ITT_DEBUG_LOCK();
657 __itt_sync_create(lock, type, src, 0);
658 KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type,
659 src);
660 }
661#endif
662}
663#else // KMP_USE_DYNAMIC_LOCK
664// Internal guts -- common code for locks and critical sections, do not call
665// directly.
666__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type) {
667#if USE_ITT_NOTIFY
668 if (__itt_sync_create_ptr) {
669 ident_t const *loc = NULL;
670 if (__kmp_get_user_lock_location_ != NULL)
671 loc = __kmp_get_user_lock_location_((lock));
672 char const *src = (loc == NULL ? NULL : loc->psource);
673 KMP_ITT_DEBUG_LOCK();
674 __itt_sync_create(lock, type, src, 0);
675 KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type,
676 src);
677 }
678#endif
679} // ___kmp_itt_lock_init
680#endif // KMP_USE_DYNAMIC_LOCK
681
682// Internal guts -- common code for locks and critical sections, do not call
683// directly.
684__kmp_inline void ___kmp_itt_lock_fini(kmp_user_lock_p lock, char const *type) {
685#if USE_ITT_NOTIFY
686 KMP_ITT_DEBUG_LOCK();
687 __itt_sync_destroy(lock);
688 KMP_ITT_DEBUG_PRINT("[lck dst] sdes( %p )\n", lock);
689#endif
690} // ___kmp_itt_lock_fini
691
692// -----------------------------------------------------------------------------
693#if KMP_USE_DYNAMIC_LOCK
694void __kmp_itt_lock_creating(kmp_user_lock_p lock, const ident_t *loc) {
695 ___kmp_itt_lock_init(lock, type: "OMP Lock", loc);
696}
697#else
698void __kmp_itt_lock_creating(kmp_user_lock_p lock) {
699 ___kmp_itt_lock_init(lock, "OMP Lock");
700} // __kmp_itt_lock_creating
701#endif
702
703void __kmp_itt_lock_acquiring(kmp_user_lock_p lock) {
704#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
705 // postpone lock object access
706 if (__itt_sync_prepare_ptr) {
707 if (KMP_EXTRACT_D_TAG(lock) == 0) {
708 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
709 __itt_sync_prepare(ilk->lock);
710 } else {
711 __itt_sync_prepare(lock);
712 }
713 }
714#else
715 __itt_sync_prepare(lock);
716#endif
717} // __kmp_itt_lock_acquiring
718
719void __kmp_itt_lock_acquired(kmp_user_lock_p lock) {
720#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
721 // postpone lock object access
722 if (__itt_sync_acquired_ptr) {
723 if (KMP_EXTRACT_D_TAG(lock) == 0) {
724 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
725 __itt_sync_acquired(ilk->lock);
726 } else {
727 __itt_sync_acquired(lock);
728 }
729 }
730#else
731 __itt_sync_acquired(lock);
732#endif
733} // __kmp_itt_lock_acquired
734
735void __kmp_itt_lock_releasing(kmp_user_lock_p lock) {
736#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
737 if (__itt_sync_releasing_ptr) {
738 if (KMP_EXTRACT_D_TAG(lock) == 0) {
739 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
740 __itt_sync_releasing(ilk->lock);
741 } else {
742 __itt_sync_releasing(lock);
743 }
744 }
745#else
746 __itt_sync_releasing(lock);
747#endif
748} // __kmp_itt_lock_releasing
749
750void __kmp_itt_lock_cancelled(kmp_user_lock_p lock) {
751#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
752 if (__itt_sync_cancel_ptr) {
753 if (KMP_EXTRACT_D_TAG(lock) == 0) {
754 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
755 __itt_sync_cancel(ilk->lock);
756 } else {
757 __itt_sync_cancel(lock);
758 }
759 }
760#else
761 __itt_sync_cancel(lock);
762#endif
763} // __kmp_itt_lock_cancelled
764
765void __kmp_itt_lock_destroyed(kmp_user_lock_p lock) {
766 ___kmp_itt_lock_fini(lock, type: "OMP Lock");
767} // __kmp_itt_lock_destroyed
768
769/* Critical reporting.
770 Critical sections are treated exactly as locks (but have different object
771 type). */
772#if KMP_USE_DYNAMIC_LOCK
773void __kmp_itt_critical_creating(kmp_user_lock_p lock, const ident_t *loc) {
774 ___kmp_itt_lock_init(lock, type: "OMP Critical", loc);
775}
776#else
777void __kmp_itt_critical_creating(kmp_user_lock_p lock) {
778 ___kmp_itt_lock_init(lock, "OMP Critical");
779} // __kmp_itt_critical_creating
780#endif
781
782void __kmp_itt_critical_acquiring(kmp_user_lock_p lock) {
783 __itt_sync_prepare(lock);
784} // __kmp_itt_critical_acquiring
785
786void __kmp_itt_critical_acquired(kmp_user_lock_p lock) {
787 __itt_sync_acquired(lock);
788} // __kmp_itt_critical_acquired
789
790void __kmp_itt_critical_releasing(kmp_user_lock_p lock) {
791 __itt_sync_releasing(lock);
792} // __kmp_itt_critical_releasing
793
794void __kmp_itt_critical_destroyed(kmp_user_lock_p lock) {
795 ___kmp_itt_lock_fini(lock, type: "OMP Critical");
796} // __kmp_itt_critical_destroyed
797
798/* Single reporting. */
799
800void __kmp_itt_single_start(int gtid) {
801#if USE_ITT_NOTIFY
802 if (__itt_mark_create_ptr || KMP_ITT_DEBUG) {
803 kmp_info_t *thr = __kmp_thread_from_gtid((gtid));
804 ident_t *loc = thr->th.th_ident;
805 char const *src = (loc == NULL ? NULL : loc->psource);
806 kmp_str_buf_t name;
807 __kmp_str_buf_init(&name);
808 __kmp_str_buf_print(buffer: &name, format: "OMP Single-%s", src);
809 KMP_ITT_DEBUG_LOCK();
810 thr->th.th_itt_mark_single = __itt_mark_create(name.str);
811 KMP_ITT_DEBUG_PRINT("[sin sta] mcre( \"%s\") -> %d\n", name.str,
812 thr->th.th_itt_mark_single);
813 __kmp_str_buf_free(buffer: &name);
814 KMP_ITT_DEBUG_LOCK();
815 __itt_mark(thr->th.th_itt_mark_single, NULL);
816 KMP_ITT_DEBUG_PRINT("[sin sta] mark( %d, NULL )\n",
817 thr->th.th_itt_mark_single);
818 }
819#endif
820} // __kmp_itt_single_start
821
822void __kmp_itt_single_end(int gtid) {
823#if USE_ITT_NOTIFY
824 __itt_mark_type mark = __kmp_thread_from_gtid(gtid)->th.th_itt_mark_single;
825 KMP_ITT_DEBUG_LOCK();
826 __itt_mark_off(mark);
827 KMP_ITT_DEBUG_PRINT("[sin end] moff( %d )\n", mark);
828#endif
829} // __kmp_itt_single_end
830
831/* Ordered reporting.
832 * __kmp_itt_ordered_init is called by each thread *before* first using sync
833 object. ITT team would like it to be called once, but it requires extra
834 synchronization.
835 * __kmp_itt_ordered_prep is called when thread is going to enter ordered
836 section (before synchronization).
837 * __kmp_itt_ordered_start is called just before entering user code (after
838 synchronization).
839 * __kmp_itt_ordered_end is called after returning from user code.
840
841 Sync object is th->th.th_dispatch->th_dispatch_sh_current.
842 Events are not generated in case of serialized team. */
843
844void __kmp_itt_ordered_init(int gtid) {
845#if USE_ITT_NOTIFY
846 if (__itt_sync_create_ptr) {
847 kmp_info_t *thr = __kmp_thread_from_gtid(gtid);
848 ident_t const *loc = thr->th.th_ident;
849 char const *src = (loc == NULL ? NULL : loc->psource);
850 __itt_sync_create(thr->th.th_dispatch->th_dispatch_sh_current,
851 "OMP Ordered", src, 0);
852 }
853#endif
854} // __kmp_itt_ordered_init
855
856void __kmp_itt_ordered_prep(int gtid) {
857#if USE_ITT_NOTIFY
858 if (__itt_sync_create_ptr) {
859 kmp_team_t *t = __kmp_team_from_gtid(gtid);
860 if (!t->t.t_serialized) {
861 kmp_info_t *th = __kmp_thread_from_gtid(gtid);
862 __itt_sync_prepare(th->th.th_dispatch->th_dispatch_sh_current);
863 }
864 }
865#endif
866} // __kmp_itt_ordered_prep
867
868void __kmp_itt_ordered_start(int gtid) {
869#if USE_ITT_NOTIFY
870 if (__itt_sync_create_ptr) {
871 kmp_team_t *t = __kmp_team_from_gtid(gtid);
872 if (!t->t.t_serialized) {
873 kmp_info_t *th = __kmp_thread_from_gtid(gtid);
874 __itt_sync_acquired(th->th.th_dispatch->th_dispatch_sh_current);
875 }
876 }
877#endif
878} // __kmp_itt_ordered_start
879
880void __kmp_itt_ordered_end(int gtid) {
881#if USE_ITT_NOTIFY
882 if (__itt_sync_create_ptr) {
883 kmp_team_t *t = __kmp_team_from_gtid(gtid);
884 if (!t->t.t_serialized) {
885 kmp_info_t *th = __kmp_thread_from_gtid(gtid);
886 __itt_sync_releasing(th->th.th_dispatch->th_dispatch_sh_current);
887 }
888 }
889#endif
890} // __kmp_itt_ordered_end
891
892/* Threads reporting. */
893
894void __kmp_itt_thread_ignore() {
895 __itt_thr_ignore();
896} // __kmp_itt_thread_ignore
897
898void __kmp_itt_thread_name(int gtid) {
899#if USE_ITT_NOTIFY
900 if (__itt_thr_name_set_ptr) {
901 kmp_str_buf_t name;
902 __kmp_str_buf_init(&name);
903 if (KMP_MASTER_GTID(gtid)) {
904 __kmp_str_buf_print(buffer: &name, format: "OMP Primary Thread #%d", gtid);
905 } else {
906 __kmp_str_buf_print(buffer: &name, format: "OMP Worker Thread #%d", gtid);
907 }
908 KMP_ITT_DEBUG_LOCK();
909 __itt_thr_name_set(name.str, name.used);
910 KMP_ITT_DEBUG_PRINT("[thr nam] name( \"%s\")\n", name.str);
911 __kmp_str_buf_free(buffer: &name);
912 }
913#endif
914} // __kmp_itt_thread_name
915
916/* System object reporting.
917 ITT catches operations with system sync objects (like Windows* OS on IA-32
918 architecture API critical sections and events). We only need to specify
919 name ("OMP Scheduler") for the object to let ITT know it is an object used
920 by OpenMP RTL for internal purposes. */
921
922void __kmp_itt_system_object_created(void *object, char const *name) {
923#if USE_ITT_NOTIFY
924 KMP_ITT_DEBUG_LOCK();
925 __itt_sync_create(object, "OMP Scheduler", name, 0);
926 KMP_ITT_DEBUG_PRINT("[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n",
927 object, name);
928#endif
929} // __kmp_itt_system_object_created
930
931/* Stack stitching api.
932 Primary thread calls "create" and put the stitching id into team structure.
933 Workers read the stitching id and call "enter" / "leave" api.
934 Primary thread calls "destroy" at the end of the parallel region. */
935
936__itt_caller __kmp_itt_stack_caller_create() {
937#if USE_ITT_NOTIFY
938 if (!__itt_stack_caller_create_ptr)
939 return NULL;
940 KMP_ITT_DEBUG_LOCK();
941 __itt_caller id = __itt_stack_caller_create();
942 KMP_ITT_DEBUG_PRINT("[stk cre] %p\n", id);
943 return id;
944#endif
945 return NULL;
946}
947
948void __kmp_itt_stack_caller_destroy(__itt_caller id) {
949#if USE_ITT_NOTIFY
950 if (__itt_stack_caller_destroy_ptr) {
951 KMP_ITT_DEBUG_LOCK();
952 __itt_stack_caller_destroy(id);
953 KMP_ITT_DEBUG_PRINT("[stk des] %p\n", id);
954 }
955#endif
956}
957
958void __kmp_itt_stack_callee_enter(__itt_caller id) {
959#if USE_ITT_NOTIFY
960 if (__itt_stack_callee_enter_ptr) {
961 KMP_ITT_DEBUG_LOCK();
962 __itt_stack_callee_enter(id);
963 KMP_ITT_DEBUG_PRINT("[stk ent] %p\n", id);
964 }
965#endif
966}
967
968void __kmp_itt_stack_callee_leave(__itt_caller id) {
969#if USE_ITT_NOTIFY
970 if (__itt_stack_callee_leave_ptr) {
971 KMP_ITT_DEBUG_LOCK();
972 __itt_stack_callee_leave(id);
973 KMP_ITT_DEBUG_PRINT("[stk lea] %p\n", id);
974 }
975#endif
976}
977
978#endif /* USE_ITT_BUILD */
979

source code of openmp/runtime/src/kmp_itt.inl