1/*
2 * ompt-tsan.cpp -- Archer runtime library, TSan annotations for Archer
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for details.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef __STDC_FORMAT_MACROS
14#define __STDC_FORMAT_MACROS
15#endif
16
17#include <algorithm>
18#include <atomic>
19#include <cassert>
20#include <cstdlib>
21#include <cstring>
22#include <inttypes.h>
23#include <iostream>
24#include <list>
25#include <mutex>
26#include <sstream>
27#include <string>
28#include <sys/resource.h>
29#include <unistd.h>
30#include <unordered_map>
31#include <vector>
32#include <dlfcn.h>
33
34#include "omp-tools.h"
35
36// Define attribute that indicates that the fall through from the previous
37// case label is intentional and should not be diagnosed by a compiler
38// Code from libcxx/include/__config
39// Use a function like macro to imply that it must be followed by a semicolon
40#if __cplusplus > 201402L && __has_cpp_attribute(fallthrough)
41#define KMP_FALLTHROUGH() [[fallthrough]]
42// icc cannot properly tell this attribute is absent so force off
43#elif defined(__INTEL_COMPILER)
44#define KMP_FALLTHROUGH() ((void)0)
45#elif __has_cpp_attribute(clang::fallthrough)
46#define KMP_FALLTHROUGH() [[clang::fallthrough]]
47#elif __has_attribute(fallthrough) || __GNUC__ >= 7
48#define KMP_FALLTHROUGH() __attribute__((__fallthrough__))
49#else
50#define KMP_FALLTHROUGH() ((void)0)
51#endif
52
53static int hasReductionCallback;
54
55namespace {
56class ArcherFlags {
57public:
58#if (LLVM_VERSION) >= 40
59 int flush_shadow{0};
60#endif
61 int print_max_rss{0};
62 int verbose{0};
63 int enabled{1};
64 int report_data_leak{0};
65 int ignore_serial{0};
66 std::atomic<int> all_memory{0};
67
68 ArcherFlags(const char *env) {
69 if (env) {
70 std::vector<std::string> tokens;
71 std::string token;
72 std::string str(env);
73 std::istringstream iss(str);
74 int tmp_int;
75 while (std::getline(in&: iss, str&: token, delim: ' '))
76 tokens.push_back(x: token);
77
78 for (std::vector<std::string>::iterator it = tokens.begin();
79 it != tokens.end(); ++it) {
80#if (LLVM_VERSION) >= 40
81 if (sscanf(it->c_str(), "flush_shadow=%d", &flush_shadow))
82 continue;
83#endif
84 if (sscanf(s: it->c_str(), format: "print_max_rss=%d", &print_max_rss))
85 continue;
86 if (sscanf(s: it->c_str(), format: "verbose=%d", &verbose))
87 continue;
88 if (sscanf(s: it->c_str(), format: "report_data_leak=%d", &report_data_leak))
89 continue;
90 if (sscanf(s: it->c_str(), format: "enable=%d", &enabled))
91 continue;
92 if (sscanf(s: it->c_str(), format: "ignore_serial=%d", &ignore_serial))
93 continue;
94 if (sscanf(s: it->c_str(), format: "all_memory=%d", &tmp_int)) {
95 all_memory = tmp_int;
96 continue;
97 }
98 std::cerr << "Illegal values for ARCHER_OPTIONS variable: " << token
99 << std::endl;
100 }
101 }
102 }
103};
104
105class TsanFlags {
106public:
107 int ignore_noninstrumented_modules;
108
109 TsanFlags(const char *env) : ignore_noninstrumented_modules(0) {
110 if (env) {
111 std::vector<std::string> tokens;
112 std::string str(env);
113 auto end = str.end();
114 auto it = str.begin();
115 auto is_sep = [](char c) {
116 return c == ' ' || c == ',' || c == ':' || c == '\n' || c == '\t' ||
117 c == '\r';
118 };
119 while (it != end) {
120 auto next_it = std::find_if(first: it, last: end, pred: is_sep);
121 tokens.emplace_back(args&: it, args&: next_it);
122 it = next_it;
123 if (it != end) {
124 ++it;
125 }
126 }
127
128 for (const auto &token : tokens) {
129 // we are interested in ignore_noninstrumented_modules to print a
130 // warning
131 if (sscanf(s: token.c_str(), format: "ignore_noninstrumented_modules=%d",
132 &ignore_noninstrumented_modules))
133 continue;
134 }
135 }
136 }
137};
138} // namespace
139
140#if (LLVM_VERSION) >= 40
141extern "C" {
142int __attribute__((weak)) __archer_get_omp_status();
143void __attribute__((weak)) __tsan_flush_memory() {}
144}
145#endif
146static ArcherFlags *archer_flags;
147
148#ifndef TsanHappensBefore
149// Thread Sanitizer is a tool that finds races in code.
150// See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations .
151// tsan detects these exact functions by name.
152extern "C" {
153static void (*AnnotateHappensAfter)(const char *, int, const volatile void *);
154static void (*AnnotateHappensBefore)(const char *, int, const volatile void *);
155static void (*AnnotateIgnoreWritesBegin)(const char *, int);
156static void (*AnnotateIgnoreWritesEnd)(const char *, int);
157static void (*AnnotateNewMemory)(const char *, int, const volatile void *,
158 size_t);
159static void (*__tsan_func_entry)(const void *);
160static void (*__tsan_func_exit)(void);
161static int (*RunningOnValgrind)(void);
162}
163
164// This marker is used to define a happens-before arc. The race detector will
165// infer an arc from the begin to the end when they share the same pointer
166// argument.
167#define TsanHappensBefore(cv) AnnotateHappensBefore(__FILE__, __LINE__, cv)
168
169// This marker defines the destination of a happens-before arc.
170#define TsanHappensAfter(cv) AnnotateHappensAfter(__FILE__, __LINE__, cv)
171
172// Ignore any races on writes between here and the next TsanIgnoreWritesEnd.
173#define TsanIgnoreWritesBegin() AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
174
175// Resume checking for racy writes.
176#define TsanIgnoreWritesEnd() AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
177
178// We don't really delete the clock for now
179#define TsanDeleteClock(cv)
180
181// newMemory
182#define TsanNewMemory(addr, size) \
183 AnnotateNewMemory(__FILE__, __LINE__, addr, size)
184#define TsanFreeMemory(addr, size) \
185 AnnotateNewMemory(__FILE__, __LINE__, addr, size)
186#endif
187
188// Function entry/exit
189#define TsanFuncEntry(pc) __tsan_func_entry(pc)
190#define TsanFuncExit() __tsan_func_exit()
191
192/// Required OMPT inquiry functions.
193static ompt_get_parallel_info_t ompt_get_parallel_info;
194static ompt_get_thread_data_t ompt_get_thread_data;
195
196typedef char ompt_tsan_clockid;
197
198static uint64_t my_next_id() {
199 static uint64_t ID = 0;
200 uint64_t ret = __sync_fetch_and_add(&ID, 1);
201 return ret;
202}
203
204static int pagesize{0};
205
206// Data structure to provide a threadsafe pool of reusable objects.
207// DataPool<Type of objects>
208namespace {
209template <typename T> struct DataPool final {
210 static __thread DataPool<T> *ThreadDataPool;
211 std::mutex DPMutex{};
212
213 // store unused objects
214 std::vector<T *> DataPointer{};
215 std::vector<T *> RemoteDataPointer{};
216
217 // store all allocated memory to finally release
218 std::list<void *> memory;
219
220 // count remotely returned data (RemoteDataPointer.size())
221 std::atomic<int> remote{0};
222
223 // totally allocated data objects in pool
224 int total{0};
225#ifdef DEBUG_DATA
226 int remoteReturn{0};
227 int localReturn{0};
228
229 int getRemote() { return remoteReturn + remote; }
230 int getLocal() { return localReturn; }
231#endif
232 int getTotal() { return total; }
233 int getMissing() {
234 return total - DataPointer.size() - RemoteDataPointer.size();
235 }
236
237 // fill the pool by allocating a page of memory
238 void newDatas() {
239 if (remote > 0) {
240 const std::lock_guard<std::mutex> lock(DPMutex);
241 // DataPointer is empty, so just swap the vectors
242 DataPointer.swap(RemoteDataPointer);
243 remote = 0;
244 return;
245 }
246 // calculate size of an object including padding to cacheline size
247 size_t elemSize = sizeof(T);
248 size_t paddedSize = (((elemSize - 1) / 64) + 1) * 64;
249 // number of padded elements to allocate
250 int ndatas = pagesize / paddedSize;
251 char *datas = (char *)malloc(size: ndatas * paddedSize);
252 memory.push_back(x: datas);
253 for (int i = 0; i < ndatas; i++) {
254 DataPointer.push_back(new (datas + i * paddedSize) T(this));
255 }
256 total += ndatas;
257 }
258
259 // get data from the pool
260 T *getData() {
261 T *ret;
262 if (DataPointer.empty())
263 newDatas();
264 ret = DataPointer.back();
265 DataPointer.pop_back();
266 return ret;
267 }
268
269 // accesses to the thread-local datapool don't need locks
270 void returnOwnData(T *data) {
271 DataPointer.emplace_back(data);
272#ifdef DEBUG_DATA
273 localReturn++;
274#endif
275 }
276
277 // returning to a remote datapool using lock
278 void returnData(T *data) {
279 const std::lock_guard<std::mutex> lock(DPMutex);
280 RemoteDataPointer.emplace_back(data);
281 remote++;
282#ifdef DEBUG_DATA
283 remoteReturn++;
284#endif
285 }
286
287 ~DataPool() {
288 // we assume all memory is returned when the thread finished / destructor is
289 // called
290 if (archer_flags->report_data_leak && getMissing() != 0) {
291 printf("ERROR: While freeing DataPool (%s) we are missing %i data "
292 "objects.\n",
293 __PRETTY_FUNCTION__, getMissing());
294 exit(status: -3);
295 }
296 for (auto i : DataPointer)
297 if (i)
298 i->~T();
299 for (auto i : RemoteDataPointer)
300 if (i)
301 i->~T();
302 for (auto i : memory)
303 if (i)
304 free(ptr: i);
305 }
306};
307
308template <typename T> struct DataPoolEntry {
309 DataPool<T> *owner;
310
311 static T *New() { return DataPool<T>::ThreadDataPool->getData(); }
312
313 void Delete() {
314 static_cast<T *>(this)->Reset();
315 if (owner == DataPool<T>::ThreadDataPool)
316 owner->returnOwnData(static_cast<T *>(this));
317 else
318 owner->returnData(static_cast<T *>(this));
319 }
320
321 DataPoolEntry(DataPool<T> *dp) : owner(dp) {}
322};
323
324struct DependencyData;
325typedef DataPool<DependencyData> DependencyDataPool;
326template <>
327__thread DependencyDataPool *DependencyDataPool::ThreadDataPool = nullptr;
328
329/// Data structure to store additional information for task dependency.
330struct DependencyData final : DataPoolEntry<DependencyData> {
331 ompt_tsan_clockid in;
332 ompt_tsan_clockid out;
333 ompt_tsan_clockid inoutset;
334 void *GetInPtr() { return &in; }
335 void *GetOutPtr() { return &out; }
336 void *GetInoutsetPtr() { return &inoutset; }
337
338 void Reset() {}
339
340 static DependencyData *New() { return DataPoolEntry<DependencyData>::New(); }
341
342 DependencyData(DataPool<DependencyData> *dp)
343 : DataPoolEntry<DependencyData>(dp) {}
344};
345
346struct TaskDependency {
347 void *inPtr;
348 void *outPtr;
349 void *inoutsetPtr;
350 ompt_dependence_type_t type;
351 TaskDependency(DependencyData *depData, ompt_dependence_type_t type)
352 : inPtr(depData->GetInPtr()), outPtr(depData->GetOutPtr()),
353 inoutsetPtr(depData->GetInoutsetPtr()), type(type) {}
354 void AnnotateBegin() {
355 if (type == ompt_dependence_type_out ||
356 type == ompt_dependence_type_inout ||
357 type == ompt_dependence_type_mutexinoutset) {
358 TsanHappensAfter(inPtr);
359 TsanHappensAfter(outPtr);
360 TsanHappensAfter(inoutsetPtr);
361 } else if (type == ompt_dependence_type_in) {
362 TsanHappensAfter(outPtr);
363 TsanHappensAfter(inoutsetPtr);
364 } else if (type == ompt_dependence_type_inoutset) {
365 TsanHappensAfter(inPtr);
366 TsanHappensAfter(outPtr);
367 }
368 }
369 void AnnotateEnd() {
370 if (type == ompt_dependence_type_out ||
371 type == ompt_dependence_type_inout ||
372 type == ompt_dependence_type_mutexinoutset) {
373 TsanHappensBefore(outPtr);
374 } else if (type == ompt_dependence_type_in) {
375 TsanHappensBefore(inPtr);
376 } else if (type == ompt_dependence_type_inoutset) {
377 TsanHappensBefore(inoutsetPtr);
378 }
379 }
380};
381
382struct ParallelData;
383typedef DataPool<ParallelData> ParallelDataPool;
384template <>
385__thread ParallelDataPool *ParallelDataPool::ThreadDataPool = nullptr;
386
387/// Data structure to store additional information for parallel regions.
388struct ParallelData final : DataPoolEntry<ParallelData> {
389
390 // Parallel fork is just another barrier, use Barrier[1]
391
392 /// Two addresses for relationships with barriers.
393 ompt_tsan_clockid Barrier[2];
394
395 const void *codePtr;
396
397 void *GetParallelPtr() { return &(Barrier[1]); }
398
399 void *GetBarrierPtr(unsigned Index) { return &(Barrier[Index]); }
400
401 ParallelData *Init(const void *codeptr) {
402 codePtr = codeptr;
403 return this;
404 }
405
406 void Reset() {}
407
408 static ParallelData *New(const void *codeptr) {
409 return DataPoolEntry<ParallelData>::New()->Init(codeptr);
410 }
411
412 ParallelData(DataPool<ParallelData> *dp) : DataPoolEntry<ParallelData>(dp) {}
413};
414
415static inline ParallelData *ToParallelData(ompt_data_t *parallel_data) {
416 return reinterpret_cast<ParallelData *>(parallel_data->ptr);
417}
418
419struct Taskgroup;
420typedef DataPool<Taskgroup> TaskgroupPool;
421template <> __thread TaskgroupPool *TaskgroupPool::ThreadDataPool = nullptr;
422
423/// Data structure to support stacking of taskgroups and allow synchronization.
424struct Taskgroup final : DataPoolEntry<Taskgroup> {
425 /// Its address is used for relationships of the taskgroup's task set.
426 ompt_tsan_clockid Ptr;
427
428 /// Reference to the parent taskgroup.
429 Taskgroup *Parent;
430
431 void *GetPtr() { return &Ptr; }
432
433 Taskgroup *Init(Taskgroup *parent) {
434 Parent = parent;
435 return this;
436 }
437
438 void Reset() {}
439
440 static Taskgroup *New(Taskgroup *Parent) {
441 return DataPoolEntry<Taskgroup>::New()->Init(parent: Parent);
442 }
443
444 Taskgroup(DataPool<Taskgroup> *dp) : DataPoolEntry<Taskgroup>(dp) {}
445};
446
447enum ArcherTaskFlag { ArcherTaskFulfilled = 0x00010000 };
448
449struct TaskData;
450typedef DataPool<TaskData> TaskDataPool;
451template <> __thread TaskDataPool *TaskDataPool::ThreadDataPool = nullptr;
452
453/// Data structure to store additional information for tasks.
454struct TaskData final : DataPoolEntry<TaskData> {
455 /// Its address is used for relationships of this task.
456 ompt_tsan_clockid Task{0};
457
458 /// Child tasks use its address to declare a relationship to a taskwait in
459 /// this task.
460 ompt_tsan_clockid Taskwait{0};
461
462 /// Child tasks use its address to model omp_all_memory dependencies
463 ompt_tsan_clockid AllMemory[2]{0};
464
465 /// Index of which barrier to use next.
466 char BarrierIndex{0};
467
468 /// Whether this task is currently executing a barrier.
469 bool InBarrier{false};
470
471 /// Whether this task is an included task.
472 int TaskType{0};
473
474 /// count execution phase
475 int execution{0};
476
477 /// Count how often this structure has been put into child tasks + 1.
478 std::atomic_int RefCount{1};
479
480 /// Reference to the parent that created this task.
481 TaskData *Parent{nullptr};
482
483 /// Reference to the team of this task.
484 ParallelData *Team{nullptr};
485
486 /// Reference to the current taskgroup that this task either belongs to or
487 /// that it just created.
488 Taskgroup *TaskGroup{nullptr};
489
490 /// Dependency information for this task.
491 TaskDependency *Dependencies{nullptr};
492
493 /// Number of dependency entries.
494 unsigned DependencyCount{0};
495
496 // The dependency-map stores DependencyData objects representing
497 // the dependency variables used on the sibling tasks created from
498 // this task
499 // We expect a rare need for the dependency-map, so alloc on demand
500 std::unordered_map<void *, DependencyData *> *DependencyMap{nullptr};
501
502#ifdef DEBUG
503 int freed{0};
504#endif
505
506 bool isIncluded() { return TaskType & ompt_task_undeferred; }
507 bool isUntied() { return TaskType & ompt_task_untied; }
508 bool isFinal() { return TaskType & ompt_task_final; }
509 bool isMergable() { return TaskType & ompt_task_mergeable; }
510 bool isMerged() { return TaskType & ompt_task_merged; }
511
512 bool isExplicit() { return TaskType & ompt_task_explicit; }
513 bool isImplicit() { return TaskType & ompt_task_implicit; }
514 bool isInitial() { return TaskType & ompt_task_initial; }
515 bool isTarget() { return TaskType & ompt_task_target; }
516
517 bool isFulfilled() { return TaskType & ArcherTaskFulfilled; }
518 void setFulfilled() { TaskType |= ArcherTaskFulfilled; }
519
520 void setAllMemoryDep() { AllMemory[0] = 1; }
521 bool hasAllMemoryDep() { return AllMemory[0]; }
522
523 void *GetTaskPtr() { return &Task; }
524
525 void *GetTaskwaitPtr() { return &Taskwait; }
526
527 void *GetLastAllMemoryPtr() { return AllMemory; }
528 void *GetNextAllMemoryPtr() { return AllMemory + 1; }
529
530 TaskData *Init(TaskData *parent, int taskType) {
531 TaskType = taskType;
532 Parent = parent;
533 Team = Parent->Team;
534 BarrierIndex = Parent->BarrierIndex;
535 if (Parent != nullptr) {
536 Parent->RefCount++;
537 // Copy over pointer to taskgroup. This task may set up its own stack
538 // but for now belongs to its parent's taskgroup.
539 TaskGroup = Parent->TaskGroup;
540 }
541 return this;
542 }
543
544 TaskData *Init(ParallelData *team, int taskType) {
545 TaskType = taskType;
546 execution = 1;
547 Team = team;
548 return this;
549 }
550
551 void Reset() {
552 InBarrier = false;
553 TaskType = 0;
554 execution = 0;
555 BarrierIndex = 0;
556 RefCount = 1;
557 Parent = nullptr;
558 Team = nullptr;
559 TaskGroup = nullptr;
560 if (DependencyMap) {
561 for (auto i : *DependencyMap)
562 i.second->Delete();
563 delete DependencyMap;
564 }
565 DependencyMap = nullptr;
566 if (Dependencies)
567 free(ptr: Dependencies);
568 Dependencies = nullptr;
569 DependencyCount = 0;
570#ifdef DEBUG
571 freed = 0;
572#endif
573 }
574
575 static TaskData *New(TaskData *parent, int taskType) {
576 return DataPoolEntry<TaskData>::New()->Init(parent, taskType);
577 }
578
579 static TaskData *New(ParallelData *team, int taskType) {
580 return DataPoolEntry<TaskData>::New()->Init(team, taskType);
581 }
582
583 TaskData(DataPool<TaskData> *dp) : DataPoolEntry<TaskData>(dp) {}
584};
585} // namespace
586
587static inline TaskData *ToTaskData(ompt_data_t *task_data) {
588 if (task_data)
589 return reinterpret_cast<TaskData *>(task_data->ptr);
590 return nullptr;
591}
592
593/// Store a mutex for each wait_id to resolve race condition with callbacks.
594static std::unordered_map<ompt_wait_id_t, std::mutex> Locks;
595static std::mutex LocksMutex;
596
597static void ompt_tsan_thread_begin(ompt_thread_t thread_type,
598 ompt_data_t *thread_data) {
599 ParallelDataPool::ThreadDataPool = new ParallelDataPool;
600 TsanNewMemory(ParallelDataPool::ThreadDataPool,
601 sizeof(ParallelDataPool::ThreadDataPool));
602 TaskgroupPool::ThreadDataPool = new TaskgroupPool;
603 TsanNewMemory(TaskgroupPool::ThreadDataPool,
604 sizeof(TaskgroupPool::ThreadDataPool));
605 TaskDataPool::ThreadDataPool = new TaskDataPool;
606 TsanNewMemory(TaskDataPool::ThreadDataPool,
607 sizeof(TaskDataPool::ThreadDataPool));
608 DependencyDataPool::ThreadDataPool = new DependencyDataPool;
609 TsanNewMemory(DependencyDataPool::ThreadDataPool,
610 sizeof(DependencyDataPool::ThreadDataPool));
611 thread_data->value = my_next_id();
612}
613
614static void ompt_tsan_thread_end(ompt_data_t *thread_data) {
615 TsanIgnoreWritesBegin();
616 delete ParallelDataPool::ThreadDataPool;
617 delete TaskgroupPool::ThreadDataPool;
618 delete TaskDataPool::ThreadDataPool;
619 delete DependencyDataPool::ThreadDataPool;
620 TsanIgnoreWritesEnd();
621}
622
623/// OMPT event callbacks for handling parallel regions.
624
625static void ompt_tsan_parallel_begin(ompt_data_t *parent_task_data,
626 const ompt_frame_t *parent_task_frame,
627 ompt_data_t *parallel_data,
628 uint32_t requested_team_size, int flag,
629 const void *codeptr_ra) {
630 ParallelData *Data = ParallelData::New(codeptr: codeptr_ra);
631 parallel_data->ptr = Data;
632
633 TsanHappensBefore(Data->GetParallelPtr());
634 if (archer_flags->ignore_serial && ToTaskData(task_data: parent_task_data)->isInitial())
635 TsanIgnoreWritesEnd();
636}
637
638static void ompt_tsan_parallel_end(ompt_data_t *parallel_data,
639 ompt_data_t *task_data, int flag,
640 const void *codeptr_ra) {
641 if (archer_flags->ignore_serial && ToTaskData(task_data)->isInitial())
642 TsanIgnoreWritesBegin();
643 ParallelData *Data = ToParallelData(parallel_data);
644 TsanHappensAfter(Data->GetBarrierPtr(0));
645 TsanHappensAfter(Data->GetBarrierPtr(1));
646
647 Data->Delete();
648
649#if (LLVM_VERSION >= 40)
650 if (&__archer_get_omp_status) {
651 if (__archer_get_omp_status() == 0 && archer_flags->flush_shadow)
652 __tsan_flush_memory();
653 }
654#endif
655}
656
657static void ompt_tsan_implicit_task(ompt_scope_endpoint_t endpoint,
658 ompt_data_t *parallel_data,
659 ompt_data_t *task_data,
660 unsigned int team_size,
661 unsigned int thread_num, int type) {
662 switch (endpoint) {
663 case ompt_scope_begin:
664 if (type & ompt_task_initial) {
665 parallel_data->ptr = ParallelData::New(codeptr: nullptr);
666 }
667 task_data->ptr = TaskData::New(team: ToParallelData(parallel_data), taskType: type);
668 TsanHappensAfter(ToParallelData(parallel_data)->GetParallelPtr());
669 TsanFuncEntry(ToParallelData(parallel_data)->codePtr);
670 break;
671 case ompt_scope_end: {
672 TaskData *Data = ToTaskData(task_data);
673#ifdef DEBUG
674 assert(Data->freed == 0 && "Implicit task end should only be called once!");
675 Data->freed = 1;
676#endif
677 assert(Data->RefCount == 1 &&
678 "All tasks should have finished at the implicit barrier!");
679 if (type & ompt_task_initial) {
680 Data->Team->Delete();
681 }
682 Data->Delete();
683 TsanFuncExit();
684 break;
685 }
686 case ompt_scope_beginend:
687 // Should not occur according to OpenMP 5.1
688 // Tested in OMPT tests
689 break;
690 }
691}
692
693static void ompt_tsan_sync_region(ompt_sync_region_t kind,
694 ompt_scope_endpoint_t endpoint,
695 ompt_data_t *parallel_data,
696 ompt_data_t *task_data,
697 const void *codeptr_ra) {
698 TaskData *Data = ToTaskData(task_data);
699 switch (endpoint) {
700 case ompt_scope_begin:
701 case ompt_scope_beginend:
702 TsanFuncEntry(codeptr_ra);
703 switch (kind) {
704 case ompt_sync_region_barrier_implementation:
705 case ompt_sync_region_barrier_implicit:
706 case ompt_sync_region_barrier_explicit:
707 case ompt_sync_region_barrier_implicit_parallel:
708 case ompt_sync_region_barrier_implicit_workshare:
709 case ompt_sync_region_barrier_teams:
710 case ompt_sync_region_barrier: {
711 char BarrierIndex = Data->BarrierIndex;
712 TsanHappensBefore(Data->Team->GetBarrierPtr(BarrierIndex));
713
714 if (hasReductionCallback < ompt_set_always) {
715 // We ignore writes inside the barrier. These would either occur during
716 // 1. reductions performed by the runtime which are guaranteed to be
717 // race-free.
718 // 2. execution of another task.
719 // For the latter case we will re-enable tracking in task_switch.
720 Data->InBarrier = true;
721 TsanIgnoreWritesBegin();
722 }
723
724 break;
725 }
726
727 case ompt_sync_region_taskwait:
728 break;
729
730 case ompt_sync_region_taskgroup:
731 Data->TaskGroup = Taskgroup::New(Parent: Data->TaskGroup);
732 break;
733
734 case ompt_sync_region_reduction:
735 // should never be reached
736 break;
737 }
738 if (endpoint == ompt_scope_begin)
739 break;
740 KMP_FALLTHROUGH();
741 case ompt_scope_end:
742 TsanFuncExit();
743 switch (kind) {
744 case ompt_sync_region_barrier_implementation:
745 case ompt_sync_region_barrier_implicit:
746 case ompt_sync_region_barrier_explicit:
747 case ompt_sync_region_barrier_implicit_parallel:
748 case ompt_sync_region_barrier_implicit_workshare:
749 case ompt_sync_region_barrier_teams:
750 case ompt_sync_region_barrier: {
751 if (hasReductionCallback < ompt_set_always) {
752 // We want to track writes after the barrier again.
753 Data->InBarrier = false;
754 TsanIgnoreWritesEnd();
755 }
756
757 char BarrierIndex = Data->BarrierIndex;
758 // Barrier will end after it has been entered by all threads.
759 if (parallel_data)
760 TsanHappensAfter(Data->Team->GetBarrierPtr(BarrierIndex));
761
762 // It is not guaranteed that all threads have exited this barrier before
763 // we enter the next one. So we will use a different address.
764 // We are however guaranteed that this current barrier is finished
765 // by the time we exit the next one. So we can then reuse the first
766 // address.
767 Data->BarrierIndex = (BarrierIndex + 1) % 2;
768 break;
769 }
770
771 case ompt_sync_region_taskwait: {
772 if (Data->execution > 1)
773 TsanHappensAfter(Data->GetTaskwaitPtr());
774 break;
775 }
776
777 case ompt_sync_region_taskgroup: {
778 assert(Data->TaskGroup != nullptr &&
779 "Should have at least one taskgroup!");
780
781 TsanHappensAfter(Data->TaskGroup->GetPtr());
782
783 // Delete this allocated taskgroup, all descendent task are finished by
784 // now.
785 Taskgroup *Parent = Data->TaskGroup->Parent;
786 Data->TaskGroup->Delete();
787 Data->TaskGroup = Parent;
788 break;
789 }
790
791 case ompt_sync_region_reduction:
792 // Should not occur according to OpenMP 5.1
793 // Tested in OMPT tests
794 break;
795 }
796 break;
797 }
798}
799
800static void ompt_tsan_reduction(ompt_sync_region_t kind,
801 ompt_scope_endpoint_t endpoint,
802 ompt_data_t *parallel_data,
803 ompt_data_t *task_data,
804 const void *codeptr_ra) {
805 switch (endpoint) {
806 case ompt_scope_begin:
807 switch (kind) {
808 case ompt_sync_region_reduction:
809 TsanIgnoreWritesBegin();
810 break;
811 default:
812 break;
813 }
814 break;
815 case ompt_scope_end:
816 switch (kind) {
817 case ompt_sync_region_reduction:
818 TsanIgnoreWritesEnd();
819 break;
820 default:
821 break;
822 }
823 break;
824 case ompt_scope_beginend:
825 // Should not occur according to OpenMP 5.1
826 // Tested in OMPT tests
827 // Would have no implications for DR detection
828 break;
829 }
830}
831
832/// OMPT event callbacks for handling tasks.
833
834static void ompt_tsan_task_create(
835 ompt_data_t *parent_task_data, /* id of parent task */
836 const ompt_frame_t *parent_frame, /* frame data for parent task */
837 ompt_data_t *new_task_data, /* id of created task */
838 int type, int has_dependences,
839 const void *codeptr_ra) /* pointer to outlined function */
840{
841 TaskData *Data;
842 assert(new_task_data->ptr == NULL &&
843 "Task data should be initialized to NULL");
844 if (type & ompt_task_initial) {
845 ompt_data_t *parallel_data;
846 int team_size = 1;
847 ompt_get_parallel_info(0, &parallel_data, &team_size);
848 ParallelData *PData = ParallelData::New(codeptr: nullptr);
849 parallel_data->ptr = PData;
850
851 Data = TaskData::New(team: PData, taskType: type);
852 new_task_data->ptr = Data;
853 } else if (type & ompt_task_undeferred) {
854 Data = TaskData::New(parent: ToTaskData(task_data: parent_task_data), taskType: type);
855 new_task_data->ptr = Data;
856 } else if (type & ompt_task_explicit || type & ompt_task_target) {
857 Data = TaskData::New(parent: ToTaskData(task_data: parent_task_data), taskType: type);
858 new_task_data->ptr = Data;
859
860 // Use the newly created address. We cannot use a single address from the
861 // parent because that would declare wrong relationships with other
862 // sibling tasks that may be created before this task is started!
863 TsanHappensBefore(Data->GetTaskPtr());
864 ToTaskData(task_data: parent_task_data)->execution++;
865 }
866}
867
868static void freeTask(TaskData *task) {
869 while (task != nullptr && --task->RefCount == 0) {
870 TaskData *Parent = task->Parent;
871 task->Delete();
872 task = Parent;
873 }
874}
875
876// LastAllMemoryPtr marks the beginning of an all_memory epoch
877// NextAllMemoryPtr marks the end of an all_memory epoch
878// All tasks with depend begin execution after LastAllMemoryPtr
879// and end before NextAllMemoryPtr
880static void releaseDependencies(TaskData *task) {
881 if (archer_flags->all_memory) {
882 if (task->hasAllMemoryDep()) {
883 TsanHappensBefore(task->Parent->GetLastAllMemoryPtr());
884 TsanHappensBefore(task->Parent->GetNextAllMemoryPtr());
885 } else if (task->DependencyCount)
886 TsanHappensBefore(task->Parent->GetNextAllMemoryPtr());
887 }
888 for (unsigned i = 0; i < task->DependencyCount; i++) {
889 task->Dependencies[i].AnnotateEnd();
890 }
891}
892
893static void acquireDependencies(TaskData *task) {
894 if (archer_flags->all_memory) {
895 if (task->hasAllMemoryDep())
896 TsanHappensAfter(task->Parent->GetNextAllMemoryPtr());
897 else if (task->DependencyCount)
898 TsanHappensAfter(task->Parent->GetLastAllMemoryPtr());
899 }
900 for (unsigned i = 0; i < task->DependencyCount; i++) {
901 task->Dependencies[i].AnnotateBegin();
902 }
903}
904
905static void completeTask(TaskData *FromTask) {
906 if (!FromTask)
907 return;
908 // Task-end happens after a possible omp_fulfill_event call
909 if (FromTask->isFulfilled())
910 TsanHappensAfter(FromTask->GetTaskPtr());
911 // Included tasks are executed sequentially, no need to track
912 // synchronization
913 if (!FromTask->isIncluded()) {
914 // Task will finish before a barrier in the surrounding parallel region
915 // ...
916 ParallelData *PData = FromTask->Team;
917 TsanHappensBefore(PData->GetBarrierPtr(FromTask->BarrierIndex));
918
919 // ... and before an eventual taskwait by the parent thread.
920 TsanHappensBefore(FromTask->Parent->GetTaskwaitPtr());
921
922 if (FromTask->TaskGroup != nullptr) {
923 // This task is part of a taskgroup, so it will finish before the
924 // corresponding taskgroup_end.
925 TsanHappensBefore(FromTask->TaskGroup->GetPtr());
926 }
927 }
928 // release dependencies
929 releaseDependencies(task: FromTask);
930}
931
932static void suspendTask(TaskData *FromTask) {
933 if (!FromTask)
934 return;
935 // Task may be resumed at a later point in time.
936 TsanHappensBefore(FromTask->GetTaskPtr());
937}
938
939static void switchTasks(TaskData *FromTask, TaskData *ToTask) {
940 // Legacy handling for missing reduction callback
941 if (hasReductionCallback < ompt_set_always) {
942 if (FromTask && FromTask->InBarrier) {
943 // We want to ignore writes in the runtime code during barriers,
944 // but not when executing tasks with user code!
945 TsanIgnoreWritesEnd();
946 }
947 if (ToTask && ToTask->InBarrier) {
948 // We want to ignore writes in the runtime code during barriers,
949 // but not when executing tasks with user code!
950 TsanIgnoreWritesBegin();
951 }
952 }
953 //// Not yet used
954 // if (FromTask)
955 // FromTask->deactivate();
956 // if (ToTask)
957 // ToTask->activate();
958}
959
960static void endTask(TaskData *FromTask) {
961 if (!FromTask)
962 return;
963}
964
965static void startTask(TaskData *ToTask) {
966 if (!ToTask)
967 return;
968 // Handle dependencies on first execution of the task
969 if (ToTask->execution == 0) {
970 ToTask->execution++;
971 acquireDependencies(task: ToTask);
972 }
973 // 1. Task will begin execution after it has been created.
974 // 2. Task will resume after it has been switched away.
975 TsanHappensAfter(ToTask->GetTaskPtr());
976}
977
978static void ompt_tsan_task_schedule(ompt_data_t *first_task_data,
979 ompt_task_status_t prior_task_status,
980 ompt_data_t *second_task_data) {
981
982 //
983 // The necessary action depends on prior_task_status:
984 //
985 // ompt_task_early_fulfill = 5,
986 // -> ignored
987 //
988 // ompt_task_late_fulfill = 6,
989 // -> first completed, first freed, second ignored
990 //
991 // ompt_task_complete = 1,
992 // ompt_task_cancel = 3,
993 // -> first completed, first freed, second starts
994 //
995 // ompt_taskwait_complete = 8,
996 // -> first starts, first completes, first freed, second ignored
997 //
998 // ompt_task_detach = 4,
999 // ompt_task_yield = 2,
1000 // ompt_task_switch = 7
1001 // -> first suspended, second starts
1002 //
1003
1004 TaskData *FromTask = ToTaskData(task_data: first_task_data);
1005 TaskData *ToTask = ToTaskData(task_data: second_task_data);
1006
1007 switch (prior_task_status) {
1008 case ompt_task_early_fulfill:
1009 TsanHappensBefore(FromTask->GetTaskPtr());
1010 FromTask->setFulfilled();
1011 return;
1012 case ompt_task_late_fulfill:
1013 TsanHappensAfter(FromTask->GetTaskPtr());
1014 completeTask(FromTask);
1015 freeTask(task: FromTask);
1016 return;
1017 case ompt_taskwait_complete:
1018 acquireDependencies(task: FromTask);
1019 freeTask(task: FromTask);
1020 return;
1021 case ompt_task_complete:
1022 completeTask(FromTask);
1023 endTask(FromTask);
1024 switchTasks(FromTask, ToTask);
1025 freeTask(task: FromTask);
1026 return;
1027 case ompt_task_cancel:
1028 completeTask(FromTask);
1029 endTask(FromTask);
1030 switchTasks(FromTask, ToTask);
1031 freeTask(task: FromTask);
1032 startTask(ToTask);
1033 return;
1034 case ompt_task_detach:
1035 endTask(FromTask);
1036 suspendTask(FromTask);
1037 switchTasks(FromTask, ToTask);
1038 startTask(ToTask);
1039 return;
1040 case ompt_task_yield:
1041 suspendTask(FromTask);
1042 switchTasks(FromTask, ToTask);
1043 startTask(ToTask);
1044 return;
1045 case ompt_task_switch:
1046 suspendTask(FromTask);
1047 switchTasks(FromTask, ToTask);
1048 startTask(ToTask);
1049 return;
1050 }
1051}
1052
1053static void ompt_tsan_dependences(ompt_data_t *task_data,
1054 const ompt_dependence_t *deps, int ndeps) {
1055 if (ndeps > 0) {
1056 // Copy the data to use it in task_switch and task_end.
1057 TaskData *Data = ToTaskData(task_data);
1058 if (!Data->Parent) {
1059 // Return since doacross dependences are not supported yet.
1060 return;
1061 }
1062 if (!Data->Parent->DependencyMap)
1063 Data->Parent->DependencyMap =
1064 new std::unordered_map<void *, DependencyData *>();
1065 Data->Dependencies =
1066 (TaskDependency *)malloc(size: sizeof(TaskDependency) * ndeps);
1067 Data->DependencyCount = ndeps;
1068 for (int i = 0, d = 0; i < ndeps; i++, d++) {
1069 if (deps[i].dependence_type == ompt_dependence_type_out_all_memory ||
1070 deps[i].dependence_type == ompt_dependence_type_inout_all_memory) {
1071 Data->setAllMemoryDep();
1072 Data->DependencyCount--;
1073 if (!archer_flags->all_memory) {
1074 printf(format: "The application uses omp_all_memory, but Archer was\n"
1075 "started to not consider omp_all_memory. This can lead\n"
1076 "to false data race alerts.\n"
1077 "Include all_memory=1 in ARCHER_OPTIONS to consider\n"
1078 "omp_all_memory from the beginning.\n");
1079 archer_flags->all_memory = 1;
1080 }
1081 d--;
1082 continue;
1083 }
1084 auto ret = Data->Parent->DependencyMap->insert(
1085 x: std::make_pair(x: deps[i].variable.ptr, y: nullptr));
1086 if (ret.second) {
1087 ret.first->second = DependencyData::New();
1088 }
1089 new ((void *)(Data->Dependencies + d))
1090 TaskDependency(ret.first->second, deps[i].dependence_type);
1091 }
1092
1093 // This callback is executed before this task is first started.
1094 TsanHappensBefore(Data->GetTaskPtr());
1095 }
1096}
1097
1098/// OMPT event callbacks for handling locking.
1099static void ompt_tsan_mutex_acquired(ompt_mutex_t kind, ompt_wait_id_t wait_id,
1100 const void *codeptr_ra) {
1101
1102 // Acquire our own lock to make sure that
1103 // 1. the previous release has finished.
1104 // 2. the next acquire doesn't start before we have finished our release.
1105 LocksMutex.lock();
1106 std::mutex &Lock = Locks[wait_id];
1107 LocksMutex.unlock();
1108
1109 Lock.lock();
1110 TsanHappensAfter(&Lock);
1111}
1112
1113static void ompt_tsan_mutex_released(ompt_mutex_t kind, ompt_wait_id_t wait_id,
1114 const void *codeptr_ra) {
1115 LocksMutex.lock();
1116 std::mutex &Lock = Locks[wait_id];
1117 LocksMutex.unlock();
1118 TsanHappensBefore(&Lock);
1119
1120 Lock.unlock();
1121}
1122
1123// callback , signature , variable to store result , required support level
1124#define SET_OPTIONAL_CALLBACK_T(event, type, result, level) \
1125 do { \
1126 ompt_callback_##type##_t tsan_##event = &ompt_tsan_##event; \
1127 result = ompt_set_callback(ompt_callback_##event, \
1128 (ompt_callback_t)tsan_##event); \
1129 if (result < level) \
1130 printf("Registered callback '" #event "' is not supported at " #level \
1131 " (%i)\n", \
1132 result); \
1133 } while (0)
1134
1135#define SET_CALLBACK_T(event, type) \
1136 do { \
1137 int res; \
1138 SET_OPTIONAL_CALLBACK_T(event, type, res, ompt_set_always); \
1139 } while (0)
1140
1141#define SET_CALLBACK(event) SET_CALLBACK_T(event, event)
1142
1143#define findTsanFunction(f, fSig) \
1144 do { \
1145 if (NULL == (f = fSig dlsym(RTLD_DEFAULT, #f))) \
1146 printf("Unable to find TSan function " #f ".\n"); \
1147 } while (0)
1148
1149#define findTsanFunctionSilent(f, fSig) f = fSig dlsym(RTLD_DEFAULT, #f)
1150
1151static int ompt_tsan_initialize(ompt_function_lookup_t lookup, int device_num,
1152 ompt_data_t *tool_data) {
1153 const char *options = getenv(name: "TSAN_OPTIONS");
1154 TsanFlags tsan_flags(options);
1155
1156 ompt_set_callback_t ompt_set_callback =
1157 (ompt_set_callback_t)lookup("ompt_set_callback");
1158 if (ompt_set_callback == NULL) {
1159 std::cerr << "Could not set callback, exiting..." << std::endl;
1160 std::exit(status: 1);
1161 }
1162 ompt_get_parallel_info =
1163 (ompt_get_parallel_info_t)lookup("ompt_get_parallel_info");
1164 ompt_get_thread_data = (ompt_get_thread_data_t)lookup("ompt_get_thread_data");
1165
1166 if (ompt_get_parallel_info == NULL) {
1167 fprintf(stderr, format: "Could not get inquiry function 'ompt_get_parallel_info', "
1168 "exiting...\n");
1169 exit(status: 1);
1170 }
1171
1172 findTsanFunction(AnnotateHappensAfter,
1173 (void (*)(const char *, int, const volatile void *)));
1174 findTsanFunction(AnnotateHappensBefore,
1175 (void (*)(const char *, int, const volatile void *)));
1176 findTsanFunction(AnnotateIgnoreWritesBegin, (void (*)(const char *, int)));
1177 findTsanFunction(AnnotateIgnoreWritesEnd, (void (*)(const char *, int)));
1178 findTsanFunction(
1179 AnnotateNewMemory,
1180 (void (*)(const char *, int, const volatile void *, size_t)));
1181 findTsanFunction(__tsan_func_entry, (void (*)(const void *)));
1182 findTsanFunction(__tsan_func_exit, (void (*)(void)));
1183
1184 SET_CALLBACK(thread_begin);
1185 SET_CALLBACK(thread_end);
1186 SET_CALLBACK(parallel_begin);
1187 SET_CALLBACK(implicit_task);
1188 SET_CALLBACK(sync_region);
1189 SET_CALLBACK(parallel_end);
1190
1191 SET_CALLBACK(task_create);
1192 SET_CALLBACK(task_schedule);
1193 SET_CALLBACK(dependences);
1194
1195 SET_CALLBACK_T(mutex_acquired, mutex);
1196 SET_CALLBACK_T(mutex_released, mutex);
1197 SET_OPTIONAL_CALLBACK_T(reduction, sync_region, hasReductionCallback,
1198 ompt_set_never);
1199
1200 if (!tsan_flags.ignore_noninstrumented_modules)
1201 fprintf(stderr,
1202 format: "Warning: please export "
1203 "TSAN_OPTIONS='ignore_noninstrumented_modules=1' "
1204 "to avoid false positive reports from the OpenMP runtime!\n");
1205 if (archer_flags->ignore_serial)
1206 TsanIgnoreWritesBegin();
1207
1208 return 1; // success
1209}
1210
1211static void ompt_tsan_finalize(ompt_data_t *tool_data) {
1212 if (archer_flags->ignore_serial)
1213 TsanIgnoreWritesEnd();
1214 if (archer_flags->print_max_rss) {
1215 struct rusage end;
1216 getrusage(RUSAGE_SELF, usage: &end);
1217 printf(format: "MAX RSS[KBytes] during execution: %ld\n", end.ru_maxrss);
1218 }
1219
1220 if (archer_flags)
1221 delete archer_flags;
1222}
1223
1224extern "C" ompt_start_tool_result_t *
1225ompt_start_tool(unsigned int omp_version, const char *runtime_version) {
1226 const char *options = getenv(name: "ARCHER_OPTIONS");
1227 archer_flags = new ArcherFlags(options);
1228 if (!archer_flags->enabled) {
1229 if (archer_flags->verbose)
1230 std::cout << "Archer disabled, stopping operation" << std::endl;
1231 delete archer_flags;
1232 return NULL;
1233 }
1234
1235 pagesize = getpagesize();
1236
1237 static ompt_start_tool_result_t ompt_start_tool_result = {
1238 .initialize: &ompt_tsan_initialize, .finalize: &ompt_tsan_finalize, .tool_data: {.value: 0}};
1239
1240 // The OMPT start-up code uses dlopen with RTLD_LAZY. Therefore, we cannot
1241 // rely on dlopen to fail if TSan is missing, but would get a runtime error
1242 // for the first TSan call. We use RunningOnValgrind to detect whether
1243 // an implementation of the Annotation interface is available in the
1244 // execution or disable the tool (by returning NULL).
1245
1246 findTsanFunctionSilent(RunningOnValgrind, (int (*)(void)));
1247 if (!RunningOnValgrind) // if we are not running on TSAN, give a different
1248 // tool the chance to be loaded
1249 {
1250 if (archer_flags->verbose)
1251 std::cout << "Archer detected OpenMP application without TSan "
1252 "stopping operation"
1253 << std::endl;
1254 delete archer_flags;
1255 return NULL;
1256 }
1257
1258 if (archer_flags->verbose)
1259 std::cout << "Archer detected OpenMP application with TSan, supplying "
1260 "OpenMP synchronization semantics"
1261 << std::endl;
1262 return &ompt_start_tool_result;
1263}
1264

source code of openmp/tools/archer/ompt-tsan.cpp