1/*
2 * kmp_barrier.h
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef KMP_BARRIER_H
14#define KMP_BARRIER_H
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18
19#if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC
20#include <xmmintrin.h>
21#define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment)
22#define KMP_ALIGNED_FREE(ptr) _mm_free(ptr)
23#elif KMP_HAVE_ALIGNED_ALLOC
24#define KMP_ALGIN_UP(val, alignment) \
25 (((val) + (alignment)-1) / (alignment) * (alignment))
26#define KMP_ALIGNED_ALLOCATE(size, alignment) \
27 aligned_alloc(alignment, KMP_ALGIN_UP(size, alignment))
28#define KMP_ALIGNED_FREE(ptr) free(ptr)
29#elif KMP_HAVE_POSIX_MEMALIGN
30static inline void *KMP_ALIGNED_ALLOCATE(size_t size, size_t alignment) {
31 void *ptr;
32 int n = posix_memalign(&ptr, alignment, size);
33 if (n != 0) {
34 if (ptr)
35 free(ptr);
36 return nullptr;
37 }
38 return ptr;
39}
40#define KMP_ALIGNED_FREE(ptr) free(ptr)
41#elif KMP_HAVE__ALIGNED_MALLOC
42#include <malloc.h>
43#define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment)
44#define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr)
45#else
46#define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size)
47#define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr)
48#endif
49
50// Use four cache lines: MLC tends to prefetch the next or previous cache line
51// creating a possible fake conflict between cores, so this is the only way to
52// guarantee that no such prefetch can happen.
53#ifndef KMP_FOURLINE_ALIGN_CACHE
54#define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE)
55#endif
56
57#define KMP_OPTIMIZE_FOR_REDUCTIONS 0
58
59class distributedBarrier {
60 struct flags_s {
61 kmp_uint32 volatile KMP_FOURLINE_ALIGN_CACHE stillNeed;
62 };
63
64 struct go_s {
65 std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go;
66 };
67
68 struct iter_s {
69 kmp_uint64 volatile KMP_FOURLINE_ALIGN_CACHE iter;
70 };
71
72 struct sleep_s {
73 std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep;
74 };
75
76 void init(size_t nthr);
77 void resize(size_t nthr);
78 void computeGo(size_t n);
79 void computeVarsForN(size_t n);
80
81public:
82 enum {
83 MAX_ITERS = 3,
84 MAX_GOS = 8,
85 IDEAL_GOS = 4,
86 IDEAL_CONTENTION = 16,
87 };
88
89 flags_s *flags[MAX_ITERS];
90 go_s *go;
91 iter_s *iter;
92 sleep_s *sleep;
93
94 size_t KMP_ALIGN_CACHE num_threads; // number of threads in barrier
95 size_t KMP_ALIGN_CACHE max_threads; // size of arrays in data structure
96 // number of go signals each requiring one write per iteration
97 size_t KMP_ALIGN_CACHE num_gos;
98 // number of groups of gos
99 size_t KMP_ALIGN_CACHE num_groups;
100 // threads per go signal
101 size_t KMP_ALIGN_CACHE threads_per_go;
102 bool KMP_ALIGN_CACHE fix_threads_per_go;
103 // threads per group
104 size_t KMP_ALIGN_CACHE threads_per_group;
105 // number of go signals in a group
106 size_t KMP_ALIGN_CACHE gos_per_group;
107 void *team_icvs;
108
109 distributedBarrier() = delete;
110 ~distributedBarrier() = delete;
111
112 // Used instead of constructor to create aligned data
113 static distributedBarrier *allocate(int nThreads) {
114 distributedBarrier *d = (distributedBarrier *)KMP_ALIGNED_ALLOCATE(
115 sizeof(distributedBarrier), 4 * CACHE_LINE);
116 if (!d) {
117 KMP_FATAL(MemoryAllocFailed);
118 }
119 d->num_threads = 0;
120 d->max_threads = 0;
121 for (int i = 0; i < MAX_ITERS; ++i)
122 d->flags[i] = NULL;
123 d->go = NULL;
124 d->iter = NULL;
125 d->sleep = NULL;
126 d->team_icvs = NULL;
127 d->fix_threads_per_go = false;
128 // calculate gos and groups ONCE on base size
129 d->computeGo(n: nThreads);
130 d->init(nthr: nThreads);
131 return d;
132 }
133
134 static void deallocate(distributedBarrier *db) { KMP_ALIGNED_FREE(db); }
135
136 void update_num_threads(size_t nthr) { init(nthr); }
137
138 bool need_resize(size_t new_nthr) { return (new_nthr > max_threads); }
139 size_t get_num_threads() { return num_threads; }
140 kmp_uint64 go_release();
141 void go_reset();
142};
143
144#endif // KMP_BARRIER_H
145

source code of openmp/runtime/src/kmp_barrier.h