1/*
2 * kmp_sched.cpp -- static scheduling -- iteration initialization
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13/* Static scheduling initialization.
14
15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16 it may change values between parallel regions. __kmp_max_nth
17 is the largest value __kmp_nth may take, 1 is the smallest. */
18
19#include "kmp.h"
20#include "kmp_error.h"
21#include "kmp_i18n.h"
22#include "kmp_itt.h"
23#include "kmp_stats.h"
24#include "kmp_str.h"
25
26#if OMPT_SUPPORT
27#include "ompt-specific.h"
28#endif
29
30#ifdef KMP_DEBUG
31//-------------------------------------------------------------------------
32// template for debug prints specification ( d, u, lld, llu )
33char const *traits_t<int>::spec = "d";
34char const *traits_t<unsigned int>::spec = "u";
35char const *traits_t<long long>::spec = "lld";
36char const *traits_t<unsigned long long>::spec = "llu";
37char const *traits_t<long>::spec = "ld";
38//-------------------------------------------------------------------------
39#endif
40
41#if KMP_STATS_ENABLED
42#define KMP_STATS_LOOP_END(stat) \
43 { \
44 kmp_int64 t; \
45 kmp_int64 u = (kmp_int64)(*pupper); \
46 kmp_int64 l = (kmp_int64)(*plower); \
47 kmp_int64 i = (kmp_int64)incr; \
48 if (i == 1) { \
49 t = u - l + 1; \
50 } else if (i == -1) { \
51 t = l - u + 1; \
52 } else if (i > 0) { \
53 t = (u - l) / i + 1; \
54 } else { \
55 KMP_DEBUG_ASSERT(i != 0); \
56 t = (l - u) / (-i) + 1; \
57 } \
58 KMP_COUNT_VALUE(stat, t); \
59 KMP_POP_PARTITIONED_TIMER(); \
60 }
61#else
62#define KMP_STATS_LOOP_END(stat) /* Nothing */
63#endif
64
65#if USE_ITT_BUILD || defined KMP_DEBUG
66static ident_t loc_stub = {.reserved_1: 0, .flags: KMP_IDENT_KMPC, .reserved_2: 0, .reserved_3: 0, .psource: ";unknown;unknown;0;0;;"};
67static inline void check_loc(ident_t *&loc) {
68 if (loc == NULL)
69 loc = &loc_stub; // may need to report location info to ittnotify
70}
71#endif
72
73template <typename T>
74static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
75 kmp_int32 schedtype, kmp_int32 *plastiter,
76 T *plower, T *pupper,
77 typename traits_t<T>::signed_t *pstride,
78 typename traits_t<T>::signed_t incr,
79 typename traits_t<T>::signed_t chunk
80#if OMPT_SUPPORT && OMPT_OPTIONAL
81 ,
82 void *codeptr
83#endif
84) {
85 KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
86 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
87 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
88
89 // Clear monotonic/nonmonotonic bits (ignore it)
90 schedtype = SCHEDULE_WITHOUT_MODIFIERS(schedtype);
91
92 typedef typename traits_t<T>::unsigned_t UT;
93 typedef typename traits_t<T>::signed_t ST;
94 /* this all has to be changed back to TID and such.. */
95 kmp_int32 gtid = global_tid;
96 kmp_uint32 tid;
97 kmp_uint32 nth;
98 UT trip_count;
99 kmp_team_t *team;
100 __kmp_assert_valid_gtid(gtid);
101 kmp_info_t *th = __kmp_threads[gtid];
102
103#if OMPT_SUPPORT && OMPT_OPTIONAL
104 ompt_team_info_t *team_info = NULL;
105 ompt_task_info_t *task_info = NULL;
106 ompt_work_t ompt_work_type = ompt_work_loop;
107
108 static kmp_int8 warn = 0;
109
110 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
111 // Only fully initialize variables needed by OMPT if OMPT is enabled.
112 team_info = __ompt_get_teaminfo(depth: 0, NULL);
113 task_info = __ompt_get_task_info_object(depth: 0);
114 // Determine workshare type
115 if (loc != NULL) {
116 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
117 ompt_work_type = ompt_work_loop;
118 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
119 ompt_work_type = ompt_work_sections;
120 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
121 ompt_work_type = ompt_work_distribute;
122 } else {
123 kmp_int8 bool_res =
124 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
125 if (bool_res)
126 KMP_WARNING(OmptOutdatedWorkshare);
127 }
128 KMP_DEBUG_ASSERT(ompt_work_type);
129 }
130 }
131#endif
132
133 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
134 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
135#ifdef KMP_DEBUG
136 {
137 char *buff;
138 // create format specifiers before the debug output
139 buff = __kmp_str_format(
140 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
141 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
142 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
143 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
144 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
145 *pstride, incr, chunk));
146 __kmp_str_free(str: &buff);
147 }
148#endif
149
150 if (__kmp_env_consistency_check) {
151 __kmp_push_workshare(gtid: global_tid, ct: ct_pdo, ident: loc);
152 if (incr == 0) {
153 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
154 loc);
155 }
156 }
157 /* special handling for zero-trip loops */
158 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
159 if (plastiter != NULL)
160 *plastiter = FALSE;
161 /* leave pupper and plower set to entire iteration space */
162 *pstride = incr; /* value should never be used */
163// *plower = *pupper - incr;
164// let compiler bypass the illegal loop (like for(i=1;i<10;i--))
165// THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
166// ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
167#ifdef KMP_DEBUG
168 {
169 char *buff;
170 // create format specifiers before the debug output
171 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
172 "lower=%%%s upper=%%%s stride = %%%s "
173 "signed?<%s>, loc = %%s\n",
174 traits_t<T>::spec, traits_t<T>::spec,
175 traits_t<ST>::spec, traits_t<T>::spec);
176 check_loc(loc);
177 KD_TRACE(100,
178 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
179 __kmp_str_free(str: &buff);
180 }
181#endif
182 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
183
184#if OMPT_SUPPORT && OMPT_OPTIONAL
185 if (ompt_enabled.ompt_callback_work) {
186 ompt_callbacks.ompt_callback(ompt_callback_work)(
187 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
188 &(task_info->task_data), 0, codeptr);
189 }
190#endif
191 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
192 return;
193 }
194
195 // Although there are schedule enumerations above kmp_ord_upper which are not
196 // schedules for "distribute", the only ones which are useful are dynamic, so
197 // cannot be seen here, since this codepath is only executed for static
198 // schedules.
199 if (schedtype > kmp_ord_upper) {
200 // we are in DISTRIBUTE construct
201 schedtype += kmp_sch_static -
202 kmp_distribute_static; // AC: convert to usual schedule type
203 if (th->th.th_team->t.t_serialized > 1) {
204 tid = 0;
205 team = th->th.th_team;
206 } else {
207 tid = th->th.th_team->t.t_master_tid;
208 team = th->th.th_team->t.t_parent;
209 }
210 } else {
211 tid = __kmp_tid_from_gtid(gtid: global_tid);
212 team = th->th.th_team;
213 }
214
215 /* determine if "for" loop is an active worksharing construct */
216 if (team->t.t_serialized) {
217 /* serialized parallel, each thread executes whole iteration space */
218 if (plastiter != NULL)
219 *plastiter = TRUE;
220 /* leave pupper and plower set to entire iteration space */
221 *pstride =
222 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
223
224#ifdef KMP_DEBUG
225 {
226 char *buff;
227 // create format specifiers before the debug output
228 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
229 "lower=%%%s upper=%%%s stride = %%%s\n",
230 traits_t<T>::spec, traits_t<T>::spec,
231 traits_t<ST>::spec);
232 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
233 __kmp_str_free(str: &buff);
234 }
235#endif
236 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
237
238#if OMPT_SUPPORT && OMPT_OPTIONAL
239 if (ompt_enabled.ompt_callback_work) {
240 ompt_callbacks.ompt_callback(ompt_callback_work)(
241 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
242 &(task_info->task_data), *pstride, codeptr);
243 }
244#endif
245 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
246 return;
247 }
248 nth = team->t.t_nproc;
249 if (nth == 1) {
250 if (plastiter != NULL)
251 *plastiter = TRUE;
252 *pstride =
253 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
254#ifdef KMP_DEBUG
255 {
256 char *buff;
257 // create format specifiers before the debug output
258 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
259 "lower=%%%s upper=%%%s stride = %%%s\n",
260 traits_t<T>::spec, traits_t<T>::spec,
261 traits_t<ST>::spec);
262 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
263 __kmp_str_free(str: &buff);
264 }
265#endif
266 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
267
268#if OMPT_SUPPORT && OMPT_OPTIONAL
269 if (ompt_enabled.ompt_callback_work) {
270 ompt_callbacks.ompt_callback(ompt_callback_work)(
271 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
272 &(task_info->task_data), *pstride, codeptr);
273 }
274#endif
275 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
276 return;
277 }
278
279 /* compute trip count */
280 if (incr == 1) {
281 trip_count = *pupper - *plower + 1;
282 } else if (incr == -1) {
283 trip_count = *plower - *pupper + 1;
284 } else if (incr > 0) {
285 // upper-lower can exceed the limit of signed type
286 trip_count = (UT)(*pupper - *plower) / incr + 1;
287 } else {
288 KMP_DEBUG_ASSERT(incr != 0);
289 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
290 }
291
292#if KMP_STATS_ENABLED
293 if (KMP_MASTER_GTID(gtid)) {
294 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
295 }
296#endif
297
298 if (__kmp_env_consistency_check) {
299 /* tripcount overflow? */
300 if (trip_count == 0 && *pupper != *plower) {
301 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
302 loc);
303 }
304 }
305
306 /* compute remaining parameters */
307 switch (schedtype) {
308 case kmp_sch_static: {
309 if (trip_count < nth) {
310 KMP_DEBUG_ASSERT(
311 __kmp_static == kmp_sch_static_greedy ||
312 __kmp_static ==
313 kmp_sch_static_balanced); // Unknown static scheduling type.
314 if (tid < trip_count) {
315 *pupper = *plower = *plower + tid * incr;
316 } else {
317 // set bounds so non-active threads execute no iterations
318 *plower = *pupper + (incr > 0 ? 1 : -1);
319 }
320 if (plastiter != NULL)
321 *plastiter = (tid == trip_count - 1);
322 } else {
323 KMP_DEBUG_ASSERT(nth != 0);
324 if (__kmp_static == kmp_sch_static_balanced) {
325 UT small_chunk = trip_count / nth;
326 UT extras = trip_count % nth;
327 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
328 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
329 if (plastiter != NULL)
330 *plastiter = (tid == nth - 1);
331 } else {
332 T big_chunk_inc_count =
333 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
334 T old_upper = *pupper;
335
336 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
337 // Unknown static scheduling type.
338
339 *plower += tid * big_chunk_inc_count;
340 *pupper = *plower + big_chunk_inc_count - incr;
341 if (incr > 0) {
342 if (*pupper < *plower)
343 *pupper = traits_t<T>::max_value;
344 if (plastiter != NULL)
345 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
346 if (*pupper > old_upper)
347 *pupper = old_upper; // tracker C73258
348 } else {
349 if (*pupper > *plower)
350 *pupper = traits_t<T>::min_value;
351 if (plastiter != NULL)
352 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
353 if (*pupper < old_upper)
354 *pupper = old_upper; // tracker C73258
355 }
356 }
357 }
358 *pstride = trip_count;
359 break;
360 }
361 case kmp_sch_static_chunked: {
362 ST span;
363 UT nchunks;
364 KMP_DEBUG_ASSERT(chunk != 0);
365 if (chunk < 1)
366 chunk = 1;
367 else if ((UT)chunk > trip_count)
368 chunk = trip_count;
369 nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0);
370 span = chunk * incr;
371 if (nchunks < nth) {
372 *pstride = span * nchunks;
373 if (tid < nchunks) {
374 *plower = *plower + (span * tid);
375 *pupper = *plower + span - incr;
376 } else {
377 *plower = *pupper + (incr > 0 ? 1 : -1);
378 }
379 } else {
380 *pstride = span * nth;
381 *plower = *plower + (span * tid);
382 *pupper = *plower + span - incr;
383 }
384 if (plastiter != NULL)
385 *plastiter = (tid == (nchunks - 1) % nth);
386 break;
387 }
388 case kmp_sch_static_balanced_chunked: {
389 T old_upper = *pupper;
390 KMP_DEBUG_ASSERT(nth != 0);
391 // round up to make sure the chunk is enough to cover all iterations
392 UT span = (trip_count + nth - 1) / nth;
393
394 // perform chunk adjustment
395 chunk = (span + chunk - 1) & ~(chunk - 1);
396
397 span = chunk * incr;
398 *plower = *plower + (span * tid);
399 *pupper = *plower + span - incr;
400 if (incr > 0) {
401 if (*pupper > old_upper)
402 *pupper = old_upper;
403 } else if (*pupper < old_upper)
404 *pupper = old_upper;
405
406 if (plastiter != NULL) {
407 KMP_DEBUG_ASSERT(chunk != 0);
408 *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
409 }
410 break;
411 }
412 default:
413 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
414 break;
415 }
416
417#if USE_ITT_BUILD
418 // Report loop metadata
419 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
420 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
421 team->t.t_active_level == 1) {
422 kmp_uint64 cur_chunk = chunk;
423 check_loc(loc);
424 // Calculate chunk in case it was not specified; it is specified for
425 // kmp_sch_static_chunked
426 if (schedtype == kmp_sch_static) {
427 KMP_DEBUG_ASSERT(nth != 0);
428 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
429 }
430 // 0 - "static" schedule
431 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
432 }
433#endif
434#ifdef KMP_DEBUG
435 {
436 char *buff;
437 // create format specifiers before the debug output
438 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
439 "upper=%%%s stride = %%%s signed?<%s>\n",
440 traits_t<T>::spec, traits_t<T>::spec,
441 traits_t<ST>::spec, traits_t<T>::spec);
442 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
443 __kmp_str_free(str: &buff);
444 }
445#endif
446 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
447
448#if OMPT_SUPPORT && OMPT_OPTIONAL
449 if (ompt_enabled.ompt_callback_work) {
450 ompt_callbacks.ompt_callback(ompt_callback_work)(
451 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
452 &(task_info->task_data), trip_count, codeptr);
453 }
454 if (ompt_enabled.ompt_callback_dispatch) {
455 ompt_dispatch_t dispatch_type;
456 ompt_data_t instance = ompt_data_none;
457 ompt_dispatch_chunk_t dispatch_chunk;
458 if (ompt_work_type == ompt_work_sections) {
459 dispatch_type = ompt_dispatch_section;
460 instance.ptr = codeptr;
461 } else {
462 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupper, incr);
463 dispatch_type = (ompt_work_type == ompt_work_distribute)
464 ? ompt_dispatch_distribute_chunk
465 : ompt_dispatch_ws_loop_chunk;
466 instance.ptr = &dispatch_chunk;
467 }
468 ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
469 &(team_info->parallel_data), &(task_info->task_data), dispatch_type,
470 instance);
471 }
472#endif
473
474 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
475 return;
476}
477
478template <typename T>
479static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
480 kmp_int32 schedule, kmp_int32 *plastiter,
481 T *plower, T *pupper, T *pupperDist,
482 typename traits_t<T>::signed_t *pstride,
483 typename traits_t<T>::signed_t incr,
484 typename traits_t<T>::signed_t chunk
485#if OMPT_SUPPORT && OMPT_OPTIONAL
486 ,
487 void *codeptr
488#endif
489) {
490 KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
491 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
492 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
493 typedef typename traits_t<T>::unsigned_t UT;
494 typedef typename traits_t<T>::signed_t ST;
495 kmp_uint32 tid;
496 kmp_uint32 nth;
497 kmp_uint32 team_id;
498 kmp_uint32 nteams;
499 UT trip_count;
500 kmp_team_t *team;
501 kmp_info_t *th;
502
503 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
504 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
505 __kmp_assert_valid_gtid(gtid);
506#ifdef KMP_DEBUG
507 {
508 char *buff;
509 // create format specifiers before the debug output
510 buff = __kmp_str_format(
511 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
512 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
513 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
514 traits_t<ST>::spec, traits_t<T>::spec);
515 KD_TRACE(100,
516 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
517 __kmp_str_free(str: &buff);
518 }
519#endif
520
521 if (__kmp_env_consistency_check) {
522 __kmp_push_workshare(gtid, ct: ct_pdo, ident: loc);
523 if (incr == 0) {
524 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
525 loc);
526 }
527 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
528 // The loop is illegal.
529 // Some zero-trip loops maintained by compiler, e.g.:
530 // for(i=10;i<0;++i) // lower >= upper - run-time check
531 // for(i=0;i>10;--i) // lower <= upper - run-time check
532 // for(i=0;i>10;++i) // incr > 0 - compile-time check
533 // for(i=10;i<0;--i) // incr < 0 - compile-time check
534 // Compiler does not check the following illegal loops:
535 // for(i=0;i<10;i+=incr) // where incr<0
536 // for(i=10;i>0;i-=incr) // where incr<0
537 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
538 }
539 }
540 tid = __kmp_tid_from_gtid(gtid);
541 th = __kmp_threads[gtid];
542 nth = th->th.th_team_nproc;
543 team = th->th.th_team;
544 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
545 nteams = th->th.th_teams_size.nteams;
546 team_id = team->t.t_master_tid;
547 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
548
549 // compute global trip count
550 if (incr == 1) {
551 trip_count = *pupper - *plower + 1;
552 } else if (incr == -1) {
553 trip_count = *plower - *pupper + 1;
554 } else if (incr > 0) {
555 // upper-lower can exceed the limit of signed type
556 trip_count = (UT)(*pupper - *plower) / incr + 1;
557 } else {
558 KMP_DEBUG_ASSERT(incr != 0);
559 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
560 }
561
562 *pstride = *pupper - *plower; // just in case (can be unused)
563 if (trip_count <= nteams) {
564 KMP_DEBUG_ASSERT(
565 __kmp_static == kmp_sch_static_greedy ||
566 __kmp_static ==
567 kmp_sch_static_balanced); // Unknown static scheduling type.
568 // only primary threads of some teams get single iteration, other threads
569 // get nothing
570 if (team_id < trip_count && tid == 0) {
571 *pupper = *pupperDist = *plower = *plower + team_id * incr;
572 } else {
573 *pupperDist = *pupper;
574 *plower = *pupper + incr; // compiler should skip loop body
575 }
576 if (plastiter != NULL)
577 *plastiter = (tid == 0 && team_id == trip_count - 1);
578 } else {
579 // Get the team's chunk first (each team gets at most one chunk)
580 KMP_DEBUG_ASSERT(nteams != 0);
581 if (__kmp_static == kmp_sch_static_balanced) {
582 UT chunkD = trip_count / nteams;
583 UT extras = trip_count % nteams;
584 *plower +=
585 incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
586 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
587 if (plastiter != NULL)
588 *plastiter = (team_id == nteams - 1);
589 } else {
590 T chunk_inc_count =
591 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
592 T upper = *pupper;
593 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
594 // Unknown static scheduling type.
595 *plower += team_id * chunk_inc_count;
596 *pupperDist = *plower + chunk_inc_count - incr;
597 // Check/correct bounds if needed
598 if (incr > 0) {
599 if (*pupperDist < *plower)
600 *pupperDist = traits_t<T>::max_value;
601 if (plastiter != NULL)
602 *plastiter = *plower <= upper && *pupperDist > upper - incr;
603 if (*pupperDist > upper)
604 *pupperDist = upper; // tracker C73258
605 if (*plower > *pupperDist) {
606 *pupper = *pupperDist; // no iterations available for the team
607 goto end;
608 }
609 } else {
610 if (*pupperDist > *plower)
611 *pupperDist = traits_t<T>::min_value;
612 if (plastiter != NULL)
613 *plastiter = *plower >= upper && *pupperDist < upper - incr;
614 if (*pupperDist < upper)
615 *pupperDist = upper; // tracker C73258
616 if (*plower < *pupperDist) {
617 *pupper = *pupperDist; // no iterations available for the team
618 goto end;
619 }
620 }
621 }
622 // Get the parallel loop chunk now (for thread)
623 // compute trip count for team's chunk
624 if (incr == 1) {
625 trip_count = *pupperDist - *plower + 1;
626 } else if (incr == -1) {
627 trip_count = *plower - *pupperDist + 1;
628 } else if (incr > 1) {
629 // upper-lower can exceed the limit of signed type
630 trip_count = (UT)(*pupperDist - *plower) / incr + 1;
631 } else {
632 KMP_DEBUG_ASSERT(incr != 0);
633 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
634 }
635 KMP_DEBUG_ASSERT(trip_count);
636 switch (schedule) {
637 case kmp_sch_static: {
638 if (trip_count <= nth) {
639 KMP_DEBUG_ASSERT(
640 __kmp_static == kmp_sch_static_greedy ||
641 __kmp_static ==
642 kmp_sch_static_balanced); // Unknown static scheduling type.
643 if (tid < trip_count)
644 *pupper = *plower = *plower + tid * incr;
645 else
646 *plower = *pupper + incr; // no iterations available
647 if (plastiter != NULL)
648 if (*plastiter != 0 && !(tid == trip_count - 1))
649 *plastiter = 0;
650 } else {
651 KMP_DEBUG_ASSERT(nth != 0);
652 if (__kmp_static == kmp_sch_static_balanced) {
653 UT chunkL = trip_count / nth;
654 UT extras = trip_count % nth;
655 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
656 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
657 if (plastiter != NULL)
658 if (*plastiter != 0 && !(tid == nth - 1))
659 *plastiter = 0;
660 } else {
661 T chunk_inc_count =
662 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
663 T upper = *pupperDist;
664 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
665 // Unknown static scheduling type.
666 *plower += tid * chunk_inc_count;
667 *pupper = *plower + chunk_inc_count - incr;
668 if (incr > 0) {
669 if (*pupper < *plower)
670 *pupper = traits_t<T>::max_value;
671 if (plastiter != NULL)
672 if (*plastiter != 0 &&
673 !(*plower <= upper && *pupper > upper - incr))
674 *plastiter = 0;
675 if (*pupper > upper)
676 *pupper = upper; // tracker C73258
677 } else {
678 if (*pupper > *plower)
679 *pupper = traits_t<T>::min_value;
680 if (plastiter != NULL)
681 if (*plastiter != 0 &&
682 !(*plower >= upper && *pupper < upper - incr))
683 *plastiter = 0;
684 if (*pupper < upper)
685 *pupper = upper; // tracker C73258
686 }
687 }
688 }
689 break;
690 }
691 case kmp_sch_static_chunked: {
692 ST span;
693 if (chunk < 1)
694 chunk = 1;
695 span = chunk * incr;
696 *pstride = span * nth;
697 *plower = *plower + (span * tid);
698 *pupper = *plower + span - incr;
699 if (plastiter != NULL) {
700 KMP_DEBUG_ASSERT(chunk != 0);
701 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
702 *plastiter = 0;
703 }
704 break;
705 }
706 default:
707 KMP_ASSERT2(0,
708 "__kmpc_dist_for_static_init: unknown loop scheduling type");
709 break;
710 }
711 }
712end:;
713#ifdef KMP_DEBUG
714 {
715 char *buff;
716 // create format specifiers before the debug output
717 buff = __kmp_str_format(
718 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
719 "stride=%%%s signed?<%s>\n",
720 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
721 traits_t<ST>::spec, traits_t<T>::spec);
722 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
723 __kmp_str_free(str: &buff);
724 }
725#endif
726 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
727#if OMPT_SUPPORT && OMPT_OPTIONAL
728 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
729 ompt_team_info_t *team_info = __ompt_get_teaminfo(depth: 0, NULL);
730 ompt_task_info_t *task_info = __ompt_get_task_info_object(depth: 0);
731 if (ompt_enabled.ompt_callback_work) {
732 ompt_callbacks.ompt_callback(ompt_callback_work)(
733 ompt_work_distribute, ompt_scope_begin, &(team_info->parallel_data),
734 &(task_info->task_data), 0, codeptr);
735 }
736 if (ompt_enabled.ompt_callback_dispatch) {
737 ompt_data_t instance = ompt_data_none;
738 ompt_dispatch_chunk_t dispatch_chunk;
739 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupperDist, incr);
740 instance.ptr = &dispatch_chunk;
741 ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
742 &(team_info->parallel_data), &(task_info->task_data),
743 ompt_dispatch_distribute_chunk, instance);
744 }
745 }
746#endif // OMPT_SUPPORT && OMPT_OPTIONAL
747 KMP_STATS_LOOP_END(OMP_distribute_iterations);
748 return;
749}
750
751template <typename T>
752static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
753 kmp_int32 *p_last, T *p_lb, T *p_ub,
754 typename traits_t<T>::signed_t *p_st,
755 typename traits_t<T>::signed_t incr,
756 typename traits_t<T>::signed_t chunk) {
757 // The routine returns the first chunk distributed to the team and
758 // stride for next chunks calculation.
759 // Last iteration flag set for the team that will execute
760 // the last iteration of the loop.
761 // The routine is called for dist_schedule(static,chunk) only.
762 typedef typename traits_t<T>::unsigned_t UT;
763 typedef typename traits_t<T>::signed_t ST;
764 kmp_uint32 team_id;
765 kmp_uint32 nteams;
766 UT trip_count;
767 T lower;
768 T upper;
769 ST span;
770 kmp_team_t *team;
771 kmp_info_t *th;
772
773 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
774 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
775 __kmp_assert_valid_gtid(gtid);
776#ifdef KMP_DEBUG
777 {
778 char *buff;
779 // create format specifiers before the debug output
780 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
781 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
782 traits_t<T>::spec, traits_t<T>::spec,
783 traits_t<ST>::spec, traits_t<ST>::spec,
784 traits_t<T>::spec);
785 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
786 __kmp_str_free(str: &buff);
787 }
788#endif
789
790 lower = *p_lb;
791 upper = *p_ub;
792 if (__kmp_env_consistency_check) {
793 if (incr == 0) {
794 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
795 loc);
796 }
797 if (incr > 0 ? (upper < lower) : (lower < upper)) {
798 // The loop is illegal.
799 // Some zero-trip loops maintained by compiler, e.g.:
800 // for(i=10;i<0;++i) // lower >= upper - run-time check
801 // for(i=0;i>10;--i) // lower <= upper - run-time check
802 // for(i=0;i>10;++i) // incr > 0 - compile-time check
803 // for(i=10;i<0;--i) // incr < 0 - compile-time check
804 // Compiler does not check the following illegal loops:
805 // for(i=0;i<10;i+=incr) // where incr<0
806 // for(i=10;i>0;i-=incr) // where incr<0
807 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
808 }
809 }
810 th = __kmp_threads[gtid];
811 team = th->th.th_team;
812 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
813 nteams = th->th.th_teams_size.nteams;
814 team_id = team->t.t_master_tid;
815 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
816
817 // compute trip count
818 if (incr == 1) {
819 trip_count = upper - lower + 1;
820 } else if (incr == -1) {
821 trip_count = lower - upper + 1;
822 } else if (incr > 0) {
823 // upper-lower can exceed the limit of signed type
824 trip_count = (UT)(upper - lower) / incr + 1;
825 } else {
826 KMP_DEBUG_ASSERT(incr != 0);
827 trip_count = (UT)(lower - upper) / (-incr) + 1;
828 }
829 if (chunk < 1)
830 chunk = 1;
831 span = chunk * incr;
832 *p_st = span * nteams;
833 *p_lb = lower + (span * team_id);
834 *p_ub = *p_lb + span - incr;
835 if (p_last != NULL) {
836 KMP_DEBUG_ASSERT(chunk != 0);
837 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
838 }
839 // Correct upper bound if needed
840 if (incr > 0) {
841 if (*p_ub < *p_lb) // overflow?
842 *p_ub = traits_t<T>::max_value;
843 if (*p_ub > upper)
844 *p_ub = upper; // tracker C73258
845 } else { // incr < 0
846 if (*p_ub > *p_lb)
847 *p_ub = traits_t<T>::min_value;
848 if (*p_ub < upper)
849 *p_ub = upper; // tracker C73258
850 }
851#ifdef KMP_DEBUG
852 {
853 char *buff;
854 // create format specifiers before the debug output
855 buff =
856 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
857 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
858 traits_t<T>::spec, traits_t<T>::spec,
859 traits_t<ST>::spec, traits_t<ST>::spec);
860 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
861 __kmp_str_free(str: &buff);
862 }
863#endif
864}
865
866//------------------------------------------------------------------------------
867extern "C" {
868/*!
869@ingroup WORK_SHARING
870@param loc Source code location
871@param gtid Global thread id of this thread
872@param schedtype Scheduling type
873@param plastiter Pointer to the "last iteration" flag
874@param plower Pointer to the lower bound
875@param pupper Pointer to the upper bound
876@param pstride Pointer to the stride
877@param incr Loop increment
878@param chunk The chunk size
879
880Each of the four functions here are identical apart from the argument types.
881
882The functions compute the upper and lower bounds and stride to be used for the
883set of iterations to be executed by the current thread from the statically
884scheduled loop that is described by the initial values of the bounds, stride,
885increment and chunk size.
886
887@{
888*/
889void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
890 kmp_int32 *plastiter, kmp_int32 *plower,
891 kmp_int32 *pupper, kmp_int32 *pstride,
892 kmp_int32 incr, kmp_int32 chunk) {
893 __kmp_for_static_init<kmp_int32>(loc, global_tid: gtid, schedtype, plastiter, plower,
894 pupper, pstride, incr, chunk
895#if OMPT_SUPPORT && OMPT_OPTIONAL
896 ,
897 OMPT_GET_RETURN_ADDRESS(0)
898#endif
899 );
900}
901
902/*!
903 See @ref __kmpc_for_static_init_4
904 */
905void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
906 kmp_int32 schedtype, kmp_int32 *plastiter,
907 kmp_uint32 *plower, kmp_uint32 *pupper,
908 kmp_int32 *pstride, kmp_int32 incr,
909 kmp_int32 chunk) {
910 __kmp_for_static_init<kmp_uint32>(loc, global_tid: gtid, schedtype, plastiter, plower,
911 pupper, pstride, incr, chunk
912#if OMPT_SUPPORT && OMPT_OPTIONAL
913 ,
914 OMPT_GET_RETURN_ADDRESS(0)
915#endif
916 );
917}
918
919/*!
920 See @ref __kmpc_for_static_init_4
921 */
922void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
923 kmp_int32 *plastiter, kmp_int64 *plower,
924 kmp_int64 *pupper, kmp_int64 *pstride,
925 kmp_int64 incr, kmp_int64 chunk) {
926 __kmp_for_static_init<kmp_int64>(loc, global_tid: gtid, schedtype, plastiter, plower,
927 pupper, pstride, incr, chunk
928#if OMPT_SUPPORT && OMPT_OPTIONAL
929 ,
930 OMPT_GET_RETURN_ADDRESS(0)
931#endif
932 );
933}
934
935/*!
936 See @ref __kmpc_for_static_init_4
937 */
938void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
939 kmp_int32 schedtype, kmp_int32 *plastiter,
940 kmp_uint64 *plower, kmp_uint64 *pupper,
941 kmp_int64 *pstride, kmp_int64 incr,
942 kmp_int64 chunk) {
943 __kmp_for_static_init<kmp_uint64>(loc, global_tid: gtid, schedtype, plastiter, plower,
944 pupper, pstride, incr, chunk
945#if OMPT_SUPPORT && OMPT_OPTIONAL
946 ,
947 OMPT_GET_RETURN_ADDRESS(0)
948#endif
949 );
950}
951/*!
952@}
953*/
954
955#if OMPT_SUPPORT && OMPT_OPTIONAL
956#define OMPT_CODEPTR_ARG , OMPT_GET_RETURN_ADDRESS(0)
957#else
958#define OMPT_CODEPTR_ARG
959#endif
960
961/*!
962@ingroup WORK_SHARING
963@param loc Source code location
964@param gtid Global thread id of this thread
965@param schedule Scheduling type for the parallel loop
966@param plastiter Pointer to the "last iteration" flag
967@param plower Pointer to the lower bound
968@param pupper Pointer to the upper bound of loop chunk
969@param pupperD Pointer to the upper bound of dist_chunk
970@param pstride Pointer to the stride for parallel loop
971@param incr Loop increment
972@param chunk The chunk size for the parallel loop
973
974Each of the four functions here are identical apart from the argument types.
975
976The functions compute the upper and lower bounds and strides to be used for the
977set of iterations to be executed by the current thread from the statically
978scheduled loop that is described by the initial values of the bounds, strides,
979increment and chunks for parallel loop and distribute constructs.
980
981@{
982*/
983void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
984 kmp_int32 schedule, kmp_int32 *plastiter,
985 kmp_int32 *plower, kmp_int32 *pupper,
986 kmp_int32 *pupperD, kmp_int32 *pstride,
987 kmp_int32 incr, kmp_int32 chunk) {
988 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
989 pupper, pupperDist: pupperD, pstride, incr,
990 chunk OMPT_CODEPTR_ARG);
991}
992
993/*!
994 See @ref __kmpc_dist_for_static_init_4
995 */
996void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
997 kmp_int32 schedule, kmp_int32 *plastiter,
998 kmp_uint32 *plower, kmp_uint32 *pupper,
999 kmp_uint32 *pupperD, kmp_int32 *pstride,
1000 kmp_int32 incr, kmp_int32 chunk) {
1001 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
1002 pupper, pupperDist: pupperD, pstride, incr,
1003 chunk OMPT_CODEPTR_ARG);
1004}
1005
1006/*!
1007 See @ref __kmpc_dist_for_static_init_4
1008 */
1009void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
1010 kmp_int32 schedule, kmp_int32 *plastiter,
1011 kmp_int64 *plower, kmp_int64 *pupper,
1012 kmp_int64 *pupperD, kmp_int64 *pstride,
1013 kmp_int64 incr, kmp_int64 chunk) {
1014 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
1015 pupper, pupperDist: pupperD, pstride, incr,
1016 chunk OMPT_CODEPTR_ARG);
1017}
1018
1019/*!
1020 See @ref __kmpc_dist_for_static_init_4
1021 */
1022void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
1023 kmp_int32 schedule, kmp_int32 *plastiter,
1024 kmp_uint64 *plower, kmp_uint64 *pupper,
1025 kmp_uint64 *pupperD, kmp_int64 *pstride,
1026 kmp_int64 incr, kmp_int64 chunk) {
1027 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
1028 pupper, pupperDist: pupperD, pstride, incr,
1029 chunk OMPT_CODEPTR_ARG);
1030}
1031/*!
1032@}
1033*/
1034
1035//------------------------------------------------------------------------------
1036// Auxiliary routines for Distribute Parallel Loop construct implementation
1037// Transfer call to template< type T >
1038// __kmp_team_static_init( ident_t *loc, int gtid,
1039// int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
1040
1041/*!
1042@ingroup WORK_SHARING
1043@{
1044@param loc Source location
1045@param gtid Global thread id
1046@param p_last pointer to last iteration flag
1047@param p_lb pointer to Lower bound
1048@param p_ub pointer to Upper bound
1049@param p_st Step (or increment if you prefer)
1050@param incr Loop increment
1051@param chunk The chunk size to block with
1052
1053The functions compute the upper and lower bounds and stride to be used for the
1054set of iterations to be executed by the current team from the statically
1055scheduled loop that is described by the initial values of the bounds, stride,
1056increment and chunk for the distribute construct as part of composite distribute
1057parallel loop construct. These functions are all identical apart from the types
1058of the arguments.
1059*/
1060
1061void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1062 kmp_int32 *p_lb, kmp_int32 *p_ub,
1063 kmp_int32 *p_st, kmp_int32 incr,
1064 kmp_int32 chunk) {
1065 KMP_DEBUG_ASSERT(__kmp_init_serial);
1066 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1067 chunk);
1068}
1069
1070/*!
1071 See @ref __kmpc_team_static_init_4
1072 */
1073void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1074 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
1075 kmp_int32 *p_st, kmp_int32 incr,
1076 kmp_int32 chunk) {
1077 KMP_DEBUG_ASSERT(__kmp_init_serial);
1078 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1079 chunk);
1080}
1081
1082/*!
1083 See @ref __kmpc_team_static_init_4
1084 */
1085void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1086 kmp_int64 *p_lb, kmp_int64 *p_ub,
1087 kmp_int64 *p_st, kmp_int64 incr,
1088 kmp_int64 chunk) {
1089 KMP_DEBUG_ASSERT(__kmp_init_serial);
1090 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1091 chunk);
1092}
1093
1094/*!
1095 See @ref __kmpc_team_static_init_4
1096 */
1097void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1098 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1099 kmp_int64 *p_st, kmp_int64 incr,
1100 kmp_int64 chunk) {
1101 KMP_DEBUG_ASSERT(__kmp_init_serial);
1102 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1103 chunk);
1104}
1105/*!
1106@}
1107*/
1108
1109} // extern "C"
1110

source code of openmp/runtime/src/kmp_sched.cpp