1/* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
5Copyright (C) 2005-2024 Free Software Foundation, Inc.
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "memmodel.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "tree.h"
31#include "gimple.h"
32#include "cfghooks.h"
33#include "tree-pass.h"
34#include "ssa.h"
35#include "optabs.h"
36#include "cgraph.h"
37#include "pretty-print.h"
38#include "diagnostic-core.h"
39#include "fold-const.h"
40#include "stor-layout.h"
41#include "cfganal.h"
42#include "internal-fn.h"
43#include "gimplify.h"
44#include "gimple-iterator.h"
45#include "gimplify-me.h"
46#include "gimple-walk.h"
47#include "tree-cfg.h"
48#include "tree-into-ssa.h"
49#include "tree-ssa.h"
50#include "splay-tree.h"
51#include "cfgloop.h"
52#include "omp-general.h"
53#include "omp-offload.h"
54#include "tree-cfgcleanup.h"
55#include "alloc-pool.h"
56#include "symbol-summary.h"
57#include "gomp-constants.h"
58#include "gimple-pretty-print.h"
59#include "stringpool.h"
60#include "attribs.h"
61#include "tree-eh.h"
62#include "opts.h"
63
64/* OMP region information. Every parallel and workshare
65 directive is enclosed between two markers, the OMP_* directive
66 and a corresponding GIMPLE_OMP_RETURN statement. */
67
68struct omp_region
69{
70 /* The enclosing region. */
71 struct omp_region *outer;
72
73 /* First child region. */
74 struct omp_region *inner;
75
76 /* Next peer region. */
77 struct omp_region *next;
78
79 /* Block containing the omp directive as its last stmt. */
80 basic_block entry;
81
82 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
83 basic_block exit;
84
85 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
86 basic_block cont;
87
88 /* If this is a combined parallel+workshare region, this is a list
89 of additional arguments needed by the combined parallel+workshare
90 library call. */
91 vec<tree, va_gc> *ws_args;
92
93 /* The code for the omp directive of this region. */
94 enum gimple_code type;
95
96 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
97 enum omp_clause_schedule_kind sched_kind;
98
99 /* Schedule modifiers. */
100 unsigned char sched_modifiers;
101
102 /* True if this is a combined parallel+workshare region. */
103 bool is_combined_parallel;
104
105 /* Copy of fd.lastprivate_conditional != 0. */
106 bool has_lastprivate_conditional;
107
108 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
109 a depend clause. */
110 gomp_ordered *ord_stmt;
111};
112
113static struct omp_region *root_omp_region;
114static bool omp_any_child_fn_dumped;
115
116static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
117 bool = false);
118static gphi *find_phi_with_arg_on_edge (tree, edge);
119static void expand_omp (struct omp_region *region);
120
121/* Return true if REGION is a combined parallel+workshare region. */
122
123static inline bool
124is_combined_parallel (struct omp_region *region)
125{
126 return region->is_combined_parallel;
127}
128
129/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
130 is the immediate dominator of PAR_ENTRY_BB, return true if there
131 are no data dependencies that would prevent expanding the parallel
132 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
133
134 When expanding a combined parallel+workshare region, the call to
135 the child function may need additional arguments in the case of
136 GIMPLE_OMP_FOR regions. In some cases, these arguments are
137 computed out of variables passed in from the parent to the child
138 via 'struct .omp_data_s'. For instance:
139
140 #pragma omp parallel for schedule (guided, i * 4)
141 for (j ...)
142
143 Is lowered into:
144
145 # BLOCK 2 (PAR_ENTRY_BB)
146 .omp_data_o.i = i;
147 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
148
149 # BLOCK 3 (WS_ENTRY_BB)
150 .omp_data_i = &.omp_data_o;
151 D.1667 = .omp_data_i->i;
152 D.1598 = D.1667 * 4;
153 #pragma omp for schedule (guided, D.1598)
154
155 When we outline the parallel region, the call to the child function
156 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
157 that value is computed *after* the call site. So, in principle we
158 cannot do the transformation.
159
160 To see whether the code in WS_ENTRY_BB blocks the combined
161 parallel+workshare call, we collect all the variables used in the
162 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
163 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
164 call.
165
166 FIXME. If we had the SSA form built at this point, we could merely
167 hoist the code in block 3 into block 2 and be done with it. But at
168 this point we don't have dataflow information and though we could
169 hack something up here, it is really not worth the aggravation. */
170
171static bool
172workshare_safe_to_combine_p (basic_block ws_entry_bb)
173{
174 struct omp_for_data fd;
175 gimple *ws_stmt = last_nondebug_stmt (ws_entry_bb);
176
177 if (gimple_code (g: ws_stmt) == GIMPLE_OMP_SECTIONS)
178 return true;
179
180 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
181 if (gimple_omp_for_kind (g: ws_stmt) != GF_OMP_FOR_KIND_FOR)
182 return false;
183
184 omp_extract_for_data (for_stmt: as_a <gomp_for *> (p: ws_stmt), fd: &fd, NULL);
185
186 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
187 return false;
188 if (fd.iter_type != long_integer_type_node)
189 return false;
190
191 /* FIXME. We give up too easily here. If any of these arguments
192 are not constants, they will likely involve variables that have
193 been mapped into fields of .omp_data_s for sharing with the child
194 function. With appropriate data flow, it would be possible to
195 see through this. */
196 if (!is_gimple_min_invariant (fd.loop.n1)
197 || !is_gimple_min_invariant (fd.loop.n2)
198 || !is_gimple_min_invariant (fd.loop.step)
199 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
200 return false;
201
202 return true;
203}
204
205/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
206 presence (SIMD_SCHEDULE). */
207
208static tree
209omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
210{
211 if (!simd_schedule || integer_zerop (chunk_size))
212 return chunk_size;
213
214 poly_uint64 vf = omp_max_vf ();
215 if (known_eq (vf, 1U))
216 return chunk_size;
217
218 tree type = TREE_TYPE (chunk_size);
219 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
220 build_int_cst (type, vf - 1));
221 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
222 build_int_cst (type, -vf));
223}
224
225/* Collect additional arguments needed to emit a combined
226 parallel+workshare call. WS_STMT is the workshare directive being
227 expanded. */
228
229static vec<tree, va_gc> *
230get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
231{
232 tree t;
233 location_t loc = gimple_location (g: ws_stmt);
234 vec<tree, va_gc> *ws_args;
235
236 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (p: ws_stmt))
237 {
238 struct omp_for_data fd;
239 tree n1, n2;
240
241 omp_extract_for_data (for_stmt, fd: &fd, NULL);
242 n1 = fd.loop.n1;
243 n2 = fd.loop.n2;
244
245 if (gimple_omp_for_combined_into_p (g: for_stmt))
246 {
247 tree innerc
248 = omp_find_clause (clauses: gimple_omp_parallel_clauses (gs: par_stmt),
249 kind: OMP_CLAUSE__LOOPTEMP_);
250 gcc_assert (innerc);
251 n1 = OMP_CLAUSE_DECL (innerc);
252 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
253 kind: OMP_CLAUSE__LOOPTEMP_);
254 gcc_assert (innerc);
255 n2 = OMP_CLAUSE_DECL (innerc);
256 }
257
258 vec_alloc (v&: ws_args, nelems: 3 + (fd.chunk_size != 0));
259
260 t = fold_convert_loc (loc, long_integer_type_node, n1);
261 ws_args->quick_push (obj: t);
262
263 t = fold_convert_loc (loc, long_integer_type_node, n2);
264 ws_args->quick_push (obj: t);
265
266 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
267 ws_args->quick_push (obj: t);
268
269 if (fd.chunk_size)
270 {
271 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
272 t = omp_adjust_chunk_size (chunk_size: t, simd_schedule: fd.simd_schedule);
273 ws_args->quick_push (obj: t);
274 }
275
276 return ws_args;
277 }
278 else if (gimple_code (g: ws_stmt) == GIMPLE_OMP_SECTIONS)
279 {
280 /* Number of sections is equal to the number of edges from the
281 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
282 the exit of the sections region. */
283 basic_block bb = single_succ (bb: gimple_bb (g: ws_stmt));
284 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
285 vec_alloc (v&: ws_args, nelems: 1);
286 ws_args->quick_push (obj: t);
287 return ws_args;
288 }
289
290 gcc_unreachable ();
291}
292
293/* Discover whether REGION is a combined parallel+workshare region. */
294
295static void
296determine_parallel_type (struct omp_region *region)
297{
298 basic_block par_entry_bb, par_exit_bb;
299 basic_block ws_entry_bb, ws_exit_bb;
300
301 if (region == NULL || region->inner == NULL
302 || region->exit == NULL || region->inner->exit == NULL
303 || region->inner->cont == NULL)
304 return;
305
306 /* We only support parallel+for and parallel+sections. */
307 if (region->type != GIMPLE_OMP_PARALLEL
308 || (region->inner->type != GIMPLE_OMP_FOR
309 && region->inner->type != GIMPLE_OMP_SECTIONS))
310 return;
311
312 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
313 WS_EXIT_BB -> PAR_EXIT_BB. */
314 par_entry_bb = region->entry;
315 par_exit_bb = region->exit;
316 ws_entry_bb = region->inner->entry;
317 ws_exit_bb = region->inner->exit;
318
319 /* Give up for task reductions on the parallel, while it is implementable,
320 adding another big set of APIs or slowing down the normal paths is
321 not acceptable. */
322 tree pclauses
323 = gimple_omp_parallel_clauses (gs: last_nondebug_stmt (par_entry_bb));
324 if (omp_find_clause (clauses: pclauses, kind: OMP_CLAUSE__REDUCTEMP_))
325 return;
326
327 if (single_succ (bb: par_entry_bb) == ws_entry_bb
328 && single_succ (bb: ws_exit_bb) == par_exit_bb
329 && workshare_safe_to_combine_p (ws_entry_bb)
330 && (gimple_omp_parallel_combined_p (g: last_nondebug_stmt (par_entry_bb))
331 || (last_and_only_stmt (ws_entry_bb)
332 && last_and_only_stmt (par_exit_bb))))
333 {
334 gimple *par_stmt = last_nondebug_stmt (par_entry_bb);
335 gimple *ws_stmt = last_nondebug_stmt (ws_entry_bb);
336
337 if (region->inner->type == GIMPLE_OMP_FOR)
338 {
339 /* If this is a combined parallel loop, we need to determine
340 whether or not to use the combined library calls. There
341 are two cases where we do not apply the transformation:
342 static loops and any kind of ordered loop. In the first
343 case, we already open code the loop so there is no need
344 to do anything else. In the latter case, the combined
345 parallel loop call would still need extra synchronization
346 to implement ordered semantics, so there would not be any
347 gain in using the combined call. */
348 tree clauses = gimple_omp_for_clauses (gs: ws_stmt);
349 tree c = omp_find_clause (clauses, kind: OMP_CLAUSE_SCHEDULE);
350 if (c == NULL
351 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
352 == OMP_CLAUSE_SCHEDULE_STATIC)
353 || omp_find_clause (clauses, kind: OMP_CLAUSE_ORDERED)
354 || omp_find_clause (clauses, kind: OMP_CLAUSE__REDUCTEMP_)
355 || ((c = omp_find_clause (clauses, kind: OMP_CLAUSE__CONDTEMP_))
356 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
357 return;
358 }
359 else if (region->inner->type == GIMPLE_OMP_SECTIONS
360 && (omp_find_clause (clauses: gimple_omp_sections_clauses (gs: ws_stmt),
361 kind: OMP_CLAUSE__REDUCTEMP_)
362 || omp_find_clause (clauses: gimple_omp_sections_clauses (gs: ws_stmt),
363 kind: OMP_CLAUSE__CONDTEMP_)))
364 return;
365
366 region->is_combined_parallel = true;
367 region->inner->is_combined_parallel = true;
368 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
369 }
370}
371
372/* Debugging dumps for parallel regions. */
373void dump_omp_region (FILE *, struct omp_region *, int);
374void debug_omp_region (struct omp_region *);
375void debug_all_omp_regions (void);
376
377/* Dump the parallel region tree rooted at REGION. */
378
379void
380dump_omp_region (FILE *file, struct omp_region *region, int indent)
381{
382 fprintf (stream: file, format: "%*sbb %d: %s\n", indent, "", region->entry->index,
383 gimple_code_name[region->type]);
384
385 if (region->inner)
386 dump_omp_region (file, region: region->inner, indent: indent + 4);
387
388 if (region->cont)
389 {
390 fprintf (stream: file, format: "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
391 region->cont->index);
392 }
393
394 if (region->exit)
395 fprintf (stream: file, format: "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
396 region->exit->index);
397 else
398 fprintf (stream: file, format: "%*s[no exit marker]\n", indent, "");
399
400 if (region->next)
401 dump_omp_region (file, region: region->next, indent);
402}
403
404DEBUG_FUNCTION void
405debug_omp_region (struct omp_region *region)
406{
407 dump_omp_region (stderr, region, indent: 0);
408}
409
410DEBUG_FUNCTION void
411debug_all_omp_regions (void)
412{
413 dump_omp_region (stderr, region: root_omp_region, indent: 0);
414}
415
416/* Create a new parallel region starting at STMT inside region PARENT. */
417
418static struct omp_region *
419new_omp_region (basic_block bb, enum gimple_code type,
420 struct omp_region *parent)
421{
422 struct omp_region *region = XCNEW (struct omp_region);
423
424 region->outer = parent;
425 region->entry = bb;
426 region->type = type;
427
428 if (parent)
429 {
430 /* This is a nested region. Add it to the list of inner
431 regions in PARENT. */
432 region->next = parent->inner;
433 parent->inner = region;
434 }
435 else
436 {
437 /* This is a toplevel region. Add it to the list of toplevel
438 regions in ROOT_OMP_REGION. */
439 region->next = root_omp_region;
440 root_omp_region = region;
441 }
442
443 return region;
444}
445
446/* Release the memory associated with the region tree rooted at REGION. */
447
448static void
449free_omp_region_1 (struct omp_region *region)
450{
451 struct omp_region *i, *n;
452
453 for (i = region->inner; i ; i = n)
454 {
455 n = i->next;
456 free_omp_region_1 (region: i);
457 }
458
459 free (ptr: region);
460}
461
462/* Release the memory for the entire omp region tree. */
463
464void
465omp_free_regions (void)
466{
467 struct omp_region *r, *n;
468 for (r = root_omp_region; r ; r = n)
469 {
470 n = r->next;
471 free_omp_region_1 (region: r);
472 }
473 root_omp_region = NULL;
474}
475
476/* A convenience function to build an empty GIMPLE_COND with just the
477 condition. */
478
479static gcond *
480gimple_build_cond_empty (tree cond)
481{
482 enum tree_code pred_code;
483 tree lhs, rhs;
484
485 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
486 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
487}
488
489/* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
490 Add CHILD_FNDECL to decl chain of the supercontext of the block
491 ENTRY_BLOCK - this is the block which originally contained the
492 code from which CHILD_FNDECL was created.
493
494 Together, these actions ensure that the debug info for the outlined
495 function will be emitted with the correct lexical scope. */
496
497static void
498adjust_context_and_scope (struct omp_region *region, tree entry_block,
499 tree child_fndecl)
500{
501 tree parent_fndecl = NULL_TREE;
502 gimple *entry_stmt;
503 /* OMP expansion expands inner regions before outer ones, so if
504 we e.g. have explicit task region nested in parallel region, when
505 expanding the task region current_function_decl will be the original
506 source function, but we actually want to use as context the child
507 function of the parallel. */
508 for (region = region->outer;
509 region && parent_fndecl == NULL_TREE; region = region->outer)
510 switch (region->type)
511 {
512 case GIMPLE_OMP_PARALLEL:
513 case GIMPLE_OMP_TASK:
514 case GIMPLE_OMP_TEAMS:
515 entry_stmt = last_nondebug_stmt (region->entry);
516 parent_fndecl = gimple_omp_taskreg_child_fn (gs: entry_stmt);
517 break;
518 case GIMPLE_OMP_TARGET:
519 entry_stmt = last_nondebug_stmt (region->entry);
520 parent_fndecl
521 = gimple_omp_target_child_fn (omp_target_stmt: as_a <gomp_target *> (p: entry_stmt));
522 break;
523 default:
524 break;
525 }
526
527 if (parent_fndecl == NULL_TREE)
528 parent_fndecl = current_function_decl;
529 DECL_CONTEXT (child_fndecl) = parent_fndecl;
530
531 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
532 {
533 tree b = BLOCK_SUPERCONTEXT (entry_block);
534 if (TREE_CODE (b) == BLOCK)
535 {
536 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
537 BLOCK_VARS (b) = child_fndecl;
538 }
539 }
540}
541
542/* Build the function calls to GOMP_parallel etc to actually
543 generate the parallel operation. REGION is the parallel region
544 being expanded. BB is the block where to insert the code. WS_ARGS
545 will be set if this is a call to a combined parallel+workshare
546 construct, it contains the list of additional arguments needed by
547 the workshare construct. */
548
549static void
550expand_parallel_call (struct omp_region *region, basic_block bb,
551 gomp_parallel *entry_stmt,
552 vec<tree, va_gc> *ws_args)
553{
554 tree t, t1, t2, val, cond, c, clauses, flags;
555 gimple_stmt_iterator gsi;
556 gimple *stmt;
557 enum built_in_function start_ix;
558 int start_ix2;
559 location_t clause_loc;
560 vec<tree, va_gc> *args;
561
562 clauses = gimple_omp_parallel_clauses (gs: entry_stmt);
563
564 /* Determine what flavor of GOMP_parallel we will be
565 emitting. */
566 start_ix = BUILT_IN_GOMP_PARALLEL;
567 tree rtmp = omp_find_clause (clauses, kind: OMP_CLAUSE__REDUCTEMP_);
568 if (rtmp)
569 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
570 else if (is_combined_parallel (region))
571 {
572 switch (region->inner->type)
573 {
574 case GIMPLE_OMP_FOR:
575 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
576 switch (region->inner->sched_kind)
577 {
578 case OMP_CLAUSE_SCHEDULE_RUNTIME:
579 /* For lastprivate(conditional:), our implementation
580 requires monotonic behavior. */
581 if (region->inner->has_lastprivate_conditional != 0)
582 start_ix2 = 3;
583 else if ((region->inner->sched_modifiers
584 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
585 start_ix2 = 6;
586 else if ((region->inner->sched_modifiers
587 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
588 start_ix2 = 7;
589 else
590 start_ix2 = 3;
591 break;
592 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
593 case OMP_CLAUSE_SCHEDULE_GUIDED:
594 if ((region->inner->sched_modifiers
595 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
596 && !region->inner->has_lastprivate_conditional)
597 {
598 start_ix2 = 3 + region->inner->sched_kind;
599 break;
600 }
601 /* FALLTHRU */
602 default:
603 start_ix2 = region->inner->sched_kind;
604 break;
605 }
606 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
607 start_ix = (enum built_in_function) start_ix2;
608 break;
609 case GIMPLE_OMP_SECTIONS:
610 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
611 break;
612 default:
613 gcc_unreachable ();
614 }
615 }
616
617 /* By default, the value of NUM_THREADS is zero (selected at run time)
618 and there is no conditional. */
619 cond = NULL_TREE;
620 val = build_int_cst (unsigned_type_node, 0);
621 flags = build_int_cst (unsigned_type_node, 0);
622
623 c = omp_find_clause (clauses, kind: OMP_CLAUSE_IF);
624 if (c)
625 cond = OMP_CLAUSE_IF_EXPR (c);
626
627 c = omp_find_clause (clauses, kind: OMP_CLAUSE_NUM_THREADS);
628 if (c)
629 {
630 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
631 clause_loc = OMP_CLAUSE_LOCATION (c);
632 }
633 else
634 clause_loc = gimple_location (g: entry_stmt);
635
636 c = omp_find_clause (clauses, kind: OMP_CLAUSE_PROC_BIND);
637 if (c)
638 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
639
640 /* Ensure 'val' is of the correct type. */
641 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
642
643 /* If we found the clause 'if (cond)', build either
644 (cond != 0) or (cond ? val : 1u). */
645 if (cond)
646 {
647 cond = gimple_boolify (cond);
648
649 if (integer_zerop (val))
650 val = fold_build2_loc (clause_loc,
651 EQ_EXPR, unsigned_type_node, cond,
652 build_int_cst (TREE_TYPE (cond), 0));
653 else
654 {
655 basic_block cond_bb, then_bb, else_bb;
656 edge e, e_then, e_else;
657 tree tmp_then, tmp_else, tmp_join, tmp_var;
658
659 tmp_var = create_tmp_var (TREE_TYPE (val));
660 if (gimple_in_ssa_p (cfun))
661 {
662 tmp_then = make_ssa_name (var: tmp_var);
663 tmp_else = make_ssa_name (var: tmp_var);
664 tmp_join = make_ssa_name (var: tmp_var);
665 }
666 else
667 {
668 tmp_then = tmp_var;
669 tmp_else = tmp_var;
670 tmp_join = tmp_var;
671 }
672
673 e = split_block_after_labels (bb);
674 cond_bb = e->src;
675 bb = e->dest;
676 remove_edge (e);
677
678 then_bb = create_empty_bb (cond_bb);
679 else_bb = create_empty_bb (then_bb);
680 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
681 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
682
683 stmt = gimple_build_cond_empty (cond);
684 gsi = gsi_start_bb (bb: cond_bb);
685 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
686
687 gsi = gsi_start_bb (bb: then_bb);
688 expand_omp_build_assign (&gsi, tmp_then, val, true);
689
690 gsi = gsi_start_bb (bb: else_bb);
691 expand_omp_build_assign (&gsi, tmp_else,
692 build_int_cst (unsigned_type_node, 1),
693 true);
694
695 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
696 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
697 add_bb_to_loop (then_bb, cond_bb->loop_father);
698 add_bb_to_loop (else_bb, cond_bb->loop_father);
699 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
700 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
701
702 if (gimple_in_ssa_p (cfun))
703 {
704 gphi *phi = create_phi_node (tmp_join, bb);
705 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
706 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
707 }
708
709 val = tmp_join;
710 }
711
712 gsi = gsi_start_bb (bb);
713 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
714 false, GSI_CONTINUE_LINKING);
715 }
716
717 gsi = gsi_last_nondebug_bb (bb);
718 t = gimple_omp_parallel_data_arg (omp_parallel_stmt: entry_stmt);
719 if (t == NULL)
720 t1 = null_pointer_node;
721 else
722 t1 = build_fold_addr_expr (t);
723 tree child_fndecl = gimple_omp_parallel_child_fn (omp_parallel_stmt: entry_stmt);
724 t2 = build_fold_addr_expr (child_fndecl);
725
726 vec_alloc (v&: args, nelems: 4 + vec_safe_length (v: ws_args));
727 args->quick_push (obj: t2);
728 args->quick_push (obj: t1);
729 args->quick_push (obj: val);
730 if (ws_args)
731 args->splice (src: *ws_args);
732 args->quick_push (obj: flags);
733
734 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
735 builtin_decl_explicit (fncode: start_ix), args);
736
737 if (rtmp)
738 {
739 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
740 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
741 fold_convert (type,
742 fold_convert (pointer_sized_int_node, t)));
743 }
744 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
745 false, GSI_CONTINUE_LINKING);
746}
747
748/* Build the function call to GOMP_task to actually
749 generate the task operation. BB is the block where to insert the code. */
750
751static void
752expand_task_call (struct omp_region *region, basic_block bb,
753 gomp_task *entry_stmt)
754{
755 tree t1, t2, t3;
756 gimple_stmt_iterator gsi;
757 location_t loc = gimple_location (g: entry_stmt);
758
759 tree clauses = gimple_omp_task_clauses (gs: entry_stmt);
760
761 tree ifc = omp_find_clause (clauses, kind: OMP_CLAUSE_IF);
762 tree untied = omp_find_clause (clauses, kind: OMP_CLAUSE_UNTIED);
763 tree mergeable = omp_find_clause (clauses, kind: OMP_CLAUSE_MERGEABLE);
764 tree depend = omp_find_clause (clauses, kind: OMP_CLAUSE_DEPEND);
765 tree finalc = omp_find_clause (clauses, kind: OMP_CLAUSE_FINAL);
766 tree priority = omp_find_clause (clauses, kind: OMP_CLAUSE_PRIORITY);
767 tree detach = omp_find_clause (clauses, kind: OMP_CLAUSE_DETACH);
768
769 unsigned int iflags
770 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
771 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
772 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
773
774 bool taskloop_p = gimple_omp_task_taskloop_p (g: entry_stmt);
775 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
776 tree num_tasks = NULL_TREE;
777 bool ull = false;
778 if (taskloop_p)
779 {
780 gimple *g = last_nondebug_stmt (region->outer->entry);
781 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
782 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
783 struct omp_for_data fd;
784 omp_extract_for_data (for_stmt: as_a <gomp_for *> (p: g), fd: &fd, NULL);
785 startvar = omp_find_clause (clauses, kind: OMP_CLAUSE__LOOPTEMP_);
786 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
787 kind: OMP_CLAUSE__LOOPTEMP_);
788 startvar = OMP_CLAUSE_DECL (startvar);
789 endvar = OMP_CLAUSE_DECL (endvar);
790 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
791 if (fd.loop.cond_code == LT_EXPR)
792 iflags |= GOMP_TASK_FLAG_UP;
793 tree tclauses = gimple_omp_for_clauses (gs: g);
794 num_tasks = omp_find_clause (clauses: tclauses, kind: OMP_CLAUSE_NUM_TASKS);
795 if (num_tasks)
796 {
797 if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
798 iflags |= GOMP_TASK_FLAG_STRICT;
799 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
800 }
801 else
802 {
803 num_tasks = omp_find_clause (clauses: tclauses, kind: OMP_CLAUSE_GRAINSIZE);
804 if (num_tasks)
805 {
806 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
807 if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
808 iflags |= GOMP_TASK_FLAG_STRICT;
809 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
810 }
811 else
812 num_tasks = integer_zero_node;
813 }
814 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
815 if (ifc == NULL_TREE)
816 iflags |= GOMP_TASK_FLAG_IF;
817 if (omp_find_clause (clauses: tclauses, kind: OMP_CLAUSE_NOGROUP))
818 iflags |= GOMP_TASK_FLAG_NOGROUP;
819 ull = fd.iter_type == long_long_unsigned_type_node;
820 if (omp_find_clause (clauses, kind: OMP_CLAUSE_REDUCTION))
821 iflags |= GOMP_TASK_FLAG_REDUCTION;
822 }
823 else
824 {
825 if (priority)
826 iflags |= GOMP_TASK_FLAG_PRIORITY;
827 if (detach)
828 iflags |= GOMP_TASK_FLAG_DETACH;
829 }
830
831 tree flags = build_int_cst (unsigned_type_node, iflags);
832
833 tree cond = boolean_true_node;
834 if (ifc)
835 {
836 if (taskloop_p)
837 {
838 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
839 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
840 build_int_cst (unsigned_type_node,
841 GOMP_TASK_FLAG_IF),
842 build_int_cst (unsigned_type_node, 0));
843 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
844 flags, t);
845 }
846 else
847 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
848 }
849
850 if (finalc)
851 {
852 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
853 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
854 build_int_cst (unsigned_type_node,
855 GOMP_TASK_FLAG_FINAL),
856 build_int_cst (unsigned_type_node, 0));
857 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
858 }
859 if (depend)
860 depend = OMP_CLAUSE_DECL (depend);
861 else
862 depend = build_int_cst (ptr_type_node, 0);
863 if (priority)
864 priority = fold_convert (integer_type_node,
865 OMP_CLAUSE_PRIORITY_EXPR (priority));
866 else
867 priority = integer_zero_node;
868
869 gsi = gsi_last_nondebug_bb (bb);
870
871 detach = (detach
872 ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
873 : null_pointer_node);
874
875 tree t = gimple_omp_task_data_arg (gs: entry_stmt);
876 if (t == NULL)
877 t2 = null_pointer_node;
878 else
879 t2 = build_fold_addr_expr_loc (loc, t);
880 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (gs: entry_stmt));
881 t = gimple_omp_task_copy_fn (gs: entry_stmt);
882 if (t == NULL)
883 t3 = null_pointer_node;
884 else
885 t3 = build_fold_addr_expr_loc (loc, t);
886
887 if (taskloop_p)
888 t = build_call_expr (ull
889 ? builtin_decl_explicit (fncode: BUILT_IN_GOMP_TASKLOOP_ULL)
890 : builtin_decl_explicit (fncode: BUILT_IN_GOMP_TASKLOOP),
891 11, t1, t2, t3,
892 gimple_omp_task_arg_size (gs: entry_stmt),
893 gimple_omp_task_arg_align (gs: entry_stmt), flags,
894 num_tasks, priority, startvar, endvar, step);
895 else
896 t = build_call_expr (builtin_decl_explicit (fncode: BUILT_IN_GOMP_TASK),
897 10, t1, t2, t3,
898 gimple_omp_task_arg_size (gs: entry_stmt),
899 gimple_omp_task_arg_align (gs: entry_stmt), cond, flags,
900 depend, priority, detach);
901
902 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
903 false, GSI_CONTINUE_LINKING);
904}
905
906/* Build the function call to GOMP_taskwait_depend to actually
907 generate the taskwait operation. BB is the block where to insert the
908 code. */
909
910static void
911expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
912{
913 tree clauses = gimple_omp_task_clauses (gs: entry_stmt);
914 tree depend = omp_find_clause (clauses, kind: OMP_CLAUSE_DEPEND);
915 if (depend == NULL_TREE)
916 return;
917
918 depend = OMP_CLAUSE_DECL (depend);
919
920 bool nowait = omp_find_clause (clauses, kind: OMP_CLAUSE_NOWAIT) != NULL_TREE;
921 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
922 enum built_in_function f = (nowait
923 ? BUILT_IN_GOMP_TASKWAIT_DEPEND_NOWAIT
924 : BUILT_IN_GOMP_TASKWAIT_DEPEND);
925 tree t = build_call_expr (builtin_decl_explicit (fncode: f), 1, depend);
926
927 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
928 false, GSI_CONTINUE_LINKING);
929}
930
931/* Build the function call to GOMP_teams_reg to actually
932 generate the host teams operation. REGION is the teams region
933 being expanded. BB is the block where to insert the code. */
934
935static void
936expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
937{
938 tree clauses = gimple_omp_teams_clauses (gs: entry_stmt);
939 tree num_teams = omp_find_clause (clauses, kind: OMP_CLAUSE_NUM_TEAMS);
940 if (num_teams == NULL_TREE)
941 num_teams = build_int_cst (unsigned_type_node, 0);
942 else
943 {
944 num_teams = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (num_teams);
945 num_teams = fold_convert (unsigned_type_node, num_teams);
946 }
947 tree thread_limit = omp_find_clause (clauses, kind: OMP_CLAUSE_THREAD_LIMIT);
948 if (thread_limit == NULL_TREE)
949 thread_limit = build_int_cst (unsigned_type_node, 0);
950 else
951 {
952 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
953 thread_limit = fold_convert (unsigned_type_node, thread_limit);
954 }
955
956 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
957 tree t = gimple_omp_teams_data_arg (omp_teams_stmt: entry_stmt), t1;
958 if (t == NULL)
959 t1 = null_pointer_node;
960 else
961 t1 = build_fold_addr_expr (t);
962 tree child_fndecl = gimple_omp_teams_child_fn (omp_teams_stmt: entry_stmt);
963 tree t2 = build_fold_addr_expr (child_fndecl);
964
965 vec<tree, va_gc> *args;
966 vec_alloc (v&: args, nelems: 5);
967 args->quick_push (obj: t2);
968 args->quick_push (obj: t1);
969 args->quick_push (obj: num_teams);
970 args->quick_push (obj: thread_limit);
971 /* For future extensibility. */
972 args->quick_push (obj: build_zero_cst (unsigned_type_node));
973
974 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
975 builtin_decl_explicit (fncode: BUILT_IN_GOMP_TEAMS_REG),
976 args);
977
978 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
979 false, GSI_CONTINUE_LINKING);
980}
981
982/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
983
984static tree
985vec2chain (vec<tree, va_gc> *v)
986{
987 tree chain = NULL_TREE, t;
988 unsigned ix;
989
990 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
991 {
992 DECL_CHAIN (t) = chain;
993 chain = t;
994 }
995
996 return chain;
997}
998
999/* Remove barriers in REGION->EXIT's block. Note that this is only
1000 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1001 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1002 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1003 removed. */
1004
1005static void
1006remove_exit_barrier (struct omp_region *region)
1007{
1008 gimple_stmt_iterator gsi;
1009 basic_block exit_bb;
1010 edge_iterator ei;
1011 edge e;
1012 gimple *stmt;
1013 int any_addressable_vars = -1;
1014
1015 exit_bb = region->exit;
1016
1017 /* If the parallel region doesn't return, we don't have REGION->EXIT
1018 block at all. */
1019 if (! exit_bb)
1020 return;
1021
1022 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1023 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1024 statements that can appear in between are extremely limited -- no
1025 memory operations at all. Here, we allow nothing at all, so the
1026 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1027 gsi = gsi_last_nondebug_bb (bb: exit_bb);
1028 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1029 gsi_prev_nondebug (i: &gsi);
1030 if (!gsi_end_p (i: gsi) && gimple_code (g: gsi_stmt (i: gsi)) != GIMPLE_LABEL)
1031 return;
1032
1033 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1034 {
1035 gsi = gsi_last_nondebug_bb (bb: e->src);
1036 if (gsi_end_p (i: gsi))
1037 continue;
1038 stmt = gsi_stmt (i: gsi);
1039 if (gimple_code (g: stmt) == GIMPLE_OMP_RETURN
1040 && !gimple_omp_return_nowait_p (g: stmt))
1041 {
1042 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1043 in many cases. If there could be tasks queued, the barrier
1044 might be needed to let the tasks run before some local
1045 variable of the parallel that the task uses as shared
1046 runs out of scope. The task can be spawned either
1047 from within current function (this would be easy to check)
1048 or from some function it calls and gets passed an address
1049 of such a variable. */
1050 if (any_addressable_vars < 0)
1051 {
1052 gomp_parallel *parallel_stmt
1053 = as_a <gomp_parallel *> (p: last_nondebug_stmt (region->entry));
1054 tree child_fun = gimple_omp_parallel_child_fn (omp_parallel_stmt: parallel_stmt);
1055 tree local_decls, block, decl;
1056 unsigned ix;
1057
1058 any_addressable_vars = 0;
1059 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1060 if (TREE_ADDRESSABLE (decl))
1061 {
1062 any_addressable_vars = 1;
1063 break;
1064 }
1065 for (block = gimple_block (g: stmt);
1066 !any_addressable_vars
1067 && block
1068 && TREE_CODE (block) == BLOCK;
1069 block = BLOCK_SUPERCONTEXT (block))
1070 {
1071 for (local_decls = BLOCK_VARS (block);
1072 local_decls;
1073 local_decls = DECL_CHAIN (local_decls))
1074 if (TREE_ADDRESSABLE (local_decls))
1075 {
1076 any_addressable_vars = 1;
1077 break;
1078 }
1079 if (block == gimple_block (g: parallel_stmt))
1080 break;
1081 }
1082 }
1083 if (!any_addressable_vars)
1084 gimple_omp_return_set_nowait (s: stmt);
1085 }
1086 }
1087}
1088
1089static void
1090remove_exit_barriers (struct omp_region *region)
1091{
1092 if (region->type == GIMPLE_OMP_PARALLEL)
1093 remove_exit_barrier (region);
1094
1095 if (region->inner)
1096 {
1097 region = region->inner;
1098 remove_exit_barriers (region);
1099 while (region->next)
1100 {
1101 region = region->next;
1102 remove_exit_barriers (region);
1103 }
1104 }
1105}
1106
1107/* Optimize omp_get_thread_num () and omp_get_num_threads ()
1108 calls. These can't be declared as const functions, but
1109 within one parallel body they are constant, so they can be
1110 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1111 which are declared const. Similarly for task body, except
1112 that in untied task omp_get_thread_num () can change at any task
1113 scheduling point. */
1114
1115static void
1116optimize_omp_library_calls (gimple *entry_stmt)
1117{
1118 basic_block bb;
1119 gimple_stmt_iterator gsi;
1120 tree thr_num_tree = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_THREAD_NUM);
1121 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1122 tree num_thr_tree = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_NUM_THREADS);
1123 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1124 bool untied_task = (gimple_code (g: entry_stmt) == GIMPLE_OMP_TASK
1125 && omp_find_clause (clauses: gimple_omp_task_clauses (gs: entry_stmt),
1126 kind: OMP_CLAUSE_UNTIED) != NULL);
1127
1128 FOR_EACH_BB_FN (bb, cfun)
1129 for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
1130 {
1131 gimple *call = gsi_stmt (i: gsi);
1132 tree decl;
1133
1134 if (is_gimple_call (gs: call)
1135 && (decl = gimple_call_fndecl (gs: call))
1136 && DECL_EXTERNAL (decl)
1137 && TREE_PUBLIC (decl)
1138 && DECL_INITIAL (decl) == NULL)
1139 {
1140 tree built_in;
1141
1142 if (DECL_NAME (decl) == thr_num_id)
1143 {
1144 /* In #pragma omp task untied omp_get_thread_num () can change
1145 during the execution of the task region. */
1146 if (untied_task)
1147 continue;
1148 built_in = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_THREAD_NUM);
1149 }
1150 else if (DECL_NAME (decl) == num_thr_id)
1151 built_in = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_NUM_THREADS);
1152 else
1153 continue;
1154
1155 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1156 || gimple_call_num_args (gs: call) != 0)
1157 continue;
1158
1159 if (flag_exceptions && !TREE_NOTHROW (decl))
1160 continue;
1161
1162 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1163 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1164 TREE_TYPE (TREE_TYPE (built_in))))
1165 continue;
1166
1167 gimple_call_set_fndecl (gs: call, decl: built_in);
1168 }
1169 }
1170}
1171
1172/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1173 regimplified. */
1174
1175static tree
1176expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1177{
1178 tree t = *tp;
1179
1180 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1181 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1182 return t;
1183
1184 if (TREE_CODE (t) == ADDR_EXPR)
1185 recompute_tree_invariant_for_addr_expr (t);
1186
1187 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1188 return NULL_TREE;
1189}
1190
1191/* Prepend or append TO = FROM assignment before or after *GSI_P. */
1192
1193static void
1194expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1195 bool after)
1196{
1197 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1198 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1199 !after, after ? GSI_CONTINUE_LINKING
1200 : GSI_SAME_STMT);
1201 gimple *stmt = gimple_build_assign (to, from);
1202 if (after)
1203 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1204 else
1205 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1206 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1207 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1208 {
1209 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1210 gimple_regimplify_operands (stmt, &gsi);
1211 }
1212}
1213
1214/* Prepend or append LHS CODE RHS condition before or after *GSI_P. */
1215
1216static gcond *
1217expand_omp_build_cond (gimple_stmt_iterator *gsi_p, enum tree_code code,
1218 tree lhs, tree rhs, bool after = false)
1219{
1220 gcond *cond_stmt = gimple_build_cond (code, lhs, rhs, NULL_TREE, NULL_TREE);
1221 if (after)
1222 gsi_insert_after (gsi_p, cond_stmt, GSI_CONTINUE_LINKING);
1223 else
1224 gsi_insert_before (gsi_p, cond_stmt, GSI_SAME_STMT);
1225 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
1226 NULL, NULL)
1227 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
1228 NULL, NULL))
1229 {
1230 gimple_stmt_iterator gsi = gsi_for_stmt (cond_stmt);
1231 gimple_regimplify_operands (cond_stmt, &gsi);
1232 }
1233 return cond_stmt;
1234}
1235
1236/* Expand the OpenMP parallel or task directive starting at REGION. */
1237
1238static void
1239expand_omp_taskreg (struct omp_region *region)
1240{
1241 basic_block entry_bb, exit_bb, new_bb;
1242 struct function *child_cfun;
1243 tree child_fn, block, t;
1244 gimple_stmt_iterator gsi;
1245 gimple *entry_stmt, *stmt;
1246 edge e;
1247 vec<tree, va_gc> *ws_args;
1248
1249 entry_stmt = last_nondebug_stmt (region->entry);
1250 if (gimple_code (g: entry_stmt) == GIMPLE_OMP_TASK
1251 && gimple_omp_task_taskwait_p (g: entry_stmt))
1252 {
1253 new_bb = region->entry;
1254 gsi = gsi_last_nondebug_bb (bb: region->entry);
1255 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1256 gsi_remove (&gsi, true);
1257 expand_taskwait_call (bb: new_bb, entry_stmt: as_a <gomp_task *> (p: entry_stmt));
1258 return;
1259 }
1260
1261 child_fn = gimple_omp_taskreg_child_fn (gs: entry_stmt);
1262 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1263
1264 entry_bb = region->entry;
1265 if (gimple_code (g: entry_stmt) == GIMPLE_OMP_TASK)
1266 exit_bb = region->cont;
1267 else
1268 exit_bb = region->exit;
1269
1270 if (is_combined_parallel (region))
1271 ws_args = region->ws_args;
1272 else
1273 ws_args = NULL;
1274
1275 if (child_cfun->cfg)
1276 {
1277 /* Due to inlining, it may happen that we have already outlined
1278 the region, in which case all we need to do is make the
1279 sub-graph unreachable and emit the parallel call. */
1280 edge entry_succ_e, exit_succ_e;
1281
1282 entry_succ_e = single_succ_edge (bb: entry_bb);
1283
1284 gsi = gsi_last_nondebug_bb (bb: entry_bb);
1285 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1286 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1287 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1288 gsi_remove (&gsi, true);
1289
1290 new_bb = entry_bb;
1291 if (exit_bb)
1292 {
1293 exit_succ_e = single_succ_edge (bb: exit_bb);
1294 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1295 }
1296 remove_edge_and_dominated_blocks (entry_succ_e);
1297 }
1298 else
1299 {
1300 unsigned srcidx, dstidx, num;
1301
1302 /* If the parallel region needs data sent from the parent
1303 function, then the very first statement (except possible
1304 tree profile counter updates) of the parallel body
1305 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1306 &.OMP_DATA_O is passed as an argument to the child function,
1307 we need to replace it with the argument as seen by the child
1308 function.
1309
1310 In most cases, this will end up being the identity assignment
1311 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1312 a function call that has been inlined, the original PARM_DECL
1313 .OMP_DATA_I may have been converted into a different local
1314 variable. In which case, we need to keep the assignment. */
1315 if (gimple_omp_taskreg_data_arg (gs: entry_stmt))
1316 {
1317 basic_block entry_succ_bb
1318 = single_succ_p (bb: entry_bb) ? single_succ (bb: entry_bb)
1319 : FALLTHRU_EDGE (entry_bb)->dest;
1320 tree arg;
1321 gimple *parcopy_stmt = NULL;
1322
1323 for (gsi = gsi_start_bb (bb: entry_succ_bb); ; gsi_next (i: &gsi))
1324 {
1325 gimple *stmt;
1326
1327 gcc_assert (!gsi_end_p (gsi));
1328 stmt = gsi_stmt (i: gsi);
1329 if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
1330 continue;
1331
1332 if (gimple_num_ops (gs: stmt) == 2)
1333 {
1334 tree arg = gimple_assign_rhs1 (gs: stmt);
1335
1336 /* We're ignore the subcode because we're
1337 effectively doing a STRIP_NOPS. */
1338
1339 if (TREE_CODE (arg) == ADDR_EXPR
1340 && (TREE_OPERAND (arg, 0)
1341 == gimple_omp_taskreg_data_arg (gs: entry_stmt)))
1342 {
1343 parcopy_stmt = stmt;
1344 break;
1345 }
1346 }
1347 }
1348
1349 gcc_assert (parcopy_stmt != NULL);
1350 arg = DECL_ARGUMENTS (child_fn);
1351
1352 if (!gimple_in_ssa_p (cfun))
1353 {
1354 if (gimple_assign_lhs (gs: parcopy_stmt) == arg)
1355 gsi_remove (&gsi, true);
1356 else
1357 {
1358 /* ?? Is setting the subcode really necessary ?? */
1359 gimple_omp_set_subcode (s: parcopy_stmt, TREE_CODE (arg));
1360 gimple_assign_set_rhs1 (gs: parcopy_stmt, rhs: arg);
1361 }
1362 }
1363 else
1364 {
1365 tree lhs = gimple_assign_lhs (gs: parcopy_stmt);
1366 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1367 /* We'd like to set the rhs to the default def in the child_fn,
1368 but it's too early to create ssa names in the child_fn.
1369 Instead, we set the rhs to the parm. In
1370 move_sese_region_to_fn, we introduce a default def for the
1371 parm, map the parm to it's default def, and once we encounter
1372 this stmt, replace the parm with the default def. */
1373 gimple_assign_set_rhs1 (gs: parcopy_stmt, rhs: arg);
1374 update_stmt (s: parcopy_stmt);
1375 }
1376 }
1377
1378 /* Declare local variables needed in CHILD_CFUN. */
1379 block = DECL_INITIAL (child_fn);
1380 BLOCK_VARS (block) = vec2chain (v: child_cfun->local_decls);
1381 /* The gimplifier could record temporaries in parallel/task block
1382 rather than in containing function's local_decls chain,
1383 which would mean cgraph missed finalizing them. Do it now. */
1384 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1385 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1386 varpool_node::finalize_decl (decl: t);
1387 DECL_SAVED_TREE (child_fn) = NULL;
1388 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1389 gimple_set_body (child_fn, NULL);
1390 TREE_USED (block) = 1;
1391
1392 /* Reset DECL_CONTEXT on function arguments. */
1393 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1394 DECL_CONTEXT (t) = child_fn;
1395
1396 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1397 so that it can be moved to the child function. */
1398 gsi = gsi_last_nondebug_bb (bb: entry_bb);
1399 stmt = gsi_stmt (i: gsi);
1400 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1401 || gimple_code (stmt) == GIMPLE_OMP_TASK
1402 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1403 e = split_block (entry_bb, stmt);
1404 gsi_remove (&gsi, true);
1405 entry_bb = e->dest;
1406 edge e2 = NULL;
1407 if (gimple_code (g: entry_stmt) != GIMPLE_OMP_TASK)
1408 single_succ_edge (bb: entry_bb)->flags = EDGE_FALLTHRU;
1409 else
1410 {
1411 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1412 gcc_assert (e2->dest == region->exit);
1413 remove_edge (BRANCH_EDGE (entry_bb));
1414 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1415 gsi = gsi_last_nondebug_bb (bb: region->exit);
1416 gcc_assert (!gsi_end_p (gsi)
1417 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1418 gsi_remove (&gsi, true);
1419 }
1420
1421 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1422 if (exit_bb)
1423 {
1424 gsi = gsi_last_nondebug_bb (bb: exit_bb);
1425 gcc_assert (!gsi_end_p (gsi)
1426 && (gimple_code (gsi_stmt (gsi))
1427 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1428 stmt = gimple_build_return (NULL);
1429 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1430 gsi_remove (&gsi, true);
1431 }
1432
1433 /* Move the parallel region into CHILD_CFUN. */
1434
1435 if (gimple_in_ssa_p (cfun))
1436 {
1437 init_tree_ssa (child_cfun);
1438 init_ssa_operands (fn: child_cfun);
1439 child_cfun->gimple_df->in_ssa_p = true;
1440 block = NULL_TREE;
1441 }
1442 else
1443 block = gimple_block (g: entry_stmt);
1444
1445 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1446 if (exit_bb)
1447 single_succ_edge (bb: new_bb)->flags = EDGE_FALLTHRU;
1448 if (e2)
1449 {
1450 basic_block dest_bb = e2->dest;
1451 if (!exit_bb)
1452 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1453 remove_edge (e2);
1454 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1455 }
1456 /* When the OMP expansion process cannot guarantee an up-to-date
1457 loop tree arrange for the child function to fixup loops. */
1458 if (loops_state_satisfies_p (flags: LOOPS_NEED_FIXUP))
1459 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1460
1461 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1462 num = vec_safe_length (v: child_cfun->local_decls);
1463 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1464 {
1465 t = (*child_cfun->local_decls)[srcidx];
1466 if (DECL_CONTEXT (t) == cfun->decl)
1467 continue;
1468 if (srcidx != dstidx)
1469 (*child_cfun->local_decls)[dstidx] = t;
1470 dstidx++;
1471 }
1472 if (dstidx != num)
1473 vec_safe_truncate (v: child_cfun->local_decls, size: dstidx);
1474
1475 /* Inform the callgraph about the new function. */
1476 child_cfun->curr_properties = cfun->curr_properties;
1477 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1478 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1479 cgraph_node *node = cgraph_node::get_create (child_fn);
1480 node->parallelized_function = 1;
1481 cgraph_node::add_new_function (fndecl: child_fn, lowered: true);
1482
1483 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1484 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1485
1486 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1487 fixed in a following pass. */
1488 push_cfun (new_cfun: child_cfun);
1489 if (need_asm)
1490 assign_assembler_name_if_needed (child_fn);
1491
1492 if (optimize)
1493 optimize_omp_library_calls (entry_stmt);
1494 update_max_bb_count ();
1495 cgraph_edge::rebuild_edges ();
1496
1497 /* Some EH regions might become dead, see PR34608. If
1498 pass_cleanup_cfg isn't the first pass to happen with the
1499 new child, these dead EH edges might cause problems.
1500 Clean them up now. */
1501 if (flag_exceptions)
1502 {
1503 basic_block bb;
1504 bool changed = false;
1505
1506 FOR_EACH_BB_FN (bb, cfun)
1507 changed |= gimple_purge_dead_eh_edges (bb);
1508 if (changed)
1509 cleanup_tree_cfg ();
1510 }
1511 if (gimple_in_ssa_p (cfun))
1512 update_ssa (TODO_update_ssa);
1513 if (flag_checking && !loops_state_satisfies_p (flags: LOOPS_NEED_FIXUP))
1514 verify_loop_structure ();
1515 pop_cfun ();
1516
1517 if (dump_file && !gimple_in_ssa_p (cfun))
1518 {
1519 omp_any_child_fn_dumped = true;
1520 dump_function_header (dump_file, child_fn, dump_flags);
1521 dump_function_to_file (child_fn, dump_file, dump_flags);
1522 }
1523 }
1524
1525 adjust_context_and_scope (region, entry_block: gimple_block (g: entry_stmt), child_fndecl: child_fn);
1526
1527 if (gimple_code (g: entry_stmt) == GIMPLE_OMP_PARALLEL)
1528 expand_parallel_call (region, bb: new_bb,
1529 entry_stmt: as_a <gomp_parallel *> (p: entry_stmt), ws_args);
1530 else if (gimple_code (g: entry_stmt) == GIMPLE_OMP_TEAMS)
1531 expand_teams_call (bb: new_bb, entry_stmt: as_a <gomp_teams *> (p: entry_stmt));
1532 else
1533 expand_task_call (region, bb: new_bb, entry_stmt: as_a <gomp_task *> (p: entry_stmt));
1534}
1535
1536/* Information about members of an OpenACC collapsed loop nest. */
1537
1538struct oacc_collapse
1539{
1540 tree base; /* Base value. */
1541 tree iters; /* Number of steps. */
1542 tree step; /* Step size. */
1543 tree tile; /* Tile increment (if tiled). */
1544 tree outer; /* Tile iterator var. */
1545};
1546
1547/* Helper for expand_oacc_for. Determine collapsed loop information.
1548 Fill in COUNTS array. Emit any initialization code before GSI.
1549 Return the calculated outer loop bound of BOUND_TYPE. */
1550
1551static tree
1552expand_oacc_collapse_init (const struct omp_for_data *fd,
1553 gimple_stmt_iterator *gsi,
1554 oacc_collapse *counts, tree diff_type,
1555 tree bound_type, location_t loc)
1556{
1557 tree tiling = fd->tiling;
1558 tree total = build_int_cst (bound_type, 1);
1559 int ix;
1560
1561 gcc_assert (integer_onep (fd->loop.step));
1562 gcc_assert (integer_zerop (fd->loop.n1));
1563
1564 /* When tiling, the first operand of the tile clause applies to the
1565 innermost loop, and we work outwards from there. Seems
1566 backwards, but whatever. */
1567 for (ix = fd->collapse; ix--;)
1568 {
1569 const omp_for_data_loop *loop = &fd->loops[ix];
1570
1571 tree iter_type = TREE_TYPE (loop->v);
1572 tree plus_type = iter_type;
1573
1574 gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
1575
1576 if (POINTER_TYPE_P (iter_type))
1577 plus_type = sizetype;
1578
1579 if (tiling)
1580 {
1581 tree num = build_int_cst (integer_type_node, fd->collapse);
1582 tree loop_no = build_int_cst (integer_type_node, ix);
1583 tree tile = TREE_VALUE (tiling);
1584 gcall *call
1585 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1586 /* gwv-outer=*/integer_zero_node,
1587 /* gwv-inner=*/integer_zero_node);
1588
1589 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1590 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1591 gimple_call_set_lhs (gs: call, lhs: counts[ix].tile);
1592 gimple_set_location (g: call, location: loc);
1593 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1594
1595 tiling = TREE_CHAIN (tiling);
1596 }
1597 else
1598 {
1599 counts[ix].tile = NULL;
1600 counts[ix].outer = loop->v;
1601 }
1602
1603 tree b = loop->n1;
1604 tree e = loop->n2;
1605 tree s = loop->step;
1606 bool up = loop->cond_code == LT_EXPR;
1607 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1608 bool negating;
1609 tree expr;
1610
1611 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1612 true, GSI_SAME_STMT);
1613 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1614 true, GSI_SAME_STMT);
1615
1616 /* Convert the step, avoiding possible unsigned->signed overflow. */
1617 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1618 if (negating)
1619 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1620 s = fold_convert (diff_type, s);
1621 if (negating)
1622 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1623 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1624 true, GSI_SAME_STMT);
1625
1626 /* Determine the range, avoiding possible unsigned->signed overflow. */
1627 negating = !up && TYPE_UNSIGNED (iter_type);
1628 expr = fold_build2 (MINUS_EXPR, plus_type,
1629 fold_convert (plus_type, negating ? b : e),
1630 fold_convert (plus_type, negating ? e : b));
1631 expr = fold_convert (diff_type, expr);
1632 if (negating)
1633 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1634 tree range = force_gimple_operand_gsi
1635 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1636
1637 /* Determine number of iterations. */
1638 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1639 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1640 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1641
1642 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1643 true, GSI_SAME_STMT);
1644
1645 counts[ix].base = b;
1646 counts[ix].iters = iters;
1647 counts[ix].step = s;
1648
1649 total = fold_build2 (MULT_EXPR, bound_type, total,
1650 fold_convert (bound_type, iters));
1651 }
1652
1653 return total;
1654}
1655
1656/* Emit initializers for collapsed loop members. INNER is true if
1657 this is for the element loop of a TILE. IVAR is the outer
1658 loop iteration variable, from which collapsed loop iteration values
1659 are calculated. COUNTS array has been initialized by
1660 expand_oacc_collapse_inits. */
1661
1662static void
1663expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1664 gimple_stmt_iterator *gsi,
1665 const oacc_collapse *counts, tree ivar,
1666 tree diff_type)
1667{
1668 tree ivar_type = TREE_TYPE (ivar);
1669
1670 /* The most rapidly changing iteration variable is the innermost
1671 one. */
1672 for (int ix = fd->collapse; ix--;)
1673 {
1674 const omp_for_data_loop *loop = &fd->loops[ix];
1675 const oacc_collapse *collapse = &counts[ix];
1676 tree v = inner ? loop->v : collapse->outer;
1677 tree iter_type = TREE_TYPE (v);
1678 tree plus_type = iter_type;
1679 enum tree_code plus_code = PLUS_EXPR;
1680 tree expr;
1681
1682 if (POINTER_TYPE_P (iter_type))
1683 {
1684 plus_code = POINTER_PLUS_EXPR;
1685 plus_type = sizetype;
1686 }
1687
1688 expr = ivar;
1689 if (ix)
1690 {
1691 tree mod = fold_convert (ivar_type, collapse->iters);
1692 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1693 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1694 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1695 true, GSI_SAME_STMT);
1696 }
1697
1698 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1699 fold_convert (diff_type, collapse->step));
1700 expr = fold_build2 (plus_code, iter_type,
1701 inner ? collapse->outer : collapse->base,
1702 fold_convert (plus_type, expr));
1703 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1704 true, GSI_SAME_STMT);
1705 gassign *ass = gimple_build_assign (v, expr);
1706 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1707 }
1708}
1709
1710/* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1711 of the combined collapse > 1 loop constructs, generate code like:
1712 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1713 if (cond3 is <)
1714 adj = STEP3 - 1;
1715 else
1716 adj = STEP3 + 1;
1717 count3 = (adj + N32 - N31) / STEP3;
1718 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1719 if (cond2 is <)
1720 adj = STEP2 - 1;
1721 else
1722 adj = STEP2 + 1;
1723 count2 = (adj + N22 - N21) / STEP2;
1724 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1725 if (cond1 is <)
1726 adj = STEP1 - 1;
1727 else
1728 adj = STEP1 + 1;
1729 count1 = (adj + N12 - N11) / STEP1;
1730 count = count1 * count2 * count3;
1731 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1732 count = 0;
1733 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1734 of the combined loop constructs, just initialize COUNTS array
1735 from the _looptemp_ clauses. For loop nests with non-rectangular
1736 loops, do this only for the rectangular loops. Then pick
1737 the loops which reference outer vars in their bound expressions
1738 and the loops which they refer to and for this sub-nest compute
1739 number of iterations. For triangular loops use Faulhaber's formula,
1740 otherwise as a fallback, compute by iterating the loops.
1741 If e.g. the sub-nest is
1742 for (I = N11; I COND1 N12; I += STEP1)
1743 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1744 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1745 do:
1746 COUNT = 0;
1747 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1748 for (tmpj = M21 * tmpi + N21;
1749 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1750 {
1751 int tmpk1 = M31 * tmpj + N31;
1752 int tmpk2 = M32 * tmpj + N32;
1753 if (tmpk1 COND3 tmpk2)
1754 {
1755 if (COND3 is <)
1756 adj = STEP3 - 1;
1757 else
1758 adj = STEP3 + 1;
1759 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1760 }
1761 }
1762 and finally multiply the counts of the rectangular loops not
1763 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1764 store number of iterations of the loops from fd->first_nonrect
1765 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1766 by the counts of rectangular loops not referenced in any non-rectangular
1767 loops sandwitched in between those. */
1768
1769/* NOTE: It *could* be better to moosh all of the BBs together,
1770 creating one larger BB with all the computation and the unexpected
1771 jump at the end. I.e.
1772
1773 bool zero3, zero2, zero1, zero;
1774
1775 zero3 = N32 c3 N31;
1776 count3 = (N32 - N31) /[cl] STEP3;
1777 zero2 = N22 c2 N21;
1778 count2 = (N22 - N21) /[cl] STEP2;
1779 zero1 = N12 c1 N11;
1780 count1 = (N12 - N11) /[cl] STEP1;
1781 zero = zero3 || zero2 || zero1;
1782 count = count1 * count2 * count3;
1783 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1784
1785 After all, we expect the zero=false, and thus we expect to have to
1786 evaluate all of the comparison expressions, so short-circuiting
1787 oughtn't be a win. Since the condition isn't protecting a
1788 denominator, we're not concerned about divide-by-zero, so we can
1789 fully evaluate count even if a numerator turned out to be wrong.
1790
1791 It seems like putting this all together would create much better
1792 scheduling opportunities, and less pressure on the chip's branch
1793 predictor. */
1794
1795static void
1796expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1797 basic_block &entry_bb, tree *counts,
1798 basic_block &zero_iter1_bb, int &first_zero_iter1,
1799 basic_block &zero_iter2_bb, int &first_zero_iter2,
1800 basic_block &l2_dom_bb)
1801{
1802 tree t, type = TREE_TYPE (fd->loop.v);
1803 edge e, ne;
1804 int i;
1805
1806 /* Collapsed loops need work for expansion into SSA form. */
1807 gcc_assert (!gimple_in_ssa_p (cfun));
1808
1809 if (gimple_omp_for_combined_into_p (g: fd->for_stmt)
1810 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1811 {
1812 gcc_assert (fd->ordered == 0);
1813 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1814 isn't supposed to be handled, as the inner loop doesn't
1815 use it. */
1816 tree innerc = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
1817 kind: OMP_CLAUSE__LOOPTEMP_);
1818 gcc_assert (innerc);
1819 for (i = 0; i < fd->collapse; i++)
1820 {
1821 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1822 kind: OMP_CLAUSE__LOOPTEMP_);
1823 gcc_assert (innerc);
1824 if (i)
1825 counts[i] = OMP_CLAUSE_DECL (innerc);
1826 else
1827 counts[0] = NULL_TREE;
1828 }
1829 if (fd->non_rect
1830 && fd->last_nonrect == fd->first_nonrect + 1
1831 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1832 {
1833 tree c[4];
1834 for (i = 0; i < 4; i++)
1835 {
1836 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1837 kind: OMP_CLAUSE__LOOPTEMP_);
1838 gcc_assert (innerc);
1839 c[i] = OMP_CLAUSE_DECL (innerc);
1840 }
1841 counts[0] = c[0];
1842 fd->first_inner_iterations = c[1];
1843 fd->factor = c[2];
1844 fd->adjn1 = c[3];
1845 }
1846 return;
1847 }
1848
1849 for (i = fd->collapse; i < fd->ordered; i++)
1850 {
1851 tree itype = TREE_TYPE (fd->loops[i].v);
1852 counts[i] = NULL_TREE;
1853 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1854 fold_convert (itype, fd->loops[i].n1),
1855 fold_convert (itype, fd->loops[i].n2));
1856 if (t && integer_zerop (t))
1857 {
1858 for (i = fd->collapse; i < fd->ordered; i++)
1859 counts[i] = build_int_cst (type, 0);
1860 break;
1861 }
1862 }
1863 bool rect_count_seen = false;
1864 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1865 {
1866 tree itype = TREE_TYPE (fd->loops[i].v);
1867
1868 if (i >= fd->collapse && counts[i])
1869 continue;
1870 if (fd->non_rect)
1871 {
1872 /* Skip loops that use outer iterators in their expressions
1873 during this phase. */
1874 if (fd->loops[i].m1 || fd->loops[i].m2)
1875 {
1876 counts[i] = build_zero_cst (type);
1877 continue;
1878 }
1879 }
1880 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1881 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1882 fold_convert (itype, fd->loops[i].n1),
1883 fold_convert (itype, fd->loops[i].n2)))
1884 == NULL_TREE || !integer_onep (t)))
1885 {
1886 gcond *cond_stmt;
1887 tree n1, n2;
1888 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1889 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1890 true, GSI_SAME_STMT);
1891 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1892 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1893 true, GSI_SAME_STMT);
1894 cond_stmt = expand_omp_build_cond (gsi_p: gsi, code: fd->loops[i].cond_code,
1895 lhs: n1, rhs: n2);
1896 e = split_block (entry_bb, cond_stmt);
1897 basic_block &zero_iter_bb
1898 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1899 int &first_zero_iter
1900 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1901 if (zero_iter_bb == NULL)
1902 {
1903 gassign *assign_stmt;
1904 first_zero_iter = i;
1905 zero_iter_bb = create_empty_bb (entry_bb);
1906 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1907 *gsi = gsi_after_labels (bb: zero_iter_bb);
1908 if (i < fd->collapse)
1909 assign_stmt = gimple_build_assign (fd->loop.n2,
1910 build_zero_cst (type));
1911 else
1912 {
1913 counts[i] = create_tmp_reg (type, ".count");
1914 assign_stmt
1915 = gimple_build_assign (counts[i], build_zero_cst (type));
1916 }
1917 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1918 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1919 entry_bb);
1920 }
1921 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1922 ne->probability = profile_probability::very_unlikely ();
1923 e->flags = EDGE_TRUE_VALUE;
1924 e->probability = ne->probability.invert ();
1925 if (l2_dom_bb == NULL)
1926 l2_dom_bb = entry_bb;
1927 entry_bb = e->dest;
1928 *gsi = gsi_last_nondebug_bb (bb: entry_bb);
1929 }
1930
1931 if (POINTER_TYPE_P (itype))
1932 itype = signed_type_for (itype);
1933 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1934 ? -1 : 1));
1935 t = fold_build2 (PLUS_EXPR, itype,
1936 fold_convert (itype, fd->loops[i].step), t);
1937 t = fold_build2 (PLUS_EXPR, itype, t,
1938 fold_convert (itype, fd->loops[i].n2));
1939 t = fold_build2 (MINUS_EXPR, itype, t,
1940 fold_convert (itype, fd->loops[i].n1));
1941 /* ?? We could probably use CEIL_DIV_EXPR instead of
1942 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1943 generate the same code in the end because generically we
1944 don't know that the values involved must be negative for
1945 GT?? */
1946 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1947 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1948 fold_build1 (NEGATE_EXPR, itype, t),
1949 fold_build1 (NEGATE_EXPR, itype,
1950 fold_convert (itype,
1951 fd->loops[i].step)));
1952 else
1953 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1954 fold_convert (itype, fd->loops[i].step));
1955 t = fold_convert (type, t);
1956 if (TREE_CODE (t) == INTEGER_CST)
1957 counts[i] = t;
1958 else
1959 {
1960 if (i < fd->collapse || i != first_zero_iter2)
1961 counts[i] = create_tmp_reg (type, ".count");
1962 expand_omp_build_assign (gsi_p: gsi, to: counts[i], from: t);
1963 }
1964 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1965 {
1966 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1967 continue;
1968 if (!rect_count_seen)
1969 {
1970 t = counts[i];
1971 rect_count_seen = true;
1972 }
1973 else
1974 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1975 expand_omp_build_assign (gsi_p: gsi, to: fd->loop.n2, from: t);
1976 }
1977 }
1978 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1979 {
1980 gcc_assert (fd->last_nonrect != -1);
1981
1982 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1983 expand_omp_build_assign (gsi_p: gsi, to: counts[fd->last_nonrect],
1984 from: build_zero_cst (type));
1985 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1986 if (fd->loops[i].m1
1987 || fd->loops[i].m2
1988 || fd->loops[i].non_rect_referenced)
1989 break;
1990 if (i == fd->last_nonrect
1991 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1992 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
1993 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1994 {
1995 int o = fd->first_nonrect;
1996 tree itype = TREE_TYPE (fd->loops[o].v);
1997 tree n1o = create_tmp_reg (itype, ".n1o");
1998 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1999 expand_omp_build_assign (gsi_p: gsi, to: n1o, from: t);
2000 tree n2o = create_tmp_reg (itype, ".n2o");
2001 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
2002 expand_omp_build_assign (gsi_p: gsi, to: n2o, from: t);
2003 if (fd->loops[i].m1 && fd->loops[i].m2)
2004 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
2005 unshare_expr (fd->loops[i].m1));
2006 else if (fd->loops[i].m1)
2007 t = fold_build1 (NEGATE_EXPR, itype,
2008 unshare_expr (fd->loops[i].m1));
2009 else
2010 t = unshare_expr (fd->loops[i].m2);
2011 tree m2minusm1
2012 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2013 true, GSI_SAME_STMT);
2014
2015 gimple_stmt_iterator gsi2 = *gsi;
2016 gsi_prev (i: &gsi2);
2017 e = split_block (entry_bb, gsi_stmt (i: gsi2));
2018 e = split_block (e->dest, (gimple *) NULL);
2019 basic_block bb1 = e->src;
2020 entry_bb = e->dest;
2021 *gsi = gsi_after_labels (bb: entry_bb);
2022
2023 gsi2 = gsi_after_labels (bb: bb1);
2024 tree ostep = fold_convert (itype, fd->loops[o].step);
2025 t = build_int_cst (itype, (fd->loops[o].cond_code
2026 == LT_EXPR ? -1 : 1));
2027 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2028 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2029 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2030 if (TYPE_UNSIGNED (itype)
2031 && fd->loops[o].cond_code == GT_EXPR)
2032 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2033 fold_build1 (NEGATE_EXPR, itype, t),
2034 fold_build1 (NEGATE_EXPR, itype, ostep));
2035 else
2036 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2037 tree outer_niters
2038 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2039 true, GSI_SAME_STMT);
2040 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2041 build_one_cst (itype));
2042 t = fold_build2 (MULT_EXPR, itype, t, ostep);
2043 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2044 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2045 true, GSI_SAME_STMT);
2046 tree n1, n2, n1e, n2e;
2047 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2048 if (fd->loops[i].m1)
2049 {
2050 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2051 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2052 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2053 }
2054 else
2055 n1 = t;
2056 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2057 true, GSI_SAME_STMT);
2058 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2059 if (fd->loops[i].m2)
2060 {
2061 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2062 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2063 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2064 }
2065 else
2066 n2 = t;
2067 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2068 true, GSI_SAME_STMT);
2069 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2070 if (fd->loops[i].m1)
2071 {
2072 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2073 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2074 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2075 }
2076 else
2077 n1e = t;
2078 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2079 true, GSI_SAME_STMT);
2080 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2081 if (fd->loops[i].m2)
2082 {
2083 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2084 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2085 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2086 }
2087 else
2088 n2e = t;
2089 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2090 true, GSI_SAME_STMT);
2091 gcond *cond_stmt
2092 = expand_omp_build_cond (gsi_p: &gsi2, code: fd->loops[i].cond_code,
2093 lhs: n1, rhs: n2);
2094 e = split_block (bb1, cond_stmt);
2095 e->flags = EDGE_TRUE_VALUE;
2096 e->probability = profile_probability::likely ().guessed ();
2097 basic_block bb2 = e->dest;
2098 gsi2 = gsi_after_labels (bb: bb2);
2099
2100 cond_stmt = expand_omp_build_cond (gsi_p: &gsi2, code: fd->loops[i].cond_code,
2101 lhs: n1e, rhs: n2e);
2102 e = split_block (bb2, cond_stmt);
2103 e->flags = EDGE_TRUE_VALUE;
2104 e->probability = profile_probability::likely ().guessed ();
2105 gsi2 = gsi_after_labels (bb: e->dest);
2106
2107 tree step = fold_convert (itype, fd->loops[i].step);
2108 t = build_int_cst (itype, (fd->loops[i].cond_code
2109 == LT_EXPR ? -1 : 1));
2110 t = fold_build2 (PLUS_EXPR, itype, step, t);
2111 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2112 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2113 if (TYPE_UNSIGNED (itype)
2114 && fd->loops[i].cond_code == GT_EXPR)
2115 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2116 fold_build1 (NEGATE_EXPR, itype, t),
2117 fold_build1 (NEGATE_EXPR, itype, step));
2118 else
2119 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2120 tree first_inner_iterations
2121 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2122 true, GSI_SAME_STMT);
2123 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2124 if (TYPE_UNSIGNED (itype)
2125 && fd->loops[i].cond_code == GT_EXPR)
2126 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2127 fold_build1 (NEGATE_EXPR, itype, t),
2128 fold_build1 (NEGATE_EXPR, itype, step));
2129 else
2130 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2131 tree factor
2132 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2133 true, GSI_SAME_STMT);
2134 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2135 build_one_cst (itype));
2136 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2137 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2138 t = fold_build2 (MULT_EXPR, itype, factor, t);
2139 t = fold_build2 (PLUS_EXPR, itype,
2140 fold_build2 (MULT_EXPR, itype, outer_niters,
2141 first_inner_iterations), t);
2142 expand_omp_build_assign (gsi_p: &gsi2, to: counts[fd->last_nonrect],
2143 fold_convert (type, t));
2144
2145 basic_block bb3 = create_empty_bb (bb1);
2146 add_bb_to_loop (bb3, bb1->loop_father);
2147
2148 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2149 e->probability = profile_probability::unlikely ().guessed ();
2150
2151 gsi2 = gsi_after_labels (bb: bb3);
2152 cond_stmt = expand_omp_build_cond (gsi_p: &gsi2, code: fd->loops[i].cond_code,
2153 lhs: n1e, rhs: n2e);
2154 e = split_block (bb3, cond_stmt);
2155 e->flags = EDGE_TRUE_VALUE;
2156 e->probability = profile_probability::likely ().guessed ();
2157 basic_block bb4 = e->dest;
2158
2159 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2160 ne->probability = e->probability.invert ();
2161
2162 basic_block bb5 = create_empty_bb (bb2);
2163 add_bb_to_loop (bb5, bb2->loop_father);
2164
2165 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2166 ne->probability = profile_probability::unlikely ().guessed ();
2167
2168 for (int j = 0; j < 2; j++)
2169 {
2170 gsi2 = gsi_after_labels (bb: j ? bb5 : bb4);
2171 t = fold_build2 (MINUS_EXPR, itype,
2172 unshare_expr (fd->loops[i].n1),
2173 unshare_expr (fd->loops[i].n2));
2174 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2175 tree tem
2176 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2177 true, GSI_SAME_STMT);
2178 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2179 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2180 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2181 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2182 true, GSI_SAME_STMT);
2183 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2184 if (fd->loops[i].m1)
2185 {
2186 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2187 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2188 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2189 }
2190 else
2191 n1 = t;
2192 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2193 true, GSI_SAME_STMT);
2194 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2195 if (fd->loops[i].m2)
2196 {
2197 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2198 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2199 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2200 }
2201 else
2202 n2 = t;
2203 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2204 true, GSI_SAME_STMT);
2205 expand_omp_build_assign (gsi_p: &gsi2, to: j ? n2o : n1o, from: tem);
2206
2207 cond_stmt = expand_omp_build_cond (gsi_p: &gsi2, code: fd->loops[i].cond_code,
2208 lhs: n1, rhs: n2);
2209 e = split_block (gsi_bb (i: gsi2), cond_stmt);
2210 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2211 e->probability = profile_probability::unlikely ().guessed ();
2212 ne = make_edge (e->src, bb1,
2213 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2214 ne->probability = e->probability.invert ();
2215 gsi2 = gsi_after_labels (bb: e->dest);
2216
2217 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2218 expand_omp_build_assign (gsi_p: &gsi2, to: j ? n2o : n1o, from: t);
2219
2220 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2221 }
2222
2223 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2224 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2225 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2226
2227 if (fd->first_nonrect + 1 == fd->last_nonrect)
2228 {
2229 fd->first_inner_iterations = first_inner_iterations;
2230 fd->factor = factor;
2231 fd->adjn1 = n1o;
2232 }
2233 }
2234 else
2235 {
2236 /* Fallback implementation. Evaluate the loops with m1/m2
2237 non-NULL as well as their outer loops at runtime using temporaries
2238 instead of the original iteration variables, and in the
2239 body just bump the counter. */
2240 gimple_stmt_iterator gsi2 = *gsi;
2241 gsi_prev (i: &gsi2);
2242 e = split_block (entry_bb, gsi_stmt (i: gsi2));
2243 e = split_block (e->dest, (gimple *) NULL);
2244 basic_block cur_bb = e->src;
2245 basic_block next_bb = e->dest;
2246 entry_bb = e->dest;
2247 *gsi = gsi_after_labels (bb: entry_bb);
2248
2249 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2250 memset (s: vs, c: 0, n: fd->last_nonrect * sizeof (tree));
2251
2252 for (i = 0; i <= fd->last_nonrect; i++)
2253 {
2254 if (fd->loops[i].m1 == NULL_TREE
2255 && fd->loops[i].m2 == NULL_TREE
2256 && !fd->loops[i].non_rect_referenced)
2257 continue;
2258
2259 tree itype = TREE_TYPE (fd->loops[i].v);
2260
2261 gsi2 = gsi_after_labels (bb: cur_bb);
2262 tree n1, n2;
2263 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2264 if (fd->loops[i].m1 == NULL_TREE)
2265 n1 = t;
2266 else if (POINTER_TYPE_P (itype))
2267 {
2268 gcc_assert (integer_onep (fd->loops[i].m1));
2269 t = unshare_expr (fd->loops[i].n1);
2270 n1 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2271 }
2272 else
2273 {
2274 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2275 n1 = fold_build2 (MULT_EXPR, itype,
2276 vs[i - fd->loops[i].outer], n1);
2277 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2278 }
2279 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2280 true, GSI_SAME_STMT);
2281 if (i < fd->last_nonrect)
2282 {
2283 vs[i] = create_tmp_reg (itype, ".it");
2284 expand_omp_build_assign (gsi_p: &gsi2, to: vs[i], from: n1);
2285 }
2286 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2287 if (fd->loops[i].m2 == NULL_TREE)
2288 n2 = t;
2289 else if (POINTER_TYPE_P (itype))
2290 {
2291 gcc_assert (integer_onep (fd->loops[i].m2));
2292 t = unshare_expr (fd->loops[i].n2);
2293 n2 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2294 }
2295 else
2296 {
2297 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2298 n2 = fold_build2 (MULT_EXPR, itype,
2299 vs[i - fd->loops[i].outer], n2);
2300 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2301 }
2302 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2303 true, GSI_SAME_STMT);
2304 if (POINTER_TYPE_P (itype))
2305 itype = signed_type_for (itype);
2306 if (i == fd->last_nonrect)
2307 {
2308 gcond *cond_stmt
2309 = expand_omp_build_cond (gsi_p: &gsi2, code: fd->loops[i].cond_code,
2310 lhs: n1, rhs: n2);
2311 e = split_block (cur_bb, cond_stmt);
2312 e->flags = EDGE_TRUE_VALUE;
2313 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2314 e->probability = profile_probability::likely ().guessed ();
2315 ne->probability = e->probability.invert ();
2316 gsi2 = gsi_after_labels (bb: e->dest);
2317
2318 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2319 ? -1 : 1));
2320 t = fold_build2 (PLUS_EXPR, itype,
2321 fold_convert (itype, fd->loops[i].step), t);
2322 t = fold_build2 (PLUS_EXPR, itype, t,
2323 fold_convert (itype, n2));
2324 t = fold_build2 (MINUS_EXPR, itype, t,
2325 fold_convert (itype, n1));
2326 tree step = fold_convert (itype, fd->loops[i].step);
2327 if (TYPE_UNSIGNED (itype)
2328 && fd->loops[i].cond_code == GT_EXPR)
2329 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2330 fold_build1 (NEGATE_EXPR, itype, t),
2331 fold_build1 (NEGATE_EXPR, itype, step));
2332 else
2333 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2334 t = fold_convert (type, t);
2335 t = fold_build2 (PLUS_EXPR, type,
2336 counts[fd->last_nonrect], t);
2337 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2338 true, GSI_SAME_STMT);
2339 expand_omp_build_assign (gsi_p: &gsi2, to: counts[fd->last_nonrect], from: t);
2340 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2341 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2342 break;
2343 }
2344 e = split_block (cur_bb, last_nondebug_stmt (cur_bb));
2345
2346 basic_block new_cur_bb = create_empty_bb (cur_bb);
2347 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2348
2349 gsi2 = gsi_after_labels (bb: e->dest);
2350 tree step = fold_convert (itype,
2351 unshare_expr (fd->loops[i].step));
2352 if (POINTER_TYPE_P (TREE_TYPE (vs[i])))
2353 t = fold_build_pointer_plus (vs[i], step);
2354 else
2355 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2356 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2357 true, GSI_SAME_STMT);
2358 expand_omp_build_assign (gsi_p: &gsi2, to: vs[i], from: t);
2359
2360 ne = split_block (e->dest, last_nondebug_stmt (e->dest));
2361 gsi2 = gsi_after_labels (bb: ne->dest);
2362
2363 expand_omp_build_cond (gsi_p: &gsi2, code: fd->loops[i].cond_code, lhs: vs[i], rhs: n2);
2364 edge e3, e4;
2365 if (next_bb == entry_bb)
2366 {
2367 e3 = find_edge (ne->dest, next_bb);
2368 e3->flags = EDGE_FALSE_VALUE;
2369 }
2370 else
2371 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2372 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2373 e4->probability = profile_probability::likely ().guessed ();
2374 e3->probability = e4->probability.invert ();
2375 basic_block esrc = e->src;
2376 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2377 cur_bb = new_cur_bb;
2378 basic_block latch_bb = next_bb;
2379 next_bb = e->dest;
2380 remove_edge (e);
2381 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2382 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2383 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2384 }
2385 }
2386 t = NULL_TREE;
2387 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2388 if (!fd->loops[i].non_rect_referenced
2389 && fd->loops[i].m1 == NULL_TREE
2390 && fd->loops[i].m2 == NULL_TREE)
2391 {
2392 if (t == NULL_TREE)
2393 t = counts[i];
2394 else
2395 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2396 }
2397 if (t)
2398 {
2399 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2400 expand_omp_build_assign (gsi_p: gsi, to: counts[fd->last_nonrect], from: t);
2401 }
2402 if (!rect_count_seen)
2403 t = counts[fd->last_nonrect];
2404 else
2405 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2406 counts[fd->last_nonrect]);
2407 expand_omp_build_assign (gsi_p: gsi, to: fd->loop.n2, from: t);
2408 }
2409 else if (fd->non_rect)
2410 {
2411 tree t = fd->loop.n2;
2412 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2413 int non_rect_referenced = 0, non_rect = 0;
2414 for (i = 0; i < fd->collapse; i++)
2415 {
2416 if ((i < fd->first_nonrect || i > fd->last_nonrect)
2417 && !integer_zerop (counts[i]))
2418 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2419 if (fd->loops[i].non_rect_referenced)
2420 non_rect_referenced++;
2421 if (fd->loops[i].m1 || fd->loops[i].m2)
2422 non_rect++;
2423 }
2424 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2425 counts[fd->last_nonrect] = t;
2426 }
2427}
2428
2429/* Helper function for expand_omp_{for_*,simd}. Generate code like:
2430 T = V;
2431 V3 = N31 + (T % count3) * STEP3;
2432 T = T / count3;
2433 V2 = N21 + (T % count2) * STEP2;
2434 T = T / count2;
2435 V1 = N11 + T * STEP1;
2436 if this loop doesn't have an inner loop construct combined with it.
2437 If it does have an inner loop construct combined with it and the
2438 iteration count isn't known constant, store values from counts array
2439 into its _looptemp_ temporaries instead.
2440 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2441 inclusive), use the count of all those loops together, and either
2442 find quadratic etc. equation roots, or as a fallback, do:
2443 COUNT = 0;
2444 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2445 for (tmpj = M21 * tmpi + N21;
2446 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2447 {
2448 int tmpk1 = M31 * tmpj + N31;
2449 int tmpk2 = M32 * tmpj + N32;
2450 if (tmpk1 COND3 tmpk2)
2451 {
2452 if (COND3 is <)
2453 adj = STEP3 - 1;
2454 else
2455 adj = STEP3 + 1;
2456 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2457 if (COUNT + temp > T)
2458 {
2459 V1 = tmpi;
2460 V2 = tmpj;
2461 V3 = tmpk1 + (T - COUNT) * STEP3;
2462 goto done;
2463 }
2464 else
2465 COUNT += temp;
2466 }
2467 }
2468 done:;
2469 but for optional innermost or outermost rectangular loops that aren't
2470 referenced by other loop expressions keep doing the division/modulo. */
2471
2472static void
2473expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2474 tree *counts, tree *nonrect_bounds,
2475 gimple *inner_stmt, tree startvar)
2476{
2477 int i;
2478 if (gimple_omp_for_combined_p (g: fd->for_stmt))
2479 {
2480 /* If fd->loop.n2 is constant, then no propagation of the counts
2481 is needed, they are constant. */
2482 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2483 return;
2484
2485 tree clauses = gimple_code (g: inner_stmt) != GIMPLE_OMP_FOR
2486 ? gimple_omp_taskreg_clauses (gs: inner_stmt)
2487 : gimple_omp_for_clauses (gs: inner_stmt);
2488 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2489 isn't supposed to be handled, as the inner loop doesn't
2490 use it. */
2491 tree innerc = omp_find_clause (clauses, kind: OMP_CLAUSE__LOOPTEMP_);
2492 gcc_assert (innerc);
2493 int count = 0;
2494 if (fd->non_rect
2495 && fd->last_nonrect == fd->first_nonrect + 1
2496 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2497 count = 4;
2498 for (i = 0; i < fd->collapse + count; i++)
2499 {
2500 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2501 kind: OMP_CLAUSE__LOOPTEMP_);
2502 gcc_assert (innerc);
2503 if (i)
2504 {
2505 tree tem = OMP_CLAUSE_DECL (innerc);
2506 tree t;
2507 if (i < fd->collapse)
2508 t = counts[i];
2509 else
2510 switch (i - fd->collapse)
2511 {
2512 case 0: t = counts[0]; break;
2513 case 1: t = fd->first_inner_iterations; break;
2514 case 2: t = fd->factor; break;
2515 case 3: t = fd->adjn1; break;
2516 default: gcc_unreachable ();
2517 }
2518 t = fold_convert (TREE_TYPE (tem), t);
2519 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2520 false, GSI_CONTINUE_LINKING);
2521 gassign *stmt = gimple_build_assign (tem, t);
2522 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2523 }
2524 }
2525 return;
2526 }
2527
2528 tree type = TREE_TYPE (fd->loop.v);
2529 tree tem = create_tmp_reg (type, ".tem");
2530 gassign *stmt = gimple_build_assign (tem, startvar);
2531 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2532
2533 for (i = fd->collapse - 1; i >= 0; i--)
2534 {
2535 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2536 itype = vtype;
2537 if (POINTER_TYPE_P (vtype))
2538 itype = signed_type_for (vtype);
2539 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2540 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2541 else
2542 t = tem;
2543 if (i == fd->last_nonrect)
2544 {
2545 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2546 false, GSI_CONTINUE_LINKING);
2547 tree stopval = t;
2548 tree idx = create_tmp_reg (type, ".count");
2549 expand_omp_build_assign (gsi_p: gsi, to: idx,
2550 from: build_zero_cst (type), after: true);
2551 basic_block bb_triang = NULL, bb_triang_dom = NULL;
2552 if (fd->first_nonrect + 1 == fd->last_nonrect
2553 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2554 || fd->first_inner_iterations)
2555 && (optab_handler (op: sqrt_optab, TYPE_MODE (double_type_node))
2556 != CODE_FOR_nothing)
2557 && !integer_zerop (fd->loop.n2))
2558 {
2559 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2560 tree itype = TREE_TYPE (fd->loops[i].v);
2561 tree first_inner_iterations = fd->first_inner_iterations;
2562 tree factor = fd->factor;
2563 gcond *cond_stmt
2564 = expand_omp_build_cond (gsi_p: gsi, code: NE_EXPR, lhs: factor,
2565 rhs: build_zero_cst (TREE_TYPE (factor)),
2566 after: true);
2567 edge e = split_block (gsi_bb (i: *gsi), cond_stmt);
2568 basic_block bb0 = e->src;
2569 e->flags = EDGE_TRUE_VALUE;
2570 e->probability = profile_probability::likely ();
2571 bb_triang_dom = bb0;
2572 *gsi = gsi_after_labels (bb: e->dest);
2573 tree slltype = long_long_integer_type_node;
2574 tree ulltype = long_long_unsigned_type_node;
2575 tree stopvalull = fold_convert (ulltype, stopval);
2576 stopvalull
2577 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2578 false, GSI_CONTINUE_LINKING);
2579 first_inner_iterations
2580 = fold_convert (slltype, first_inner_iterations);
2581 first_inner_iterations
2582 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2583 NULL_TREE, false,
2584 GSI_CONTINUE_LINKING);
2585 factor = fold_convert (slltype, factor);
2586 factor
2587 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2588 false, GSI_CONTINUE_LINKING);
2589 tree first_inner_iterationsd
2590 = fold_build1 (FLOAT_EXPR, double_type_node,
2591 first_inner_iterations);
2592 first_inner_iterationsd
2593 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2594 NULL_TREE, false,
2595 GSI_CONTINUE_LINKING);
2596 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2597 factor);
2598 factord = force_gimple_operand_gsi (gsi, factord, true,
2599 NULL_TREE, false,
2600 GSI_CONTINUE_LINKING);
2601 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2602 stopvalull);
2603 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2604 NULL_TREE, false,
2605 GSI_CONTINUE_LINKING);
2606 /* Temporarily disable flag_rounding_math, values will be
2607 decimal numbers divided by 2 and worst case imprecisions
2608 due to too large values ought to be caught later by the
2609 checks for fallback. */
2610 int save_flag_rounding_math = flag_rounding_math;
2611 flag_rounding_math = 0;
2612 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2613 build_real (double_type_node, dconst2));
2614 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2615 first_inner_iterationsd, t);
2616 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2617 GSI_CONTINUE_LINKING);
2618 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2619 build_real (double_type_node, dconst2));
2620 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2621 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2622 fold_build2 (MULT_EXPR, double_type_node,
2623 t3, t3));
2624 flag_rounding_math = save_flag_rounding_math;
2625 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2626 GSI_CONTINUE_LINKING);
2627 if (flag_exceptions
2628 && cfun->can_throw_non_call_exceptions
2629 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2630 {
2631 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2632 build_zero_cst (double_type_node));
2633 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2634 false, GSI_CONTINUE_LINKING);
2635 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2636 boolean_false_node,
2637 NULL_TREE, NULL_TREE);
2638 }
2639 else
2640 cond_stmt
2641 = gimple_build_cond (LT_EXPR, t,
2642 build_zero_cst (double_type_node),
2643 NULL_TREE, NULL_TREE);
2644 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2645 e = split_block (gsi_bb (i: *gsi), cond_stmt);
2646 basic_block bb1 = e->src;
2647 e->flags = EDGE_FALSE_VALUE;
2648 e->probability = profile_probability::very_likely ();
2649 *gsi = gsi_after_labels (bb: e->dest);
2650 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2651 tree sqrtr = create_tmp_var (double_type_node);
2652 gimple_call_set_lhs (gs: call, lhs: sqrtr);
2653 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2654 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2655 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2656 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2657 tree c = create_tmp_var (ulltype);
2658 tree d = create_tmp_var (ulltype);
2659 expand_omp_build_assign (gsi_p: gsi, to: c, from: t, after: true);
2660 t = fold_build2 (MINUS_EXPR, ulltype, c,
2661 build_one_cst (ulltype));
2662 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2663 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2664 t = fold_build2 (MULT_EXPR, ulltype,
2665 fold_convert (ulltype, fd->factor), t);
2666 tree t2
2667 = fold_build2 (MULT_EXPR, ulltype, c,
2668 fold_convert (ulltype,
2669 fd->first_inner_iterations));
2670 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2671 expand_omp_build_assign (gsi_p: gsi, to: d, from: t, after: true);
2672 t = fold_build2 (MULT_EXPR, ulltype,
2673 fold_convert (ulltype, fd->factor), c);
2674 t = fold_build2 (PLUS_EXPR, ulltype,
2675 t, fold_convert (ulltype,
2676 fd->first_inner_iterations));
2677 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2678 GSI_CONTINUE_LINKING);
2679 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2680 NULL_TREE, NULL_TREE);
2681 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2682 e = split_block (gsi_bb (i: *gsi), cond_stmt);
2683 basic_block bb2 = e->src;
2684 e->flags = EDGE_TRUE_VALUE;
2685 e->probability = profile_probability::very_likely ();
2686 *gsi = gsi_after_labels (bb: e->dest);
2687 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2688 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2689 GSI_CONTINUE_LINKING);
2690 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2691 NULL_TREE, NULL_TREE);
2692 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2693 e = split_block (gsi_bb (i: *gsi), cond_stmt);
2694 basic_block bb3 = e->src;
2695 e->flags = EDGE_FALSE_VALUE;
2696 e->probability = profile_probability::very_likely ();
2697 *gsi = gsi_after_labels (bb: e->dest);
2698 t = fold_convert (itype, c);
2699 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2700 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2701 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2702 GSI_CONTINUE_LINKING);
2703 expand_omp_build_assign (gsi_p: gsi, to: fd->loops[i - 1].v, from: t, after: true);
2704 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2705 t2 = fold_convert (itype, t2);
2706 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2707 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2708 if (fd->loops[i].m1)
2709 {
2710 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2711 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2712 }
2713 expand_omp_build_assign (gsi_p: gsi, to: fd->loops[i].v, from: t2, after: true);
2714 e = split_block (gsi_bb (i: *gsi), gsi_stmt (i: *gsi));
2715 bb_triang = e->src;
2716 *gsi = gsi_after_labels (bb: e->dest);
2717 remove_edge (e);
2718 e = make_edge (bb1, gsi_bb (i: *gsi), EDGE_TRUE_VALUE);
2719 e->probability = profile_probability::very_unlikely ();
2720 e = make_edge (bb2, gsi_bb (i: *gsi), EDGE_FALSE_VALUE);
2721 e->probability = profile_probability::very_unlikely ();
2722 e = make_edge (bb3, gsi_bb (i: *gsi), EDGE_TRUE_VALUE);
2723 e->probability = profile_probability::very_unlikely ();
2724
2725 basic_block bb4 = create_empty_bb (bb0);
2726 add_bb_to_loop (bb4, bb0->loop_father);
2727 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2728 e->probability = profile_probability::unlikely ();
2729 make_edge (bb4, gsi_bb (i: *gsi), EDGE_FALLTHRU);
2730 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2731 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (i: *gsi), bb0);
2732 gimple_stmt_iterator gsi2 = gsi_after_labels (bb: bb4);
2733 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2734 counts[i], counts[i - 1]);
2735 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2736 GSI_CONTINUE_LINKING);
2737 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2738 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2739 t = fold_convert (itype, t);
2740 t2 = fold_convert (itype, t2);
2741 t = fold_build2 (MULT_EXPR, itype, t,
2742 fold_convert (itype, fd->loops[i].step));
2743 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2744 t2 = fold_build2 (MULT_EXPR, itype, t2,
2745 fold_convert (itype, fd->loops[i - 1].step));
2746 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2747 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2748 false, GSI_CONTINUE_LINKING);
2749 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2750 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2751 if (fd->loops[i].m1)
2752 {
2753 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2754 fd->loops[i - 1].v);
2755 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2756 }
2757 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2758 false, GSI_CONTINUE_LINKING);
2759 stmt = gimple_build_assign (fd->loops[i].v, t);
2760 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2761 }
2762 /* Fallback implementation. Evaluate the loops in between
2763 (inclusive) fd->first_nonrect and fd->last_nonrect at
2764 runtime unsing temporaries instead of the original iteration
2765 variables, in the body just bump the counter and compare
2766 with the desired value. */
2767 gimple_stmt_iterator gsi2 = *gsi;
2768 basic_block entry_bb = gsi_bb (i: gsi2);
2769 edge e = split_block (entry_bb, gsi_stmt (i: gsi2));
2770 e = split_block (e->dest, (gimple *) NULL);
2771 basic_block dom_bb = NULL;
2772 basic_block cur_bb = e->src;
2773 basic_block next_bb = e->dest;
2774 entry_bb = e->dest;
2775 *gsi = gsi_after_labels (bb: entry_bb);
2776
2777 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2778 tree n1 = NULL_TREE, n2 = NULL_TREE;
2779 memset (s: vs, c: 0, n: fd->last_nonrect * sizeof (tree));
2780
2781 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2782 {
2783 tree itype = TREE_TYPE (fd->loops[j].v);
2784 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2785 && fd->loops[j].m2 == NULL_TREE
2786 && !fd->loops[j].non_rect_referenced);
2787 gsi2 = gsi_after_labels (bb: cur_bb);
2788 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2789 if (fd->loops[j].m1 == NULL_TREE)
2790 n1 = rect_p ? build_zero_cst (type) : t;
2791 else if (POINTER_TYPE_P (itype))
2792 {
2793 gcc_assert (integer_onep (fd->loops[j].m1));
2794 t = unshare_expr (fd->loops[j].n1);
2795 n1 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2796 }
2797 else
2798 {
2799 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2800 n1 = fold_build2 (MULT_EXPR, itype,
2801 vs[j - fd->loops[j].outer], n1);
2802 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2803 }
2804 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2805 true, GSI_SAME_STMT);
2806 if (j < fd->last_nonrect)
2807 {
2808 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2809 expand_omp_build_assign (gsi_p: &gsi2, to: vs[j], from: n1);
2810 }
2811 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2812 if (fd->loops[j].m2 == NULL_TREE)
2813 n2 = rect_p ? counts[j] : t;
2814 else if (POINTER_TYPE_P (itype))
2815 {
2816 gcc_assert (integer_onep (fd->loops[j].m2));
2817 t = unshare_expr (fd->loops[j].n2);
2818 n2 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2819 }
2820 else
2821 {
2822 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2823 n2 = fold_build2 (MULT_EXPR, itype,
2824 vs[j - fd->loops[j].outer], n2);
2825 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2826 }
2827 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2828 true, GSI_SAME_STMT);
2829 if (POINTER_TYPE_P (itype))
2830 itype = signed_type_for (itype);
2831 if (j == fd->last_nonrect)
2832 {
2833 gcond *cond_stmt
2834 = expand_omp_build_cond (gsi_p: &gsi2, code: fd->loops[i].cond_code,
2835 lhs: n1, rhs: n2);
2836 e = split_block (cur_bb, cond_stmt);
2837 e->flags = EDGE_TRUE_VALUE;
2838 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2839 e->probability = profile_probability::likely ().guessed ();
2840 ne->probability = e->probability.invert ();
2841 gsi2 = gsi_after_labels (bb: e->dest);
2842
2843 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2844 ? -1 : 1));
2845 t = fold_build2 (PLUS_EXPR, itype,
2846 fold_convert (itype, fd->loops[j].step), t);
2847 t = fold_build2 (PLUS_EXPR, itype, t,
2848 fold_convert (itype, n2));
2849 t = fold_build2 (MINUS_EXPR, itype, t,
2850 fold_convert (itype, n1));
2851 tree step = fold_convert (itype, fd->loops[j].step);
2852 if (TYPE_UNSIGNED (itype)
2853 && fd->loops[j].cond_code == GT_EXPR)
2854 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2855 fold_build1 (NEGATE_EXPR, itype, t),
2856 fold_build1 (NEGATE_EXPR, itype, step));
2857 else
2858 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2859 t = fold_convert (type, t);
2860 t = fold_build2 (PLUS_EXPR, type, idx, t);
2861 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2862 true, GSI_SAME_STMT);
2863 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2864 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2865 cond_stmt
2866 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2867 NULL_TREE);
2868 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2869 e = split_block (gsi_bb (i: gsi2), cond_stmt);
2870 e->flags = EDGE_TRUE_VALUE;
2871 e->probability = profile_probability::likely ().guessed ();
2872 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2873 ne->probability = e->probability.invert ();
2874 gsi2 = gsi_after_labels (bb: e->dest);
2875 expand_omp_build_assign (gsi_p: &gsi2, to: idx, from: t);
2876 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2877 break;
2878 }
2879 e = split_block (cur_bb, last_nondebug_stmt (cur_bb));
2880
2881 basic_block new_cur_bb = create_empty_bb (cur_bb);
2882 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2883
2884 gsi2 = gsi_after_labels (bb: e->dest);
2885 if (rect_p)
2886 t = fold_build2 (PLUS_EXPR, type, vs[j],
2887 build_one_cst (type));
2888 else
2889 {
2890 tree step
2891 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2892 if (POINTER_TYPE_P (vtype))
2893 t = fold_build_pointer_plus (vs[j], step);
2894 else
2895 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2896 }
2897 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2898 true, GSI_SAME_STMT);
2899 expand_omp_build_assign (gsi_p: &gsi2, to: vs[j], from: t);
2900
2901 edge ne = split_block (e->dest, last_nondebug_stmt (e->dest));
2902 gsi2 = gsi_after_labels (bb: ne->dest);
2903
2904 gcond *cond_stmt;
2905 if (next_bb == entry_bb)
2906 /* No need to actually check the outermost condition. */
2907 cond_stmt
2908 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2909 boolean_true_node,
2910 NULL_TREE, NULL_TREE);
2911 else
2912 cond_stmt
2913 = gimple_build_cond (rect_p ? LT_EXPR
2914 : fd->loops[j].cond_code,
2915 vs[j], n2, NULL_TREE, NULL_TREE);
2916 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2917 edge e3, e4;
2918 if (next_bb == entry_bb)
2919 {
2920 e3 = find_edge (ne->dest, next_bb);
2921 e3->flags = EDGE_FALSE_VALUE;
2922 dom_bb = ne->dest;
2923 }
2924 else
2925 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2926 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2927 e4->probability = profile_probability::likely ().guessed ();
2928 e3->probability = e4->probability.invert ();
2929 basic_block esrc = e->src;
2930 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2931 cur_bb = new_cur_bb;
2932 basic_block latch_bb = next_bb;
2933 next_bb = e->dest;
2934 remove_edge (e);
2935 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2936 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2937 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2938 }
2939 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2940 {
2941 tree vtype = TREE_TYPE (fd->loops[j].v);
2942 tree itype = vtype;
2943 if (POINTER_TYPE_P (itype))
2944 itype = signed_type_for (itype);
2945 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2946 && fd->loops[j].m2 == NULL_TREE
2947 && !fd->loops[j].non_rect_referenced);
2948 if (j == fd->last_nonrect)
2949 {
2950 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2951 t = fold_convert (itype, t);
2952 tree t2
2953 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2954 t = fold_build2 (MULT_EXPR, itype, t, t2);
2955 if (POINTER_TYPE_P (vtype))
2956 t = fold_build_pointer_plus (n1, t);
2957 else
2958 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2959 }
2960 else if (rect_p)
2961 {
2962 t = fold_convert (itype, vs[j]);
2963 t = fold_build2 (MULT_EXPR, itype, t,
2964 fold_convert (itype, fd->loops[j].step));
2965 if (POINTER_TYPE_P (vtype))
2966 t = fold_build_pointer_plus (fd->loops[j].n1, t);
2967 else
2968 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2969 }
2970 else
2971 t = vs[j];
2972 t = force_gimple_operand_gsi (gsi, t, false,
2973 NULL_TREE, true,
2974 GSI_SAME_STMT);
2975 stmt = gimple_build_assign (fd->loops[j].v, t);
2976 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2977 }
2978 if (gsi_end_p (i: *gsi))
2979 *gsi = gsi_last_bb (bb: gsi_bb (i: *gsi));
2980 else
2981 gsi_prev (i: gsi);
2982 if (bb_triang)
2983 {
2984 e = split_block (gsi_bb (i: *gsi), gsi_stmt (i: *gsi));
2985 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2986 *gsi = gsi_after_labels (bb: e->dest);
2987 if (!gsi_end_p (i: *gsi))
2988 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2989 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2990 }
2991 }
2992 else
2993 {
2994 t = fold_convert (itype, t);
2995 t = fold_build2 (MULT_EXPR, itype, t,
2996 fold_convert (itype, fd->loops[i].step));
2997 if (POINTER_TYPE_P (vtype))
2998 t = fold_build_pointer_plus (fd->loops[i].n1, t);
2999 else
3000 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
3001 t = force_gimple_operand_gsi (gsi, t,
3002 DECL_P (fd->loops[i].v)
3003 && TREE_ADDRESSABLE (fd->loops[i].v),
3004 NULL_TREE, false,
3005 GSI_CONTINUE_LINKING);
3006 stmt = gimple_build_assign (fd->loops[i].v, t);
3007 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3008 }
3009 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
3010 {
3011 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
3012 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
3013 false, GSI_CONTINUE_LINKING);
3014 stmt = gimple_build_assign (tem, t);
3015 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3016 }
3017 if (i == fd->last_nonrect)
3018 i = fd->first_nonrect;
3019 }
3020 if (fd->non_rect)
3021 for (i = 0; i <= fd->last_nonrect; i++)
3022 if (fd->loops[i].m2)
3023 {
3024 tree itype = TREE_TYPE (fd->loops[i].v);
3025
3026 tree t;
3027 if (POINTER_TYPE_P (itype))
3028 {
3029 gcc_assert (integer_onep (fd->loops[i].m2));
3030 t = fold_build_pointer_plus (fd->loops[i - fd->loops[i].outer].v,
3031 unshare_expr (fd->loops[i].n2));
3032 }
3033 else
3034 {
3035 t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
3036 t = fold_build2 (MULT_EXPR, itype,
3037 fd->loops[i - fd->loops[i].outer].v, t);
3038 t = fold_build2 (PLUS_EXPR, itype, t,
3039 fold_convert (itype,
3040 unshare_expr (fd->loops[i].n2)));
3041 }
3042 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
3043 t = force_gimple_operand_gsi (gsi, t, false,
3044 NULL_TREE, false,
3045 GSI_CONTINUE_LINKING);
3046 stmt = gimple_build_assign (nonrect_bounds[i], t);
3047 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3048 }
3049}
3050
3051/* Helper function for expand_omp_for_*. Generate code like:
3052 L10:
3053 V3 += STEP3;
3054 if (V3 cond3 N32) goto BODY_BB; else goto L11;
3055 L11:
3056 V3 = N31;
3057 V2 += STEP2;
3058 if (V2 cond2 N22) goto BODY_BB; else goto L12;
3059 L12:
3060 V2 = N21;
3061 V1 += STEP1;
3062 goto BODY_BB;
3063 For non-rectangular loops, use temporaries stored in nonrect_bounds
3064 for the upper bounds if M?2 multiplier is present. Given e.g.
3065 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3066 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3067 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3068 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3069 do:
3070 L10:
3071 V4 += STEP4;
3072 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3073 L11:
3074 V4 = N41 + M41 * V2; // This can be left out if the loop
3075 // refers to the immediate parent loop
3076 V3 += STEP3;
3077 if (V3 cond3 N32) goto BODY_BB; else goto L12;
3078 L12:
3079 V3 = N31;
3080 V2 += STEP2;
3081 if (V2 cond2 N22) goto L120; else goto L13;
3082 L120:
3083 V4 = N41 + M41 * V2;
3084 NONRECT_BOUND4 = N42 + M42 * V2;
3085 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3086 L13:
3087 V2 = N21;
3088 V1 += STEP1;
3089 goto L120; */
3090
3091static basic_block
3092extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3093 basic_block cont_bb, basic_block body_bb)
3094{
3095 basic_block last_bb, bb, collapse_bb = NULL;
3096 int i;
3097 gimple_stmt_iterator gsi;
3098 edge e;
3099 tree t;
3100 gimple *stmt;
3101
3102 last_bb = cont_bb;
3103 for (i = fd->collapse - 1; i >= 0; i--)
3104 {
3105 tree vtype = TREE_TYPE (fd->loops[i].v);
3106
3107 bb = create_empty_bb (last_bb);
3108 add_bb_to_loop (bb, last_bb->loop_father);
3109 gsi = gsi_start_bb (bb);
3110
3111 if (i < fd->collapse - 1)
3112 {
3113 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3114 e->probability = profile_probability::guessed_always () / 8;
3115
3116 struct omp_for_data_loop *l = &fd->loops[i + 1];
3117 if (l->m1 == NULL_TREE || l->outer != 1)
3118 {
3119 t = l->n1;
3120 if (l->m1)
3121 {
3122 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3123 t = fold_build_pointer_plus (fd->loops[i + 1 - l->outer].v,
3124 t);
3125 else
3126 {
3127 tree t2
3128 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3129 fd->loops[i + 1 - l->outer].v, l->m1);
3130 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3131 }
3132 }
3133 t = force_gimple_operand_gsi (&gsi, t,
3134 DECL_P (l->v)
3135 && TREE_ADDRESSABLE (l->v),
3136 NULL_TREE, false,
3137 GSI_CONTINUE_LINKING);
3138 stmt = gimple_build_assign (l->v, t);
3139 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3140 }
3141 }
3142 else
3143 collapse_bb = bb;
3144
3145 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3146
3147 if (POINTER_TYPE_P (vtype))
3148 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3149 else
3150 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3151 t = force_gimple_operand_gsi (&gsi, t,
3152 DECL_P (fd->loops[i].v)
3153 && TREE_ADDRESSABLE (fd->loops[i].v),
3154 NULL_TREE, false, GSI_CONTINUE_LINKING);
3155 stmt = gimple_build_assign (fd->loops[i].v, t);
3156 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3157
3158 if (fd->loops[i].non_rect_referenced)
3159 {
3160 basic_block update_bb = NULL, prev_bb = NULL;
3161 for (int j = i + 1; j <= fd->last_nonrect; j++)
3162 if (j - fd->loops[j].outer == i)
3163 {
3164 tree n1, n2;
3165 struct omp_for_data_loop *l = &fd->loops[j];
3166 basic_block this_bb = create_empty_bb (last_bb);
3167 add_bb_to_loop (this_bb, last_bb->loop_father);
3168 gimple_stmt_iterator gsi2 = gsi_start_bb (bb: this_bb);
3169 if (prev_bb)
3170 {
3171 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3172 e->probability
3173 = profile_probability::guessed_always ().apply_scale (num: 7,
3174 den: 8);
3175 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3176 }
3177 if (l->m1)
3178 {
3179 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3180 t = fold_build_pointer_plus (fd->loops[i].v, l->n1);
3181 else
3182 {
3183 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3184 fd->loops[i].v);
3185 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v),
3186 t, l->n1);
3187 }
3188 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3189 false,
3190 GSI_CONTINUE_LINKING);
3191 stmt = gimple_build_assign (l->v, n1);
3192 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3193 n1 = l->v;
3194 }
3195 else
3196 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3197 NULL_TREE, false,
3198 GSI_CONTINUE_LINKING);
3199 if (l->m2)
3200 {
3201 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3202 t = fold_build_pointer_plus (fd->loops[i].v, l->n2);
3203 else
3204 {
3205 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3206 fd->loops[i].v);
3207 t = fold_build2 (PLUS_EXPR,
3208 TREE_TYPE (nonrect_bounds[j]),
3209 t, unshare_expr (l->n2));
3210 }
3211 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3212 false,
3213 GSI_CONTINUE_LINKING);
3214 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3215 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3216 n2 = nonrect_bounds[j];
3217 }
3218 else
3219 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3220 true, NULL_TREE, false,
3221 GSI_CONTINUE_LINKING);
3222 gcond *cond_stmt
3223 = gimple_build_cond (l->cond_code, n1, n2,
3224 NULL_TREE, NULL_TREE);
3225 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3226 if (update_bb == NULL)
3227 update_bb = this_bb;
3228 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3229 e->probability = profile_probability::guessed_always () / 8;
3230 if (prev_bb == NULL)
3231 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3232 prev_bb = this_bb;
3233 }
3234 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3235 e->probability
3236 = profile_probability::guessed_always ().apply_scale (num: 7, den: 8);
3237 body_bb = update_bb;
3238 }
3239
3240 if (i > 0)
3241 {
3242 if (fd->loops[i].m2)
3243 t = nonrect_bounds[i];
3244 else
3245 t = unshare_expr (fd->loops[i].n2);
3246 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3247 false, GSI_CONTINUE_LINKING);
3248 tree v = fd->loops[i].v;
3249 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3250 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3251 false, GSI_CONTINUE_LINKING);
3252 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3253 stmt = gimple_build_cond_empty (cond: t);
3254 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3255 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3256 expand_omp_regimplify_p, NULL, NULL)
3257 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3258 expand_omp_regimplify_p, NULL, NULL))
3259 gimple_regimplify_operands (stmt, &gsi);
3260 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3261 e->probability = profile_probability::guessed_always ().apply_scale (num: 7, den: 8);
3262 }
3263 else
3264 make_edge (bb, body_bb, EDGE_FALLTHRU);
3265 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3266 last_bb = bb;
3267 }
3268
3269 return collapse_bb;
3270}
3271
3272/* Expand #pragma omp ordered depend(source). */
3273
3274static void
3275expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3276 tree *counts, location_t loc)
3277{
3278 enum built_in_function source_ix
3279 = fd->iter_type == long_integer_type_node
3280 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3281 gimple *g
3282 = gimple_build_call (builtin_decl_explicit (fncode: source_ix), 1,
3283 build_fold_addr_expr (counts[fd->ordered]));
3284 gimple_set_location (g, location: loc);
3285 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3286}
3287
3288/* Expand a single depend from #pragma omp ordered depend(sink:...). */
3289
3290static void
3291expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3292 tree *counts, tree c, location_t loc,
3293 basic_block cont_bb)
3294{
3295 auto_vec<tree, 10> args;
3296 enum built_in_function sink_ix
3297 = fd->iter_type == long_integer_type_node
3298 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3299 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3300 int i;
3301 gimple_stmt_iterator gsi2 = *gsi;
3302 bool warned_step = false;
3303
3304 if (deps == NULL)
3305 {
3306 /* Handle doacross(sink: omp_cur_iteration - 1). */
3307 gsi_prev (i: &gsi2);
3308 edge e1 = split_block (gsi_bb (i: gsi2), gsi_stmt (i: gsi2));
3309 edge e2 = split_block_after_labels (e1->dest);
3310 gsi2 = gsi_after_labels (bb: e1->dest);
3311 *gsi = gsi_last_bb (bb: e1->src);
3312 gimple_stmt_iterator gsi3 = *gsi;
3313
3314 if (counts[fd->collapse - 1])
3315 {
3316 gcc_assert (fd->collapse == 1);
3317 t = counts[fd->collapse - 1];
3318 }
3319 else if (fd->collapse > 1)
3320 t = fd->loop.v;
3321 else
3322 {
3323 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3324 fd->loops[0].v, fd->loops[0].n1);
3325 t = fold_convert (fd->iter_type, t);
3326 }
3327
3328 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
3329 false, GSI_CONTINUE_LINKING);
3330 gsi_insert_after (gsi, gimple_build_cond (NE_EXPR, t,
3331 build_zero_cst (TREE_TYPE (t)),
3332 NULL_TREE, NULL_TREE),
3333 GSI_NEW_STMT);
3334
3335 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t,
3336 build_minus_one_cst (TREE_TYPE (t)));
3337 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3338 true, GSI_SAME_STMT);
3339 args.safe_push (obj: t);
3340 for (i = fd->collapse; i < fd->ordered; i++)
3341 {
3342 t = counts[fd->ordered + 2 + (i - fd->collapse)];
3343 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t,
3344 build_minus_one_cst (TREE_TYPE (t)));
3345 t = fold_convert (fd->iter_type, t);
3346 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3347 true, GSI_SAME_STMT);
3348 args.safe_push (obj: t);
3349 }
3350
3351 gimple *g = gimple_build_call_vec (builtin_decl_explicit (fncode: sink_ix),
3352 args);
3353 gimple_set_location (g, location: loc);
3354 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3355
3356 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3357 e3->probability = profile_probability::guessed_always () / 8;
3358 e1->probability = e3->probability.invert ();
3359 e1->flags = EDGE_TRUE_VALUE;
3360 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3361
3362 if (fd->ordered > fd->collapse && cont_bb)
3363 {
3364 if (counts[fd->ordered + 1] == NULL_TREE)
3365 counts[fd->ordered + 1]
3366 = create_tmp_var (boolean_type_node, ".first");
3367
3368 edge e4;
3369 if (gsi_end_p (i: gsi3))
3370 e4 = split_block_after_labels (e1->src);
3371 else
3372 {
3373 gsi_prev (i: &gsi3);
3374 e4 = split_block (gsi_bb (i: gsi3), gsi_stmt (i: gsi3));
3375 }
3376 gsi3 = gsi_last_bb (bb: e4->src);
3377
3378 gsi_insert_after (&gsi3,
3379 gimple_build_cond (NE_EXPR,
3380 counts[fd->ordered + 1],
3381 boolean_false_node,
3382 NULL_TREE, NULL_TREE),
3383 GSI_NEW_STMT);
3384
3385 edge e5 = make_edge (e4->src, e2->dest, EDGE_FALSE_VALUE);
3386 e4->probability = profile_probability::guessed_always () / 8;
3387 e5->probability = e4->probability.invert ();
3388 e4->flags = EDGE_TRUE_VALUE;
3389 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e4->src);
3390 }
3391
3392 *gsi = gsi_after_labels (bb: e2->dest);
3393 return;
3394 }
3395 for (i = 0; i < fd->ordered; i++)
3396 {
3397 tree step = NULL_TREE;
3398 off = TREE_PURPOSE (deps);
3399 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3400 {
3401 step = TREE_OPERAND (off, 1);
3402 off = TREE_OPERAND (off, 0);
3403 }
3404 if (!integer_zerop (off))
3405 {
3406 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3407 || fd->loops[i].cond_code == GT_EXPR);
3408 bool forward = fd->loops[i].cond_code == LT_EXPR;
3409 if (step)
3410 {
3411 /* Non-simple Fortran DO loops. If step is variable,
3412 we don't know at compile even the direction, so can't
3413 warn. */
3414 if (TREE_CODE (step) != INTEGER_CST)
3415 break;
3416 forward = tree_int_cst_sgn (step) != -1;
3417 }
3418 if (forward ^ OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3419 warning_at (loc, OPT_Wopenmp,
3420 "%qs clause with %<sink%> modifier "
3421 "waiting for lexically later iteration",
3422 OMP_CLAUSE_DOACROSS_DEPEND (c)
3423 ? "depend" : "doacross");
3424 break;
3425 }
3426 deps = TREE_CHAIN (deps);
3427 }
3428 /* If all offsets corresponding to the collapsed loops are zero,
3429 this depend clause can be ignored. FIXME: but there is still a
3430 flush needed. We need to emit one __sync_synchronize () for it
3431 though (perhaps conditionally)? Solve this together with the
3432 conservative dependence folding optimization.
3433 if (i >= fd->collapse)
3434 return; */
3435
3436 deps = OMP_CLAUSE_DECL (c);
3437 gsi_prev (i: &gsi2);
3438 edge e1 = split_block (gsi_bb (i: gsi2), gsi_stmt (i: gsi2));
3439 edge e2 = split_block_after_labels (e1->dest);
3440
3441 gsi2 = gsi_after_labels (bb: e1->dest);
3442 *gsi = gsi_last_bb (bb: e1->src);
3443 for (i = 0; i < fd->ordered; i++)
3444 {
3445 tree itype = TREE_TYPE (fd->loops[i].v);
3446 tree step = NULL_TREE;
3447 tree orig_off = NULL_TREE;
3448 if (POINTER_TYPE_P (itype))
3449 itype = sizetype;
3450 if (i)
3451 deps = TREE_CHAIN (deps);
3452 off = TREE_PURPOSE (deps);
3453 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3454 {
3455 step = TREE_OPERAND (off, 1);
3456 off = TREE_OPERAND (off, 0);
3457 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3458 && integer_onep (fd->loops[i].step)
3459 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3460 }
3461 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3462 if (step)
3463 {
3464 off = fold_convert_loc (loc, itype, off);
3465 orig_off = off;
3466 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3467 }
3468
3469 if (integer_zerop (off))
3470 t = boolean_true_node;
3471 else
3472 {
3473 tree a;
3474 tree co = fold_convert_loc (loc, itype, off);
3475 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3476 {
3477 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3478 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3479 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3480 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3481 co);
3482 }
3483 else if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3484 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3485 fd->loops[i].v, co);
3486 else
3487 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3488 fd->loops[i].v, co);
3489 if (step)
3490 {
3491 tree t1, t2;
3492 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3493 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3494 fd->loops[i].n1);
3495 else
3496 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3497 fd->loops[i].n2);
3498 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3499 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3500 fd->loops[i].n2);
3501 else
3502 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3503 fd->loops[i].n1);
3504 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3505 step, build_int_cst (TREE_TYPE (step), 0));
3506 if (TREE_CODE (step) != INTEGER_CST)
3507 {
3508 t1 = unshare_expr (t1);
3509 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3510 false, GSI_CONTINUE_LINKING);
3511 t2 = unshare_expr (t2);
3512 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3513 false, GSI_CONTINUE_LINKING);
3514 }
3515 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3516 t, t2, t1);
3517 }
3518 else if (fd->loops[i].cond_code == LT_EXPR)
3519 {
3520 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3521 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3522 fd->loops[i].n1);
3523 else
3524 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3525 fd->loops[i].n2);
3526 }
3527 else if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3528 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3529 fd->loops[i].n2);
3530 else
3531 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3532 fd->loops[i].n1);
3533 }
3534 if (cond)
3535 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3536 else
3537 cond = t;
3538
3539 off = fold_convert_loc (loc, itype, off);
3540
3541 if (step
3542 || (fd->loops[i].cond_code == LT_EXPR
3543 ? !integer_onep (fd->loops[i].step)
3544 : !integer_minus_onep (fd->loops[i].step)))
3545 {
3546 if (step == NULL_TREE
3547 && TYPE_UNSIGNED (itype)
3548 && fd->loops[i].cond_code == GT_EXPR)
3549 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3550 fold_build1_loc (loc, NEGATE_EXPR, itype,
3551 s));
3552 else
3553 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3554 orig_off ? orig_off : off, s);
3555 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3556 build_int_cst (itype, 0));
3557 if (integer_zerop (t) && !warned_step)
3558 {
3559 warning_at (loc, OPT_Wopenmp,
3560 "%qs clause with %<sink%> modifier refers to "
3561 "iteration never in the iteration space",
3562 OMP_CLAUSE_DOACROSS_DEPEND (c)
3563 ? "depend" : "doacross");
3564 warned_step = true;
3565 }
3566 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3567 cond, t);
3568 }
3569
3570 if (i <= fd->collapse - 1 && fd->collapse > 1)
3571 t = fd->loop.v;
3572 else if (counts[i])
3573 t = counts[i];
3574 else
3575 {
3576 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3577 fd->loops[i].v, fd->loops[i].n1);
3578 t = fold_convert_loc (loc, fd->iter_type, t);
3579 }
3580 if (step)
3581 /* We have divided off by step already earlier. */;
3582 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3583 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3584 fold_build1_loc (loc, NEGATE_EXPR, itype,
3585 s));
3586 else
3587 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3588 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3589 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3590 off = fold_convert_loc (loc, fd->iter_type, off);
3591 if (i <= fd->collapse - 1 && fd->collapse > 1)
3592 {
3593 if (i)
3594 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3595 off);
3596 if (i < fd->collapse - 1)
3597 {
3598 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3599 counts[i]);
3600 continue;
3601 }
3602 }
3603 off = unshare_expr (off);
3604 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3605 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3606 true, GSI_SAME_STMT);
3607 args.safe_push (obj: t);
3608 }
3609 gimple *g = gimple_build_call_vec (builtin_decl_explicit (fncode: sink_ix), args);
3610 gimple_set_location (g, location: loc);
3611 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3612
3613 cond = unshare_expr (cond);
3614 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3615 GSI_CONTINUE_LINKING);
3616 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3617 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3618 e3->probability = profile_probability::guessed_always () / 8;
3619 e1->probability = e3->probability.invert ();
3620 e1->flags = EDGE_TRUE_VALUE;
3621 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3622
3623 *gsi = gsi_after_labels (bb: e2->dest);
3624}
3625
3626/* Expand all #pragma omp ordered depend(source) and
3627 #pragma omp ordered depend(sink:...) constructs in the current
3628 #pragma omp for ordered(n) region. */
3629
3630static void
3631expand_omp_ordered_source_sink (struct omp_region *region,
3632 struct omp_for_data *fd, tree *counts,
3633 basic_block cont_bb)
3634{
3635 struct omp_region *inner;
3636 int i;
3637 for (i = fd->collapse - 1; i < fd->ordered; i++)
3638 if (i == fd->collapse - 1 && fd->collapse > 1)
3639 counts[i] = NULL_TREE;
3640 else if (i >= fd->collapse && !cont_bb)
3641 counts[i] = build_zero_cst (fd->iter_type);
3642 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3643 && integer_onep (fd->loops[i].step))
3644 counts[i] = NULL_TREE;
3645 else
3646 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3647 tree atype
3648 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3649 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3650 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3651 counts[fd->ordered + 1] = NULL_TREE;
3652
3653 for (inner = region->inner; inner; inner = inner->next)
3654 if (inner->type == GIMPLE_OMP_ORDERED)
3655 {
3656 gomp_ordered *ord_stmt = inner->ord_stmt;
3657 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3658 location_t loc = gimple_location (g: ord_stmt);
3659 tree c;
3660 for (c = gimple_omp_ordered_clauses (ord_stmt);
3661 c; c = OMP_CLAUSE_CHAIN (c))
3662 if (OMP_CLAUSE_DOACROSS_KIND (c) == OMP_CLAUSE_DOACROSS_SOURCE)
3663 break;
3664 if (c)
3665 expand_omp_ordered_source (gsi: &gsi, fd, counts, loc);
3666 for (c = gimple_omp_ordered_clauses (ord_stmt);
3667 c; c = OMP_CLAUSE_CHAIN (c))
3668 if (OMP_CLAUSE_DOACROSS_KIND (c) == OMP_CLAUSE_DOACROSS_SINK)
3669 expand_omp_ordered_sink (gsi: &gsi, fd, counts, c, loc, cont_bb);
3670 gsi_remove (&gsi, true);
3671 }
3672}
3673
3674/* Wrap the body into fd->ordered - fd->collapse loops that aren't
3675 collapsed. */
3676
3677static basic_block
3678expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3679 basic_block cont_bb, basic_block body_bb,
3680 basic_block l0_bb, bool ordered_lastprivate)
3681{
3682 if (fd->ordered == fd->collapse)
3683 return cont_bb;
3684
3685 if (!cont_bb)
3686 {
3687 gimple_stmt_iterator gsi = gsi_after_labels (bb: body_bb);
3688 for (int i = fd->collapse; i < fd->ordered; i++)
3689 {
3690 tree type = TREE_TYPE (fd->loops[i].v);
3691 tree n1 = fold_convert (type, fd->loops[i].n1);
3692 expand_omp_build_assign (gsi_p: &gsi, to: fd->loops[i].v, from: n1);
3693 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3694 size_int (i - fd->collapse + 1),
3695 NULL_TREE, NULL_TREE);
3696 expand_omp_build_assign (gsi_p: &gsi, to: aref, from: build_zero_cst (fd->iter_type));
3697 }
3698 return NULL;
3699 }
3700
3701 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3702 {
3703 tree t, type = TREE_TYPE (fd->loops[i].v);
3704 gimple_stmt_iterator gsi = gsi_after_labels (bb: body_bb);
3705 if (counts[fd->ordered + 1] && i == fd->collapse)
3706 expand_omp_build_assign (gsi_p: &gsi, to: counts[fd->ordered + 1],
3707 boolean_true_node);
3708 expand_omp_build_assign (gsi_p: &gsi, to: fd->loops[i].v,
3709 fold_convert (type, fd->loops[i].n1));
3710 if (counts[i])
3711 expand_omp_build_assign (gsi_p: &gsi, to: counts[i],
3712 from: build_zero_cst (fd->iter_type));
3713 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3714 size_int (i - fd->collapse + 1),
3715 NULL_TREE, NULL_TREE);
3716 expand_omp_build_assign (gsi_p: &gsi, to: aref, from: build_zero_cst (fd->iter_type));
3717 if (!gsi_end_p (i: gsi))
3718 gsi_prev (i: &gsi);
3719 else
3720 gsi = gsi_last_bb (bb: body_bb);
3721 edge e1 = split_block (body_bb, gsi_stmt (i: gsi));
3722 basic_block new_body = e1->dest;
3723 if (body_bb == cont_bb)
3724 cont_bb = new_body;
3725 edge e2 = NULL;
3726 basic_block new_header;
3727 if (EDGE_COUNT (cont_bb->preds) > 0)
3728 {
3729 gsi = gsi_last_bb (bb: cont_bb);
3730 if (POINTER_TYPE_P (type))
3731 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3732 else
3733 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3734 fold_convert (type, fd->loops[i].step));
3735 expand_omp_build_assign (gsi_p: &gsi, to: fd->loops[i].v, from: t);
3736 if (counts[i])
3737 {
3738 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3739 build_int_cst (fd->iter_type, 1));
3740 expand_omp_build_assign (gsi_p: &gsi, to: counts[i], from: t);
3741 t = counts[i];
3742 }
3743 else
3744 {
3745 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3746 fd->loops[i].v, fd->loops[i].n1);
3747 t = fold_convert (fd->iter_type, t);
3748 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3749 true, GSI_SAME_STMT);
3750 }
3751 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3752 size_int (i - fd->collapse + 1),
3753 NULL_TREE, NULL_TREE);
3754 expand_omp_build_assign (gsi_p: &gsi, to: aref, from: t);
3755 if (counts[fd->ordered + 1] && i == fd->ordered - 1)
3756 expand_omp_build_assign (gsi_p: &gsi, to: counts[fd->ordered + 1],
3757 boolean_false_node);
3758 gsi_prev (i: &gsi);
3759 e2 = split_block (cont_bb, gsi_stmt (i: gsi));
3760 new_header = e2->dest;
3761 }
3762 else
3763 new_header = cont_bb;
3764 gsi = gsi_after_labels (bb: new_header);
3765 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3766 true, GSI_SAME_STMT);
3767 tree n2
3768 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3769 true, NULL_TREE, true, GSI_SAME_STMT);
3770 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3771 gsi_insert_before (&gsi, gimple_build_cond_empty (cond: t), GSI_NEW_STMT);
3772 edge e3 = split_block (new_header, gsi_stmt (i: gsi));
3773 cont_bb = e3->dest;
3774 remove_edge (e1);
3775 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3776 e3->flags = EDGE_FALSE_VALUE;
3777 e3->probability = profile_probability::guessed_always () / 8;
3778 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3779 e1->probability = e3->probability.invert ();
3780
3781 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3782 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3783
3784 if (e2)
3785 {
3786 class loop *loop = alloc_loop ();
3787 loop->header = new_header;
3788 loop->latch = e2->src;
3789 add_loop (loop, l0_bb->loop_father);
3790 }
3791 }
3792
3793 /* If there are any lastprivate clauses and it is possible some loops
3794 might have zero iterations, ensure all the decls are initialized,
3795 otherwise we could crash evaluating C++ class iterators with lastprivate
3796 clauses. */
3797 bool need_inits = false;
3798 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3799 if (need_inits)
3800 {
3801 tree type = TREE_TYPE (fd->loops[i].v);
3802 gimple_stmt_iterator gsi = gsi_after_labels (bb: body_bb);
3803 expand_omp_build_assign (gsi_p: &gsi, to: fd->loops[i].v,
3804 fold_convert (type, fd->loops[i].n1));
3805 }
3806 else
3807 {
3808 tree type = TREE_TYPE (fd->loops[i].v);
3809 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3810 boolean_type_node,
3811 fold_convert (type, fd->loops[i].n1),
3812 fold_convert (type, fd->loops[i].n2));
3813 if (!integer_onep (this_cond))
3814 need_inits = true;
3815 }
3816
3817 return cont_bb;
3818}
3819
3820/* A subroutine of expand_omp_for. Generate code for a parallel
3821 loop with any schedule. Given parameters:
3822
3823 for (V = N1; V cond N2; V += STEP) BODY;
3824
3825 where COND is "<" or ">", we generate pseudocode
3826
3827 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3828 if (more) goto L0; else goto L3;
3829 L0:
3830 V = istart0;
3831 iend = iend0;
3832 L1:
3833 BODY;
3834 V += STEP;
3835 if (V cond iend) goto L1; else goto L2;
3836 L2:
3837 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3838 L3:
3839
3840 If this is a combined omp parallel loop, instead of the call to
3841 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3842 If this is gimple_omp_for_combined_p loop, then instead of assigning
3843 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3844 inner GIMPLE_OMP_FOR and V += STEP; and
3845 if (V cond iend) goto L1; else goto L2; are removed.
3846
3847 For collapsed loops, given parameters:
3848 collapse(3)
3849 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3850 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3851 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3852 BODY;
3853
3854 we generate pseudocode
3855
3856 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3857 if (cond3 is <)
3858 adj = STEP3 - 1;
3859 else
3860 adj = STEP3 + 1;
3861 count3 = (adj + N32 - N31) / STEP3;
3862 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3863 if (cond2 is <)
3864 adj = STEP2 - 1;
3865 else
3866 adj = STEP2 + 1;
3867 count2 = (adj + N22 - N21) / STEP2;
3868 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3869 if (cond1 is <)
3870 adj = STEP1 - 1;
3871 else
3872 adj = STEP1 + 1;
3873 count1 = (adj + N12 - N11) / STEP1;
3874 count = count1 * count2 * count3;
3875 goto Z1;
3876 Z0:
3877 count = 0;
3878 Z1:
3879 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3880 if (more) goto L0; else goto L3;
3881 L0:
3882 V = istart0;
3883 T = V;
3884 V3 = N31 + (T % count3) * STEP3;
3885 T = T / count3;
3886 V2 = N21 + (T % count2) * STEP2;
3887 T = T / count2;
3888 V1 = N11 + T * STEP1;
3889 iend = iend0;
3890 L1:
3891 BODY;
3892 V += 1;
3893 if (V < iend) goto L10; else goto L2;
3894 L10:
3895 V3 += STEP3;
3896 if (V3 cond3 N32) goto L1; else goto L11;
3897 L11:
3898 V3 = N31;
3899 V2 += STEP2;
3900 if (V2 cond2 N22) goto L1; else goto L12;
3901 L12:
3902 V2 = N21;
3903 V1 += STEP1;
3904 goto L1;
3905 L2:
3906 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3907 L3:
3908
3909 */
3910
3911static void
3912expand_omp_for_generic (struct omp_region *region,
3913 struct omp_for_data *fd,
3914 enum built_in_function start_fn,
3915 enum built_in_function next_fn,
3916 tree sched_arg,
3917 gimple *inner_stmt)
3918{
3919 tree type, istart0, iend0, iend;
3920 tree t, vmain, vback, bias = NULL_TREE;
3921 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3922 basic_block l2_bb = NULL, l3_bb = NULL;
3923 gimple_stmt_iterator gsi;
3924 gassign *assign_stmt;
3925 bool in_combined_parallel = is_combined_parallel (region);
3926 bool broken_loop = region->cont == NULL;
3927 edge e, ne;
3928 tree *counts = NULL;
3929 int i;
3930 bool ordered_lastprivate = false;
3931
3932 gcc_assert (!broken_loop || !in_combined_parallel);
3933 gcc_assert (fd->iter_type == long_integer_type_node
3934 || !in_combined_parallel);
3935
3936 entry_bb = region->entry;
3937 cont_bb = region->cont;
3938 collapse_bb = NULL;
3939 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3940 gcc_assert (broken_loop
3941 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3942 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3943 l1_bb = single_succ (bb: l0_bb);
3944 if (!broken_loop)
3945 {
3946 l2_bb = create_empty_bb (cont_bb);
3947 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3948 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3949 == l1_bb));
3950 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3951 }
3952 else
3953 l2_bb = NULL;
3954 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3955 exit_bb = region->exit;
3956
3957 gsi = gsi_last_nondebug_bb (bb: entry_bb);
3958
3959 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3960 if (fd->ordered
3961 && omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
3962 kind: OMP_CLAUSE_LASTPRIVATE))
3963 ordered_lastprivate = false;
3964 tree reductions = NULL_TREE;
3965 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3966 tree memv = NULL_TREE;
3967 if (fd->lastprivate_conditional)
3968 {
3969 tree c = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
3970 kind: OMP_CLAUSE__CONDTEMP_);
3971 if (fd->have_pointer_condtemp)
3972 condtemp = OMP_CLAUSE_DECL (c);
3973 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), kind: OMP_CLAUSE__CONDTEMP_);
3974 cond_var = OMP_CLAUSE_DECL (c);
3975 }
3976 if (sched_arg)
3977 {
3978 if (fd->have_reductemp)
3979 {
3980 tree c = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
3981 kind: OMP_CLAUSE__REDUCTEMP_);
3982 reductions = OMP_CLAUSE_DECL (c);
3983 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3984 gimple *g = SSA_NAME_DEF_STMT (reductions);
3985 reductions = gimple_assign_rhs1 (gs: g);
3986 OMP_CLAUSE_DECL (c) = reductions;
3987 entry_bb = gimple_bb (g);
3988 edge e = split_block (entry_bb, g);
3989 if (region->entry == entry_bb)
3990 region->entry = e->dest;
3991 gsi = gsi_last_bb (bb: entry_bb);
3992 }
3993 else
3994 reductions = null_pointer_node;
3995 if (fd->have_pointer_condtemp)
3996 {
3997 tree type = TREE_TYPE (condtemp);
3998 memv = create_tmp_var (type);
3999 TREE_ADDRESSABLE (memv) = 1;
4000 unsigned HOST_WIDE_INT sz
4001 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4002 sz *= fd->lastprivate_conditional;
4003 expand_omp_build_assign (gsi_p: &gsi, to: memv, from: build_int_cst (type, sz),
4004 after: false);
4005 mem = build_fold_addr_expr (memv);
4006 }
4007 else
4008 mem = null_pointer_node;
4009 }
4010 if (fd->collapse > 1 || fd->ordered)
4011 {
4012 int first_zero_iter1 = -1, first_zero_iter2 = -1;
4013 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
4014
4015 counts = XALLOCAVEC (tree, fd->ordered
4016 ? fd->ordered + 2
4017 + (fd->ordered - fd->collapse)
4018 : fd->collapse);
4019 expand_omp_for_init_counts (fd, gsi: &gsi, entry_bb, counts,
4020 zero_iter1_bb, first_zero_iter1,
4021 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
4022
4023 if (zero_iter1_bb)
4024 {
4025 /* Some counts[i] vars might be uninitialized if
4026 some loop has zero iterations. But the body shouldn't
4027 be executed in that case, so just avoid uninit warnings. */
4028 for (i = first_zero_iter1;
4029 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
4030 if (SSA_VAR_P (counts[i]))
4031 suppress_warning (counts[i], OPT_Wuninitialized);
4032 gsi_prev (i: &gsi);
4033 e = split_block (entry_bb, gsi_stmt (i: gsi));
4034 entry_bb = e->dest;
4035 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
4036 gsi = gsi_last_nondebug_bb (bb: entry_bb);
4037 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4038 get_immediate_dominator (CDI_DOMINATORS,
4039 zero_iter1_bb));
4040 }
4041 if (zero_iter2_bb)
4042 {
4043 /* Some counts[i] vars might be uninitialized if
4044 some loop has zero iterations. But the body shouldn't
4045 be executed in that case, so just avoid uninit warnings. */
4046 for (i = first_zero_iter2; i < fd->ordered; i++)
4047 if (SSA_VAR_P (counts[i]))
4048 suppress_warning (counts[i], OPT_Wuninitialized);
4049 if (zero_iter1_bb)
4050 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
4051 else
4052 {
4053 gsi_prev (i: &gsi);
4054 e = split_block (entry_bb, gsi_stmt (i: gsi));
4055 entry_bb = e->dest;
4056 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
4057 gsi = gsi_last_nondebug_bb (bb: entry_bb);
4058 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4059 get_immediate_dominator
4060 (CDI_DOMINATORS, zero_iter2_bb));
4061 }
4062 }
4063 if (fd->collapse == 1)
4064 {
4065 counts[0] = fd->loop.n2;
4066 fd->loop = fd->loops[0];
4067 }
4068 }
4069
4070 type = TREE_TYPE (fd->loop.v);
4071 istart0 = create_tmp_var (fd->iter_type, ".istart0");
4072 iend0 = create_tmp_var (fd->iter_type, ".iend0");
4073 TREE_ADDRESSABLE (istart0) = 1;
4074 TREE_ADDRESSABLE (iend0) = 1;
4075
4076 /* See if we need to bias by LLONG_MIN. */
4077 if (fd->iter_type == long_long_unsigned_type_node
4078 && (TREE_CODE (type) == INTEGER_TYPE || TREE_CODE (type) == BITINT_TYPE)
4079 && !TYPE_UNSIGNED (type)
4080 && fd->ordered == 0)
4081 {
4082 tree n1, n2;
4083
4084 if (fd->loop.cond_code == LT_EXPR)
4085 {
4086 n1 = fd->loop.n1;
4087 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4088 }
4089 else
4090 {
4091 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4092 n2 = fd->loop.n1;
4093 }
4094 if (TREE_CODE (n1) != INTEGER_CST
4095 || TREE_CODE (n2) != INTEGER_CST
4096 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4097 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4098 }
4099
4100 gimple_stmt_iterator gsif = gsi;
4101 gsi_prev (i: &gsif);
4102
4103 tree arr = NULL_TREE;
4104 if (in_combined_parallel)
4105 {
4106 gcc_assert (fd->ordered == 0);
4107 /* In a combined parallel loop, emit a call to
4108 GOMP_loop_foo_next. */
4109 t = build_call_expr (builtin_decl_explicit (fncode: next_fn), 2,
4110 build_fold_addr_expr (istart0),
4111 build_fold_addr_expr (iend0));
4112 }
4113 else
4114 {
4115 tree t0, t1, t2, t3, t4;
4116 /* If this is not a combined parallel loop, emit a call to
4117 GOMP_loop_foo_start in ENTRY_BB. */
4118 t4 = build_fold_addr_expr (iend0);
4119 t3 = build_fold_addr_expr (istart0);
4120 if (fd->ordered)
4121 {
4122 t0 = build_int_cst (unsigned_type_node,
4123 fd->ordered - fd->collapse + 1);
4124 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
4125 fd->ordered
4126 - fd->collapse + 1),
4127 ".omp_counts");
4128 DECL_NAMELESS (arr) = 1;
4129 TREE_ADDRESSABLE (arr) = 1;
4130 TREE_STATIC (arr) = 1;
4131 vec<constructor_elt, va_gc> *v;
4132 vec_alloc (v, nelems: fd->ordered - fd->collapse + 1);
4133 int idx;
4134
4135 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
4136 {
4137 tree c;
4138 if (idx == 0 && fd->collapse > 1)
4139 c = fd->loop.n2;
4140 else
4141 c = counts[idx + fd->collapse - 1];
4142 tree purpose = size_int (idx);
4143 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
4144 if (TREE_CODE (c) != INTEGER_CST)
4145 TREE_STATIC (arr) = 0;
4146 }
4147
4148 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
4149 if (!TREE_STATIC (arr))
4150 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
4151 void_type_node, arr),
4152 true, NULL_TREE, true, GSI_SAME_STMT);
4153 t1 = build_fold_addr_expr (arr);
4154 t2 = NULL_TREE;
4155 }
4156 else
4157 {
4158 t2 = fold_convert (fd->iter_type, fd->loop.step);
4159 t1 = fd->loop.n2;
4160 t0 = fd->loop.n1;
4161 if (gimple_omp_for_combined_into_p (g: fd->for_stmt))
4162 {
4163 tree innerc
4164 = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
4165 kind: OMP_CLAUSE__LOOPTEMP_);
4166 gcc_assert (innerc);
4167 t0 = OMP_CLAUSE_DECL (innerc);
4168 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4169 kind: OMP_CLAUSE__LOOPTEMP_);
4170 gcc_assert (innerc);
4171 t1 = OMP_CLAUSE_DECL (innerc);
4172 }
4173 if (POINTER_TYPE_P (TREE_TYPE (t0))
4174 && TYPE_PRECISION (TREE_TYPE (t0))
4175 != TYPE_PRECISION (fd->iter_type))
4176 {
4177 /* Avoid casting pointers to integer of a different size. */
4178 tree itype = signed_type_for (type);
4179 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4180 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4181 }
4182 else
4183 {
4184 t1 = fold_convert (fd->iter_type, t1);
4185 t0 = fold_convert (fd->iter_type, t0);
4186 }
4187 if (bias)
4188 {
4189 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4190 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4191 }
4192 }
4193 if (fd->iter_type == long_integer_type_node || fd->ordered)
4194 {
4195 if (fd->chunk_size)
4196 {
4197 t = fold_convert (fd->iter_type, fd->chunk_size);
4198 t = omp_adjust_chunk_size (chunk_size: t, simd_schedule: fd->simd_schedule);
4199 if (sched_arg)
4200 {
4201 if (fd->ordered)
4202 t = build_call_expr (builtin_decl_explicit (fncode: start_fn),
4203 8, t0, t1, sched_arg, t, t3, t4,
4204 reductions, mem);
4205 else
4206 t = build_call_expr (builtin_decl_explicit (fncode: start_fn),
4207 9, t0, t1, t2, sched_arg, t, t3, t4,
4208 reductions, mem);
4209 }
4210 else if (fd->ordered)
4211 t = build_call_expr (builtin_decl_explicit (fncode: start_fn),
4212 5, t0, t1, t, t3, t4);
4213 else
4214 t = build_call_expr (builtin_decl_explicit (fncode: start_fn),
4215 6, t0, t1, t2, t, t3, t4);
4216 }
4217 else if (fd->ordered)
4218 t = build_call_expr (builtin_decl_explicit (fncode: start_fn),
4219 4, t0, t1, t3, t4);
4220 else
4221 t = build_call_expr (builtin_decl_explicit (fncode: start_fn),
4222 5, t0, t1, t2, t3, t4);
4223 }
4224 else
4225 {
4226 tree t5;
4227 tree c_bool_type;
4228 tree bfn_decl;
4229
4230 /* The GOMP_loop_ull_*start functions have additional boolean
4231 argument, true for < loops and false for > loops.
4232 In Fortran, the C bool type can be different from
4233 boolean_type_node. */
4234 bfn_decl = builtin_decl_explicit (fncode: start_fn);
4235 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4236 t5 = build_int_cst (c_bool_type,
4237 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4238 if (fd->chunk_size)
4239 {
4240 tree bfn_decl = builtin_decl_explicit (fncode: start_fn);
4241 t = fold_convert (fd->iter_type, fd->chunk_size);
4242 t = omp_adjust_chunk_size (chunk_size: t, simd_schedule: fd->simd_schedule);
4243 if (sched_arg)
4244 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4245 t, t3, t4, reductions, mem);
4246 else
4247 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4248 }
4249 else
4250 t = build_call_expr (builtin_decl_explicit (fncode: start_fn),
4251 6, t5, t0, t1, t2, t3, t4);
4252 }
4253 }
4254 if (TREE_TYPE (t) != boolean_type_node)
4255 t = fold_build2 (NE_EXPR, boolean_type_node,
4256 t, build_int_cst (TREE_TYPE (t), 0));
4257 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4258 true, GSI_SAME_STMT);
4259 if (arr && !TREE_STATIC (arr))
4260 {
4261 tree clobber = build_clobber (TREE_TYPE (arr));
4262 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4263 GSI_SAME_STMT);
4264 }
4265 if (fd->have_pointer_condtemp)
4266 expand_omp_build_assign (gsi_p: &gsi, to: condtemp, from: memv, after: false);
4267 if (fd->have_reductemp)
4268 {
4269 gimple *g = gsi_stmt (i: gsi);
4270 gsi_remove (&gsi, true);
4271 release_ssa_name (name: gimple_assign_lhs (gs: g));
4272
4273 entry_bb = region->entry;
4274 gsi = gsi_last_nondebug_bb (bb: entry_bb);
4275
4276 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4277 }
4278 gsi_insert_after (&gsi, gimple_build_cond_empty (cond: t), GSI_SAME_STMT);
4279
4280 /* Remove the GIMPLE_OMP_FOR statement. */
4281 gsi_remove (&gsi, true);
4282
4283 if (gsi_end_p (i: gsif))
4284 gsif = gsi_after_labels (bb: gsi_bb (i: gsif));
4285 gsi_next (i: &gsif);
4286
4287 /* Iteration setup for sequential loop goes in L0_BB. */
4288 tree startvar = fd->loop.v;
4289 tree endvar = NULL_TREE;
4290
4291 if (gimple_omp_for_combined_p (g: fd->for_stmt))
4292 {
4293 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4294 && gimple_omp_for_kind (inner_stmt)
4295 == GF_OMP_FOR_KIND_SIMD);
4296 tree innerc = omp_find_clause (clauses: gimple_omp_for_clauses (gs: inner_stmt),
4297 kind: OMP_CLAUSE__LOOPTEMP_);
4298 gcc_assert (innerc);
4299 startvar = OMP_CLAUSE_DECL (innerc);
4300 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4301 kind: OMP_CLAUSE__LOOPTEMP_);
4302 gcc_assert (innerc);
4303 endvar = OMP_CLAUSE_DECL (innerc);
4304 }
4305
4306 gsi = gsi_start_bb (bb: l0_bb);
4307 t = istart0;
4308 if (fd->ordered && fd->collapse == 1)
4309 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4310 fold_convert (fd->iter_type, fd->loop.step));
4311 else if (bias)
4312 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4313 if (fd->ordered && fd->collapse == 1)
4314 {
4315 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4316 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4317 fd->loop.n1, fold_convert (sizetype, t));
4318 else
4319 {
4320 t = fold_convert (TREE_TYPE (startvar), t);
4321 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4322 fd->loop.n1, t);
4323 }
4324 }
4325 else
4326 {
4327 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4328 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4329 t = fold_convert (TREE_TYPE (startvar), t);
4330 }
4331 t = force_gimple_operand_gsi (&gsi, t,
4332 DECL_P (startvar)
4333 && TREE_ADDRESSABLE (startvar),
4334 NULL_TREE, false, GSI_CONTINUE_LINKING);
4335 assign_stmt = gimple_build_assign (startvar, t);
4336 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4337 if (cond_var)
4338 {
4339 tree itype = TREE_TYPE (cond_var);
4340 /* For lastprivate(conditional:) itervar, we need some iteration
4341 counter that starts at unsigned non-zero and increases.
4342 Prefer as few IVs as possible, so if we can use startvar
4343 itself, use that, or startvar + constant (those would be
4344 incremented with step), and as last resort use the s0 + 1
4345 incremented by 1. */
4346 if ((fd->ordered && fd->collapse == 1)
4347 || bias
4348 || POINTER_TYPE_P (type)
4349 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4350 || fd->loop.cond_code != LT_EXPR)
4351 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4352 build_int_cst (itype, 1));
4353 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4354 t = fold_convert (itype, t);
4355 else
4356 {
4357 tree c = fold_convert (itype, fd->loop.n1);
4358 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4359 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4360 }
4361 t = force_gimple_operand_gsi (&gsi, t, false,
4362 NULL_TREE, false, GSI_CONTINUE_LINKING);
4363 assign_stmt = gimple_build_assign (cond_var, t);
4364 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4365 }
4366
4367 t = iend0;
4368 if (fd->ordered && fd->collapse == 1)
4369 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4370 fold_convert (fd->iter_type, fd->loop.step));
4371 else if (bias)
4372 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4373 if (fd->ordered && fd->collapse == 1)
4374 {
4375 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4376 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4377 fd->loop.n1, fold_convert (sizetype, t));
4378 else
4379 {
4380 t = fold_convert (TREE_TYPE (startvar), t);
4381 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4382 fd->loop.n1, t);
4383 }
4384 }
4385 else
4386 {
4387 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4388 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4389 t = fold_convert (TREE_TYPE (startvar), t);
4390 }
4391 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4392 false, GSI_CONTINUE_LINKING);
4393 if (endvar)
4394 {
4395 assign_stmt = gimple_build_assign (endvar, iend);
4396 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4397 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4398 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4399 else
4400 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4401 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4402 }
4403 /* Handle linear clause adjustments. */
4404 tree itercnt = NULL_TREE;
4405 if (gimple_omp_for_kind (g: fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4406 for (tree c = gimple_omp_for_clauses (gs: fd->for_stmt);
4407 c; c = OMP_CLAUSE_CHAIN (c))
4408 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4409 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4410 {
4411 tree d = OMP_CLAUSE_DECL (c);
4412 tree t = d, a, dest;
4413 if (omp_privatize_by_reference (decl: t))
4414 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4415 tree type = TREE_TYPE (t);
4416 if (POINTER_TYPE_P (type))
4417 type = sizetype;
4418 dest = unshare_expr (t);
4419 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4420 expand_omp_build_assign (gsi_p: &gsif, to: v, from: t);
4421 if (itercnt == NULL_TREE)
4422 {
4423 itercnt = startvar;
4424 tree n1 = fd->loop.n1;
4425 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4426 {
4427 itercnt
4428 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4429 itercnt);
4430 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4431 }
4432 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4433 itercnt, n1);
4434 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4435 itercnt, fd->loop.step);
4436 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4437 NULL_TREE, false,
4438 GSI_CONTINUE_LINKING);
4439 }
4440 a = fold_build2 (MULT_EXPR, type,
4441 fold_convert (type, itercnt),
4442 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4443 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4444 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4445 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4446 false, GSI_CONTINUE_LINKING);
4447 expand_omp_build_assign (gsi_p: &gsi, to: dest, from: t, after: true);
4448 }
4449 if (fd->collapse > 1)
4450 expand_omp_for_init_vars (fd, gsi: &gsi, counts, NULL, inner_stmt, startvar);
4451
4452 if (fd->ordered)
4453 {
4454 /* Until now, counts array contained number of iterations or
4455 variable containing it for ith loop. From now on, we usually need
4456 those counts only for collapsed loops, and only for the 2nd
4457 till the last collapsed one. Move those one element earlier,
4458 we'll use counts[fd->collapse - 1] for the first source/sink
4459 iteration counter and so on and counts[fd->ordered]
4460 as the array holding the current counter values for
4461 depend(source). For doacross(sink:omp_cur_iteration - 1) we need
4462 the counts from fd->collapse to fd->ordered - 1; make a copy of
4463 those to counts[fd->ordered + 2] and onwards.
4464 counts[fd->ordered + 1] can be a flag whether it is the first
4465 iteration with a new collapsed counter (used only if
4466 fd->ordered > fd->collapse). */
4467 if (fd->ordered > fd->collapse)
4468 memcpy (dest: counts + fd->ordered + 2, src: counts + fd->collapse,
4469 n: (fd->ordered - fd->collapse) * sizeof (counts[0]));
4470 if (fd->collapse > 1)
4471 memmove (dest: counts, src: counts + 1, n: (fd->collapse - 1) * sizeof (counts[0]));
4472 if (broken_loop)
4473 {
4474 int i;
4475 for (i = fd->collapse; i < fd->ordered; i++)
4476 {
4477 tree type = TREE_TYPE (fd->loops[i].v);
4478 tree this_cond
4479 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4480 fold_convert (type, fd->loops[i].n1),
4481 fold_convert (type, fd->loops[i].n2));
4482 if (!integer_onep (this_cond))
4483 break;
4484 }
4485 if (i < fd->ordered)
4486 {
4487 if (entry_bb->loop_father != l0_bb->loop_father)
4488 {
4489 remove_bb_from_loops (l0_bb);
4490 add_bb_to_loop (l0_bb, entry_bb->loop_father);
4491 gcc_assert (single_succ (l0_bb) == l1_bb);
4492 }
4493 cont_bb
4494 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4495 add_bb_to_loop (cont_bb, l0_bb->loop_father);
4496 gimple_stmt_iterator gsi = gsi_after_labels (bb: cont_bb);
4497 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4498 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4499 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4500 make_edge (cont_bb, l1_bb, 0);
4501 l2_bb = create_empty_bb (cont_bb);
4502 broken_loop = false;
4503 }
4504 }
4505 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4506 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, body_bb: l1_bb,
4507 l0_bb, ordered_lastprivate);
4508 if (counts[fd->collapse - 1])
4509 {
4510 gcc_assert (fd->collapse == 1);
4511 gsi = gsi_last_bb (bb: l0_bb);
4512 expand_omp_build_assign (gsi_p: &gsi, to: counts[fd->collapse - 1],
4513 from: istart0, after: true);
4514 if (cont_bb)
4515 {
4516 gsi = gsi_last_bb (bb: cont_bb);
4517 t = fold_build2 (PLUS_EXPR, fd->iter_type,
4518 counts[fd->collapse - 1],
4519 build_int_cst (fd->iter_type, 1));
4520 expand_omp_build_assign (gsi_p: &gsi, to: counts[fd->collapse - 1], from: t);
4521 tree aref = build4 (ARRAY_REF, fd->iter_type,
4522 counts[fd->ordered], size_zero_node,
4523 NULL_TREE, NULL_TREE);
4524 expand_omp_build_assign (gsi_p: &gsi, to: aref, from: counts[fd->collapse - 1]);
4525 }
4526 t = counts[fd->collapse - 1];
4527 }
4528 else if (fd->collapse > 1)
4529 t = fd->loop.v;
4530 else
4531 {
4532 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4533 fd->loops[0].v, fd->loops[0].n1);
4534 t = fold_convert (fd->iter_type, t);
4535 }
4536 gsi = gsi_last_bb (bb: l0_bb);
4537 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4538 size_zero_node, NULL_TREE, NULL_TREE);
4539 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4540 false, GSI_CONTINUE_LINKING);
4541 expand_omp_build_assign (gsi_p: &gsi, to: aref, from: t, after: true);
4542 }
4543
4544 if (!broken_loop)
4545 {
4546 /* Code to control the increment and predicate for the sequential
4547 loop goes in the CONT_BB. */
4548 gsi = gsi_last_nondebug_bb (bb: cont_bb);
4549 gomp_continue *cont_stmt = as_a <gomp_continue *> (p: gsi_stmt (i: gsi));
4550 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4551 vmain = gimple_omp_continue_control_use (cont_stmt);
4552 vback = gimple_omp_continue_control_def (cont_stmt);
4553
4554 if (cond_var)
4555 {
4556 tree itype = TREE_TYPE (cond_var);
4557 tree t2;
4558 if ((fd->ordered && fd->collapse == 1)
4559 || bias
4560 || POINTER_TYPE_P (type)
4561 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4562 || fd->loop.cond_code != LT_EXPR)
4563 t2 = build_int_cst (itype, 1);
4564 else
4565 t2 = fold_convert (itype, fd->loop.step);
4566 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4567 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4568 NULL_TREE, true, GSI_SAME_STMT);
4569 assign_stmt = gimple_build_assign (cond_var, t2);
4570 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4571 }
4572
4573 if (!gimple_omp_for_combined_p (g: fd->for_stmt))
4574 {
4575 if (POINTER_TYPE_P (type))
4576 t = fold_build_pointer_plus (vmain, fd->loop.step);
4577 else
4578 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4579 t = force_gimple_operand_gsi (&gsi, t,
4580 DECL_P (vback)
4581 && TREE_ADDRESSABLE (vback),
4582 NULL_TREE, true, GSI_SAME_STMT);
4583 assign_stmt = gimple_build_assign (vback, t);
4584 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4585
4586 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4587 {
4588 tree tem;
4589 if (fd->collapse > 1)
4590 tem = fd->loop.v;
4591 else
4592 {
4593 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4594 fd->loops[0].v, fd->loops[0].n1);
4595 tem = fold_convert (fd->iter_type, tem);
4596 }
4597 tree aref = build4 (ARRAY_REF, fd->iter_type,
4598 counts[fd->ordered], size_zero_node,
4599 NULL_TREE, NULL_TREE);
4600 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4601 true, GSI_SAME_STMT);
4602 expand_omp_build_assign (gsi_p: &gsi, to: aref, from: tem);
4603 }
4604
4605 t = build2 (fd->loop.cond_code, boolean_type_node,
4606 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4607 iend);
4608 gcond *cond_stmt = gimple_build_cond_empty (cond: t);
4609 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4610 }
4611
4612 /* Remove GIMPLE_OMP_CONTINUE. */
4613 gsi_remove (&gsi, true);
4614
4615 if (fd->collapse > 1 && !gimple_omp_for_combined_p (g: fd->for_stmt))
4616 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb: l1_bb);
4617
4618 /* Emit code to get the next parallel iteration in L2_BB. */
4619 gsi = gsi_start_bb (bb: l2_bb);
4620
4621 t = build_call_expr (builtin_decl_explicit (fncode: next_fn), 2,
4622 build_fold_addr_expr (istart0),
4623 build_fold_addr_expr (iend0));
4624 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4625 false, GSI_CONTINUE_LINKING);
4626 if (TREE_TYPE (t) != boolean_type_node)
4627 t = fold_build2 (NE_EXPR, boolean_type_node,
4628 t, build_int_cst (TREE_TYPE (t), 0));
4629 gcond *cond_stmt = gimple_build_cond_empty (cond: t);
4630 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4631 }
4632
4633 /* Add the loop cleanup function. */
4634 gsi = gsi_last_nondebug_bb (bb: exit_bb);
4635 if (gimple_omp_return_nowait_p (g: gsi_stmt (i: gsi)))
4636 t = builtin_decl_explicit (fncode: BUILT_IN_GOMP_LOOP_END_NOWAIT);
4637 else if (gimple_omp_return_lhs (g: gsi_stmt (i: gsi)))
4638 t = builtin_decl_explicit (fncode: BUILT_IN_GOMP_LOOP_END_CANCEL);
4639 else
4640 t = builtin_decl_explicit (fncode: BUILT_IN_GOMP_LOOP_END);
4641 gcall *call_stmt = gimple_build_call (t, 0);
4642 if (fd->ordered)
4643 {
4644 tree arr = counts[fd->ordered];
4645 tree clobber = build_clobber (TREE_TYPE (arr));
4646 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4647 GSI_SAME_STMT);
4648 }
4649 if (gimple_omp_return_lhs (g: gsi_stmt (i: gsi)))
4650 {
4651 gimple_call_set_lhs (gs: call_stmt, lhs: gimple_omp_return_lhs (g: gsi_stmt (i: gsi)));
4652 if (fd->have_reductemp)
4653 {
4654 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4655 gimple_call_lhs (gs: call_stmt));
4656 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4657 }
4658 }
4659 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4660 gsi_remove (&gsi, true);
4661
4662 /* Connect the new blocks. */
4663 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4664 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4665
4666 if (!broken_loop)
4667 {
4668 gimple_seq phis;
4669
4670 e = find_edge (cont_bb, l3_bb);
4671 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4672
4673 phis = phi_nodes (bb: l3_bb);
4674 for (gsi = gsi_start (seq&: phis); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
4675 {
4676 gimple *phi = gsi_stmt (i: gsi);
4677 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4678 PHI_ARG_DEF_FROM_EDGE (phi, e));
4679 }
4680 remove_edge (e);
4681
4682 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4683 e = find_edge (cont_bb, l1_bb);
4684 if (e == NULL)
4685 {
4686 e = BRANCH_EDGE (cont_bb);
4687 gcc_assert (single_succ (e->dest) == l1_bb);
4688 }
4689 if (gimple_omp_for_combined_p (g: fd->for_stmt))
4690 {
4691 remove_edge (e);
4692 e = NULL;
4693 }
4694 else if (fd->collapse > 1)
4695 {
4696 remove_edge (e);
4697 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4698 }
4699 else
4700 e->flags = EDGE_TRUE_VALUE;
4701 if (e)
4702 {
4703 e->probability = profile_probability::guessed_always ().apply_scale (num: 7, den: 8);
4704 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4705 }
4706 else
4707 {
4708 e = find_edge (cont_bb, l2_bb);
4709 e->flags = EDGE_FALLTHRU;
4710 }
4711 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4712
4713 if (gimple_in_ssa_p (cfun))
4714 {
4715 /* Add phis to the outer loop that connect to the phis in the inner,
4716 original loop, and move the loop entry value of the inner phi to
4717 the loop entry value of the outer phi. */
4718 gphi_iterator psi;
4719 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (i: psi); gsi_next (i: &psi))
4720 {
4721 location_t locus;
4722 gphi *nphi;
4723 gphi *exit_phi = psi.phi ();
4724
4725 if (virtual_operand_p (op: gimple_phi_result (gs: exit_phi)))
4726 continue;
4727
4728 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4729 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4730
4731 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4732 edge latch_to_l1 = find_edge (latch, l1_bb);
4733 gphi *inner_phi
4734 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4735
4736 tree t = gimple_phi_result (gs: exit_phi);
4737 tree new_res = copy_ssa_name (var: t, NULL);
4738 nphi = create_phi_node (new_res, l0_bb);
4739
4740 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4741 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4742 locus = gimple_phi_arg_location_from_edge (phi: inner_phi, e: l0_to_l1);
4743 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4744 add_phi_arg (nphi, t, entry_to_l0, locus);
4745
4746 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4747 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4748
4749 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4750 }
4751 }
4752
4753 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4754 recompute_dominator (CDI_DOMINATORS, l2_bb));
4755 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4756 recompute_dominator (CDI_DOMINATORS, l3_bb));
4757 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4758 recompute_dominator (CDI_DOMINATORS, l0_bb));
4759 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4760 recompute_dominator (CDI_DOMINATORS, l1_bb));
4761
4762 /* We enter expand_omp_for_generic with a loop. This original loop may
4763 have its own loop struct, or it may be part of an outer loop struct
4764 (which may be the fake loop). */
4765 class loop *outer_loop = entry_bb->loop_father;
4766 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4767
4768 add_bb_to_loop (l2_bb, outer_loop);
4769
4770 /* We've added a new loop around the original loop. Allocate the
4771 corresponding loop struct. */
4772 class loop *new_loop = alloc_loop ();
4773 new_loop->header = l0_bb;
4774 new_loop->latch = l2_bb;
4775 add_loop (new_loop, outer_loop);
4776
4777 /* Allocate a loop structure for the original loop unless we already
4778 had one. */
4779 if (!orig_loop_has_loop_struct
4780 && !gimple_omp_for_combined_p (g: fd->for_stmt))
4781 {
4782 class loop *orig_loop = alloc_loop ();
4783 orig_loop->header = l1_bb;
4784 /* The loop may have multiple latches. */
4785 add_loop (orig_loop, new_loop);
4786 }
4787 }
4788}
4789
4790/* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4791 compute needed allocation size. If !ALLOC of team allocations,
4792 if ALLOC of thread allocation. SZ is the initial needed size for
4793 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4794 CNT number of elements of each array, for !ALLOC this is
4795 omp_get_num_threads (), for ALLOC number of iterations handled by the
4796 current thread. If PTR is non-NULL, it is the start of the allocation
4797 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4798 clauses pointers to the corresponding arrays. */
4799
4800static tree
4801expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4802 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4803 gimple_stmt_iterator *gsi, bool alloc)
4804{
4805 tree eltsz = NULL_TREE;
4806 unsigned HOST_WIDE_INT preval = 0;
4807 if (ptr && sz)
4808 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4809 ptr, size_int (sz));
4810 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4811 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4812 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4813 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4814 {
4815 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4816 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4817 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4818 {
4819 unsigned HOST_WIDE_INT szl
4820 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4821 szl = least_bit_hwi (x: szl);
4822 if (szl)
4823 al = MIN (al, szl);
4824 }
4825 if (ptr == NULL_TREE)
4826 {
4827 if (eltsz == NULL_TREE)
4828 eltsz = TYPE_SIZE_UNIT (pointee_type);
4829 else
4830 eltsz = size_binop (PLUS_EXPR, eltsz,
4831 TYPE_SIZE_UNIT (pointee_type));
4832 }
4833 if (preval == 0 && al <= alloc_align)
4834 {
4835 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4836 sz += diff;
4837 if (diff && ptr)
4838 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4839 ptr, size_int (diff));
4840 }
4841 else if (al > preval)
4842 {
4843 if (ptr)
4844 {
4845 ptr = fold_convert (pointer_sized_int_node, ptr);
4846 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4847 build_int_cst (pointer_sized_int_node,
4848 al - 1));
4849 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4850 build_int_cst (pointer_sized_int_node,
4851 -(HOST_WIDE_INT) al));
4852 ptr = fold_convert (ptr_type_node, ptr);
4853 }
4854 else
4855 sz += al - 1;
4856 }
4857 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4858 preval = al;
4859 else
4860 preval = 1;
4861 if (ptr)
4862 {
4863 expand_omp_build_assign (gsi_p: gsi, OMP_CLAUSE_DECL (c), from: ptr, after: false);
4864 ptr = OMP_CLAUSE_DECL (c);
4865 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4866 size_binop (MULT_EXPR, cnt,
4867 TYPE_SIZE_UNIT (pointee_type)));
4868 }
4869 }
4870
4871 if (ptr == NULL_TREE)
4872 {
4873 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4874 if (sz)
4875 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4876 return eltsz;
4877 }
4878 else
4879 return ptr;
4880}
4881
4882/* Return the last _looptemp_ clause if one has been created for
4883 lastprivate on distribute parallel for{, simd} or taskloop.
4884 FD is the loop data and INNERC should be the second _looptemp_
4885 clause (the one holding the end of the range).
4886 This is followed by collapse - 1 _looptemp_ clauses for the
4887 counts[1] and up, and for triangular loops followed by 4
4888 further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4889 one factor and one adjn1). After this there is optionally one
4890 _looptemp_ clause that this function returns. */
4891
4892static tree
4893find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4894{
4895 gcc_assert (innerc);
4896 int count = fd->collapse - 1;
4897 if (fd->non_rect
4898 && fd->last_nonrect == fd->first_nonrect + 1
4899 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4900 count += 4;
4901 for (int i = 0; i < count; i++)
4902 {
4903 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4904 kind: OMP_CLAUSE__LOOPTEMP_);
4905 gcc_assert (innerc);
4906 }
4907 return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4908 kind: OMP_CLAUSE__LOOPTEMP_);
4909}
4910
4911/* A subroutine of expand_omp_for. Generate code for a parallel
4912 loop with static schedule and no specified chunk size. Given
4913 parameters:
4914
4915 for (V = N1; V cond N2; V += STEP) BODY;
4916
4917 where COND is "<" or ">", we generate pseudocode
4918
4919 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4920 if (cond is <)
4921 adj = STEP - 1;
4922 else
4923 adj = STEP + 1;
4924 if ((__typeof (V)) -1 > 0 && cond is >)
4925 n = -(adj + N2 - N1) / -STEP;
4926 else
4927 n = (adj + N2 - N1) / STEP;
4928 q = n / nthreads;
4929 tt = n % nthreads;
4930 if (threadid < tt) goto L3; else goto L4;
4931 L3:
4932 tt = 0;
4933 q = q + 1;
4934 L4:
4935 s0 = q * threadid + tt;
4936 e0 = s0 + q;
4937 V = s0 * STEP + N1;
4938 if (s0 >= e0) goto L2; else goto L0;
4939 L0:
4940 e = e0 * STEP + N1;
4941 L1:
4942 BODY;
4943 V += STEP;
4944 if (V cond e) goto L1;
4945 L2:
4946*/
4947
4948static void
4949expand_omp_for_static_nochunk (struct omp_region *region,
4950 struct omp_for_data *fd,
4951 gimple *inner_stmt)
4952{
4953 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4954 tree type, itype, vmain, vback;
4955 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4956 basic_block body_bb, cont_bb, collapse_bb = NULL;
4957 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4958 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4959 gimple_stmt_iterator gsi, gsip;
4960 edge ep;
4961 bool broken_loop = region->cont == NULL;
4962 tree *counts = NULL;
4963 tree n1, n2, step;
4964 tree reductions = NULL_TREE;
4965 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4966
4967 itype = type = TREE_TYPE (fd->loop.v);
4968 if (POINTER_TYPE_P (type))
4969 itype = signed_type_for (type);
4970
4971 entry_bb = region->entry;
4972 cont_bb = region->cont;
4973 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4974 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4975 gcc_assert (broken_loop
4976 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4977 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4978 body_bb = single_succ (bb: seq_start_bb);
4979 if (!broken_loop)
4980 {
4981 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4982 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4983 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4984 }
4985 exit_bb = region->exit;
4986
4987 /* Iteration space partitioning goes in ENTRY_BB. */
4988 gsi = gsi_last_nondebug_bb (bb: entry_bb);
4989 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4990 gsip = gsi;
4991 gsi_prev (i: &gsip);
4992
4993 if (fd->collapse > 1)
4994 {
4995 int first_zero_iter = -1, dummy = -1;
4996 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4997
4998 counts = XALLOCAVEC (tree, fd->collapse);
4999 expand_omp_for_init_counts (fd, gsi: &gsi, entry_bb, counts,
5000 zero_iter1_bb&: fin_bb, first_zero_iter1&: first_zero_iter,
5001 zero_iter2_bb&: dummy_bb, first_zero_iter2&: dummy, l2_dom_bb);
5002 t = NULL_TREE;
5003 }
5004 else if (gimple_omp_for_combined_into_p (g: fd->for_stmt))
5005 t = integer_one_node;
5006 else
5007 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5008 fold_convert (type, fd->loop.n1),
5009 fold_convert (type, fd->loop.n2));
5010 if (fd->collapse == 1
5011 && TYPE_UNSIGNED (type)
5012 && (t == NULL_TREE || !integer_onep (t)))
5013 {
5014 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5015 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5016 true, GSI_SAME_STMT);
5017 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5018 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5019 true, GSI_SAME_STMT);
5020 gcond *cond_stmt = expand_omp_build_cond (gsi_p: &gsi, code: fd->loop.cond_code,
5021 lhs: n1, rhs: n2);
5022 ep = split_block (entry_bb, cond_stmt);
5023 ep->flags = EDGE_TRUE_VALUE;
5024 entry_bb = ep->dest;
5025 ep->probability = profile_probability::very_likely ();
5026 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
5027 ep->probability = profile_probability::very_unlikely ();
5028 if (gimple_in_ssa_p (cfun))
5029 {
5030 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
5031 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5032 !gsi_end_p (i: gpi); gsi_next (i: &gpi))
5033 {
5034 gphi *phi = gpi.phi ();
5035 add_phi_arg (phi, gimple_phi_arg_def (gs: phi, index: dest_idx),
5036 ep, UNKNOWN_LOCATION);
5037 }
5038 }
5039 gsi = gsi_last_bb (bb: entry_bb);
5040 }
5041
5042 if (fd->lastprivate_conditional)
5043 {
5044 tree clauses = gimple_omp_for_clauses (gs: fd->for_stmt);
5045 tree c = omp_find_clause (clauses, kind: OMP_CLAUSE__CONDTEMP_);
5046 if (fd->have_pointer_condtemp)
5047 condtemp = OMP_CLAUSE_DECL (c);
5048 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), kind: OMP_CLAUSE__CONDTEMP_);
5049 cond_var = OMP_CLAUSE_DECL (c);
5050 }
5051 if (fd->have_reductemp
5052 /* For scan, we don't want to reinitialize condtemp before the
5053 second loop. */
5054 || (fd->have_pointer_condtemp && !fd->have_scantemp)
5055 || fd->have_nonctrl_scantemp)
5056 {
5057 tree t1 = build_int_cst (long_integer_type_node, 0);
5058 tree t2 = build_int_cst (long_integer_type_node, 1);
5059 tree t3 = build_int_cstu (long_integer_type_node,
5060 (HOST_WIDE_INT_1U << 31) + 1);
5061 tree clauses = gimple_omp_for_clauses (gs: fd->for_stmt);
5062 gimple_stmt_iterator gsi2 = gsi_none ();
5063 gimple *g = NULL;
5064 tree mem = null_pointer_node, memv = NULL_TREE;
5065 unsigned HOST_WIDE_INT condtemp_sz = 0;
5066 unsigned HOST_WIDE_INT alloc_align = 0;
5067 if (fd->have_reductemp)
5068 {
5069 gcc_assert (!fd->have_nonctrl_scantemp);
5070 tree c = omp_find_clause (clauses, kind: OMP_CLAUSE__REDUCTEMP_);
5071 reductions = OMP_CLAUSE_DECL (c);
5072 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5073 g = SSA_NAME_DEF_STMT (reductions);
5074 reductions = gimple_assign_rhs1 (gs: g);
5075 OMP_CLAUSE_DECL (c) = reductions;
5076 gsi2 = gsi_for_stmt (g);
5077 }
5078 else
5079 {
5080 if (gsi_end_p (i: gsip))
5081 gsi2 = gsi_after_labels (bb: region->entry);
5082 else
5083 gsi2 = gsip;
5084 reductions = null_pointer_node;
5085 }
5086 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
5087 {
5088 tree type;
5089 if (fd->have_pointer_condtemp)
5090 type = TREE_TYPE (condtemp);
5091 else
5092 type = ptr_type_node;
5093 memv = create_tmp_var (type);
5094 TREE_ADDRESSABLE (memv) = 1;
5095 unsigned HOST_WIDE_INT sz = 0;
5096 tree size = NULL_TREE;
5097 if (fd->have_pointer_condtemp)
5098 {
5099 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5100 sz *= fd->lastprivate_conditional;
5101 condtemp_sz = sz;
5102 }
5103 if (fd->have_nonctrl_scantemp)
5104 {
5105 nthreads = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_NUM_THREADS);
5106 gimple *g = gimple_build_call (nthreads, 0);
5107 nthreads = create_tmp_var (integer_type_node);
5108 gimple_call_set_lhs (gs: g, lhs: nthreads);
5109 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5110 nthreads = fold_convert (sizetype, nthreads);
5111 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
5112 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
5113 alloc_align, cnt: nthreads, NULL,
5114 alloc: false);
5115 size = fold_convert (type, size);
5116 }
5117 else
5118 size = build_int_cst (type, sz);
5119 expand_omp_build_assign (gsi_p: &gsi2, to: memv, from: size, after: false);
5120 mem = build_fold_addr_expr (memv);
5121 }
5122 tree t
5123 = build_call_expr (builtin_decl_explicit (fncode: BUILT_IN_GOMP_LOOP_START),
5124 9, t1, t2, t2, t3, t1, null_pointer_node,
5125 null_pointer_node, reductions, mem);
5126 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5127 true, GSI_SAME_STMT);
5128 if (fd->have_pointer_condtemp)
5129 expand_omp_build_assign (gsi_p: &gsi2, to: condtemp, from: memv, after: false);
5130 if (fd->have_nonctrl_scantemp)
5131 {
5132 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
5133 expand_omp_scantemp_alloc (clauses, ptr, sz: condtemp_sz,
5134 alloc_align, cnt: nthreads, gsi: &gsi2, alloc: false);
5135 }
5136 if (fd->have_reductemp)
5137 {
5138 gsi_remove (&gsi2, true);
5139 release_ssa_name (name: gimple_assign_lhs (gs: g));
5140 }
5141 }
5142 switch (gimple_omp_for_kind (g: fd->for_stmt))
5143 {
5144 case GF_OMP_FOR_KIND_FOR:
5145 nthreads = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_NUM_THREADS);
5146 threadid = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_THREAD_NUM);
5147 break;
5148 case GF_OMP_FOR_KIND_DISTRIBUTE:
5149 nthreads = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_NUM_TEAMS);
5150 threadid = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_TEAM_NUM);
5151 break;
5152 default:
5153 gcc_unreachable ();
5154 }
5155 nthreads = build_call_expr (nthreads, 0);
5156 nthreads = fold_convert (itype, nthreads);
5157 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5158 true, GSI_SAME_STMT);
5159 threadid = build_call_expr (threadid, 0);
5160 threadid = fold_convert (itype, threadid);
5161 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5162 true, GSI_SAME_STMT);
5163
5164 n1 = fd->loop.n1;
5165 n2 = fd->loop.n2;
5166 step = fd->loop.step;
5167 if (gimple_omp_for_combined_into_p (g: fd->for_stmt))
5168 {
5169 tree innerc = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
5170 kind: OMP_CLAUSE__LOOPTEMP_);
5171 gcc_assert (innerc);
5172 n1 = OMP_CLAUSE_DECL (innerc);
5173 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5174 kind: OMP_CLAUSE__LOOPTEMP_);
5175 gcc_assert (innerc);
5176 n2 = OMP_CLAUSE_DECL (innerc);
5177 }
5178 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5179 true, NULL_TREE, true, GSI_SAME_STMT);
5180 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5181 true, NULL_TREE, true, GSI_SAME_STMT);
5182 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5183 true, NULL_TREE, true, GSI_SAME_STMT);
5184
5185 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5186 t = fold_build2 (PLUS_EXPR, itype, step, t);
5187 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5188 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5189 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5190 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5191 fold_build1 (NEGATE_EXPR, itype, t),
5192 fold_build1 (NEGATE_EXPR, itype, step));
5193 else
5194 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5195 t = fold_convert (itype, t);
5196 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5197
5198 q = create_tmp_reg (itype, "q");
5199 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5200 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5201 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5202
5203 tt = create_tmp_reg (itype, "tt");
5204 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5205 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5206 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5207
5208 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5209 gcond *cond_stmt = gimple_build_cond_empty (cond: t);
5210 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5211
5212 second_bb = split_block (entry_bb, cond_stmt)->dest;
5213 gsi = gsi_last_nondebug_bb (bb: second_bb);
5214 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5215
5216 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5217 GSI_SAME_STMT);
5218 gassign *assign_stmt
5219 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5220 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5221
5222 third_bb = split_block (second_bb, assign_stmt)->dest;
5223 gsi = gsi_last_nondebug_bb (bb: third_bb);
5224 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5225
5226 if (fd->have_nonctrl_scantemp)
5227 {
5228 tree clauses = gimple_omp_for_clauses (gs: fd->for_stmt);
5229 tree controlp = NULL_TREE, controlb = NULL_TREE;
5230 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5231 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5232 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5233 {
5234 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5235 controlb = OMP_CLAUSE_DECL (c);
5236 else
5237 controlp = OMP_CLAUSE_DECL (c);
5238 if (controlb && controlp)
5239 break;
5240 }
5241 gcc_assert (controlp && controlb);
5242 tree cnt = create_tmp_var (sizetype);
5243 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5244 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5245 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5246 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz: 0,
5247 alloc_align, cnt, NULL, alloc: true);
5248 tree size = create_tmp_var (sizetype);
5249 expand_omp_build_assign (gsi_p: &gsi, to: size, from: sz, after: false);
5250 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5251 size, size_int (16384));
5252 expand_omp_build_assign (gsi_p: &gsi, to: controlb, from: cmp);
5253 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5254 NULL_TREE, NULL_TREE);
5255 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5256 fourth_bb = split_block (third_bb, g)->dest;
5257 gsi = gsi_last_nondebug_bb (bb: fourth_bb);
5258 /* FIXME: Once we have allocators, this should use allocator. */
5259 g = gimple_build_call (builtin_decl_explicit (fncode: BUILT_IN_MALLOC), 1, size);
5260 gimple_call_set_lhs (gs: g, lhs: controlp);
5261 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5262 expand_omp_scantemp_alloc (clauses, ptr: controlp, sz: 0, alloc_align, cnt,
5263 gsi: &gsi, alloc: true);
5264 gsi_prev (i: &gsi);
5265 g = gsi_stmt (i: gsi);
5266 fifth_bb = split_block (fourth_bb, g)->dest;
5267 gsi = gsi_last_nondebug_bb (bb: fifth_bb);
5268
5269 g = gimple_build_call (builtin_decl_implicit (fncode: BUILT_IN_STACK_SAVE), 0);
5270 gimple_call_set_lhs (gs: g, lhs: controlp);
5271 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5272 tree alloca_decl = builtin_decl_explicit (fncode: BUILT_IN_ALLOCA_WITH_ALIGN);
5273 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5274 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5275 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5276 {
5277 tree tmp = create_tmp_var (sizetype);
5278 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5279 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5280 TYPE_SIZE_UNIT (pointee_type));
5281 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5282 g = gimple_build_call (alloca_decl, 2, tmp,
5283 size_int (TYPE_ALIGN (pointee_type)));
5284 gimple_call_set_lhs (gs: g, OMP_CLAUSE_DECL (c));
5285 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5286 }
5287
5288 sixth_bb = split_block (fifth_bb, g)->dest;
5289 gsi = gsi_last_nondebug_bb (bb: sixth_bb);
5290 }
5291
5292 t = build2 (MULT_EXPR, itype, q, threadid);
5293 t = build2 (PLUS_EXPR, itype, t, tt);
5294 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5295
5296 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5297 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5298
5299 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5300 gsi_insert_before (&gsi, gimple_build_cond_empty (cond: t), GSI_SAME_STMT);
5301
5302 /* Remove the GIMPLE_OMP_FOR statement. */
5303 gsi_remove (&gsi, true);
5304
5305 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5306 gsi = gsi_start_bb (bb: seq_start_bb);
5307
5308 tree startvar = fd->loop.v;
5309 tree endvar = NULL_TREE;
5310
5311 if (gimple_omp_for_combined_p (g: fd->for_stmt))
5312 {
5313 tree clauses = gimple_code (g: inner_stmt) == GIMPLE_OMP_PARALLEL
5314 ? gimple_omp_parallel_clauses (gs: inner_stmt)
5315 : gimple_omp_for_clauses (gs: inner_stmt);
5316 tree innerc = omp_find_clause (clauses, kind: OMP_CLAUSE__LOOPTEMP_);
5317 gcc_assert (innerc);
5318 startvar = OMP_CLAUSE_DECL (innerc);
5319 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5320 kind: OMP_CLAUSE__LOOPTEMP_);
5321 gcc_assert (innerc);
5322 endvar = OMP_CLAUSE_DECL (innerc);
5323 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5324 && gimple_omp_for_kind (g: fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5325 {
5326 innerc = find_lastprivate_looptemp (fd, innerc);
5327 if (innerc)
5328 {
5329 /* If needed (distribute parallel for with lastprivate),
5330 propagate down the total number of iterations. */
5331 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5332 fd->loop.n2);
5333 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5334 GSI_CONTINUE_LINKING);
5335 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5336 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5337 }
5338 }
5339 }
5340 t = fold_convert (itype, s0);
5341 t = fold_build2 (MULT_EXPR, itype, t, step);
5342 if (POINTER_TYPE_P (type))
5343 {
5344 t = fold_build_pointer_plus (n1, t);
5345 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5346 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5347 t = fold_convert (signed_type_for (type), t);
5348 }
5349 else
5350 t = fold_build2 (PLUS_EXPR, type, t, n1);
5351 t = fold_convert (TREE_TYPE (startvar), t);
5352 t = force_gimple_operand_gsi (&gsi, t,
5353 DECL_P (startvar)
5354 && TREE_ADDRESSABLE (startvar),
5355 NULL_TREE, false, GSI_CONTINUE_LINKING);
5356 assign_stmt = gimple_build_assign (startvar, t);
5357 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5358 if (cond_var)
5359 {
5360 tree itype = TREE_TYPE (cond_var);
5361 /* For lastprivate(conditional:) itervar, we need some iteration
5362 counter that starts at unsigned non-zero and increases.
5363 Prefer as few IVs as possible, so if we can use startvar
5364 itself, use that, or startvar + constant (those would be
5365 incremented with step), and as last resort use the s0 + 1
5366 incremented by 1. */
5367 if (POINTER_TYPE_P (type)
5368 || TREE_CODE (n1) != INTEGER_CST
5369 || fd->loop.cond_code != LT_EXPR)
5370 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5371 build_int_cst (itype, 1));
5372 else if (tree_int_cst_sgn (n1) == 1)
5373 t = fold_convert (itype, t);
5374 else
5375 {
5376 tree c = fold_convert (itype, n1);
5377 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5378 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5379 }
5380 t = force_gimple_operand_gsi (&gsi, t, false,
5381 NULL_TREE, false, GSI_CONTINUE_LINKING);
5382 assign_stmt = gimple_build_assign (cond_var, t);
5383 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5384 }
5385
5386 t = fold_convert (itype, e0);
5387 t = fold_build2 (MULT_EXPR, itype, t, step);
5388 if (POINTER_TYPE_P (type))
5389 {
5390 t = fold_build_pointer_plus (n1, t);
5391 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5392 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5393 t = fold_convert (signed_type_for (type), t);
5394 }
5395 else
5396 t = fold_build2 (PLUS_EXPR, type, t, n1);
5397 t = fold_convert (TREE_TYPE (startvar), t);
5398 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5399 false, GSI_CONTINUE_LINKING);
5400 if (endvar)
5401 {
5402 assign_stmt = gimple_build_assign (endvar, e);
5403 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5404 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5405 assign_stmt = gimple_build_assign (fd->loop.v, e);
5406 else
5407 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5408 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5409 }
5410 /* Handle linear clause adjustments. */
5411 tree itercnt = NULL_TREE;
5412 tree *nonrect_bounds = NULL;
5413 if (gimple_omp_for_kind (g: fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5414 for (tree c = gimple_omp_for_clauses (gs: fd->for_stmt);
5415 c; c = OMP_CLAUSE_CHAIN (c))
5416 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5417 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5418 {
5419 tree d = OMP_CLAUSE_DECL (c);
5420 tree t = d, a, dest;
5421 if (omp_privatize_by_reference (decl: t))
5422 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5423 if (itercnt == NULL_TREE)
5424 {
5425 if (gimple_omp_for_combined_into_p (g: fd->for_stmt))
5426 {
5427 itercnt = fold_build2 (MINUS_EXPR, itype,
5428 fold_convert (itype, n1),
5429 fold_convert (itype, fd->loop.n1));
5430 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5431 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5432 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5433 NULL_TREE, false,
5434 GSI_CONTINUE_LINKING);
5435 }
5436 else
5437 itercnt = s0;
5438 }
5439 tree type = TREE_TYPE (t);
5440 if (POINTER_TYPE_P (type))
5441 type = sizetype;
5442 a = fold_build2 (MULT_EXPR, type,
5443 fold_convert (type, itercnt),
5444 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5445 dest = unshare_expr (t);
5446 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5447 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5448 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5449 false, GSI_CONTINUE_LINKING);
5450 expand_omp_build_assign (gsi_p: &gsi, to: dest, from: t, after: true);
5451 }
5452 if (fd->collapse > 1)
5453 {
5454 if (fd->non_rect)
5455 {
5456 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5457 memset (s: nonrect_bounds, c: 0, n: sizeof (tree) * (fd->last_nonrect + 1));
5458 }
5459 expand_omp_for_init_vars (fd, gsi: &gsi, counts, nonrect_bounds, inner_stmt,
5460 startvar);
5461 }
5462
5463 if (!broken_loop)
5464 {
5465 /* The code controlling the sequential loop replaces the
5466 GIMPLE_OMP_CONTINUE. */
5467 gsi = gsi_last_nondebug_bb (bb: cont_bb);
5468 gomp_continue *cont_stmt = as_a <gomp_continue *> (p: gsi_stmt (i: gsi));
5469 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5470 vmain = gimple_omp_continue_control_use (cont_stmt);
5471 vback = gimple_omp_continue_control_def (cont_stmt);
5472
5473 if (cond_var)
5474 {
5475 tree itype = TREE_TYPE (cond_var);
5476 tree t2;
5477 if (POINTER_TYPE_P (type)
5478 || TREE_CODE (n1) != INTEGER_CST
5479 || fd->loop.cond_code != LT_EXPR)
5480 t2 = build_int_cst (itype, 1);
5481 else
5482 t2 = fold_convert (itype, step);
5483 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5484 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5485 NULL_TREE, true, GSI_SAME_STMT);
5486 assign_stmt = gimple_build_assign (cond_var, t2);
5487 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5488 }
5489
5490 if (!gimple_omp_for_combined_p (g: fd->for_stmt))
5491 {
5492 if (POINTER_TYPE_P (type))
5493 t = fold_build_pointer_plus (vmain, step);
5494 else
5495 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5496 t = force_gimple_operand_gsi (&gsi, t,
5497 DECL_P (vback)
5498 && TREE_ADDRESSABLE (vback),
5499 NULL_TREE, true, GSI_SAME_STMT);
5500 assign_stmt = gimple_build_assign (vback, t);
5501 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5502
5503 t = build2 (fd->loop.cond_code, boolean_type_node,
5504 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5505 ? t : vback, e);
5506 gsi_insert_before (&gsi, gimple_build_cond_empty (cond: t), GSI_SAME_STMT);
5507 }
5508
5509 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5510 gsi_remove (&gsi, true);
5511
5512 if (fd->collapse > 1 && !gimple_omp_for_combined_p (g: fd->for_stmt))
5513 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5514 cont_bb, body_bb);
5515 }
5516
5517 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5518 gsi = gsi_last_nondebug_bb (bb: exit_bb);
5519 if (!gimple_omp_return_nowait_p (g: gsi_stmt (i: gsi)))
5520 {
5521 t = gimple_omp_return_lhs (g: gsi_stmt (i: gsi));
5522 if (fd->have_reductemp
5523 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5524 && !fd->have_nonctrl_scantemp))
5525 {
5526 tree fn;
5527 if (t)
5528 fn = builtin_decl_explicit (fncode: BUILT_IN_GOMP_LOOP_END_CANCEL);
5529 else
5530 fn = builtin_decl_explicit (fncode: BUILT_IN_GOMP_LOOP_END);
5531 gcall *g = gimple_build_call (fn, 0);
5532 if (t)
5533 {
5534 gimple_call_set_lhs (gs: g, lhs: t);
5535 if (fd->have_reductemp)
5536 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5537 NOP_EXPR, t),
5538 GSI_SAME_STMT);
5539 }
5540 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5541 }
5542 else
5543 gsi_insert_after (&gsi, omp_build_barrier (lhs: t), GSI_SAME_STMT);
5544 }
5545 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5546 && !fd->have_nonctrl_scantemp)
5547 {
5548 tree fn = builtin_decl_explicit (fncode: BUILT_IN_GOMP_LOOP_END_NOWAIT);
5549 gcall *g = gimple_build_call (fn, 0);
5550 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5551 }
5552 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5553 {
5554 tree clauses = gimple_omp_for_clauses (gs: fd->for_stmt);
5555 tree controlp = NULL_TREE, controlb = NULL_TREE;
5556 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5557 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5558 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5559 {
5560 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5561 controlb = OMP_CLAUSE_DECL (c);
5562 else
5563 controlp = OMP_CLAUSE_DECL (c);
5564 if (controlb && controlp)
5565 break;
5566 }
5567 gcc_assert (controlp && controlb);
5568 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5569 NULL_TREE, NULL_TREE);
5570 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5571 exit1_bb = split_block (exit_bb, g)->dest;
5572 gsi = gsi_after_labels (bb: exit1_bb);
5573 g = gimple_build_call (builtin_decl_explicit (fncode: BUILT_IN_FREE), 1,
5574 controlp);
5575 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5576 exit2_bb = split_block (exit1_bb, g)->dest;
5577 gsi = gsi_after_labels (bb: exit2_bb);
5578 g = gimple_build_call (builtin_decl_implicit (fncode: BUILT_IN_STACK_RESTORE), 1,
5579 controlp);
5580 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5581 exit3_bb = split_block (exit2_bb, g)->dest;
5582 gsi = gsi_after_labels (bb: exit3_bb);
5583 }
5584 gsi_remove (&gsi, true);
5585
5586 /* Connect all the blocks. */
5587 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5588 ep->probability = profile_probability::guessed_always ().apply_scale (num: 3, den: 4);
5589 ep = find_edge (entry_bb, second_bb);
5590 ep->flags = EDGE_TRUE_VALUE;
5591 ep->probability = profile_probability::guessed_always () / 4;
5592 if (fourth_bb)
5593 {
5594 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5595 ep->probability = profile_probability::guessed_always () / 2;
5596 ep = find_edge (third_bb, fourth_bb);
5597 ep->flags = EDGE_TRUE_VALUE;
5598 ep->probability = profile_probability::guessed_always () / 2;
5599 ep = find_edge (fourth_bb, fifth_bb);
5600 redirect_edge_and_branch (ep, sixth_bb);
5601 }
5602 else
5603 sixth_bb = third_bb;
5604 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5605 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5606 if (exit1_bb)
5607 {
5608 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5609 ep->probability = profile_probability::guessed_always () / 2;
5610 ep = find_edge (exit_bb, exit1_bb);
5611 ep->flags = EDGE_TRUE_VALUE;
5612 ep->probability = profile_probability::guessed_always () / 2;
5613 ep = find_edge (exit1_bb, exit2_bb);
5614 redirect_edge_and_branch (ep, exit3_bb);
5615 }
5616
5617 if (!broken_loop)
5618 {
5619 ep = find_edge (cont_bb, body_bb);
5620 if (ep == NULL)
5621 {
5622 ep = BRANCH_EDGE (cont_bb);
5623 gcc_assert (single_succ (ep->dest) == body_bb);
5624 }
5625 if (gimple_omp_for_combined_p (g: fd->for_stmt))
5626 {
5627 remove_edge (ep);
5628 ep = NULL;
5629 }
5630 else if (fd->collapse > 1)
5631 {
5632 remove_edge (ep);
5633 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5634 }
5635 else
5636 ep->flags = EDGE_TRUE_VALUE;
5637 find_edge (cont_bb, fin_bb)->flags
5638 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5639 }
5640
5641 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5642 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5643 if (fourth_bb)
5644 {
5645 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5646 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5647 }
5648 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5649
5650 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5651 recompute_dominator (CDI_DOMINATORS, body_bb));
5652 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5653 recompute_dominator (CDI_DOMINATORS, fin_bb));
5654 if (exit1_bb)
5655 {
5656 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5657 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5658 }
5659
5660 class loop *loop = body_bb->loop_father;
5661 if (loop != entry_bb->loop_father)
5662 {
5663 gcc_assert (broken_loop || loop->header == body_bb);
5664 gcc_assert (broken_loop
5665 || loop->latch == region->cont
5666 || single_pred (loop->latch) == region->cont);
5667 return;
5668 }
5669
5670 if (!broken_loop && !gimple_omp_for_combined_p (g: fd->for_stmt))
5671 {
5672 loop = alloc_loop ();
5673 loop->header = body_bb;
5674 if (collapse_bb == NULL)
5675 loop->latch = cont_bb;
5676 add_loop (loop, body_bb->loop_father);
5677 }
5678}
5679
5680/* Return phi in E->DEST with ARG on edge E. */
5681
5682static gphi *
5683find_phi_with_arg_on_edge (tree arg, edge e)
5684{
5685 basic_block bb = e->dest;
5686
5687 for (gphi_iterator gpi = gsi_start_phis (bb);
5688 !gsi_end_p (i: gpi);
5689 gsi_next (i: &gpi))
5690 {
5691 gphi *phi = gpi.phi ();
5692 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5693 return phi;
5694 }
5695
5696 return NULL;
5697}
5698
5699/* A subroutine of expand_omp_for. Generate code for a parallel
5700 loop with static schedule and a specified chunk size. Given
5701 parameters:
5702
5703 for (V = N1; V cond N2; V += STEP) BODY;
5704
5705 where COND is "<" or ">", we generate pseudocode
5706
5707 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5708 if (cond is <)
5709 adj = STEP - 1;
5710 else
5711 adj = STEP + 1;
5712 if ((__typeof (V)) -1 > 0 && cond is >)
5713 n = -(adj + N2 - N1) / -STEP;
5714 else
5715 n = (adj + N2 - N1) / STEP;
5716 trip = 0;
5717 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5718 here so that V is defined
5719 if the loop is not entered
5720 L0:
5721 s0 = (trip * nthreads + threadid) * CHUNK;
5722 e0 = min (s0 + CHUNK, n);
5723 if (s0 < n) goto L1; else goto L4;
5724 L1:
5725 V = s0 * STEP + N1;
5726 e = e0 * STEP + N1;
5727 L2:
5728 BODY;
5729 V += STEP;
5730 if (V cond e) goto L2; else goto L3;
5731 L3:
5732 trip += 1;
5733 goto L0;
5734 L4:
5735*/
5736
5737static void
5738expand_omp_for_static_chunk (struct omp_region *region,
5739 struct omp_for_data *fd, gimple *inner_stmt)
5740{
5741 tree n, s0, e0, e, t;
5742 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5743 tree type, itype, vmain, vback, vextra;
5744 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5745 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5746 gimple_stmt_iterator gsi, gsip;
5747 edge se;
5748 bool broken_loop = region->cont == NULL;
5749 tree *counts = NULL;
5750 tree n1, n2, step;
5751 tree reductions = NULL_TREE;
5752 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5753
5754 itype = type = TREE_TYPE (fd->loop.v);
5755 if (POINTER_TYPE_P (type))
5756 itype = signed_type_for (type);
5757
5758 entry_bb = region->entry;
5759 se = split_block (entry_bb, last_nondebug_stmt (entry_bb));
5760 entry_bb = se->src;
5761 iter_part_bb = se->dest;
5762 cont_bb = region->cont;
5763 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5764 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5765 gcc_assert (broken_loop
5766 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5767 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5768 body_bb = single_succ (bb: seq_start_bb);
5769 if (!broken_loop)
5770 {
5771 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5772 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5773 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5774 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5775 }
5776 exit_bb = region->exit;
5777
5778 /* Trip and adjustment setup goes in ENTRY_BB. */
5779 gsi = gsi_last_nondebug_bb (bb: entry_bb);
5780 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5781 gsip = gsi;
5782 gsi_prev (i: &gsip);
5783
5784 if (fd->collapse > 1)
5785 {
5786 int first_zero_iter = -1, dummy = -1;
5787 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5788
5789 counts = XALLOCAVEC (tree, fd->collapse);
5790 expand_omp_for_init_counts (fd, gsi: &gsi, entry_bb, counts,
5791 zero_iter1_bb&: fin_bb, first_zero_iter1&: first_zero_iter,
5792 zero_iter2_bb&: dummy_bb, first_zero_iter2&: dummy, l2_dom_bb);
5793 t = NULL_TREE;
5794 }
5795 else if (gimple_omp_for_combined_into_p (g: fd->for_stmt))
5796 t = integer_one_node;
5797 else
5798 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5799 fold_convert (type, fd->loop.n1),
5800 fold_convert (type, fd->loop.n2));
5801 if (fd->collapse == 1
5802 && TYPE_UNSIGNED (type)
5803 && (t == NULL_TREE || !integer_onep (t)))
5804 {
5805 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5806 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5807 true, GSI_SAME_STMT);
5808 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5809 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5810 true, GSI_SAME_STMT);
5811 gcond *cond_stmt = expand_omp_build_cond (gsi_p: &gsi, code: fd->loop.cond_code,
5812 lhs: n1, rhs: n2);
5813 se = split_block (entry_bb, cond_stmt);
5814 se->flags = EDGE_TRUE_VALUE;
5815 entry_bb = se->dest;
5816 se->probability = profile_probability::very_likely ();
5817 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5818 se->probability = profile_probability::very_unlikely ();
5819 if (gimple_in_ssa_p (cfun))
5820 {
5821 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5822 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5823 !gsi_end_p (i: gpi); gsi_next (i: &gpi))
5824 {
5825 gphi *phi = gpi.phi ();
5826 add_phi_arg (phi, gimple_phi_arg_def (gs: phi, index: dest_idx),
5827 se, UNKNOWN_LOCATION);
5828 }
5829 }
5830 gsi = gsi_last_bb (bb: entry_bb);
5831 }
5832
5833 if (fd->lastprivate_conditional)
5834 {
5835 tree clauses = gimple_omp_for_clauses (gs: fd->for_stmt);
5836 tree c = omp_find_clause (clauses, kind: OMP_CLAUSE__CONDTEMP_);
5837 if (fd->have_pointer_condtemp)
5838 condtemp = OMP_CLAUSE_DECL (c);
5839 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), kind: OMP_CLAUSE__CONDTEMP_);
5840 cond_var = OMP_CLAUSE_DECL (c);
5841 }
5842 if (fd->have_reductemp || fd->have_pointer_condtemp)
5843 {
5844 tree t1 = build_int_cst (long_integer_type_node, 0);
5845 tree t2 = build_int_cst (long_integer_type_node, 1);
5846 tree t3 = build_int_cstu (long_integer_type_node,
5847 (HOST_WIDE_INT_1U << 31) + 1);
5848 tree clauses = gimple_omp_for_clauses (gs: fd->for_stmt);
5849 gimple_stmt_iterator gsi2 = gsi_none ();
5850 gimple *g = NULL;
5851 tree mem = null_pointer_node, memv = NULL_TREE;
5852 if (fd->have_reductemp)
5853 {
5854 tree c = omp_find_clause (clauses, kind: OMP_CLAUSE__REDUCTEMP_);
5855 reductions = OMP_CLAUSE_DECL (c);
5856 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5857 g = SSA_NAME_DEF_STMT (reductions);
5858 reductions = gimple_assign_rhs1 (gs: g);
5859 OMP_CLAUSE_DECL (c) = reductions;
5860 gsi2 = gsi_for_stmt (g);
5861 }
5862 else
5863 {
5864 if (gsi_end_p (i: gsip))
5865 gsi2 = gsi_after_labels (bb: region->entry);
5866 else
5867 gsi2 = gsip;
5868 reductions = null_pointer_node;
5869 }
5870 if (fd->have_pointer_condtemp)
5871 {
5872 tree type = TREE_TYPE (condtemp);
5873 memv = create_tmp_var (type);
5874 TREE_ADDRESSABLE (memv) = 1;
5875 unsigned HOST_WIDE_INT sz
5876 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5877 sz *= fd->lastprivate_conditional;
5878 expand_omp_build_assign (gsi_p: &gsi2, to: memv, from: build_int_cst (type, sz),
5879 after: false);
5880 mem = build_fold_addr_expr (memv);
5881 }
5882 tree t
5883 = build_call_expr (builtin_decl_explicit (fncode: BUILT_IN_GOMP_LOOP_START),
5884 9, t1, t2, t2, t3, t1, null_pointer_node,
5885 null_pointer_node, reductions, mem);
5886 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5887 true, GSI_SAME_STMT);
5888 if (fd->have_pointer_condtemp)
5889 expand_omp_build_assign (gsi_p: &gsi2, to: condtemp, from: memv, after: false);
5890 if (fd->have_reductemp)
5891 {
5892 gsi_remove (&gsi2, true);
5893 release_ssa_name (name: gimple_assign_lhs (gs: g));
5894 }
5895 }
5896 switch (gimple_omp_for_kind (g: fd->for_stmt))
5897 {
5898 case GF_OMP_FOR_KIND_FOR:
5899 nthreads = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_NUM_THREADS);
5900 threadid = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_THREAD_NUM);
5901 break;
5902 case GF_OMP_FOR_KIND_DISTRIBUTE:
5903 nthreads = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_NUM_TEAMS);
5904 threadid = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_TEAM_NUM);
5905 break;
5906 default:
5907 gcc_unreachable ();
5908 }
5909 nthreads = build_call_expr (nthreads, 0);
5910 nthreads = fold_convert (itype, nthreads);
5911 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5912 true, GSI_SAME_STMT);
5913 threadid = build_call_expr (threadid, 0);
5914 threadid = fold_convert (itype, threadid);
5915 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5916 true, GSI_SAME_STMT);
5917
5918 n1 = fd->loop.n1;
5919 n2 = fd->loop.n2;
5920 step = fd->loop.step;
5921 if (gimple_omp_for_combined_into_p (g: fd->for_stmt))
5922 {
5923 tree innerc = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
5924 kind: OMP_CLAUSE__LOOPTEMP_);
5925 gcc_assert (innerc);
5926 n1 = OMP_CLAUSE_DECL (innerc);
5927 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5928 kind: OMP_CLAUSE__LOOPTEMP_);
5929 gcc_assert (innerc);
5930 n2 = OMP_CLAUSE_DECL (innerc);
5931 }
5932 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5933 true, NULL_TREE, true, GSI_SAME_STMT);
5934 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5935 true, NULL_TREE, true, GSI_SAME_STMT);
5936 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5937 true, NULL_TREE, true, GSI_SAME_STMT);
5938 tree chunk_size = fold_convert (itype, fd->chunk_size);
5939 chunk_size = omp_adjust_chunk_size (chunk_size, simd_schedule: fd->simd_schedule);
5940 chunk_size
5941 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5942 GSI_SAME_STMT);
5943
5944 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5945 t = fold_build2 (PLUS_EXPR, itype, step, t);
5946 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5947 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5948 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5949 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5950 fold_build1 (NEGATE_EXPR, itype, t),
5951 fold_build1 (NEGATE_EXPR, itype, step));
5952 else
5953 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5954 t = fold_convert (itype, t);
5955 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5956 true, GSI_SAME_STMT);
5957
5958 trip_var = create_tmp_reg (itype, ".trip");
5959 if (gimple_in_ssa_p (cfun))
5960 {
5961 trip_init = make_ssa_name (var: trip_var);
5962 trip_main = make_ssa_name (var: trip_var);
5963 trip_back = make_ssa_name (var: trip_var);
5964 }
5965 else
5966 {
5967 trip_init = trip_var;
5968 trip_main = trip_var;
5969 trip_back = trip_var;
5970 }
5971
5972 gassign *assign_stmt
5973 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5974 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5975
5976 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5977 t = fold_build2 (MULT_EXPR, itype, t, step);
5978 if (POINTER_TYPE_P (type))
5979 t = fold_build_pointer_plus (n1, t);
5980 else
5981 t = fold_build2 (PLUS_EXPR, type, t, n1);
5982 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5983 true, GSI_SAME_STMT);
5984
5985 /* Remove the GIMPLE_OMP_FOR. */
5986 gsi_remove (&gsi, true);
5987
5988 gimple_stmt_iterator gsif = gsi;
5989
5990 /* Iteration space partitioning goes in ITER_PART_BB. */
5991 gsi = gsi_last_bb (bb: iter_part_bb);
5992
5993 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5994 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5995 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5996 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5997 false, GSI_CONTINUE_LINKING);
5998
5999 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
6000 t = fold_build2 (MIN_EXPR, itype, t, n);
6001 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6002 false, GSI_CONTINUE_LINKING);
6003
6004 t = build2 (LT_EXPR, boolean_type_node, s0, n);
6005 gsi_insert_after (&gsi, gimple_build_cond_empty (cond: t), GSI_CONTINUE_LINKING);
6006
6007 /* Setup code for sequential iteration goes in SEQ_START_BB. */
6008 gsi = gsi_start_bb (bb: seq_start_bb);
6009
6010 tree startvar = fd->loop.v;
6011 tree endvar = NULL_TREE;
6012
6013 if (gimple_omp_for_combined_p (g: fd->for_stmt))
6014 {
6015 tree clauses = gimple_code (g: inner_stmt) == GIMPLE_OMP_PARALLEL
6016 ? gimple_omp_parallel_clauses (gs: inner_stmt)
6017 : gimple_omp_for_clauses (gs: inner_stmt);
6018 tree innerc = omp_find_clause (clauses, kind: OMP_CLAUSE__LOOPTEMP_);
6019 gcc_assert (innerc);
6020 startvar = OMP_CLAUSE_DECL (innerc);
6021 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6022 kind: OMP_CLAUSE__LOOPTEMP_);
6023 gcc_assert (innerc);
6024 endvar = OMP_CLAUSE_DECL (innerc);
6025 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
6026 && gimple_omp_for_kind (g: fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
6027 {
6028 innerc = find_lastprivate_looptemp (fd, innerc);
6029 if (innerc)
6030 {
6031 /* If needed (distribute parallel for with lastprivate),
6032 propagate down the total number of iterations. */
6033 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
6034 fd->loop.n2);
6035 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
6036 GSI_CONTINUE_LINKING);
6037 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
6038 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6039 }
6040 }
6041 }
6042
6043 t = fold_convert (itype, s0);
6044 t = fold_build2 (MULT_EXPR, itype, t, step);
6045 if (POINTER_TYPE_P (type))
6046 {
6047 t = fold_build_pointer_plus (n1, t);
6048 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
6049 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
6050 t = fold_convert (signed_type_for (type), t);
6051 }
6052 else
6053 t = fold_build2 (PLUS_EXPR, type, t, n1);
6054 t = fold_convert (TREE_TYPE (startvar), t);
6055 t = force_gimple_operand_gsi (&gsi, t,
6056 DECL_P (startvar)
6057 && TREE_ADDRESSABLE (startvar),
6058 NULL_TREE, false, GSI_CONTINUE_LINKING);
6059 assign_stmt = gimple_build_assign (startvar, t);
6060 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6061 if (cond_var)
6062 {
6063 tree itype = TREE_TYPE (cond_var);
6064 /* For lastprivate(conditional:) itervar, we need some iteration
6065 counter that starts at unsigned non-zero and increases.
6066 Prefer as few IVs as possible, so if we can use startvar
6067 itself, use that, or startvar + constant (those would be
6068 incremented with step), and as last resort use the s0 + 1
6069 incremented by 1. */
6070 if (POINTER_TYPE_P (type)
6071 || TREE_CODE (n1) != INTEGER_CST
6072 || fd->loop.cond_code != LT_EXPR)
6073 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
6074 build_int_cst (itype, 1));
6075 else if (tree_int_cst_sgn (n1) == 1)
6076 t = fold_convert (itype, t);
6077 else
6078 {
6079 tree c = fold_convert (itype, n1);
6080 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
6081 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
6082 }
6083 t = force_gimple_operand_gsi (&gsi, t, false,
6084 NULL_TREE, false, GSI_CONTINUE_LINKING);
6085 assign_stmt = gimple_build_assign (cond_var, t);
6086 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6087 }
6088
6089 t = fold_convert (itype, e0);
6090 t = fold_build2 (MULT_EXPR, itype, t, step);
6091 if (POINTER_TYPE_P (type))
6092 {
6093 t = fold_build_pointer_plus (n1, t);
6094 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
6095 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
6096 t = fold_convert (signed_type_for (type), t);
6097 }
6098 else
6099 t = fold_build2 (PLUS_EXPR, type, t, n1);
6100 t = fold_convert (TREE_TYPE (startvar), t);
6101 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6102 false, GSI_CONTINUE_LINKING);
6103 if (endvar)
6104 {
6105 assign_stmt = gimple_build_assign (endvar, e);
6106 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6107 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
6108 assign_stmt = gimple_build_assign (fd->loop.v, e);
6109 else
6110 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
6111 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6112 }
6113 /* Handle linear clause adjustments. */
6114 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
6115 if (gimple_omp_for_kind (g: fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
6116 for (tree c = gimple_omp_for_clauses (gs: fd->for_stmt);
6117 c; c = OMP_CLAUSE_CHAIN (c))
6118 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
6119 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
6120 {
6121 tree d = OMP_CLAUSE_DECL (c);
6122 tree t = d, a, dest;
6123 if (omp_privatize_by_reference (decl: t))
6124 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
6125 tree type = TREE_TYPE (t);
6126 if (POINTER_TYPE_P (type))
6127 type = sizetype;
6128 dest = unshare_expr (t);
6129 tree v = create_tmp_var (TREE_TYPE (t), NULL);
6130 expand_omp_build_assign (gsi_p: &gsif, to: v, from: t);
6131 if (itercnt == NULL_TREE)
6132 {
6133 if (gimple_omp_for_combined_into_p (g: fd->for_stmt))
6134 {
6135 itercntbias
6136 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
6137 fold_convert (itype, fd->loop.n1));
6138 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
6139 itercntbias, step);
6140 itercntbias
6141 = force_gimple_operand_gsi (&gsif, itercntbias, true,
6142 NULL_TREE, true,
6143 GSI_SAME_STMT);
6144 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
6145 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
6146 NULL_TREE, false,
6147 GSI_CONTINUE_LINKING);
6148 }
6149 else
6150 itercnt = s0;
6151 }
6152 a = fold_build2 (MULT_EXPR, type,
6153 fold_convert (type, itercnt),
6154 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
6155 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
6156 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
6157 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6158 false, GSI_CONTINUE_LINKING);
6159 expand_omp_build_assign (gsi_p: &gsi, to: dest, from: t, after: true);
6160 }
6161 if (fd->collapse > 1)
6162 expand_omp_for_init_vars (fd, gsi: &gsi, counts, NULL, inner_stmt, startvar);
6163
6164 if (!broken_loop)
6165 {
6166 /* The code controlling the sequential loop goes in CONT_BB,
6167 replacing the GIMPLE_OMP_CONTINUE. */
6168 gsi = gsi_last_nondebug_bb (bb: cont_bb);
6169 gomp_continue *cont_stmt = as_a <gomp_continue *> (p: gsi_stmt (i: gsi));
6170 vmain = gimple_omp_continue_control_use (cont_stmt);
6171 vback = gimple_omp_continue_control_def (cont_stmt);
6172
6173 if (cond_var)
6174 {
6175 tree itype = TREE_TYPE (cond_var);
6176 tree t2;
6177 if (POINTER_TYPE_P (type)
6178 || TREE_CODE (n1) != INTEGER_CST
6179 || fd->loop.cond_code != LT_EXPR)
6180 t2 = build_int_cst (itype, 1);
6181 else
6182 t2 = fold_convert (itype, step);
6183 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6184 t2 = force_gimple_operand_gsi (&gsi, t2, false,
6185 NULL_TREE, true, GSI_SAME_STMT);
6186 assign_stmt = gimple_build_assign (cond_var, t2);
6187 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6188 }
6189
6190 if (!gimple_omp_for_combined_p (g: fd->for_stmt))
6191 {
6192 if (POINTER_TYPE_P (type))
6193 t = fold_build_pointer_plus (vmain, step);
6194 else
6195 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6196 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6197 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6198 true, GSI_SAME_STMT);
6199 assign_stmt = gimple_build_assign (vback, t);
6200 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6201
6202 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6203 t = build2 (EQ_EXPR, boolean_type_node,
6204 build_int_cst (itype, 0),
6205 build_int_cst (itype, 1));
6206 else
6207 t = build2 (fd->loop.cond_code, boolean_type_node,
6208 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6209 ? t : vback, e);
6210 gsi_insert_before (&gsi, gimple_build_cond_empty (cond: t), GSI_SAME_STMT);
6211 }
6212
6213 /* Remove GIMPLE_OMP_CONTINUE. */
6214 gsi_remove (&gsi, true);
6215
6216 if (fd->collapse > 1 && !gimple_omp_for_combined_p (g: fd->for_stmt))
6217 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6218
6219 /* Trip update code goes into TRIP_UPDATE_BB. */
6220 gsi = gsi_start_bb (bb: trip_update_bb);
6221
6222 t = build_int_cst (itype, 1);
6223 t = build2 (PLUS_EXPR, itype, trip_main, t);
6224 assign_stmt = gimple_build_assign (trip_back, t);
6225 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6226 }
6227
6228 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6229 gsi = gsi_last_nondebug_bb (bb: exit_bb);
6230 if (!gimple_omp_return_nowait_p (g: gsi_stmt (i: gsi)))
6231 {
6232 t = gimple_omp_return_lhs (g: gsi_stmt (i: gsi));
6233 if (fd->have_reductemp || fd->have_pointer_condtemp)
6234 {
6235 tree fn;
6236 if (t)
6237 fn = builtin_decl_explicit (fncode: BUILT_IN_GOMP_LOOP_END_CANCEL);
6238 else
6239 fn = builtin_decl_explicit (fncode: BUILT_IN_GOMP_LOOP_END);
6240 gcall *g = gimple_build_call (fn, 0);
6241 if (t)
6242 {
6243 gimple_call_set_lhs (gs: g, lhs: t);
6244 if (fd->have_reductemp)
6245 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6246 NOP_EXPR, t),
6247 GSI_SAME_STMT);
6248 }
6249 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6250 }
6251 else
6252 gsi_insert_after (&gsi, omp_build_barrier (lhs: t), GSI_SAME_STMT);
6253 }
6254 else if (fd->have_pointer_condtemp)
6255 {
6256 tree fn = builtin_decl_explicit (fncode: BUILT_IN_GOMP_LOOP_END_NOWAIT);
6257 gcall *g = gimple_build_call (fn, 0);
6258 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6259 }
6260 gsi_remove (&gsi, true);
6261
6262 /* Connect the new blocks. */
6263 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6264 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6265
6266 if (!broken_loop)
6267 {
6268 se = find_edge (cont_bb, body_bb);
6269 if (se == NULL)
6270 {
6271 se = BRANCH_EDGE (cont_bb);
6272 gcc_assert (single_succ (se->dest) == body_bb);
6273 }
6274 if (gimple_omp_for_combined_p (g: fd->for_stmt))
6275 {
6276 remove_edge (se);
6277 se = NULL;
6278 }
6279 else if (fd->collapse > 1)
6280 {
6281 remove_edge (se);
6282 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6283 }
6284 else
6285 se->flags = EDGE_TRUE_VALUE;
6286 find_edge (cont_bb, trip_update_bb)->flags
6287 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6288
6289 redirect_edge_and_branch (single_succ_edge (bb: trip_update_bb),
6290 iter_part_bb);
6291 }
6292
6293 if (gimple_in_ssa_p (cfun))
6294 {
6295 gphi_iterator psi;
6296 gphi *phi;
6297 edge re, ene;
6298 edge_var_map *vm;
6299 size_t i;
6300
6301 gcc_assert (fd->collapse == 1 && !broken_loop);
6302
6303 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6304 remove arguments of the phi nodes in fin_bb. We need to create
6305 appropriate phi nodes in iter_part_bb instead. */
6306 se = find_edge (iter_part_bb, fin_bb);
6307 re = single_succ_edge (bb: trip_update_bb);
6308 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6309 ene = single_succ_edge (bb: entry_bb);
6310
6311 psi = gsi_start_phis (fin_bb);
6312 for (i = 0; !gsi_end_p (i: psi) && head->iterate (ix: i, ptr: &vm);
6313 gsi_next (i: &psi), ++i)
6314 {
6315 gphi *nphi;
6316 location_t locus;
6317
6318 phi = psi.phi ();
6319 if (operand_equal_p (gimple_phi_arg_def (gs: phi, index: 0),
6320 redirect_edge_var_map_def (v: vm), flags: 0))
6321 continue;
6322
6323 t = gimple_phi_result (gs: phi);
6324 gcc_assert (t == redirect_edge_var_map_result (vm));
6325
6326 if (!single_pred_p (bb: fin_bb))
6327 t = copy_ssa_name (var: t, stmt: phi);
6328
6329 nphi = create_phi_node (t, iter_part_bb);
6330
6331 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6332 locus = gimple_phi_arg_location_from_edge (phi, e: se);
6333
6334 /* A special case -- fd->loop.v is not yet computed in
6335 iter_part_bb, we need to use vextra instead. */
6336 if (t == fd->loop.v)
6337 t = vextra;
6338 add_phi_arg (nphi, t, ene, locus);
6339 locus = redirect_edge_var_map_location (v: vm);
6340 tree back_arg = redirect_edge_var_map_def (v: vm);
6341 add_phi_arg (nphi, back_arg, re, locus);
6342 edge ce = find_edge (cont_bb, body_bb);
6343 if (ce == NULL)
6344 {
6345 ce = BRANCH_EDGE (cont_bb);
6346 gcc_assert (single_succ (ce->dest) == body_bb);
6347 ce = single_succ_edge (bb: ce->dest);
6348 }
6349 gphi *inner_loop_phi = find_phi_with_arg_on_edge (arg: back_arg, e: ce);
6350 gcc_assert (inner_loop_phi != NULL);
6351 add_phi_arg (inner_loop_phi, gimple_phi_result (gs: nphi),
6352 find_edge (seq_start_bb, body_bb), locus);
6353
6354 if (!single_pred_p (bb: fin_bb))
6355 add_phi_arg (phi, gimple_phi_result (gs: nphi), se, locus);
6356 }
6357 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6358 redirect_edge_var_map_clear (re);
6359 if (single_pred_p (bb: fin_bb))
6360 while (1)
6361 {
6362 psi = gsi_start_phis (fin_bb);
6363 if (gsi_end_p (i: psi))
6364 break;
6365 remove_phi_node (&psi, false);
6366 }
6367
6368 /* Make phi node for trip. */
6369 phi = create_phi_node (trip_main, iter_part_bb);
6370 add_phi_arg (phi, trip_back, single_succ_edge (bb: trip_update_bb),
6371 UNKNOWN_LOCATION);
6372 add_phi_arg (phi, trip_init, single_succ_edge (bb: entry_bb),
6373 UNKNOWN_LOCATION);
6374 }
6375
6376 if (!broken_loop)
6377 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6378 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6379 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6380 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6381 recompute_dominator (CDI_DOMINATORS, fin_bb));
6382 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6383 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6384 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6385 recompute_dominator (CDI_DOMINATORS, body_bb));
6386
6387 if (!broken_loop)
6388 {
6389 class loop *loop = body_bb->loop_father;
6390 class loop *trip_loop = alloc_loop ();
6391 trip_loop->header = iter_part_bb;
6392 trip_loop->latch = trip_update_bb;
6393 add_loop (trip_loop, iter_part_bb->loop_father);
6394
6395 if (loop != entry_bb->loop_father)
6396 {
6397 gcc_assert (loop->header == body_bb);
6398 gcc_assert (loop->latch == region->cont
6399 || single_pred (loop->latch) == region->cont);
6400 trip_loop->inner = loop;
6401 return;
6402 }
6403
6404 if (!gimple_omp_for_combined_p (g: fd->for_stmt))
6405 {
6406 loop = alloc_loop ();
6407 loop->header = body_bb;
6408 if (collapse_bb == NULL)
6409 loop->latch = cont_bb;
6410 add_loop (loop, trip_loop);
6411 }
6412 }
6413}
6414
6415/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6416 loop. Given parameters:
6417
6418 for (V = N1; V cond N2; V += STEP) BODY;
6419
6420 where COND is "<" or ">", we generate pseudocode
6421
6422 V = N1;
6423 goto L1;
6424 L0:
6425 BODY;
6426 V += STEP;
6427 L1:
6428 if (V cond N2) goto L0; else goto L2;
6429 L2:
6430
6431 For collapsed loops, emit the outer loops as scalar
6432 and only try to vectorize the innermost loop. */
6433
6434static void
6435expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6436{
6437 tree type, t;
6438 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6439 gimple_stmt_iterator gsi;
6440 gimple *stmt;
6441 gcond *cond_stmt;
6442 bool broken_loop = region->cont == NULL;
6443 edge e, ne;
6444 tree *counts = NULL;
6445 int i;
6446 int safelen_int = INT_MAX;
6447 bool dont_vectorize = false;
6448 tree safelen = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
6449 kind: OMP_CLAUSE_SAFELEN);
6450 tree simduid = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
6451 kind: OMP_CLAUSE__SIMDUID_);
6452 tree ifc = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
6453 kind: OMP_CLAUSE_IF);
6454 tree simdlen = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
6455 kind: OMP_CLAUSE_SIMDLEN);
6456 tree condtemp = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
6457 kind: OMP_CLAUSE__CONDTEMP_);
6458 tree n1, n2;
6459 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6460
6461 if (safelen)
6462 {
6463 poly_uint64 val;
6464 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6465 if (!poly_int_tree_p (t: safelen, value: &val))
6466 safelen_int = 0;
6467 else
6468 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6469 if (safelen_int == 1)
6470 safelen_int = 0;
6471 }
6472 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6473 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6474 {
6475 safelen_int = 0;
6476 dont_vectorize = true;
6477 }
6478 type = TREE_TYPE (fd->loop.v);
6479 entry_bb = region->entry;
6480 cont_bb = region->cont;
6481 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6482 gcc_assert (broken_loop
6483 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6484 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6485 if (!broken_loop)
6486 {
6487 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6488 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6489 l1_bb = split_block (cont_bb, last_nondebug_stmt (cont_bb))->dest;
6490 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6491 }
6492 else
6493 {
6494 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6495 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6496 l2_bb = single_succ (bb: l1_bb);
6497 }
6498 exit_bb = region->exit;
6499 l2_dom_bb = NULL;
6500
6501 gsi = gsi_last_nondebug_bb (bb: entry_bb);
6502
6503 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6504 /* Not needed in SSA form right now. */
6505 gcc_assert (!gimple_in_ssa_p (cfun));
6506 if (fd->collapse > 1
6507 && (gimple_omp_for_combined_into_p (g: fd->for_stmt)
6508 || broken_loop))
6509 {
6510 int first_zero_iter = -1, dummy = -1;
6511 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6512
6513 counts = XALLOCAVEC (tree, fd->collapse);
6514 expand_omp_for_init_counts (fd, gsi: &gsi, entry_bb, counts,
6515 zero_iter1_bb&: zero_iter_bb, first_zero_iter1&: first_zero_iter,
6516 zero_iter2_bb&: dummy_bb, first_zero_iter2&: dummy, l2_dom_bb);
6517 }
6518 if (l2_dom_bb == NULL)
6519 l2_dom_bb = l1_bb;
6520
6521 n1 = fd->loop.n1;
6522 n2 = fd->loop.n2;
6523 if (gimple_omp_for_combined_into_p (g: fd->for_stmt))
6524 {
6525 tree innerc = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
6526 kind: OMP_CLAUSE__LOOPTEMP_);
6527 gcc_assert (innerc);
6528 n1 = OMP_CLAUSE_DECL (innerc);
6529 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6530 kind: OMP_CLAUSE__LOOPTEMP_);
6531 gcc_assert (innerc);
6532 n2 = OMP_CLAUSE_DECL (innerc);
6533 }
6534 tree step = fd->loop.step;
6535 tree orig_step = step; /* May be different from step if is_simt. */
6536
6537 bool is_simt = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
6538 kind: OMP_CLAUSE__SIMT_);
6539 if (is_simt)
6540 {
6541 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6542 is_simt = safelen_int > 1;
6543 }
6544 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6545 if (is_simt)
6546 {
6547 simt_lane = create_tmp_var (unsigned_type_node);
6548 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6549 gimple_call_set_lhs (gs: g, lhs: simt_lane);
6550 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6551 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6552 fold_convert (TREE_TYPE (step), simt_lane));
6553 n1 = fold_convert (type, n1);
6554 if (POINTER_TYPE_P (type))
6555 n1 = fold_build_pointer_plus (n1, offset);
6556 else
6557 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6558
6559 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6560 if (fd->collapse > 1)
6561 simt_maxlane = build_one_cst (unsigned_type_node);
6562 else if (safelen_int < omp_max_simt_vf ())
6563 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6564 tree vf
6565 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6566 unsigned_type_node, 0);
6567 if (simt_maxlane)
6568 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6569 vf = fold_convert (TREE_TYPE (step), vf);
6570 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6571 }
6572
6573 tree n2var = NULL_TREE;
6574 tree n2v = NULL_TREE;
6575 tree *nonrect_bounds = NULL;
6576 tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6577 if (fd->collapse > 1)
6578 {
6579 if (broken_loop || gimple_omp_for_combined_into_p (g: fd->for_stmt))
6580 {
6581 if (fd->non_rect)
6582 {
6583 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6584 memset (s: nonrect_bounds, c: 0,
6585 n: sizeof (tree) * (fd->last_nonrect + 1));
6586 }
6587 expand_omp_build_assign (gsi_p: &gsi, to: fd->loop.v, fold_convert (type, n1));
6588 gcc_assert (entry_bb == gsi_bb (gsi));
6589 gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6590 gsi_prev (i: &gsi);
6591 entry_bb = split_block (entry_bb, gsi_stmt (i: gsi))->dest;
6592 expand_omp_for_init_vars (fd, gsi: &gsi, counts, nonrect_bounds,
6593 NULL, startvar: n1);
6594 gsi = gsi_for_stmt (fd->for_stmt);
6595 }
6596 if (broken_loop)
6597 ;
6598 else if (gimple_omp_for_combined_into_p (g: fd->for_stmt))
6599 {
6600 /* Compute in n2var the limit for the first innermost loop,
6601 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6602 where cnt is how many iterations would the loop have if
6603 all further iterations were assigned to the current task. */
6604 n2var = create_tmp_var (type);
6605 i = fd->collapse - 1;
6606 tree itype = TREE_TYPE (fd->loops[i].v);
6607 if (POINTER_TYPE_P (itype))
6608 itype = signed_type_for (itype);
6609 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6610 ? -1 : 1));
6611 t = fold_build2 (PLUS_EXPR, itype,
6612 fold_convert (itype, fd->loops[i].step), t);
6613 t = fold_build2 (PLUS_EXPR, itype, t,
6614 fold_convert (itype, fd->loops[i].n2));
6615 if (fd->loops[i].m2)
6616 {
6617 tree t2 = fold_convert (itype,
6618 fd->loops[i - fd->loops[i].outer].v);
6619 tree t3 = fold_convert (itype, fd->loops[i].m2);
6620 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6621 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6622 }
6623 t = fold_build2 (MINUS_EXPR, itype, t,
6624 fold_convert (itype, fd->loops[i].v));
6625 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6626 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6627 fold_build1 (NEGATE_EXPR, itype, t),
6628 fold_build1 (NEGATE_EXPR, itype,
6629 fold_convert (itype,
6630 fd->loops[i].step)));
6631 else
6632 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6633 fold_convert (itype, fd->loops[i].step));
6634 t = fold_convert (type, t);
6635 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6636 min_arg1 = create_tmp_var (type);
6637 expand_omp_build_assign (gsi_p: &gsi, to: min_arg1, from: t2);
6638 min_arg2 = create_tmp_var (type);
6639 expand_omp_build_assign (gsi_p: &gsi, to: min_arg2, from: t);
6640 }
6641 else
6642 {
6643 if (TREE_CODE (n2) == INTEGER_CST)
6644 {
6645 /* Indicate for lastprivate handling that at least one iteration
6646 has been performed, without wasting runtime. */
6647 if (integer_nonzerop (n2))
6648 expand_omp_build_assign (gsi_p: &gsi, to: fd->loop.v,
6649 fold_convert (type, n2));
6650 else
6651 /* Indicate that no iteration has been performed. */
6652 expand_omp_build_assign (gsi_p: &gsi, to: fd->loop.v,
6653 from: build_one_cst (type));
6654 }
6655 else
6656 {
6657 expand_omp_build_assign (gsi_p: &gsi, to: fd->loop.v,
6658 from: build_zero_cst (type));
6659 expand_omp_build_assign (gsi_p: &gsi, to: n2, from: build_one_cst (type));
6660 }
6661 for (i = 0; i < fd->collapse; i++)
6662 {
6663 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6664 if (fd->loops[i].m1)
6665 {
6666 tree t2
6667 = fold_convert (TREE_TYPE (t),
6668 fd->loops[i - fd->loops[i].outer].v);
6669 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6670 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6671 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6672 }
6673 expand_omp_build_assign (gsi_p: &gsi, to: fd->loops[i].v, from: t);
6674 /* For normal non-combined collapsed loops just initialize
6675 the outermost iterator in the entry_bb. */
6676 if (!broken_loop)
6677 break;
6678 }
6679 }
6680 }
6681 else
6682 expand_omp_build_assign (gsi_p: &gsi, to: fd->loop.v, fold_convert (type, n1));
6683 tree altv = NULL_TREE, altn2 = NULL_TREE;
6684 if (fd->collapse == 1
6685 && !broken_loop
6686 && TREE_CODE (orig_step) != INTEGER_CST)
6687 {
6688 /* The vectorizer currently punts on loops with non-constant steps
6689 for the main IV (can't compute number of iterations and gives up
6690 because of that). As for OpenMP loops it is always possible to
6691 compute the number of iterations upfront, use an alternate IV
6692 as the loop iterator:
6693 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6694 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6695 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6696 expand_omp_build_assign (gsi_p: &gsi, to: altv, from: build_zero_cst (TREE_TYPE (altv)));
6697 tree itype = TREE_TYPE (fd->loop.v);
6698 if (POINTER_TYPE_P (itype))
6699 itype = signed_type_for (itype);
6700 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6701 t = fold_build2 (PLUS_EXPR, itype,
6702 fold_convert (itype, step), t);
6703 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6704 t = fold_build2 (MINUS_EXPR, itype, t,
6705 fold_convert (itype, fd->loop.v));
6706 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6707 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6708 fold_build1 (NEGATE_EXPR, itype, t),
6709 fold_build1 (NEGATE_EXPR, itype,
6710 fold_convert (itype, step)));
6711 else
6712 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6713 fold_convert (itype, step));
6714 t = fold_convert (TREE_TYPE (altv), t);
6715 altn2 = create_tmp_var (TREE_TYPE (altv));
6716 expand_omp_build_assign (gsi_p: &gsi, to: altn2, from: t);
6717 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6718 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6719 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6720 true, GSI_SAME_STMT);
6721 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6722 build_zero_cst (TREE_TYPE (altv)));
6723 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6724 }
6725 else if (fd->collapse > 1
6726 && !broken_loop
6727 && !gimple_omp_for_combined_into_p (g: fd->for_stmt)
6728 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6729 {
6730 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6731 altn2 = create_tmp_var (TREE_TYPE (altv));
6732 }
6733 if (cond_var)
6734 {
6735 if (POINTER_TYPE_P (type)
6736 || TREE_CODE (n1) != INTEGER_CST
6737 || fd->loop.cond_code != LT_EXPR
6738 || tree_int_cst_sgn (n1) != 1)
6739 expand_omp_build_assign (gsi_p: &gsi, to: cond_var,
6740 from: build_one_cst (TREE_TYPE (cond_var)));
6741 else
6742 expand_omp_build_assign (gsi_p: &gsi, to: cond_var,
6743 fold_convert (TREE_TYPE (cond_var), n1));
6744 }
6745
6746 /* Remove the GIMPLE_OMP_FOR statement. */
6747 gsi_remove (&gsi, true);
6748
6749 if (!broken_loop)
6750 {
6751 /* Code to control the increment goes in the CONT_BB. */
6752 gsi = gsi_last_nondebug_bb (bb: cont_bb);
6753 stmt = gsi_stmt (i: gsi);
6754 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6755
6756 if (fd->collapse == 1
6757 || gimple_omp_for_combined_into_p (g: fd->for_stmt))
6758 {
6759 if (POINTER_TYPE_P (type))
6760 t = fold_build_pointer_plus (fd->loop.v, step);
6761 else
6762 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6763 expand_omp_build_assign (gsi_p: &gsi, to: fd->loop.v, from: t);
6764 }
6765 else if (TREE_CODE (n2) != INTEGER_CST)
6766 expand_omp_build_assign (gsi_p: &gsi, to: fd->loop.v, from: build_one_cst (type));
6767 if (altv)
6768 {
6769 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6770 build_one_cst (TREE_TYPE (altv)));
6771 expand_omp_build_assign (gsi_p: &gsi, to: altv, from: t);
6772 }
6773
6774 if (fd->collapse > 1)
6775 {
6776 i = fd->collapse - 1;
6777 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6778 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
6779 else
6780 {
6781 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6782 fd->loops[i].step);
6783 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6784 fd->loops[i].v, t);
6785 }
6786 expand_omp_build_assign (gsi_p: &gsi, to: fd->loops[i].v, from: t);
6787 }
6788 if (cond_var)
6789 {
6790 if (POINTER_TYPE_P (type)
6791 || TREE_CODE (n1) != INTEGER_CST
6792 || fd->loop.cond_code != LT_EXPR
6793 || tree_int_cst_sgn (n1) != 1)
6794 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6795 build_one_cst (TREE_TYPE (cond_var)));
6796 else
6797 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6798 fold_convert (TREE_TYPE (cond_var), step));
6799 expand_omp_build_assign (gsi_p: &gsi, to: cond_var, from: t);
6800 }
6801
6802 /* Remove GIMPLE_OMP_CONTINUE. */
6803 gsi_remove (&gsi, true);
6804 }
6805
6806 /* Emit the condition in L1_BB. */
6807 gsi = gsi_start_bb (bb: l1_bb);
6808
6809 if (altv)
6810 t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6811 else if (fd->collapse > 1
6812 && !gimple_omp_for_combined_into_p (g: fd->for_stmt)
6813 && !broken_loop)
6814 {
6815 i = fd->collapse - 1;
6816 tree itype = TREE_TYPE (fd->loops[i].v);
6817 if (fd->loops[i].m2)
6818 t = n2v = create_tmp_var (itype);
6819 else
6820 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
6821 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6822 false, GSI_CONTINUE_LINKING);
6823 tree v = fd->loops[i].v;
6824 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6825 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6826 false, GSI_CONTINUE_LINKING);
6827 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6828 }
6829 else
6830 {
6831 if (fd->collapse > 1 && !broken_loop)
6832 t = n2var;
6833 else
6834 t = fold_convert (type, unshare_expr (n2));
6835 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6836 false, GSI_CONTINUE_LINKING);
6837 tree v = fd->loop.v;
6838 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6839 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6840 false, GSI_CONTINUE_LINKING);
6841 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6842 }
6843 cond_stmt = gimple_build_cond_empty (cond: t);
6844 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6845 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6846 NULL, NULL)
6847 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6848 NULL, NULL))
6849 {
6850 gsi = gsi_for_stmt (cond_stmt);
6851 gimple_regimplify_operands (cond_stmt, &gsi);
6852 }
6853
6854 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6855 if (is_simt)
6856 {
6857 gsi = gsi_start_bb (bb: l2_bb);
6858 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), orig_step, step);
6859 if (POINTER_TYPE_P (type))
6860 t = fold_build_pointer_plus (fd->loop.v, step);
6861 else
6862 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6863 expand_omp_build_assign (gsi_p: &gsi, to: fd->loop.v, from: t);
6864 }
6865
6866 /* Remove GIMPLE_OMP_RETURN. */
6867 gsi = gsi_last_nondebug_bb (bb: exit_bb);
6868 gsi_remove (&gsi, true);
6869
6870 /* Connect the new blocks. */
6871 remove_edge (FALLTHRU_EDGE (entry_bb));
6872
6873 if (!broken_loop)
6874 {
6875 remove_edge (BRANCH_EDGE (entry_bb));
6876 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6877
6878 e = BRANCH_EDGE (l1_bb);
6879 ne = FALLTHRU_EDGE (l1_bb);
6880 e->flags = EDGE_TRUE_VALUE;
6881 }
6882 else
6883 {
6884 single_succ_edge (bb: entry_bb)->flags = EDGE_FALLTHRU;
6885
6886 ne = single_succ_edge (bb: l1_bb);
6887 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6888
6889 }
6890 ne->flags = EDGE_FALSE_VALUE;
6891 e->probability = profile_probability::guessed_always ().apply_scale (num: 7, den: 8);
6892 ne->probability = e->probability.invert ();
6893
6894 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6895 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6896
6897 if (simt_maxlane)
6898 {
6899 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6900 NULL_TREE, NULL_TREE);
6901 gsi = gsi_last_bb (bb: entry_bb);
6902 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6903 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6904 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6905 FALLTHRU_EDGE (entry_bb)->probability
6906 = profile_probability::guessed_always ().apply_scale (num: 7, den: 8);
6907 BRANCH_EDGE (entry_bb)->probability
6908 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6909 l2_dom_bb = entry_bb;
6910 }
6911 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6912
6913 if (!broken_loop && fd->collapse > 1)
6914 {
6915 basic_block last_bb = l1_bb;
6916 basic_block init_bb = NULL;
6917 for (i = fd->collapse - 2; i >= 0; i--)
6918 {
6919 tree nextn2v = NULL_TREE;
6920 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6921 e = EDGE_SUCC (last_bb, 0);
6922 else
6923 e = EDGE_SUCC (last_bb, 1);
6924 basic_block bb = split_edge (e);
6925 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6926 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
6927 else
6928 {
6929 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6930 fd->loops[i].step);
6931 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6932 fd->loops[i].v, t);
6933 }
6934 gsi = gsi_after_labels (bb);
6935 expand_omp_build_assign (gsi_p: &gsi, to: fd->loops[i].v, from: t);
6936
6937 bb = split_block (bb, last_nondebug_stmt (bb))->dest;
6938 gsi = gsi_start_bb (bb);
6939 tree itype = TREE_TYPE (fd->loops[i].v);
6940 if (fd->loops[i].m2)
6941 t = nextn2v = create_tmp_var (itype);
6942 else
6943 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
6944 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6945 false, GSI_CONTINUE_LINKING);
6946 tree v = fd->loops[i].v;
6947 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6948 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6949 false, GSI_CONTINUE_LINKING);
6950 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6951 cond_stmt = gimple_build_cond_empty (cond: t);
6952 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6953 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6954 expand_omp_regimplify_p, NULL, NULL)
6955 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6956 expand_omp_regimplify_p, NULL, NULL))
6957 {
6958 gsi = gsi_for_stmt (cond_stmt);
6959 gimple_regimplify_operands (cond_stmt, &gsi);
6960 }
6961 ne = single_succ_edge (bb);
6962 ne->flags = EDGE_FALSE_VALUE;
6963
6964 init_bb = create_empty_bb (bb);
6965 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6966 add_bb_to_loop (init_bb, bb->loop_father);
6967 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6968 e->probability
6969 = profile_probability::guessed_always ().apply_scale (num: 7, den: 8);
6970 ne->probability = e->probability.invert ();
6971
6972 gsi = gsi_after_labels (bb: init_bb);
6973 if (fd->loops[i + 1].m1)
6974 {
6975 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6976 fd->loops[i + 1
6977 - fd->loops[i + 1].outer].v);
6978 if (POINTER_TYPE_P (TREE_TYPE (t2)))
6979 t = fold_build_pointer_plus (t2, fd->loops[i + 1].n1);
6980 else
6981 {
6982 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6983 fd->loops[i + 1].n1);
6984 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6985 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6986 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6987 }
6988 }
6989 else
6990 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6991 fd->loops[i + 1].n1);
6992 expand_omp_build_assign (gsi_p: &gsi, to: fd->loops[i + 1].v, from: t);
6993 if (fd->loops[i + 1].m2)
6994 {
6995 if (i + 2 == fd->collapse && (n2var || altv))
6996 {
6997 gcc_assert (n2v == NULL_TREE);
6998 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6999 }
7000 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
7001 fd->loops[i + 1
7002 - fd->loops[i + 1].outer].v);
7003 if (POINTER_TYPE_P (TREE_TYPE (t2)))
7004 t = fold_build_pointer_plus (t2, fd->loops[i + 1].n2);
7005 else
7006 {
7007 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
7008 fd->loops[i + 1].n2);
7009 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
7010 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
7011 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
7012 }
7013 expand_omp_build_assign (gsi_p: &gsi, to: n2v, from: t);
7014 }
7015 if (i + 2 == fd->collapse && n2var)
7016 {
7017 /* For composite simd, n2 is the first iteration the current
7018 task shouldn't already handle, so we effectively want to use
7019 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
7020 as the vectorized loop. Except the vectorizer will not
7021 vectorize that, so instead compute N2VAR as
7022 N2VAR = V + MIN (N2 - V, COUNTS3) and use
7023 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
7024 as the loop to vectorize. */
7025 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
7026 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
7027 {
7028 tree itype = TREE_TYPE (fd->loops[i].v);
7029 if (POINTER_TYPE_P (itype))
7030 itype = signed_type_for (itype);
7031 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
7032 == LT_EXPR ? -1 : 1));
7033 t = fold_build2 (PLUS_EXPR, itype,
7034 fold_convert (itype,
7035 fd->loops[i + 1].step), t);
7036 if (fd->loops[i + 1].m2 == NULL_TREE)
7037 t = fold_build2 (PLUS_EXPR, itype, t,
7038 fold_convert (itype,
7039 fd->loops[i + 1].n2));
7040 else if (POINTER_TYPE_P (TREE_TYPE (n2v)))
7041 {
7042 t = fold_build_pointer_plus (n2v, t);
7043 t = fold_convert (itype, t);
7044 }
7045 else
7046 t = fold_build2 (PLUS_EXPR, itype, t, n2v);
7047 t = fold_build2 (MINUS_EXPR, itype, t,
7048 fold_convert (itype, fd->loops[i + 1].v));
7049 tree step = fold_convert (itype, fd->loops[i + 1].step);
7050 if (TYPE_UNSIGNED (itype)
7051 && fd->loops[i + 1].cond_code == GT_EXPR)
7052 t = fold_build2 (TRUNC_DIV_EXPR, itype,
7053 fold_build1 (NEGATE_EXPR, itype, t),
7054 fold_build1 (NEGATE_EXPR, itype, step));
7055 else
7056 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
7057 t = fold_convert (type, t);
7058 }
7059 else
7060 t = counts[i + 1];
7061 expand_omp_build_assign (gsi_p: &gsi, to: min_arg1, from: t2);
7062 expand_omp_build_assign (gsi_p: &gsi, to: min_arg2, from: t);
7063 e = split_block (init_bb, last_nondebug_stmt (init_bb));
7064 gsi = gsi_after_labels (bb: e->dest);
7065 init_bb = e->dest;
7066 remove_edge (FALLTHRU_EDGE (entry_bb));
7067 make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
7068 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
7069 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
7070 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
7071 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
7072 expand_omp_build_assign (gsi_p: &gsi, to: n2var, from: t);
7073 }
7074 if (i + 2 == fd->collapse && altv)
7075 {
7076 /* The vectorizer currently punts on loops with non-constant
7077 steps for the main IV (can't compute number of iterations
7078 and gives up because of that). As for OpenMP loops it is
7079 always possible to compute the number of iterations upfront,
7080 use an alternate IV as the loop iterator. */
7081 expand_omp_build_assign (gsi_p: &gsi, to: altv,
7082 from: build_zero_cst (TREE_TYPE (altv)));
7083 tree itype = TREE_TYPE (fd->loops[i + 1].v);
7084 if (POINTER_TYPE_P (itype))
7085 itype = signed_type_for (itype);
7086 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
7087 ? -1 : 1));
7088 t = fold_build2 (PLUS_EXPR, itype,
7089 fold_convert (itype, fd->loops[i + 1].step), t);
7090 t = fold_build2 (PLUS_EXPR, itype, t,
7091 fold_convert (itype,
7092 fd->loops[i + 1].m2
7093 ? n2v : fd->loops[i + 1].n2));
7094 t = fold_build2 (MINUS_EXPR, itype, t,
7095 fold_convert (itype, fd->loops[i + 1].v));
7096 tree step = fold_convert (itype, fd->loops[i + 1].step);
7097 if (TYPE_UNSIGNED (itype)
7098 && fd->loops[i + 1].cond_code == GT_EXPR)
7099 t = fold_build2 (TRUNC_DIV_EXPR, itype,
7100 fold_build1 (NEGATE_EXPR, itype, t),
7101 fold_build1 (NEGATE_EXPR, itype, step));
7102 else
7103 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
7104 t = fold_convert (TREE_TYPE (altv), t);
7105 expand_omp_build_assign (gsi_p: &gsi, to: altn2, from: t);
7106 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
7107 fd->loops[i + 1].m2
7108 ? n2v : fd->loops[i + 1].n2);
7109 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
7110 fd->loops[i + 1].v, t2);
7111 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
7112 true, GSI_SAME_STMT);
7113 gassign *g
7114 = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
7115 build_zero_cst (TREE_TYPE (altv)));
7116 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7117 }
7118 n2v = nextn2v;
7119
7120 make_edge (init_bb, last_bb, EDGE_FALLTHRU);
7121 if (!gimple_omp_for_combined_into_p (g: fd->for_stmt))
7122 {
7123 e = find_edge (entry_bb, last_bb);
7124 redirect_edge_succ (e, bb);
7125 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
7126 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
7127 }
7128
7129 last_bb = bb;
7130 }
7131 }
7132 if (!broken_loop)
7133 {
7134 class loop *loop = alloc_loop ();
7135 loop->header = l1_bb;
7136 loop->latch = cont_bb;
7137 add_loop (loop, l1_bb->loop_father);
7138 loop->safelen = safelen_int;
7139 if (simduid)
7140 {
7141 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
7142 cfun->has_simduid_loops = true;
7143 }
7144 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
7145 the loop. */
7146 if ((flag_tree_loop_vectorize
7147 || !OPTION_SET_P (flag_tree_loop_vectorize))
7148 && flag_tree_loop_optimize
7149 && loop->safelen > 1)
7150 {
7151 loop->force_vectorize = true;
7152 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
7153 {
7154 unsigned HOST_WIDE_INT v
7155 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
7156 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
7157 loop->simdlen = v;
7158 }
7159 cfun->has_force_vectorize_loops = true;
7160 }
7161 else if (dont_vectorize)
7162 loop->dont_vectorize = true;
7163 }
7164 else if (simduid)
7165 cfun->has_simduid_loops = true;
7166}
7167
7168/* Taskloop construct is represented after gimplification with
7169 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7170 in between them. This routine expands the outer GIMPLE_OMP_FOR,
7171 which should just compute all the needed loop temporaries
7172 for GIMPLE_OMP_TASK. */
7173
7174static void
7175expand_omp_taskloop_for_outer (struct omp_region *region,
7176 struct omp_for_data *fd,
7177 gimple *inner_stmt)
7178{
7179 tree type, bias = NULL_TREE;
7180 basic_block entry_bb, cont_bb, exit_bb;
7181 gimple_stmt_iterator gsi;
7182 gassign *assign_stmt;
7183 tree *counts = NULL;
7184 int i;
7185
7186 gcc_assert (inner_stmt);
7187 gcc_assert (region->cont);
7188 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
7189 && gimple_omp_task_taskloop_p (inner_stmt));
7190 type = TREE_TYPE (fd->loop.v);
7191
7192 /* See if we need to bias by LLONG_MIN. */
7193 if (fd->iter_type == long_long_unsigned_type_node
7194 && (TREE_CODE (type) == INTEGER_TYPE || TREE_CODE (type) == BITINT_TYPE)
7195 && !TYPE_UNSIGNED (type))
7196 {
7197 tree n1, n2;
7198
7199 if (fd->loop.cond_code == LT_EXPR)
7200 {
7201 n1 = fd->loop.n1;
7202 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7203 }
7204 else
7205 {
7206 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7207 n2 = fd->loop.n1;
7208 }
7209 if (TREE_CODE (n1) != INTEGER_CST
7210 || TREE_CODE (n2) != INTEGER_CST
7211 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7212 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7213 }
7214
7215 entry_bb = region->entry;
7216 cont_bb = region->cont;
7217 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7218 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7219 exit_bb = region->exit;
7220
7221 gsi = gsi_last_nondebug_bb (bb: entry_bb);
7222 gimple *for_stmt = gsi_stmt (i: gsi);
7223 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7224 if (fd->collapse > 1)
7225 {
7226 int first_zero_iter = -1, dummy = -1;
7227 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7228
7229 counts = XALLOCAVEC (tree, fd->collapse);
7230 expand_omp_for_init_counts (fd, gsi: &gsi, entry_bb, counts,
7231 zero_iter1_bb&: zero_iter_bb, first_zero_iter1&: first_zero_iter,
7232 zero_iter2_bb&: dummy_bb, first_zero_iter2&: dummy, l2_dom_bb);
7233
7234 if (zero_iter_bb)
7235 {
7236 /* Some counts[i] vars might be uninitialized if
7237 some loop has zero iterations. But the body shouldn't
7238 be executed in that case, so just avoid uninit warnings. */
7239 for (i = first_zero_iter; i < fd->collapse; i++)
7240 if (SSA_VAR_P (counts[i]))
7241 suppress_warning (counts[i], OPT_Wuninitialized);
7242 gsi_prev (i: &gsi);
7243 edge e = split_block (entry_bb, gsi_stmt (i: gsi));
7244 entry_bb = e->dest;
7245 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7246 gsi = gsi_last_bb (bb: entry_bb);
7247 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7248 get_immediate_dominator (CDI_DOMINATORS,
7249 zero_iter_bb));
7250 }
7251 }
7252
7253 tree t0, t1;
7254 t1 = fd->loop.n2;
7255 t0 = fd->loop.n1;
7256 if (POINTER_TYPE_P (TREE_TYPE (t0))
7257 && TYPE_PRECISION (TREE_TYPE (t0))
7258 != TYPE_PRECISION (fd->iter_type))
7259 {
7260 /* Avoid casting pointers to integer of a different size. */
7261 tree itype = signed_type_for (type);
7262 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7263 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7264 }
7265 else
7266 {
7267 t1 = fold_convert (fd->iter_type, t1);
7268 t0 = fold_convert (fd->iter_type, t0);
7269 }
7270 if (bias)
7271 {
7272 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7273 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7274 }
7275
7276 tree innerc = omp_find_clause (clauses: gimple_omp_task_clauses (gs: inner_stmt),
7277 kind: OMP_CLAUSE__LOOPTEMP_);
7278 gcc_assert (innerc);
7279 tree startvar = OMP_CLAUSE_DECL (innerc);
7280 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), kind: OMP_CLAUSE__LOOPTEMP_);
7281 gcc_assert (innerc);
7282 tree endvar = OMP_CLAUSE_DECL (innerc);
7283 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7284 {
7285 innerc = find_lastprivate_looptemp (fd, innerc);
7286 if (innerc)
7287 {
7288 /* If needed (inner taskloop has lastprivate clause), propagate
7289 down the total number of iterations. */
7290 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7291 NULL_TREE, false,
7292 GSI_CONTINUE_LINKING);
7293 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7294 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7295 }
7296 }
7297
7298 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7299 GSI_CONTINUE_LINKING);
7300 assign_stmt = gimple_build_assign (startvar, t0);
7301 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7302
7303 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7304 GSI_CONTINUE_LINKING);
7305 assign_stmt = gimple_build_assign (endvar, t1);
7306 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7307 if (fd->collapse > 1)
7308 expand_omp_for_init_vars (fd, gsi: &gsi, counts, NULL, inner_stmt, startvar);
7309
7310 /* Remove the GIMPLE_OMP_FOR statement. */
7311 gsi = gsi_for_stmt (for_stmt);
7312 gsi_remove (&gsi, true);
7313
7314 gsi = gsi_last_nondebug_bb (bb: cont_bb);
7315 gsi_remove (&gsi, true);
7316
7317 gsi = gsi_last_nondebug_bb (bb: exit_bb);
7318 gsi_remove (&gsi, true);
7319
7320 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7321 remove_edge (BRANCH_EDGE (entry_bb));
7322 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7323 remove_edge (BRANCH_EDGE (cont_bb));
7324 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7325 set_immediate_dominator (CDI_DOMINATORS, region->entry,
7326 recompute_dominator (CDI_DOMINATORS, region->entry));
7327}
7328
7329/* Taskloop construct is represented after gimplification with
7330 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7331 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7332 GOMP_taskloop{,_ull} function arranges for each task to be given just
7333 a single range of iterations. */
7334
7335static void
7336expand_omp_taskloop_for_inner (struct omp_region *region,
7337 struct omp_for_data *fd,
7338 gimple *inner_stmt)
7339{
7340 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7341 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7342 basic_block fin_bb;
7343 gimple_stmt_iterator gsi;
7344 edge ep;
7345 bool broken_loop = region->cont == NULL;
7346 tree *counts = NULL;
7347 tree n1, n2, step;
7348
7349 itype = type = TREE_TYPE (fd->loop.v);
7350 if (POINTER_TYPE_P (type))
7351 itype = signed_type_for (type);
7352
7353 /* See if we need to bias by LLONG_MIN. */
7354 if (fd->iter_type == long_long_unsigned_type_node
7355 && (TREE_CODE (type) == INTEGER_TYPE || TREE_CODE (type) == BITINT_TYPE)
7356 && !TYPE_UNSIGNED (type))
7357 {
7358 tree n1, n2;
7359
7360 if (fd->loop.cond_code == LT_EXPR)
7361 {
7362 n1 = fd->loop.n1;
7363 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7364 }
7365 else
7366 {
7367 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7368 n2 = fd->loop.n1;
7369 }
7370 if (TREE_CODE (n1) != INTEGER_CST
7371 || TREE_CODE (n2) != INTEGER_CST
7372 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7373 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7374 }
7375
7376 entry_bb = region->entry;
7377 cont_bb = region->cont;
7378 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7379 fin_bb = BRANCH_EDGE (entry_bb)->dest;
7380 gcc_assert (broken_loop
7381 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7382 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7383 if (!broken_loop)
7384 {
7385 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7386 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7387 }
7388 exit_bb = region->exit;
7389
7390 /* Iteration space partitioning goes in ENTRY_BB. */
7391 gsi = gsi_last_nondebug_bb (bb: entry_bb);
7392 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7393
7394 if (fd->collapse > 1)
7395 {
7396 int first_zero_iter = -1, dummy = -1;
7397 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7398
7399 counts = XALLOCAVEC (tree, fd->collapse);
7400 expand_omp_for_init_counts (fd, gsi: &gsi, entry_bb, counts,
7401 zero_iter1_bb&: fin_bb, first_zero_iter1&: first_zero_iter,
7402 zero_iter2_bb&: dummy_bb, first_zero_iter2&: dummy, l2_dom_bb);
7403 t = NULL_TREE;
7404 }
7405 else
7406 t = integer_one_node;
7407
7408 step = fd->loop.step;
7409 tree innerc = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
7410 kind: OMP_CLAUSE__LOOPTEMP_);
7411 gcc_assert (innerc);
7412 n1 = OMP_CLAUSE_DECL (innerc);
7413 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), kind: OMP_CLAUSE__LOOPTEMP_);
7414 gcc_assert (innerc);
7415 n2 = OMP_CLAUSE_DECL (innerc);
7416 if (bias)
7417 {
7418 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7419 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7420 }
7421 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7422 true, NULL_TREE, true, GSI_SAME_STMT);
7423 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7424 true, NULL_TREE, true, GSI_SAME_STMT);
7425 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7426 true, NULL_TREE, true, GSI_SAME_STMT);
7427
7428 tree startvar = fd->loop.v;
7429 tree endvar = NULL_TREE;
7430
7431 if (gimple_omp_for_combined_p (g: fd->for_stmt))
7432 {
7433 tree clauses = gimple_omp_for_clauses (gs: inner_stmt);
7434 tree innerc = omp_find_clause (clauses, kind: OMP_CLAUSE__LOOPTEMP_);
7435 gcc_assert (innerc);
7436 startvar = OMP_CLAUSE_DECL (innerc);
7437 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7438 kind: OMP_CLAUSE__LOOPTEMP_);
7439 gcc_assert (innerc);
7440 endvar = OMP_CLAUSE_DECL (innerc);
7441 }
7442 t = fold_convert (TREE_TYPE (startvar), n1);
7443 t = force_gimple_operand_gsi (&gsi, t,
7444 DECL_P (startvar)
7445 && TREE_ADDRESSABLE (startvar),
7446 NULL_TREE, false, GSI_CONTINUE_LINKING);
7447 gimple *assign_stmt = gimple_build_assign (startvar, t);
7448 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7449
7450 t = fold_convert (TREE_TYPE (startvar), n2);
7451 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7452 false, GSI_CONTINUE_LINKING);
7453 if (endvar)
7454 {
7455 assign_stmt = gimple_build_assign (endvar, e);
7456 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7457 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7458 assign_stmt = gimple_build_assign (fd->loop.v, e);
7459 else
7460 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7461 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7462 }
7463
7464 tree *nonrect_bounds = NULL;
7465 if (fd->collapse > 1)
7466 {
7467 if (fd->non_rect)
7468 {
7469 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7470 memset (s: nonrect_bounds, c: 0, n: sizeof (tree) * (fd->last_nonrect + 1));
7471 }
7472 gcc_assert (gsi_bb (gsi) == entry_bb);
7473 expand_omp_for_init_vars (fd, gsi: &gsi, counts, nonrect_bounds, inner_stmt,
7474 startvar);
7475 entry_bb = gsi_bb (i: gsi);
7476 }
7477
7478 if (!broken_loop)
7479 {
7480 /* The code controlling the sequential loop replaces the
7481 GIMPLE_OMP_CONTINUE. */
7482 gsi = gsi_last_nondebug_bb (bb: cont_bb);
7483 gomp_continue *cont_stmt = as_a <gomp_continue *> (p: gsi_stmt (i: gsi));
7484 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7485 vmain = gimple_omp_continue_control_use (cont_stmt);
7486 vback = gimple_omp_continue_control_def (cont_stmt);
7487
7488 if (!gimple_omp_for_combined_p (g: fd->for_stmt))
7489 {
7490 if (POINTER_TYPE_P (type))
7491 t = fold_build_pointer_plus (vmain, step);
7492 else
7493 t = fold_build2 (PLUS_EXPR, type, vmain, step);
7494 t = force_gimple_operand_gsi (&gsi, t,
7495 DECL_P (vback)
7496 && TREE_ADDRESSABLE (vback),
7497 NULL_TREE, true, GSI_SAME_STMT);
7498 assign_stmt = gimple_build_assign (vback, t);
7499 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7500
7501 t = build2 (fd->loop.cond_code, boolean_type_node,
7502 DECL_P (vback) && TREE_ADDRESSABLE (vback)
7503 ? t : vback, e);
7504 gsi_insert_before (&gsi, gimple_build_cond_empty (cond: t), GSI_SAME_STMT);
7505 }
7506
7507 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7508 gsi_remove (&gsi, true);
7509
7510 if (fd->collapse > 1 && !gimple_omp_for_combined_p (g: fd->for_stmt))
7511 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7512 cont_bb, body_bb);
7513 }
7514
7515 /* Remove the GIMPLE_OMP_FOR statement. */
7516 gsi = gsi_for_stmt (fd->for_stmt);
7517 gsi_remove (&gsi, true);
7518
7519 /* Remove the GIMPLE_OMP_RETURN statement. */
7520 gsi = gsi_last_nondebug_bb (bb: exit_bb);
7521 gsi_remove (&gsi, true);
7522
7523 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7524 if (!broken_loop)
7525 remove_edge (BRANCH_EDGE (entry_bb));
7526 else
7527 {
7528 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7529 region->outer->cont = NULL;
7530 }
7531
7532 /* Connect all the blocks. */
7533 if (!broken_loop)
7534 {
7535 ep = find_edge (cont_bb, body_bb);
7536 if (gimple_omp_for_combined_p (g: fd->for_stmt))
7537 {
7538 remove_edge (ep);
7539 ep = NULL;
7540 }
7541 else if (fd->collapse > 1)
7542 {
7543 remove_edge (ep);
7544 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7545 }
7546 else
7547 ep->flags = EDGE_TRUE_VALUE;
7548 find_edge (cont_bb, fin_bb)->flags
7549 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7550 }
7551
7552 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7553 recompute_dominator (CDI_DOMINATORS, body_bb));
7554 if (!broken_loop)
7555 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7556 recompute_dominator (CDI_DOMINATORS, fin_bb));
7557
7558 if (!broken_loop && !gimple_omp_for_combined_p (g: fd->for_stmt))
7559 {
7560 class loop *loop = alloc_loop ();
7561 loop->header = body_bb;
7562 if (collapse_bb == NULL)
7563 loop->latch = cont_bb;
7564 add_loop (loop, body_bb->loop_father);
7565 }
7566}
7567
7568/* A subroutine of expand_omp_for. Generate code for an OpenACC
7569 partitioned loop. The lowering here is abstracted, in that the
7570 loop parameters are passed through internal functions, which are
7571 further lowered by oacc_device_lower, once we get to the target
7572 compiler. The loop is of the form:
7573
7574 for (V = B; V LTGT E; V += S) {BODY}
7575
7576 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7577 (constant 0 for no chunking) and we will have a GWV partitioning
7578 mask, specifying dimensions over which the loop is to be
7579 partitioned (see note below). We generate code that looks like
7580 (this ignores tiling):
7581
7582 <entry_bb> [incoming FALL->body, BRANCH->exit]
7583 typedef signedintify (typeof (V)) T; // underlying signed integral type
7584 T range = E - B;
7585 T chunk_no = 0;
7586 T DIR = LTGT == '<' ? +1 : -1;
7587 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7588 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7589
7590 <head_bb> [created by splitting end of entry_bb]
7591 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7592 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7593 if (!(offset LTGT bound)) goto bottom_bb;
7594
7595 <body_bb> [incoming]
7596 V = B + offset;
7597 {BODY}
7598
7599 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7600 offset += step;
7601 if (offset LTGT bound) goto body_bb; [*]
7602
7603 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7604 chunk_no++;
7605 if (chunk < chunk_max) goto head_bb;
7606
7607 <exit_bb> [incoming]
7608 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7609
7610 [*] Needed if V live at end of loop. */
7611
7612static void
7613expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7614{
7615 bool is_oacc_kernels_parallelized
7616 = (lookup_attribute (attr_name: "oacc kernels parallelized",
7617 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7618 {
7619 bool is_oacc_kernels
7620 = (lookup_attribute (attr_name: "oacc kernels",
7621 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7622 if (is_oacc_kernels_parallelized)
7623 gcc_checking_assert (is_oacc_kernels);
7624 }
7625 gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
7626 /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
7627 for SSA specifics, and some are for 'parloops' OpenACC
7628 'kernels'-parallelized specifics. */
7629
7630 tree v = fd->loop.v;
7631 enum tree_code cond_code = fd->loop.cond_code;
7632 enum tree_code plus_code = PLUS_EXPR;
7633
7634 tree chunk_size = integer_minus_one_node;
7635 tree gwv = integer_zero_node;
7636 tree iter_type = TREE_TYPE (v);
7637 tree diff_type = iter_type;
7638 tree plus_type = iter_type;
7639 struct oacc_collapse *counts = NULL;
7640
7641 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7642 == GF_OMP_FOR_KIND_OACC_LOOP);
7643 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7644 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7645
7646 if (POINTER_TYPE_P (iter_type))
7647 {
7648 plus_code = POINTER_PLUS_EXPR;
7649 plus_type = sizetype;
7650 }
7651 for (int ix = fd->collapse; ix--;)
7652 {
7653 tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
7654 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
7655 diff_type = diff_type2;
7656 }
7657 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7658 diff_type = signed_type_for (diff_type);
7659 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7660 diff_type = integer_type_node;
7661
7662 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7663 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7664 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7665 basic_block bottom_bb = NULL;
7666
7667 /* entry_bb has two successors; the branch edge is to the exit
7668 block, fallthrough edge to body. */
7669 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7670 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7671
7672 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7673 body_bb, or to a block whose only successor is the body_bb. Its
7674 fallthrough successor is the final block (same as the branch
7675 successor of the entry_bb). */
7676 if (cont_bb)
7677 {
7678 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7679 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7680
7681 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7682 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7683 }
7684 else
7685 gcc_assert (!gimple_in_ssa_p (cfun));
7686
7687 /* The exit block only has entry_bb and cont_bb as predecessors. */
7688 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7689
7690 tree chunk_no;
7691 tree chunk_max = NULL_TREE;
7692 tree bound, offset;
7693 tree step = create_tmp_var (diff_type, ".step");
7694 bool up = cond_code == LT_EXPR;
7695 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7696 bool chunking = !gimple_in_ssa_p (cfun);
7697 bool negating;
7698
7699 /* Tiling vars. */
7700 tree tile_size = NULL_TREE;
7701 tree element_s = NULL_TREE;
7702 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7703 basic_block elem_body_bb = NULL;
7704 basic_block elem_cont_bb = NULL;
7705
7706 /* SSA instances. */
7707 tree offset_incr = NULL_TREE;
7708 tree offset_init = NULL_TREE;
7709
7710 gimple_stmt_iterator gsi;
7711 gassign *ass;
7712 gcall *call;
7713 gimple *stmt;
7714 tree expr;
7715 location_t loc;
7716 edge split, be, fte;
7717
7718 /* Split the end of entry_bb to create head_bb. */
7719 split = split_block (entry_bb, last_nondebug_stmt (entry_bb));
7720 basic_block head_bb = split->dest;
7721 entry_bb = split->src;
7722
7723 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7724 gsi = gsi_last_nondebug_bb (bb: entry_bb);
7725 gomp_for *for_stmt = as_a <gomp_for *> (p: gsi_stmt (i: gsi));
7726 loc = gimple_location (g: for_stmt);
7727
7728 if (gimple_in_ssa_p (cfun))
7729 {
7730 offset_init = gimple_omp_for_index (gs: for_stmt, i: 0);
7731 gcc_assert (integer_zerop (fd->loop.n1));
7732 /* The SSA parallelizer does gang parallelism. */
7733 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7734 }
7735
7736 if (fd->collapse > 1 || fd->tiling)
7737 {
7738 gcc_assert (!gimple_in_ssa_p (cfun) && up);
7739 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7740 tree total = expand_oacc_collapse_init (fd, gsi: &gsi, counts, diff_type,
7741 TREE_TYPE (fd->loop.n2), loc);
7742
7743 if (SSA_VAR_P (fd->loop.n2))
7744 {
7745 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7746 true, GSI_SAME_STMT);
7747 ass = gimple_build_assign (fd->loop.n2, total);
7748 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7749 }
7750 }
7751
7752 tree b = fd->loop.n1;
7753 tree e = fd->loop.n2;
7754 tree s = fd->loop.step;
7755
7756 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7757 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7758
7759 /* Convert the step, avoiding possible unsigned->signed overflow. */
7760 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7761 if (negating)
7762 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7763 s = fold_convert (diff_type, s);
7764 if (negating)
7765 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7766 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7767
7768 if (!chunking)
7769 chunk_size = integer_zero_node;
7770 expr = fold_convert (diff_type, chunk_size);
7771 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7772 NULL_TREE, true, GSI_SAME_STMT);
7773
7774 if (fd->tiling)
7775 {
7776 /* Determine the tile size and element step,
7777 modify the outer loop step size. */
7778 tile_size = create_tmp_var (diff_type, ".tile_size");
7779 expr = build_int_cst (diff_type, 1);
7780 for (int ix = 0; ix < fd->collapse; ix++)
7781 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7782 expr = force_gimple_operand_gsi (&gsi, expr, true,
7783 NULL_TREE, true, GSI_SAME_STMT);
7784 ass = gimple_build_assign (tile_size, expr);
7785 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7786
7787 element_s = create_tmp_var (diff_type, ".element_s");
7788 ass = gimple_build_assign (element_s, s);
7789 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7790
7791 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7792 s = force_gimple_operand_gsi (&gsi, expr, true,
7793 NULL_TREE, true, GSI_SAME_STMT);
7794 }
7795
7796 /* Determine the range, avoiding possible unsigned->signed overflow. */
7797 negating = !up && TYPE_UNSIGNED (iter_type);
7798 expr = fold_build2 (MINUS_EXPR, plus_type,
7799 fold_convert (plus_type, negating ? b : e),
7800 fold_convert (plus_type, negating ? e : b));
7801 expr = fold_convert (diff_type, expr);
7802 if (negating)
7803 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7804 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7805 NULL_TREE, true, GSI_SAME_STMT);
7806
7807 chunk_no = build_int_cst (diff_type, 0);
7808 if (chunking)
7809 {
7810 gcc_assert (!gimple_in_ssa_p (cfun));
7811
7812 expr = chunk_no;
7813 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7814 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7815
7816 ass = gimple_build_assign (chunk_no, expr);
7817 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7818
7819 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7820 build_int_cst (integer_type_node,
7821 IFN_GOACC_LOOP_CHUNKS),
7822 dir, range, s, chunk_size, gwv);
7823 gimple_call_set_lhs (gs: call, lhs: chunk_max);
7824 gimple_set_location (g: call, location: loc);
7825 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7826 }
7827 else
7828 chunk_size = chunk_no;
7829
7830 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7831 build_int_cst (integer_type_node,
7832 IFN_GOACC_LOOP_STEP),
7833 dir, range, s, chunk_size, gwv);
7834 gimple_call_set_lhs (gs: call, lhs: step);
7835 gimple_set_location (g: call, location: loc);
7836 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7837
7838 /* Remove the GIMPLE_OMP_FOR. */
7839 gsi_remove (&gsi, true);
7840
7841 /* Fixup edges from head_bb. */
7842 be = BRANCH_EDGE (head_bb);
7843 fte = FALLTHRU_EDGE (head_bb);
7844 be->flags |= EDGE_FALSE_VALUE;
7845 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7846
7847 basic_block body_bb = fte->dest;
7848
7849 if (gimple_in_ssa_p (cfun))
7850 {
7851 gsi = gsi_last_nondebug_bb (bb: cont_bb);
7852 gomp_continue *cont_stmt = as_a <gomp_continue *> (p: gsi_stmt (i: gsi));
7853
7854 offset = gimple_omp_continue_control_use (cont_stmt);
7855 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7856 }
7857 else
7858 {
7859 offset = create_tmp_var (diff_type, ".offset");
7860 offset_init = offset_incr = offset;
7861 }
7862 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7863
7864 /* Loop offset & bound go into head_bb. */
7865 gsi = gsi_start_bb (bb: head_bb);
7866
7867 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7868 build_int_cst (integer_type_node,
7869 IFN_GOACC_LOOP_OFFSET),
7870 dir, range, s,
7871 chunk_size, gwv, chunk_no);
7872 gimple_call_set_lhs (gs: call, lhs: offset_init);
7873 gimple_set_location (g: call, location: loc);
7874 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7875
7876 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7877 build_int_cst (integer_type_node,
7878 IFN_GOACC_LOOP_BOUND),
7879 dir, range, s,
7880 chunk_size, gwv, offset_init);
7881 gimple_call_set_lhs (gs: call, lhs: bound);
7882 gimple_set_location (g: call, location: loc);
7883 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7884
7885 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7886 gsi_insert_after (&gsi, gimple_build_cond_empty (cond: expr),
7887 GSI_CONTINUE_LINKING);
7888
7889 /* V assignment goes into body_bb. */
7890 if (!gimple_in_ssa_p (cfun))
7891 {
7892 gsi = gsi_start_bb (bb: body_bb);
7893
7894 expr = build2 (plus_code, iter_type, b,
7895 fold_convert (plus_type, offset));
7896 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7897 true, GSI_SAME_STMT);
7898 ass = gimple_build_assign (v, expr);
7899 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7900
7901 if (fd->collapse > 1 || fd->tiling)
7902 expand_oacc_collapse_vars (fd, inner: false, gsi: &gsi, counts, ivar: v, diff_type);
7903
7904 if (fd->tiling)
7905 {
7906 /* Determine the range of the element loop -- usually simply
7907 the tile_size, but could be smaller if the final
7908 iteration of the outer loop is a partial tile. */
7909 tree e_range = create_tmp_var (diff_type, ".e_range");
7910
7911 expr = build2 (MIN_EXPR, diff_type,
7912 build2 (MINUS_EXPR, diff_type, bound, offset),
7913 build2 (MULT_EXPR, diff_type, tile_size,
7914 element_s));
7915 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7916 true, GSI_SAME_STMT);
7917 ass = gimple_build_assign (e_range, expr);
7918 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7919
7920 /* Determine bound, offset & step of inner loop. */
7921 e_bound = create_tmp_var (diff_type, ".e_bound");
7922 e_offset = create_tmp_var (diff_type, ".e_offset");
7923 e_step = create_tmp_var (diff_type, ".e_step");
7924
7925 /* Mark these as element loops. */
7926 tree t, e_gwv = integer_minus_one_node;
7927 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7928
7929 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7930 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7931 element_s, chunk, e_gwv, chunk);
7932 gimple_call_set_lhs (gs: call, lhs: e_offset);
7933 gimple_set_location (g: call, location: loc);
7934 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7935
7936 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7937 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7938 element_s, chunk, e_gwv, e_offset);
7939 gimple_call_set_lhs (gs: call, lhs: e_bound);
7940 gimple_set_location (g: call, location: loc);
7941 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7942
7943 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7944 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7945 element_s, chunk, e_gwv);
7946 gimple_call_set_lhs (gs: call, lhs: e_step);
7947 gimple_set_location (g: call, location: loc);
7948 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7949
7950 /* Add test and split block. */
7951 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7952 stmt = gimple_build_cond_empty (cond: expr);
7953 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7954 split = split_block (body_bb, stmt);
7955 elem_body_bb = split->dest;
7956 if (cont_bb == body_bb)
7957 cont_bb = elem_body_bb;
7958 body_bb = split->src;
7959
7960 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7961
7962 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7963 if (cont_bb == NULL)
7964 {
7965 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7966 e->probability = profile_probability::even ();
7967 split->probability = profile_probability::even ();
7968 }
7969
7970 /* Initialize the user's loop vars. */
7971 gsi = gsi_start_bb (bb: elem_body_bb);
7972 expand_oacc_collapse_vars (fd, inner: true, gsi: &gsi, counts, ivar: e_offset,
7973 diff_type);
7974 }
7975 }
7976
7977 /* Loop increment goes into cont_bb. If this is not a loop, we
7978 will have spawned threads as if it was, and each one will
7979 execute one iteration. The specification is not explicit about
7980 whether such constructs are ill-formed or not, and they can
7981 occur, especially when noreturn routines are involved. */
7982 if (cont_bb)
7983 {
7984 gsi = gsi_last_nondebug_bb (bb: cont_bb);
7985 gomp_continue *cont_stmt = as_a <gomp_continue *> (p: gsi_stmt (i: gsi));
7986 loc = gimple_location (g: cont_stmt);
7987
7988 if (fd->tiling)
7989 {
7990 /* Insert element loop increment and test. */
7991 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7992 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7993 true, GSI_SAME_STMT);
7994 ass = gimple_build_assign (e_offset, expr);
7995 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7996 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7997
7998 stmt = gimple_build_cond_empty (cond: expr);
7999 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8000 split = split_block (cont_bb, stmt);
8001 elem_cont_bb = split->src;
8002 cont_bb = split->dest;
8003
8004 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
8005 split->probability = profile_probability::unlikely ().guessed ();
8006 edge latch_edge
8007 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
8008 latch_edge->probability = profile_probability::likely ().guessed ();
8009
8010 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
8011 skip_edge->probability = profile_probability::unlikely ().guessed ();
8012 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
8013 loop_entry_edge->probability
8014 = profile_probability::likely ().guessed ();
8015
8016 gsi = gsi_for_stmt (cont_stmt);
8017 }
8018
8019 /* Increment offset. */
8020 if (gimple_in_ssa_p (cfun))
8021 expr = build2 (plus_code, iter_type, offset,
8022 fold_convert (plus_type, step));
8023 else
8024 expr = build2 (PLUS_EXPR, diff_type, offset, step);
8025 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
8026 true, GSI_SAME_STMT);
8027 ass = gimple_build_assign (offset_incr, expr);
8028 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
8029 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
8030 gsi_insert_before (&gsi, gimple_build_cond_empty (cond: expr), GSI_SAME_STMT);
8031
8032 /* Remove the GIMPLE_OMP_CONTINUE. */
8033 gsi_remove (&gsi, true);
8034
8035 /* Fixup edges from cont_bb. */
8036 be = BRANCH_EDGE (cont_bb);
8037 fte = FALLTHRU_EDGE (cont_bb);
8038 be->flags |= EDGE_TRUE_VALUE;
8039 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
8040
8041 if (chunking)
8042 {
8043 /* Split the beginning of exit_bb to make bottom_bb. We
8044 need to insert a nop at the start, because splitting is
8045 after a stmt, not before. */
8046 gsi = gsi_start_bb (bb: exit_bb);
8047 stmt = gimple_build_nop ();
8048 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8049 split = split_block (exit_bb, stmt);
8050 bottom_bb = split->src;
8051 exit_bb = split->dest;
8052 gsi = gsi_last_bb (bb: bottom_bb);
8053
8054 /* Chunk increment and test goes into bottom_bb. */
8055 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
8056 build_int_cst (diff_type, 1));
8057 ass = gimple_build_assign (chunk_no, expr);
8058 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
8059
8060 /* Chunk test at end of bottom_bb. */
8061 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
8062 gsi_insert_after (&gsi, gimple_build_cond_empty (cond: expr),
8063 GSI_CONTINUE_LINKING);
8064
8065 /* Fixup edges from bottom_bb. */
8066 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
8067 split->probability = profile_probability::unlikely ().guessed ();
8068 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
8069 latch_edge->probability = profile_probability::likely ().guessed ();
8070 }
8071 }
8072
8073 gsi = gsi_last_nondebug_bb (bb: exit_bb);
8074 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8075 loc = gimple_location (g: gsi_stmt (i: gsi));
8076
8077 if (!gimple_in_ssa_p (cfun))
8078 {
8079 /* Insert the final value of V, in case it is live. This is the
8080 value for the only thread that survives past the join. */
8081 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
8082 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
8083 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
8084 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
8085 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
8086 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
8087 true, GSI_SAME_STMT);
8088 ass = gimple_build_assign (v, expr);
8089 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
8090 }
8091
8092 /* Remove the OMP_RETURN. */
8093 gsi_remove (&gsi, true);
8094
8095 if (cont_bb)
8096 {
8097 /* We now have one, two or three nested loops. Update the loop
8098 structures. */
8099 class loop *parent = entry_bb->loop_father;
8100 class loop *body = body_bb->loop_father;
8101
8102 if (chunking)
8103 {
8104 class loop *chunk_loop = alloc_loop ();
8105 chunk_loop->header = head_bb;
8106 chunk_loop->latch = bottom_bb;
8107 add_loop (chunk_loop, parent);
8108 parent = chunk_loop;
8109 }
8110 else if (parent != body)
8111 {
8112 gcc_assert (body->header == body_bb);
8113 gcc_assert (body->latch == cont_bb
8114 || single_pred (body->latch) == cont_bb);
8115 parent = NULL;
8116 }
8117
8118 if (parent)
8119 {
8120 class loop *body_loop = alloc_loop ();
8121 body_loop->header = body_bb;
8122 body_loop->latch = cont_bb;
8123 add_loop (body_loop, parent);
8124
8125 if (fd->tiling)
8126 {
8127 /* Insert tiling's element loop. */
8128 class loop *inner_loop = alloc_loop ();
8129 inner_loop->header = elem_body_bb;
8130 inner_loop->latch = elem_cont_bb;
8131 add_loop (inner_loop, body_loop);
8132 }
8133 }
8134 }
8135}
8136
8137/* Expand the OMP loop defined by REGION. */
8138
8139static void
8140expand_omp_for (struct omp_region *region, gimple *inner_stmt)
8141{
8142 struct omp_for_data fd;
8143 struct omp_for_data_loop *loops;
8144
8145 loops = XALLOCAVEC (struct omp_for_data_loop,
8146 gimple_omp_for_collapse
8147 (last_nondebug_stmt (region->entry)));
8148 omp_extract_for_data (for_stmt: as_a <gomp_for *> (p: last_nondebug_stmt (region->entry)),
8149 fd: &fd, loops);
8150 region->sched_kind = fd.sched_kind;
8151 region->sched_modifiers = fd.sched_modifiers;
8152 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
8153 if (fd.non_rect && !gimple_omp_for_combined_into_p (g: fd.for_stmt))
8154 {
8155 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
8156 if ((loops[i].m1 || loops[i].m2)
8157 && (loops[i].m1 == NULL_TREE
8158 || TREE_CODE (loops[i].m1) == INTEGER_CST)
8159 && (loops[i].m2 == NULL_TREE
8160 || TREE_CODE (loops[i].m2) == INTEGER_CST)
8161 && TREE_CODE (loops[i].step) == INTEGER_CST
8162 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
8163 {
8164 tree t;
8165 tree itype = TREE_TYPE (loops[i].v);
8166 if (loops[i].m1 && loops[i].m2)
8167 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
8168 else if (loops[i].m1)
8169 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
8170 else
8171 t = loops[i].m2;
8172 t = fold_build2 (MULT_EXPR, itype, t,
8173 fold_convert (itype,
8174 loops[i - loops[i].outer].step));
8175 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
8176 t = fold_build2 (TRUNC_MOD_EXPR, itype,
8177 fold_build1 (NEGATE_EXPR, itype, t),
8178 fold_build1 (NEGATE_EXPR, itype,
8179 fold_convert (itype,
8180 loops[i].step)));
8181 else
8182 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
8183 fold_convert (itype, loops[i].step));
8184 if (integer_nonzerop (t))
8185 error_at (gimple_location (g: fd.for_stmt),
8186 "invalid OpenMP non-rectangular loop step; "
8187 "%<(%E - %E) * %E%> is not a multiple of loop %d "
8188 "step %qE",
8189 loops[i].m2 ? loops[i].m2 : integer_zero_node,
8190 loops[i].m1 ? loops[i].m1 : integer_zero_node,
8191 loops[i - loops[i].outer].step, i + 1,
8192 loops[i].step);
8193 }
8194 }
8195
8196 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
8197 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8198 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8199 if (region->cont)
8200 {
8201 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
8202 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8203 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8204 }
8205 else
8206 /* If there isn't a continue then this is a degerate case where
8207 the introduction of abnormal edges during lowering will prevent
8208 original loops from being detected. Fix that up. */
8209 loops_state_set (flags: LOOPS_NEED_FIXUP);
8210
8211 if (gimple_omp_for_kind (g: fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
8212 expand_omp_simd (region, fd: &fd);
8213 else if (gimple_omp_for_kind (g: fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
8214 {
8215 gcc_assert (!inner_stmt && !fd.non_rect);
8216 expand_oacc_for (region, fd: &fd);
8217 }
8218 else if (gimple_omp_for_kind (g: fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8219 {
8220 if (gimple_omp_for_combined_into_p (g: fd.for_stmt))
8221 expand_omp_taskloop_for_inner (region, fd: &fd, inner_stmt);
8222 else
8223 expand_omp_taskloop_for_outer (region, fd: &fd, inner_stmt);
8224 }
8225 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8226 && !fd.have_ordered)
8227 {
8228 if (fd.chunk_size == NULL)
8229 expand_omp_for_static_nochunk (region, fd: &fd, inner_stmt);
8230 else
8231 expand_omp_for_static_chunk (region, fd: &fd, inner_stmt);
8232 }
8233 else
8234 {
8235 int fn_index, start_ix, next_ix;
8236 unsigned HOST_WIDE_INT sched = 0;
8237 tree sched_arg = NULL_TREE;
8238
8239 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8240 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8241 if (fd.chunk_size == NULL
8242 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8243 fd.chunk_size = integer_zero_node;
8244 switch (fd.sched_kind)
8245 {
8246 case OMP_CLAUSE_SCHEDULE_RUNTIME:
8247 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8248 && fd.lastprivate_conditional == 0)
8249 {
8250 gcc_assert (!fd.have_ordered);
8251 fn_index = 6;
8252 sched = 4;
8253 }
8254 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8255 && !fd.have_ordered
8256 && fd.lastprivate_conditional == 0)
8257 fn_index = 7;
8258 else
8259 {
8260 fn_index = 3;
8261 sched = (HOST_WIDE_INT_1U << 31);
8262 }
8263 break;
8264 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8265 case OMP_CLAUSE_SCHEDULE_GUIDED:
8266 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8267 && !fd.have_ordered
8268 && fd.lastprivate_conditional == 0)
8269 {
8270 fn_index = 3 + fd.sched_kind;
8271 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8272 break;
8273 }
8274 fn_index = fd.sched_kind;
8275 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8276 sched += (HOST_WIDE_INT_1U << 31);
8277 break;
8278 case OMP_CLAUSE_SCHEDULE_STATIC:
8279 gcc_assert (fd.have_ordered);
8280 fn_index = 0;
8281 sched = (HOST_WIDE_INT_1U << 31) + 1;
8282 break;
8283 default:
8284 gcc_unreachable ();
8285 }
8286 if (!fd.ordered)
8287 fn_index += fd.have_ordered * 8;
8288 if (fd.ordered)
8289 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8290 else
8291 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8292 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8293 if (fd.have_reductemp || fd.have_pointer_condtemp)
8294 {
8295 if (fd.ordered)
8296 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8297 else if (fd.have_ordered)
8298 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8299 else
8300 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8301 sched_arg = build_int_cstu (long_integer_type_node, sched);
8302 if (!fd.chunk_size)
8303 fd.chunk_size = integer_zero_node;
8304 }
8305 if (fd.iter_type == long_long_unsigned_type_node)
8306 {
8307 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8308 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8309 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8310 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8311 }
8312 expand_omp_for_generic (region, fd: &fd, start_fn: (enum built_in_function) start_ix,
8313 next_fn: (enum built_in_function) next_ix, sched_arg,
8314 inner_stmt);
8315 }
8316}
8317
8318/* Expand code for an OpenMP sections directive. In pseudo code, we generate
8319
8320 v = GOMP_sections_start (n);
8321 L0:
8322 switch (v)
8323 {
8324 case 0:
8325 goto L2;
8326 case 1:
8327 section 1;
8328 goto L1;
8329 case 2:
8330 ...
8331 case n:
8332 ...
8333 default:
8334 abort ();
8335 }
8336 L1:
8337 v = GOMP_sections_next ();
8338 goto L0;
8339 L2:
8340 reduction;
8341
8342 If this is a combined parallel sections, replace the call to
8343 GOMP_sections_start with call to GOMP_sections_next. */
8344
8345static void
8346expand_omp_sections (struct omp_region *region)
8347{
8348 tree t, u, vin = NULL, vmain, vnext, l2;
8349 unsigned len;
8350 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8351 gimple_stmt_iterator si, switch_si;
8352 gomp_sections *sections_stmt;
8353 gimple *stmt;
8354 gomp_continue *cont;
8355 edge_iterator ei;
8356 edge e;
8357 struct omp_region *inner;
8358 unsigned i, casei;
8359 bool exit_reachable = region->cont != NULL;
8360
8361 gcc_assert (region->exit != NULL);
8362 entry_bb = region->entry;
8363 l0_bb = single_succ (bb: entry_bb);
8364 l1_bb = region->cont;
8365 l2_bb = region->exit;
8366 if (single_pred_p (bb: l2_bb) && single_pred (bb: l2_bb) == l0_bb)
8367 l2 = gimple_block_label (l2_bb);
8368 else
8369 {
8370 /* This can happen if there are reductions. */
8371 len = EDGE_COUNT (l0_bb->succs);
8372 gcc_assert (len > 0);
8373 e = EDGE_SUCC (l0_bb, len - 1);
8374 si = gsi_last_nondebug_bb (bb: e->dest);
8375 l2 = NULL_TREE;
8376 if (gsi_end_p (i: si)
8377 || gimple_code (g: gsi_stmt (i: si)) != GIMPLE_OMP_SECTION)
8378 l2 = gimple_block_label (e->dest);
8379 else
8380 FOR_EACH_EDGE (e, ei, l0_bb->succs)
8381 {
8382 si = gsi_last_nondebug_bb (bb: e->dest);
8383 if (gsi_end_p (i: si)
8384 || gimple_code (g: gsi_stmt (i: si)) != GIMPLE_OMP_SECTION)
8385 {
8386 l2 = gimple_block_label (e->dest);
8387 break;
8388 }
8389 }
8390 }
8391 if (exit_reachable)
8392 default_bb = create_empty_bb (l1_bb->prev_bb);
8393 else
8394 default_bb = create_empty_bb (l0_bb);
8395
8396 /* We will build a switch() with enough cases for all the
8397 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8398 and a default case to abort if something goes wrong. */
8399 len = EDGE_COUNT (l0_bb->succs);
8400
8401 /* Use vec::quick_push on label_vec throughout, since we know the size
8402 in advance. */
8403 auto_vec<tree> label_vec (len);
8404
8405 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8406 GIMPLE_OMP_SECTIONS statement. */
8407 si = gsi_last_nondebug_bb (bb: entry_bb);
8408 sections_stmt = as_a <gomp_sections *> (p: gsi_stmt (i: si));
8409 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8410 vin = gimple_omp_sections_control (gs: sections_stmt);
8411 tree clauses = gimple_omp_sections_clauses (gs: sections_stmt);
8412 tree reductmp = omp_find_clause (clauses, kind: OMP_CLAUSE__REDUCTEMP_);
8413 tree condtmp = omp_find_clause (clauses, kind: OMP_CLAUSE__CONDTEMP_);
8414 tree cond_var = NULL_TREE;
8415 if (reductmp || condtmp)
8416 {
8417 tree reductions = null_pointer_node, mem = null_pointer_node;
8418 tree memv = NULL_TREE, condtemp = NULL_TREE;
8419 gimple_stmt_iterator gsi = gsi_none ();
8420 gimple *g = NULL;
8421 if (reductmp)
8422 {
8423 reductions = OMP_CLAUSE_DECL (reductmp);
8424 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8425 g = SSA_NAME_DEF_STMT (reductions);
8426 reductions = gimple_assign_rhs1 (gs: g);
8427 OMP_CLAUSE_DECL (reductmp) = reductions;
8428 gsi = gsi_for_stmt (g);
8429 }
8430 else
8431 gsi = si;
8432 if (condtmp)
8433 {
8434 condtemp = OMP_CLAUSE_DECL (condtmp);
8435 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8436 kind: OMP_CLAUSE__CONDTEMP_);
8437 cond_var = OMP_CLAUSE_DECL (c);
8438 tree type = TREE_TYPE (condtemp);
8439 memv = create_tmp_var (type);
8440 TREE_ADDRESSABLE (memv) = 1;
8441 unsigned cnt = 0;
8442 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8443 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8444 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8445 ++cnt;
8446 unsigned HOST_WIDE_INT sz
8447 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8448 expand_omp_build_assign (gsi_p: &gsi, to: memv, from: build_int_cst (type, sz),
8449 after: false);
8450 mem = build_fold_addr_expr (memv);
8451 }
8452 t = build_int_cst (unsigned_type_node, len - 1);
8453 u = builtin_decl_explicit (fncode: BUILT_IN_GOMP_SECTIONS2_START);
8454 stmt = gimple_build_call (u, 3, t, reductions, mem);
8455 gimple_call_set_lhs (gs: stmt, lhs: vin);
8456 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8457 if (condtmp)
8458 {
8459 expand_omp_build_assign (gsi_p: &gsi, to: condtemp, from: memv, after: false);
8460 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8461 vin, build_one_cst (TREE_TYPE (cond_var)));
8462 expand_omp_build_assign (gsi_p: &gsi, to: cond_var, from: t, after: false);
8463 }
8464 if (reductmp)
8465 {
8466 gsi_remove (&gsi, true);
8467 release_ssa_name (name: gimple_assign_lhs (gs: g));
8468 }
8469 }
8470 else if (!is_combined_parallel (region))
8471 {
8472 /* If we are not inside a combined parallel+sections region,
8473 call GOMP_sections_start. */
8474 t = build_int_cst (unsigned_type_node, len - 1);
8475 u = builtin_decl_explicit (fncode: BUILT_IN_GOMP_SECTIONS_START);
8476 stmt = gimple_build_call (u, 1, t);
8477 }
8478 else
8479 {
8480 /* Otherwise, call GOMP_sections_next. */
8481 u = builtin_decl_explicit (fncode: BUILT_IN_GOMP_SECTIONS_NEXT);
8482 stmt = gimple_build_call (u, 0);
8483 }
8484 if (!reductmp && !condtmp)
8485 {
8486 gimple_call_set_lhs (gs: stmt, lhs: vin);
8487 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8488 }
8489 gsi_remove (&si, true);
8490
8491 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8492 L0_BB. */
8493 switch_si = gsi_last_nondebug_bb (bb: l0_bb);
8494 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8495 if (exit_reachable)
8496 {
8497 cont = as_a <gomp_continue *> (p: last_nondebug_stmt (l1_bb));
8498 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8499 vmain = gimple_omp_continue_control_use (cont_stmt: cont);
8500 vnext = gimple_omp_continue_control_def (cont_stmt: cont);
8501 }
8502 else
8503 {
8504 vmain = vin;
8505 vnext = NULL_TREE;
8506 }
8507
8508 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8509 label_vec.quick_push (obj: t);
8510 i = 1;
8511
8512 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8513 for (inner = region->inner, casei = 1;
8514 inner;
8515 inner = inner->next, i++, casei++)
8516 {
8517 basic_block s_entry_bb, s_exit_bb;
8518
8519 /* Skip optional reduction region. */
8520 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8521 {
8522 --i;
8523 --casei;
8524 continue;
8525 }
8526
8527 s_entry_bb = inner->entry;
8528 s_exit_bb = inner->exit;
8529
8530 t = gimple_block_label (s_entry_bb);
8531 u = build_int_cst (unsigned_type_node, casei);
8532 u = build_case_label (u, NULL, t);
8533 label_vec.quick_push (obj: u);
8534
8535 si = gsi_last_nondebug_bb (bb: s_entry_bb);
8536 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8537 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8538 gsi_remove (&si, true);
8539 single_succ_edge (bb: s_entry_bb)->flags = EDGE_FALLTHRU;
8540
8541 if (s_exit_bb == NULL)
8542 continue;
8543
8544 si = gsi_last_nondebug_bb (bb: s_exit_bb);
8545 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8546 gsi_remove (&si, true);
8547
8548 single_succ_edge (bb: s_exit_bb)->flags = EDGE_FALLTHRU;
8549 }
8550
8551 /* Error handling code goes in DEFAULT_BB. */
8552 t = gimple_block_label (default_bb);
8553 u = build_case_label (NULL, NULL, t);
8554 make_edge (l0_bb, default_bb, 0);
8555 add_bb_to_loop (default_bb, current_loops->tree_root);
8556
8557 stmt = gimple_build_switch (vmain, u, label_vec);
8558 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8559 gsi_remove (&switch_si, true);
8560
8561 si = gsi_start_bb (bb: default_bb);
8562 stmt = gimple_build_call (builtin_decl_explicit (fncode: BUILT_IN_TRAP), 0);
8563 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8564
8565 if (exit_reachable)
8566 {
8567 tree bfn_decl;
8568
8569 /* Code to get the next section goes in L1_BB. */
8570 si = gsi_last_nondebug_bb (bb: l1_bb);
8571 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8572
8573 bfn_decl = builtin_decl_explicit (fncode: BUILT_IN_GOMP_SECTIONS_NEXT);
8574 stmt = gimple_build_call (bfn_decl, 0);
8575 gimple_call_set_lhs (gs: stmt, lhs: vnext);
8576 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8577 if (cond_var)
8578 {
8579 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8580 vnext, build_one_cst (TREE_TYPE (cond_var)));
8581 expand_omp_build_assign (gsi_p: &si, to: cond_var, from: t, after: false);
8582 }
8583 gsi_remove (&si, true);
8584
8585 single_succ_edge (bb: l1_bb)->flags = EDGE_FALLTHRU;
8586 }
8587
8588 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8589 si = gsi_last_nondebug_bb (bb: l2_bb);
8590 if (gimple_omp_return_nowait_p (g: gsi_stmt (i: si)))
8591 t = builtin_decl_explicit (fncode: BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8592 else if (gimple_omp_return_lhs (g: gsi_stmt (i: si)))
8593 t = builtin_decl_explicit (fncode: BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8594 else
8595 t = builtin_decl_explicit (fncode: BUILT_IN_GOMP_SECTIONS_END);
8596 stmt = gimple_build_call (t, 0);
8597 if (gimple_omp_return_lhs (g: gsi_stmt (i: si)))
8598 gimple_call_set_lhs (gs: stmt, lhs: gimple_omp_return_lhs (g: gsi_stmt (i: si)));
8599 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8600 gsi_remove (&si, true);
8601
8602 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8603}
8604
8605/* Expand code for an OpenMP single or scope directive. We've already expanded
8606 much of the code, here we simply place the GOMP_barrier call. */
8607
8608static void
8609expand_omp_single (struct omp_region *region)
8610{
8611 basic_block entry_bb, exit_bb;
8612 gimple_stmt_iterator si;
8613
8614 entry_bb = region->entry;
8615 exit_bb = region->exit;
8616
8617 si = gsi_last_nondebug_bb (bb: entry_bb);
8618 enum gimple_code code = gimple_code (g: gsi_stmt (i: si));
8619 gcc_assert (code == GIMPLE_OMP_SINGLE || code == GIMPLE_OMP_SCOPE);
8620 gsi_remove (&si, true);
8621 single_succ_edge (bb: entry_bb)->flags = EDGE_FALLTHRU;
8622
8623 if (exit_bb == NULL)
8624 {
8625 gcc_assert (code == GIMPLE_OMP_SCOPE);
8626 return;
8627 }
8628
8629 si = gsi_last_nondebug_bb (bb: exit_bb);
8630 if (!gimple_omp_return_nowait_p (g: gsi_stmt (i: si)))
8631 {
8632 tree t = gimple_omp_return_lhs (g: gsi_stmt (i: si));
8633 gsi_insert_after (&si, omp_build_barrier (lhs: t), GSI_SAME_STMT);
8634 }
8635 gsi_remove (&si, true);
8636 single_succ_edge (bb: exit_bb)->flags = EDGE_FALLTHRU;
8637}
8638
8639/* Generic expansion for OpenMP synchronization directives: master,
8640 ordered and critical. All we need to do here is remove the entry
8641 and exit markers for REGION. */
8642
8643static void
8644expand_omp_synch (struct omp_region *region)
8645{
8646 basic_block entry_bb, exit_bb;
8647 gimple_stmt_iterator si;
8648
8649 entry_bb = region->entry;
8650 exit_bb = region->exit;
8651
8652 si = gsi_last_nondebug_bb (bb: entry_bb);
8653 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8654 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8655 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASKED
8656 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8657 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8658 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8659 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8660 if (gimple_code (g: gsi_stmt (i: si)) == GIMPLE_OMP_TEAMS
8661 && gimple_omp_teams_host (omp_teams_stmt: as_a <gomp_teams *> (p: gsi_stmt (i: si))))
8662 {
8663 expand_omp_taskreg (region);
8664 return;
8665 }
8666 gsi_remove (&si, true);
8667 single_succ_edge (bb: entry_bb)->flags = EDGE_FALLTHRU;
8668
8669 if (exit_bb)
8670 {
8671 si = gsi_last_nondebug_bb (bb: exit_bb);
8672 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8673 gsi_remove (&si, true);
8674 single_succ_edge (bb: exit_bb)->flags = EDGE_FALLTHRU;
8675 }
8676}
8677
8678/* Translate enum omp_memory_order to enum memmodel for the embedded
8679 fail clause in there. */
8680
8681static enum memmodel
8682omp_memory_order_to_fail_memmodel (enum omp_memory_order mo)
8683{
8684 switch (mo & OMP_FAIL_MEMORY_ORDER_MASK)
8685 {
8686 case OMP_FAIL_MEMORY_ORDER_UNSPECIFIED:
8687 switch (mo & OMP_MEMORY_ORDER_MASK)
8688 {
8689 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8690 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8691 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELAXED;
8692 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQUIRE;
8693 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8694 default: break;
8695 }
8696 gcc_unreachable ();
8697 case OMP_FAIL_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8698 case OMP_FAIL_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8699 case OMP_FAIL_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8700 default: gcc_unreachable ();
8701 }
8702}
8703
8704/* Translate enum omp_memory_order to enum memmodel. The two enums
8705 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8706 is 0 and omp_memory_order has the fail mode encoded in it too. */
8707
8708static enum memmodel
8709omp_memory_order_to_memmodel (enum omp_memory_order mo)
8710{
8711 enum memmodel ret, fail_ret;
8712 switch (mo & OMP_MEMORY_ORDER_MASK)
8713 {
8714 case OMP_MEMORY_ORDER_RELAXED: ret = MEMMODEL_RELAXED; break;
8715 case OMP_MEMORY_ORDER_ACQUIRE: ret = MEMMODEL_ACQUIRE; break;
8716 case OMP_MEMORY_ORDER_RELEASE: ret = MEMMODEL_RELEASE; break;
8717 case OMP_MEMORY_ORDER_ACQ_REL: ret = MEMMODEL_ACQ_REL; break;
8718 case OMP_MEMORY_ORDER_SEQ_CST: ret = MEMMODEL_SEQ_CST; break;
8719 default: gcc_unreachable ();
8720 }
8721 /* If we drop the -Winvalid-memory-model warning for C++17 P0418R2,
8722 we can just return ret here unconditionally. Otherwise, work around
8723 it here and make sure fail memmodel is not stronger. */
8724 if ((mo & OMP_FAIL_MEMORY_ORDER_MASK) == OMP_FAIL_MEMORY_ORDER_UNSPECIFIED)
8725 return ret;
8726 fail_ret = omp_memory_order_to_fail_memmodel (mo);
8727 if (fail_ret > ret)
8728 return fail_ret;
8729 return ret;
8730}
8731
8732/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8733 operation as a normal volatile load. */
8734
8735static bool
8736expand_omp_atomic_load (basic_block load_bb, tree addr,
8737 tree loaded_val, int index)
8738{
8739 enum built_in_function tmpbase;
8740 gimple_stmt_iterator gsi;
8741 basic_block store_bb;
8742 location_t loc;
8743 gimple *stmt;
8744 tree decl, type, itype;
8745
8746 gsi = gsi_last_nondebug_bb (bb: load_bb);
8747 stmt = gsi_stmt (i: gsi);
8748 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8749 loc = gimple_location (g: stmt);
8750
8751 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8752 is smaller than word size, then expand_atomic_load assumes that the load
8753 is atomic. We could avoid the builtin entirely in this case. */
8754
8755 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8756 decl = builtin_decl_explicit (fncode: tmpbase);
8757 if (decl == NULL_TREE)
8758 return false;
8759
8760 type = TREE_TYPE (loaded_val);
8761 itype = TREE_TYPE (TREE_TYPE (decl));
8762
8763 enum omp_memory_order omo = gimple_omp_atomic_memory_order (g: stmt);
8764 tree mo = build_int_cst (integer_type_node,
8765 omp_memory_order_to_memmodel (mo: omo));
8766 gcall *call = gimple_build_call (decl, 2, addr, mo);
8767 gimple_set_location (g: call, location: loc);
8768 gimple_set_vuse (g: call, vuse: gimple_vuse (g: stmt));
8769 gimple *repl;
8770 if (!useless_type_conversion_p (type, itype))
8771 {
8772 tree lhs = make_ssa_name (var: itype);
8773 gimple_call_set_lhs (gs: call, lhs);
8774 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
8775 repl = gimple_build_assign (loaded_val,
8776 build1 (VIEW_CONVERT_EXPR, type, lhs));
8777 gimple_set_location (g: repl, location: loc);
8778 }
8779 else
8780 {
8781 gimple_call_set_lhs (gs: call, lhs: loaded_val);
8782 repl = call;
8783 }
8784 gsi_replace (&gsi, repl, true);
8785
8786 store_bb = single_succ (bb: load_bb);
8787 gsi = gsi_last_nondebug_bb (bb: store_bb);
8788 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8789 gsi_remove (&gsi, true);
8790
8791 return true;
8792}
8793
8794/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8795 operation as a normal volatile store. */
8796
8797static bool
8798expand_omp_atomic_store (basic_block load_bb, tree addr,
8799 tree loaded_val, tree stored_val, int index)
8800{
8801 enum built_in_function tmpbase;
8802 gimple_stmt_iterator gsi;
8803 basic_block store_bb = single_succ (bb: load_bb);
8804 location_t loc;
8805 gimple *stmt;
8806 tree decl, type, itype;
8807 machine_mode imode;
8808 bool exchange;
8809
8810 gsi = gsi_last_nondebug_bb (bb: load_bb);
8811 stmt = gsi_stmt (i: gsi);
8812 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8813
8814 /* If the load value is needed, then this isn't a store but an exchange. */
8815 exchange = gimple_omp_atomic_need_value_p (g: stmt);
8816
8817 gsi = gsi_last_nondebug_bb (bb: store_bb);
8818 stmt = gsi_stmt (i: gsi);
8819 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8820 loc = gimple_location (g: stmt);
8821
8822 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8823 is smaller than word size, then expand_atomic_store assumes that the store
8824 is atomic. We could avoid the builtin entirely in this case. */
8825
8826 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8827 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8828 decl = builtin_decl_explicit (fncode: tmpbase);
8829 if (decl == NULL_TREE)
8830 return false;
8831
8832 type = TREE_TYPE (stored_val);
8833
8834 /* Dig out the type of the function's second argument. */
8835 itype = TREE_TYPE (decl);
8836 itype = TYPE_ARG_TYPES (itype);
8837 itype = TREE_CHAIN (itype);
8838 itype = TREE_VALUE (itype);
8839 imode = TYPE_MODE (itype);
8840
8841 if (exchange && !can_atomic_exchange_p (imode, true))
8842 return false;
8843
8844 if (!useless_type_conversion_p (itype, type))
8845 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8846 enum omp_memory_order omo = gimple_omp_atomic_memory_order (g: stmt);
8847 tree mo = build_int_cst (integer_type_node,
8848 omp_memory_order_to_memmodel (mo: omo));
8849 stored_val = force_gimple_operand_gsi (&gsi, stored_val, true, NULL_TREE,
8850 true, GSI_SAME_STMT);
8851 gcall *call = gimple_build_call (decl, 3, addr, stored_val, mo);
8852 gimple_set_location (g: call, location: loc);
8853 gimple_set_vuse (g: call, vuse: gimple_vuse (g: stmt));
8854 gimple_set_vdef (g: call, vdef: gimple_vdef (g: stmt));
8855
8856 gimple *repl = call;
8857 if (exchange)
8858 {
8859 if (!useless_type_conversion_p (type, itype))
8860 {
8861 tree lhs = make_ssa_name (var: itype);
8862 gimple_call_set_lhs (gs: call, lhs);
8863 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
8864 repl = gimple_build_assign (loaded_val,
8865 build1 (VIEW_CONVERT_EXPR, type, lhs));
8866 gimple_set_location (g: repl, location: loc);
8867 }
8868 else
8869 gimple_call_set_lhs (gs: call, lhs: loaded_val);
8870 }
8871 gsi_replace (&gsi, repl, true);
8872
8873 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8874 gsi = gsi_last_nondebug_bb (bb: load_bb);
8875 gsi_remove (&gsi, true);
8876
8877 return true;
8878}
8879
8880/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8881 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8882 size of the data type, and thus usable to find the index of the builtin
8883 decl. Returns false if the expression is not of the proper form. */
8884
8885static bool
8886expand_omp_atomic_fetch_op (basic_block load_bb,
8887 tree addr, tree loaded_val,
8888 tree stored_val, int index)
8889{
8890 enum built_in_function oldbase, newbase, tmpbase;
8891 tree decl, itype, call;
8892 tree lhs, rhs;
8893 basic_block store_bb = single_succ (bb: load_bb);
8894 gimple_stmt_iterator gsi;
8895 gimple *stmt;
8896 location_t loc;
8897 enum tree_code code;
8898 bool need_old, need_new;
8899 machine_mode imode;
8900
8901 /* We expect to find the following sequences:
8902
8903 load_bb:
8904 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8905
8906 store_bb:
8907 val = tmp OP something; (or: something OP tmp)
8908 GIMPLE_OMP_STORE (val)
8909
8910 ???FIXME: Allow a more flexible sequence.
8911 Perhaps use data flow to pick the statements.
8912
8913 */
8914
8915 gsi = gsi_after_labels (bb: store_bb);
8916 stmt = gsi_stmt (i: gsi);
8917 if (is_gimple_debug (gs: stmt))
8918 {
8919 gsi_next_nondebug (i: &gsi);
8920 if (gsi_end_p (i: gsi))
8921 return false;
8922 stmt = gsi_stmt (i: gsi);
8923 }
8924 loc = gimple_location (g: stmt);
8925 if (!is_gimple_assign (gs: stmt))
8926 return false;
8927 gsi_next_nondebug (i: &gsi);
8928 if (gimple_code (g: gsi_stmt (i: gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8929 return false;
8930 need_new = gimple_omp_atomic_need_value_p (g: gsi_stmt (i: gsi));
8931 need_old = gimple_omp_atomic_need_value_p (g: last_nondebug_stmt (load_bb));
8932 enum omp_memory_order omo
8933 = gimple_omp_atomic_memory_order (g: last_nondebug_stmt (load_bb));
8934 enum memmodel mo = omp_memory_order_to_memmodel (mo: omo);
8935 gcc_checking_assert (!need_old || !need_new);
8936
8937 if (!operand_equal_p (gimple_assign_lhs (gs: stmt), stored_val, flags: 0))
8938 return false;
8939
8940 /* Check for one of the supported fetch-op operations. */
8941 code = gimple_assign_rhs_code (gs: stmt);
8942 switch (code)
8943 {
8944 case PLUS_EXPR:
8945 case POINTER_PLUS_EXPR:
8946 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8947 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8948 break;
8949 case MINUS_EXPR:
8950 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8951 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8952 break;
8953 case BIT_AND_EXPR:
8954 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8955 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8956 break;
8957 case BIT_IOR_EXPR:
8958 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8959 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8960 break;
8961 case BIT_XOR_EXPR:
8962 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8963 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8964 break;
8965 default:
8966 return false;
8967 }
8968
8969 /* Make sure the expression is of the proper form. */
8970 if (operand_equal_p (gimple_assign_rhs1 (gs: stmt), loaded_val, flags: 0))
8971 rhs = gimple_assign_rhs2 (gs: stmt);
8972 else if (commutative_tree_code (gimple_assign_rhs_code (gs: stmt))
8973 && operand_equal_p (gimple_assign_rhs2 (gs: stmt), loaded_val, flags: 0))
8974 rhs = gimple_assign_rhs1 (gs: stmt);
8975 else
8976 return false;
8977
8978 tmpbase = ((enum built_in_function)
8979 ((need_new ? newbase : oldbase) + index + 1));
8980 decl = builtin_decl_explicit (fncode: tmpbase);
8981 if (decl == NULL_TREE)
8982 return false;
8983 itype = TREE_TYPE (TREE_TYPE (decl));
8984 imode = TYPE_MODE (itype);
8985
8986 /* We could test all of the various optabs involved, but the fact of the
8987 matter is that (with the exception of i486 vs i586 and xadd) all targets
8988 that support any atomic operaton optab also implements compare-and-swap.
8989 Let optabs.cc take care of expanding any compare-and-swap loop. */
8990 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8991 return false;
8992
8993 gsi = gsi_last_nondebug_bb (bb: load_bb);
8994 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8995
8996 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8997 It only requires that the operation happen atomically. Thus we can
8998 use the RELAXED memory model. */
8999 call = build_call_expr_loc (loc, decl, 3, addr,
9000 fold_convert_loc (loc, itype, rhs),
9001 build_int_cst (NULL, mo));
9002
9003 if (need_old || need_new)
9004 {
9005 lhs = need_old ? loaded_val : stored_val;
9006 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
9007 call = build2_loc (loc, code: MODIFY_EXPR, void_type_node, arg0: lhs, arg1: call);
9008 }
9009 else
9010 call = fold_convert_loc (loc, void_type_node, call);
9011 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
9012 gsi_remove (&gsi, true);
9013
9014 gsi = gsi_last_nondebug_bb (bb: store_bb);
9015 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
9016 gsi_remove (&gsi, true);
9017 gsi = gsi_last_nondebug_bb (bb: store_bb);
9018 stmt = gsi_stmt (i: gsi);
9019 gsi_remove (&gsi, true);
9020
9021 if (gimple_in_ssa_p (cfun))
9022 release_defs (stmt);
9023
9024 return true;
9025}
9026
9027/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
9028 compare and exchange as an ATOMIC_COMPARE_EXCHANGE internal function.
9029 Returns false if the expression is not of the proper form. */
9030
9031static bool
9032expand_omp_atomic_cas (basic_block load_bb, tree addr,
9033 tree loaded_val, tree stored_val, int index)
9034{
9035 /* We expect to find the following sequences:
9036
9037 load_bb:
9038 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
9039
9040 store_bb:
9041 val = tmp == e ? d : tmp;
9042 GIMPLE_OMP_ATOMIC_STORE (val)
9043
9044 or in store_bb instead:
9045 tmp2 = tmp == e;
9046 val = tmp2 ? d : tmp;
9047 GIMPLE_OMP_ATOMIC_STORE (val)
9048
9049 or:
9050 tmp3 = VIEW_CONVERT_EXPR<integral_type>(tmp);
9051 val = e == tmp3 ? d : tmp;
9052 GIMPLE_OMP_ATOMIC_STORE (val)
9053
9054 etc. */
9055
9056
9057 basic_block store_bb = single_succ (bb: load_bb);
9058 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb: store_bb);
9059 gimple *store_stmt = gsi_stmt (i: gsi);
9060 if (!store_stmt || gimple_code (g: store_stmt) != GIMPLE_OMP_ATOMIC_STORE)
9061 return false;
9062 gsi_prev_nondebug (i: &gsi);
9063 if (gsi_end_p (i: gsi))
9064 return false;
9065 gimple *condexpr_stmt = gsi_stmt (i: gsi);
9066 if (!is_gimple_assign (gs: condexpr_stmt)
9067 || gimple_assign_rhs_code (gs: condexpr_stmt) != COND_EXPR)
9068 return false;
9069 if (!operand_equal_p (gimple_assign_lhs (gs: condexpr_stmt), stored_val, flags: 0))
9070 return false;
9071 gimple *cond_stmt = NULL;
9072 gimple *vce_stmt = NULL;
9073 gsi_prev_nondebug (i: &gsi);
9074 if (!gsi_end_p (i: gsi))
9075 {
9076 cond_stmt = gsi_stmt (i: gsi);
9077 if (!is_gimple_assign (gs: cond_stmt))
9078 return false;
9079 if (gimple_assign_rhs_code (gs: cond_stmt) == EQ_EXPR)
9080 {
9081 gsi_prev_nondebug (i: &gsi);
9082 if (!gsi_end_p (i: gsi))
9083 {
9084 vce_stmt = gsi_stmt (i: gsi);
9085 if (!is_gimple_assign (gs: vce_stmt)
9086 || gimple_assign_rhs_code (gs: vce_stmt) != VIEW_CONVERT_EXPR)
9087 return false;
9088 }
9089 }
9090 else if (gimple_assign_rhs_code (gs: cond_stmt) == VIEW_CONVERT_EXPR)
9091 std::swap (a&: vce_stmt, b&: cond_stmt);
9092 else
9093 return false;
9094 if (vce_stmt)
9095 {
9096 tree vce_rhs = gimple_assign_rhs1 (gs: vce_stmt);
9097 if (TREE_CODE (vce_rhs) != VIEW_CONVERT_EXPR
9098 || !operand_equal_p (TREE_OPERAND (vce_rhs, 0), loaded_val))
9099 return false;
9100 if (!INTEGRAL_TYPE_P (TREE_TYPE (vce_rhs))
9101 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (loaded_val))
9102 || !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vce_rhs)),
9103 TYPE_SIZE (TREE_TYPE (loaded_val))))
9104 return false;
9105 gsi_prev_nondebug (i: &gsi);
9106 if (!gsi_end_p (i: gsi))
9107 return false;
9108 }
9109 }
9110 tree cond = gimple_assign_rhs1 (gs: condexpr_stmt);
9111 tree cond_op1, cond_op2;
9112 if (cond_stmt)
9113 {
9114 /* We should now always get a separate cond_stmt. */
9115 if (!operand_equal_p (cond, gimple_assign_lhs (gs: cond_stmt)))
9116 return false;
9117 cond_op1 = gimple_assign_rhs1 (gs: cond_stmt);
9118 cond_op2 = gimple_assign_rhs2 (gs: cond_stmt);
9119 }
9120 else if (TREE_CODE (cond) != EQ_EXPR && TREE_CODE (cond) != NE_EXPR)
9121 return false;
9122 else
9123 {
9124 cond_op1 = TREE_OPERAND (cond, 0);
9125 cond_op2 = TREE_OPERAND (cond, 1);
9126 }
9127 tree d;
9128 if (TREE_CODE (cond) == NE_EXPR)
9129 {
9130 if (!operand_equal_p (gimple_assign_rhs2 (gs: condexpr_stmt), loaded_val))
9131 return false;
9132 d = gimple_assign_rhs3 (gs: condexpr_stmt);
9133 }
9134 else if (!operand_equal_p (gimple_assign_rhs3 (gs: condexpr_stmt), loaded_val))
9135 return false;
9136 else
9137 d = gimple_assign_rhs2 (gs: condexpr_stmt);
9138 tree e = vce_stmt ? gimple_assign_lhs (gs: vce_stmt) : loaded_val;
9139 if (operand_equal_p (e, cond_op1))
9140 e = cond_op2;
9141 else if (operand_equal_p (e, cond_op2))
9142 e = cond_op1;
9143 else
9144 return false;
9145
9146 location_t loc = gimple_location (g: store_stmt);
9147 gimple *load_stmt = last_nondebug_stmt (load_bb);
9148 bool need_new = gimple_omp_atomic_need_value_p (g: store_stmt);
9149 bool need_old = gimple_omp_atomic_need_value_p (g: load_stmt);
9150 bool weak = gimple_omp_atomic_weak_p (g: load_stmt);
9151 enum omp_memory_order omo = gimple_omp_atomic_memory_order (g: load_stmt);
9152 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (mo: omo));
9153 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (mo: omo));
9154 gcc_checking_assert (!need_old || !need_new);
9155
9156 enum built_in_function fncode
9157 = (enum built_in_function) ((int) BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9158 + index + 1);
9159 tree cmpxchg = builtin_decl_explicit (fncode);
9160 if (cmpxchg == NULL_TREE)
9161 return false;
9162 tree itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9163
9164 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9165 || !can_atomic_load_p (TYPE_MODE (itype)))
9166 return false;
9167
9168 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9169 if (SCALAR_FLOAT_TYPE_P (type) && !vce_stmt)
9170 return false;
9171
9172 gsi = gsi_for_stmt (store_stmt);
9173 if (!useless_type_conversion_p (itype, TREE_TYPE (e)))
9174 {
9175 tree ne = create_tmp_reg (itype);
9176 gimple *g = gimple_build_assign (ne, NOP_EXPR, e);
9177 gimple_set_location (g, location: loc);
9178 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9179 e = ne;
9180 }
9181 if (!useless_type_conversion_p (itype, TREE_TYPE (d)))
9182 {
9183 tree nd = create_tmp_reg (itype);
9184 enum tree_code code;
9185 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (d)))
9186 {
9187 code = VIEW_CONVERT_EXPR;
9188 d = build1 (VIEW_CONVERT_EXPR, itype, d);
9189 }
9190 else
9191 code = NOP_EXPR;
9192 gimple *g = gimple_build_assign (nd, code, d);
9193 gimple_set_location (g, location: loc);
9194 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9195 d = nd;
9196 }
9197
9198 tree ctype = build_complex_type (itype);
9199 int flag = int_size_in_bytes (itype) + (weak ? 256 : 0);
9200 gimple *g
9201 = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, addr, e, d,
9202 build_int_cst (integer_type_node, flag),
9203 mo, fmo);
9204 tree cres = create_tmp_reg (ctype);
9205 gimple_call_set_lhs (gs: g, lhs: cres);
9206 gimple_set_location (g, location: loc);
9207 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9208
9209 if (cond_stmt || need_old || need_new)
9210 {
9211 tree im = create_tmp_reg (itype);
9212 g = gimple_build_assign (im, IMAGPART_EXPR,
9213 build1 (IMAGPART_EXPR, itype, cres));
9214 gimple_set_location (g, location: loc);
9215 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9216
9217 tree re = NULL_TREE;
9218 if (need_old || need_new)
9219 {
9220 re = create_tmp_reg (itype);
9221 g = gimple_build_assign (re, REALPART_EXPR,
9222 build1 (REALPART_EXPR, itype, cres));
9223 gimple_set_location (g, location: loc);
9224 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9225 }
9226
9227 if (cond_stmt)
9228 {
9229 g = gimple_build_assign (cond, NOP_EXPR, im);
9230 gimple_set_location (g, location: loc);
9231 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9232 }
9233
9234 if (need_new)
9235 {
9236 g = gimple_build_assign (create_tmp_reg (itype), COND_EXPR,
9237 cond_stmt
9238 ? cond : build2 (NE_EXPR, boolean_type_node,
9239 im, build_zero_cst (itype)),
9240 d, re);
9241 gimple_set_location (g, location: loc);
9242 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9243 re = gimple_assign_lhs (gs: g);
9244 }
9245
9246 if (need_old || need_new)
9247 {
9248 tree v = need_old ? loaded_val : stored_val;
9249 enum tree_code code;
9250 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (v)))
9251 {
9252 code = VIEW_CONVERT_EXPR;
9253 re = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (v), re);
9254 }
9255 else if (!useless_type_conversion_p (TREE_TYPE (v), itype))
9256 code = NOP_EXPR;
9257 else
9258 code = TREE_CODE (re);
9259 g = gimple_build_assign (v, code, re);
9260 gimple_set_location (g, location: loc);
9261 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9262 }
9263 }
9264
9265 gsi_remove (&gsi, true);
9266 gsi = gsi_for_stmt (load_stmt);
9267 gsi_remove (&gsi, true);
9268 gsi = gsi_for_stmt (condexpr_stmt);
9269 gsi_remove (&gsi, true);
9270 if (cond_stmt)
9271 {
9272 gsi = gsi_for_stmt (cond_stmt);
9273 gsi_remove (&gsi, true);
9274 }
9275 if (vce_stmt)
9276 {
9277 gsi = gsi_for_stmt (vce_stmt);
9278 gsi_remove (&gsi, true);
9279 }
9280
9281 return true;
9282}
9283
9284/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9285
9286 oldval = *addr;
9287 repeat:
9288 newval = rhs; // with oldval replacing *addr in rhs
9289 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
9290 if (oldval != newval)
9291 goto repeat;
9292
9293 INDEX is log2 of the size of the data type, and thus usable to find the
9294 index of the builtin decl. */
9295
9296static bool
9297expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
9298 tree addr, tree loaded_val, tree stored_val,
9299 int index)
9300{
9301 tree loadedi, storedi, initial, new_storedi, old_vali;
9302 tree type, itype, cmpxchg, iaddr, atype;
9303 gimple_stmt_iterator si;
9304 basic_block loop_header = single_succ (bb: load_bb);
9305 gimple *phi, *stmt;
9306 edge e;
9307 enum built_in_function fncode;
9308
9309 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9310 + index + 1);
9311 cmpxchg = builtin_decl_explicit (fncode);
9312 if (cmpxchg == NULL_TREE)
9313 return false;
9314 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9315 atype = type;
9316 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9317
9318 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9319 || !can_atomic_load_p (TYPE_MODE (itype)))
9320 return false;
9321
9322 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
9323 si = gsi_last_nondebug_bb (bb: load_bb);
9324 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9325 location_t loc = gimple_location (g: gsi_stmt (i: si));
9326 enum omp_memory_order omo = gimple_omp_atomic_memory_order (g: gsi_stmt (i: si));
9327 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (mo: omo));
9328 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (mo: omo));
9329
9330 /* For floating-point values, we'll need to view-convert them to integers
9331 so that we can perform the atomic compare and swap. Simplify the
9332 following code by always setting up the "i"ntegral variables. */
9333 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
9334 {
9335 tree iaddr_val;
9336
9337 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
9338 true));
9339 atype = itype;
9340 iaddr_val
9341 = force_gimple_operand_gsi (&si,
9342 fold_convert (TREE_TYPE (iaddr), addr),
9343 false, NULL_TREE, true, GSI_SAME_STMT);
9344 stmt = gimple_build_assign (iaddr, iaddr_val);
9345 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9346 loadedi = create_tmp_var (itype);
9347 if (gimple_in_ssa_p (cfun))
9348 loadedi = make_ssa_name (var: loadedi);
9349 }
9350 else
9351 {
9352 iaddr = addr;
9353 loadedi = loaded_val;
9354 }
9355
9356 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
9357 tree loaddecl = builtin_decl_explicit (fncode);
9358 if (loaddecl)
9359 initial
9360 = fold_convert (atype,
9361 build_call_expr (loaddecl, 2, iaddr,
9362 build_int_cst (NULL_TREE,
9363 MEMMODEL_RELAXED)));
9364 else
9365 {
9366 tree off
9367 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
9368 true), 0);
9369 initial = build2 (MEM_REF, atype, iaddr, off);
9370 }
9371
9372 initial
9373 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
9374 GSI_SAME_STMT);
9375
9376 /* Move the value to the LOADEDI temporary. */
9377 if (gimple_in_ssa_p (cfun))
9378 {
9379 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
9380 phi = create_phi_node (loadedi, loop_header);
9381 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
9382 initial);
9383 }
9384 else
9385 gsi_insert_before (&si,
9386 gimple_build_assign (loadedi, initial),
9387 GSI_SAME_STMT);
9388 if (loadedi != loaded_val)
9389 {
9390 gimple_stmt_iterator gsi2;
9391 tree x;
9392
9393 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
9394 gsi2 = gsi_start_bb (bb: loop_header);
9395 if (gimple_in_ssa_p (cfun))
9396 {
9397 gassign *stmt;
9398 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9399 true, GSI_SAME_STMT);
9400 stmt = gimple_build_assign (loaded_val, x);
9401 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
9402 }
9403 else
9404 {
9405 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
9406 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9407 true, GSI_SAME_STMT);
9408 }
9409 }
9410 gsi_remove (&si, true);
9411
9412 si = gsi_last_nondebug_bb (bb: store_bb);
9413 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9414
9415 if (iaddr == addr)
9416 storedi = stored_val;
9417 else
9418 storedi
9419 = force_gimple_operand_gsi (&si,
9420 build1 (VIEW_CONVERT_EXPR, itype,
9421 stored_val), true, NULL_TREE, true,
9422 GSI_SAME_STMT);
9423
9424 /* Build the compare&swap statement. */
9425 tree ctype = build_complex_type (itype);
9426 int flag = int_size_in_bytes (itype);
9427 new_storedi = build_call_expr_internal_loc (loc, IFN_ATOMIC_COMPARE_EXCHANGE,
9428 ctype, 6, iaddr, loadedi,
9429 storedi,
9430 build_int_cst (integer_type_node,
9431 flag),
9432 mo, fmo);
9433 new_storedi = build1 (REALPART_EXPR, itype, new_storedi);
9434 new_storedi = force_gimple_operand_gsi (&si,
9435 fold_convert (TREE_TYPE (loadedi),
9436 new_storedi),
9437 true, NULL_TREE,
9438 true, GSI_SAME_STMT);
9439
9440 if (gimple_in_ssa_p (cfun))
9441 old_vali = loadedi;
9442 else
9443 {
9444 old_vali = create_tmp_var (TREE_TYPE (loadedi));
9445 stmt = gimple_build_assign (old_vali, loadedi);
9446 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9447
9448 stmt = gimple_build_assign (loadedi, new_storedi);
9449 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9450 }
9451
9452 /* Note that we always perform the comparison as an integer, even for
9453 floating point. This allows the atomic operation to properly
9454 succeed even with NaNs and -0.0. */
9455 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
9456 stmt = gimple_build_cond_empty (cond: ne);
9457 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9458
9459 /* Update cfg. */
9460 e = single_succ_edge (bb: store_bb);
9461 e->flags &= ~EDGE_FALLTHRU;
9462 e->flags |= EDGE_FALSE_VALUE;
9463 /* Expect no looping. */
9464 e->probability = profile_probability::guessed_always ();
9465
9466 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
9467 e->probability = profile_probability::guessed_never ();
9468
9469 /* Copy the new value to loadedi (we already did that before the condition
9470 if we are not in SSA). */
9471 if (gimple_in_ssa_p (cfun))
9472 {
9473 phi = gimple_seq_first_stmt (s: phi_nodes (bb: loop_header));
9474 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
9475 }
9476
9477 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
9478 stmt = gsi_stmt (i: si);
9479 gsi_remove (&si, true);
9480 if (gimple_in_ssa_p (cfun))
9481 release_defs (stmt);
9482
9483 class loop *loop = alloc_loop ();
9484 loop->header = loop_header;
9485 loop->latch = store_bb;
9486 add_loop (loop, loop_header->loop_father);
9487
9488 return true;
9489}
9490
9491/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9492
9493 GOMP_atomic_start ();
9494 *addr = rhs;
9495 GOMP_atomic_end ();
9496
9497 The result is not globally atomic, but works so long as all parallel
9498 references are within #pragma omp atomic directives. According to
9499 responses received from omp@openmp.org, appears to be within spec.
9500 Which makes sense, since that's how several other compilers handle
9501 this situation as well.
9502 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
9503 expanding. STORED_VAL is the operand of the matching
9504 GIMPLE_OMP_ATOMIC_STORE.
9505
9506 We replace
9507 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
9508 loaded_val = *addr;
9509
9510 and replace
9511 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
9512 *addr = stored_val;
9513*/
9514
9515static bool
9516expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
9517 tree addr, tree loaded_val, tree stored_val)
9518{
9519 gimple_stmt_iterator si;
9520 gassign *stmt;
9521 tree t;
9522
9523 si = gsi_last_nondebug_bb (bb: load_bb);
9524 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9525
9526 t = builtin_decl_explicit (fncode: BUILT_IN_GOMP_ATOMIC_START);
9527 t = build_call_expr (t, 0);
9528 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9529
9530 tree mem = build_simple_mem_ref (addr);
9531 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
9532 TREE_OPERAND (mem, 1)
9533 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
9534 true),
9535 TREE_OPERAND (mem, 1));
9536 stmt = gimple_build_assign (loaded_val, mem);
9537 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9538 gsi_remove (&si, true);
9539
9540 si = gsi_last_nondebug_bb (bb: store_bb);
9541 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9542
9543 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
9544 gimple_set_vuse (g: stmt, vuse: gimple_vuse (g: gsi_stmt (i: si)));
9545 gimple_set_vdef (g: stmt, vdef: gimple_vdef (g: gsi_stmt (i: si)));
9546 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9547
9548 t = builtin_decl_explicit (fncode: BUILT_IN_GOMP_ATOMIC_END);
9549 t = build_call_expr (t, 0);
9550 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9551 gsi_remove (&si, true);
9552 return true;
9553}
9554
9555/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
9556 using expand_omp_atomic_fetch_op. If it failed, we try to
9557 call expand_omp_atomic_pipeline, and if it fails too, the
9558 ultimate fallback is wrapping the operation in a mutex
9559 (expand_omp_atomic_mutex). REGION is the atomic region built
9560 by build_omp_regions_1(). */
9561
9562static void
9563expand_omp_atomic (struct omp_region *region)
9564{
9565 basic_block load_bb = region->entry, store_bb = region->exit;
9566 gomp_atomic_load *load
9567 = as_a <gomp_atomic_load *> (p: last_nondebug_stmt (load_bb));
9568 gomp_atomic_store *store
9569 = as_a <gomp_atomic_store *> (p: last_nondebug_stmt (store_bb));
9570 tree loaded_val = gimple_omp_atomic_load_lhs (load_stmt: load);
9571 tree addr = gimple_omp_atomic_load_rhs (load_stmt: load);
9572 tree stored_val = gimple_omp_atomic_store_val (store_stmt: store);
9573 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9574 HOST_WIDE_INT index;
9575
9576 /* Make sure the type is one of the supported sizes. */
9577 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9578 index = exact_log2 (x: index);
9579 if (index >= 0 && index <= 4)
9580 {
9581 unsigned int align = TYPE_ALIGN_UNIT (type);
9582
9583 /* __sync builtins require strict data alignment. */
9584 if (exact_log2 (x: align) >= index)
9585 {
9586 /* Atomic load. */
9587 scalar_mode smode;
9588 if (loaded_val == stored_val
9589 && (is_int_mode (TYPE_MODE (type), int_mode: &smode)
9590 || is_float_mode (TYPE_MODE (type), float_mode: &smode))
9591 && GET_MODE_BITSIZE (mode: smode) <= BITS_PER_WORD
9592 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9593 return;
9594
9595 /* Atomic store. */
9596 if ((is_int_mode (TYPE_MODE (type), int_mode: &smode)
9597 || is_float_mode (TYPE_MODE (type), float_mode: &smode))
9598 && GET_MODE_BITSIZE (mode: smode) <= BITS_PER_WORD
9599 && store_bb == single_succ (bb: load_bb)
9600 && first_stmt (store_bb) == store
9601 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9602 stored_val, index))
9603 return;
9604
9605 /* When possible, use specialized atomic update functions. */
9606 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9607 && store_bb == single_succ (bb: load_bb)
9608 && expand_omp_atomic_fetch_op (load_bb, addr,
9609 loaded_val, stored_val, index))
9610 return;
9611
9612 /* When possible, use ATOMIC_COMPARE_EXCHANGE ifn without a loop. */
9613 if (store_bb == single_succ (bb: load_bb)
9614 && !gimple_in_ssa_p (cfun)
9615 && expand_omp_atomic_cas (load_bb, addr, loaded_val, stored_val,
9616 index))
9617 return;
9618
9619 /* If we don't have specialized __sync builtins, try and implement
9620 as a compare and swap loop. */
9621 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9622 loaded_val, stored_val, index))
9623 return;
9624 }
9625 }
9626
9627 /* The ultimate fallback is wrapping the operation in a mutex. */
9628 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9629}
9630
9631/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9632 at REGION_EXIT. */
9633
9634static void
9635mark_loops_in_oacc_kernels_region (basic_block region_entry,
9636 basic_block region_exit)
9637{
9638 class loop *outer = region_entry->loop_father;
9639 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9640
9641 /* Don't parallelize the kernels region if it contains more than one outer
9642 loop. */
9643 unsigned int nr_outer_loops = 0;
9644 class loop *single_outer = NULL;
9645 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9646 {
9647 gcc_assert (loop_outer (loop) == outer);
9648
9649 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9650 continue;
9651
9652 if (region_exit != NULL
9653 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9654 continue;
9655
9656 nr_outer_loops++;
9657 single_outer = loop;
9658 }
9659 if (nr_outer_loops != 1)
9660 return;
9661
9662 for (class loop *loop = single_outer->inner;
9663 loop != NULL;
9664 loop = loop->inner)
9665 if (loop->next)
9666 return;
9667
9668 /* Mark the loops in the region. */
9669 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9670 loop->in_oacc_kernels_region = true;
9671}
9672
9673/* Build target argument identifier from the DEVICE identifier, value
9674 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9675
9676static tree
9677get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9678{
9679 tree t = build_int_cst (integer_type_node, device);
9680 if (subseqent_param)
9681 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9682 build_int_cst (integer_type_node,
9683 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9684 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9685 build_int_cst (integer_type_node, id));
9686 return t;
9687}
9688
9689/* Like above but return it in type that can be directly stored as an element
9690 of the argument array. */
9691
9692static tree
9693get_target_argument_identifier (int device, bool subseqent_param, int id)
9694{
9695 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9696 return fold_convert (ptr_type_node, t);
9697}
9698
9699/* Return a target argument consisting of DEVICE identifier, value identifier
9700 ID, and the actual VALUE. */
9701
9702static tree
9703get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9704 tree value)
9705{
9706 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9707 fold_convert (integer_type_node, value),
9708 build_int_cst (unsigned_type_node,
9709 GOMP_TARGET_ARG_VALUE_SHIFT));
9710 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9711 get_target_argument_identifier_1 (device, false, id));
9712 t = fold_convert (ptr_type_node, t);
9713 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9714}
9715
9716/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9717 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9718 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9719 arguments. */
9720
9721static void
9722push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9723 int id, tree value, vec <tree> *args)
9724{
9725 if (tree_fits_shwi_p (value)
9726 && tree_to_shwi (value) > -(1 << 15)
9727 && tree_to_shwi (value) < (1 << 15))
9728 args->quick_push (obj: get_target_argument_value (gsi, device, id, value));
9729 else
9730 {
9731 args->quick_push (obj: get_target_argument_identifier (device, subseqent_param: true, id));
9732 value = fold_convert (ptr_type_node, value);
9733 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9734 GSI_SAME_STMT);
9735 args->quick_push (obj: value);
9736 }
9737}
9738
9739/* Create an array of arguments that is then passed to GOMP_target. */
9740
9741static tree
9742get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9743{
9744 auto_vec <tree, 6> args;
9745 tree clauses = gimple_omp_target_clauses (gs: tgt_stmt);
9746 tree t, c = omp_find_clause (clauses, kind: OMP_CLAUSE_NUM_TEAMS);
9747 if (c)
9748 t = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (c);
9749 else
9750 t = integer_minus_one_node;
9751 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9752 GOMP_TARGET_ARG_NUM_TEAMS, value: t, args: &args);
9753
9754 c = omp_find_clause (clauses, kind: OMP_CLAUSE_THREAD_LIMIT);
9755 if (c)
9756 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9757 else
9758 t = integer_minus_one_node;
9759 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9760 GOMP_TARGET_ARG_THREAD_LIMIT, value: t,
9761 args: &args);
9762
9763 /* Produce more, perhaps device specific, arguments here. */
9764
9765 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9766 args.length () + 1),
9767 ".omp_target_args");
9768 for (unsigned i = 0; i < args.length (); i++)
9769 {
9770 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9771 build_int_cst (integer_type_node, i),
9772 NULL_TREE, NULL_TREE);
9773 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9774 GSI_SAME_STMT);
9775 }
9776 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9777 build_int_cst (integer_type_node, args.length ()),
9778 NULL_TREE, NULL_TREE);
9779 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9780 GSI_SAME_STMT);
9781 TREE_ADDRESSABLE (argarray) = 1;
9782 return build_fold_addr_expr (argarray);
9783}
9784
9785/* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9786
9787static void
9788expand_omp_target (struct omp_region *region)
9789{
9790 basic_block entry_bb, exit_bb, new_bb;
9791 struct function *child_cfun;
9792 tree child_fn, child_fn2, block, t, c;
9793 gimple_stmt_iterator gsi;
9794 gomp_target *entry_stmt;
9795 gimple *stmt;
9796 edge e;
9797 bool offloaded;
9798 int target_kind;
9799
9800 entry_stmt = as_a <gomp_target *> (p: last_nondebug_stmt (region->entry));
9801 target_kind = gimple_omp_target_kind (g: entry_stmt);
9802 new_bb = region->entry;
9803
9804 offloaded = is_gimple_omp_offloaded (stmt: entry_stmt);
9805 switch (target_kind)
9806 {
9807 case GF_OMP_TARGET_KIND_REGION:
9808 case GF_OMP_TARGET_KIND_UPDATE:
9809 case GF_OMP_TARGET_KIND_ENTER_DATA:
9810 case GF_OMP_TARGET_KIND_EXIT_DATA:
9811 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9812 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9813 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9814 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9815 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9816 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9817 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9818 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9819 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9820 case GF_OMP_TARGET_KIND_DATA:
9821 case GF_OMP_TARGET_KIND_OACC_DATA:
9822 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9823 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9824 break;
9825 default:
9826 gcc_unreachable ();
9827 }
9828
9829 tree clauses = gimple_omp_target_clauses (gs: entry_stmt);
9830
9831 bool is_ancestor = false;
9832 child_fn = child_fn2 = NULL_TREE;
9833 child_cfun = NULL;
9834 if (offloaded)
9835 {
9836 c = omp_find_clause (clauses, kind: OMP_CLAUSE_DEVICE);
9837 if (ENABLE_OFFLOADING && c)
9838 is_ancestor = OMP_CLAUSE_DEVICE_ANCESTOR (c);
9839 child_fn = gimple_omp_target_child_fn (omp_target_stmt: entry_stmt);
9840 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9841 }
9842
9843 /* Supported by expand_omp_taskreg, but not here. */
9844 if (child_cfun != NULL)
9845 gcc_checking_assert (!child_cfun->cfg);
9846 gcc_checking_assert (!gimple_in_ssa_p (cfun));
9847
9848 entry_bb = region->entry;
9849 exit_bb = region->exit;
9850
9851 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9852 mark_loops_in_oacc_kernels_region (region_entry: region->entry, region_exit: region->exit);
9853
9854 /* Going on, all OpenACC compute constructs are mapped to
9855 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9856 To distinguish between them, we attach attributes. */
9857 switch (target_kind)
9858 {
9859 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9860 DECL_ATTRIBUTES (child_fn)
9861 = tree_cons (get_identifier ("oacc parallel"),
9862 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9863 break;
9864 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9865 DECL_ATTRIBUTES (child_fn)
9866 = tree_cons (get_identifier ("oacc kernels"),
9867 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9868 break;
9869 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9870 DECL_ATTRIBUTES (child_fn)
9871 = tree_cons (get_identifier ("oacc serial"),
9872 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9873 break;
9874 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9875 DECL_ATTRIBUTES (child_fn)
9876 = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9877 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9878 break;
9879 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9880 DECL_ATTRIBUTES (child_fn)
9881 = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9882 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9883 break;
9884 default:
9885 /* Make sure we don't miss any. */
9886 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9887 && is_gimple_omp_offloaded (entry_stmt)));
9888 break;
9889 }
9890
9891 if (offloaded)
9892 {
9893 unsigned srcidx, dstidx, num;
9894
9895 /* If the offloading region needs data sent from the parent
9896 function, then the very first statement (except possible
9897 tree profile counter updates) of the offloading body
9898 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9899 &.OMP_DATA_O is passed as an argument to the child function,
9900 we need to replace it with the argument as seen by the child
9901 function.
9902
9903 In most cases, this will end up being the identity assignment
9904 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9905 a function call that has been inlined, the original PARM_DECL
9906 .OMP_DATA_I may have been converted into a different local
9907 variable. In which case, we need to keep the assignment. */
9908 tree data_arg = gimple_omp_target_data_arg (omp_target_stmt: entry_stmt);
9909 if (data_arg)
9910 {
9911 basic_block entry_succ_bb = single_succ (bb: entry_bb);
9912 gimple_stmt_iterator gsi;
9913 tree arg;
9914 gimple *tgtcopy_stmt = NULL;
9915 tree sender = TREE_VEC_ELT (data_arg, 0);
9916
9917 for (gsi = gsi_start_bb (bb: entry_succ_bb); ; gsi_next (i: &gsi))
9918 {
9919 gcc_assert (!gsi_end_p (gsi));
9920 stmt = gsi_stmt (i: gsi);
9921 if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
9922 continue;
9923
9924 if (gimple_num_ops (gs: stmt) == 2)
9925 {
9926 tree arg = gimple_assign_rhs1 (gs: stmt);
9927
9928 /* We're ignoring the subcode because we're
9929 effectively doing a STRIP_NOPS. */
9930
9931 if (TREE_CODE (arg) == ADDR_EXPR
9932 && TREE_OPERAND (arg, 0) == sender)
9933 {
9934 tgtcopy_stmt = stmt;
9935 break;
9936 }
9937 }
9938 }
9939
9940 gcc_assert (tgtcopy_stmt != NULL);
9941 arg = DECL_ARGUMENTS (child_fn);
9942
9943 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9944 gsi_remove (&gsi, true);
9945 }
9946
9947 /* Declare local variables needed in CHILD_CFUN. */
9948 block = DECL_INITIAL (child_fn);
9949 BLOCK_VARS (block) = vec2chain (v: child_cfun->local_decls);
9950 /* The gimplifier could record temporaries in the offloading block
9951 rather than in containing function's local_decls chain,
9952 which would mean cgraph missed finalizing them. Do it now. */
9953 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9954 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9955 varpool_node::finalize_decl (decl: t);
9956 DECL_SAVED_TREE (child_fn) = NULL;
9957 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9958 gimple_set_body (child_fn, NULL);
9959 TREE_USED (block) = 1;
9960
9961 /* Reset DECL_CONTEXT on function arguments. */
9962 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9963 DECL_CONTEXT (t) = child_fn;
9964
9965 /* Split ENTRY_BB at GIMPLE_*,
9966 so that it can be moved to the child function. */
9967 gsi = gsi_last_nondebug_bb (bb: entry_bb);
9968 stmt = gsi_stmt (i: gsi);
9969 gcc_assert (stmt
9970 && gimple_code (stmt) == gimple_code (entry_stmt));
9971 e = split_block (entry_bb, stmt);
9972 gsi_remove (&gsi, true);
9973 entry_bb = e->dest;
9974 single_succ_edge (bb: entry_bb)->flags = EDGE_FALLTHRU;
9975
9976 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9977 if (exit_bb)
9978 {
9979 gsi = gsi_last_nondebug_bb (bb: exit_bb);
9980 gcc_assert (!gsi_end_p (gsi)
9981 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9982 stmt = gimple_build_return (NULL);
9983 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9984 gsi_remove (&gsi, true);
9985 }
9986
9987 /* Move the offloading region into CHILD_CFUN. */
9988
9989 block = gimple_block (g: entry_stmt);
9990
9991 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9992 if (exit_bb)
9993 single_succ_edge (bb: new_bb)->flags = EDGE_FALLTHRU;
9994 /* When the OMP expansion process cannot guarantee an up-to-date
9995 loop tree arrange for the child function to fixup loops. */
9996 if (loops_state_satisfies_p (flags: LOOPS_NEED_FIXUP))
9997 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9998
9999 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
10000 num = vec_safe_length (v: child_cfun->local_decls);
10001 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
10002 {
10003 t = (*child_cfun->local_decls)[srcidx];
10004 if (DECL_CONTEXT (t) == cfun->decl)
10005 continue;
10006 if (srcidx != dstidx)
10007 (*child_cfun->local_decls)[dstidx] = t;
10008 dstidx++;
10009 }
10010 if (dstidx != num)
10011 vec_safe_truncate (v: child_cfun->local_decls, size: dstidx);
10012
10013 /* Inform the callgraph about the new function. */
10014 child_cfun->curr_properties = cfun->curr_properties;
10015 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
10016 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
10017 cgraph_node *node = cgraph_node::get_create (child_fn);
10018 node->parallelized_function = 1;
10019 cgraph_node::add_new_function (fndecl: child_fn, lowered: true);
10020
10021 /* Add the new function to the offload table. */
10022 if (ENABLE_OFFLOADING)
10023 {
10024 if (in_lto_p)
10025 DECL_PRESERVE_P (child_fn) = 1;
10026 if (!is_ancestor)
10027 vec_safe_push (v&: offload_funcs, obj: child_fn);
10028 }
10029
10030 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
10031 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
10032
10033 /* Fix the callgraph edges for child_cfun. Those for cfun will be
10034 fixed in a following pass. */
10035 push_cfun (new_cfun: child_cfun);
10036 if (need_asm)
10037 assign_assembler_name_if_needed (child_fn);
10038 cgraph_edge::rebuild_edges ();
10039
10040 /* Some EH regions might become dead, see PR34608. If
10041 pass_cleanup_cfg isn't the first pass to happen with the
10042 new child, these dead EH edges might cause problems.
10043 Clean them up now. */
10044 if (flag_exceptions)
10045 {
10046 basic_block bb;
10047 bool changed = false;
10048
10049 FOR_EACH_BB_FN (bb, cfun)
10050 changed |= gimple_purge_dead_eh_edges (bb);
10051 if (changed)
10052 cleanup_tree_cfg ();
10053 }
10054 if (flag_checking && !loops_state_satisfies_p (flags: LOOPS_NEED_FIXUP))
10055 verify_loop_structure ();
10056 pop_cfun ();
10057
10058 if (dump_file && !gimple_in_ssa_p (cfun))
10059 {
10060 omp_any_child_fn_dumped = true;
10061 dump_function_header (dump_file, child_fn, dump_flags);
10062 dump_function_to_file (child_fn, dump_file, dump_flags);
10063 }
10064
10065 adjust_context_and_scope (region, entry_block: gimple_block (g: entry_stmt), child_fndecl: child_fn);
10066
10067 /* Handle the case that an inner ancestor:1 target is called by an outer
10068 target region. */
10069 if (is_ancestor)
10070 {
10071 cgraph_node *fn2_node;
10072 child_fn2 = build_decl (DECL_SOURCE_LOCATION (child_fn),
10073 FUNCTION_DECL,
10074 clone_function_name (decl: child_fn, suffix: "nohost"),
10075 TREE_TYPE (child_fn));
10076 if (in_lto_p)
10077 DECL_PRESERVE_P (child_fn2) = 1;
10078 TREE_STATIC (child_fn2) = 1;
10079 DECL_ARTIFICIAL (child_fn2) = 1;
10080 DECL_IGNORED_P (child_fn2) = 0;
10081 TREE_PUBLIC (child_fn2) = 0;
10082 DECL_UNINLINABLE (child_fn2) = 1;
10083 DECL_EXTERNAL (child_fn2) = 0;
10084 DECL_CONTEXT (child_fn2) = DECL_CONTEXT (child_fn);
10085 DECL_INITIAL (child_fn2) = make_node (BLOCK);
10086 BLOCK_SUPERCONTEXT (DECL_INITIAL (child_fn2)) = child_fn2;
10087 DECL_ATTRIBUTES (child_fn)
10088 = remove_attribute ("omp target entrypoint",
10089 DECL_ATTRIBUTES (child_fn));
10090 DECL_ATTRIBUTES (child_fn2)
10091 = tree_cons (get_identifier ("omp target device_ancestor_nohost"),
10092 NULL_TREE, copy_list (DECL_ATTRIBUTES (child_fn)));
10093 DECL_ATTRIBUTES (child_fn)
10094 = tree_cons (get_identifier ("omp target device_ancestor_host"),
10095 NULL_TREE, DECL_ATTRIBUTES (child_fn));
10096 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (child_fn2)
10097 = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (current_function_decl);
10098 DECL_FUNCTION_SPECIFIC_TARGET (child_fn2)
10099 = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
10100 DECL_FUNCTION_VERSIONED (child_fn2)
10101 = DECL_FUNCTION_VERSIONED (current_function_decl);
10102
10103 fn2_node = cgraph_node::get_create (child_fn2);
10104 fn2_node->offloadable = 1;
10105 fn2_node->force_output = 1;
10106 node->offloadable = 0;
10107
10108 /* Enable pass_omp_device_lower pass. */
10109 fn2_node = cgraph_node::get (DECL_CONTEXT (child_fn));
10110 fn2_node->calls_declare_variant_alt = 1;
10111
10112 t = build_decl (DECL_SOURCE_LOCATION (child_fn),
10113 RESULT_DECL, NULL_TREE, void_type_node);
10114 DECL_ARTIFICIAL (t) = 1;
10115 DECL_IGNORED_P (t) = 1;
10116 DECL_CONTEXT (t) = child_fn2;
10117 DECL_RESULT (child_fn2) = t;
10118 DECL_SAVED_TREE (child_fn2) = build1 (RETURN_EXPR,
10119 void_type_node, NULL);
10120 tree tmp = DECL_ARGUMENTS (child_fn);
10121 t = build_decl (DECL_SOURCE_LOCATION (child_fn), PARM_DECL,
10122 DECL_NAME (tmp), TREE_TYPE (tmp));
10123 DECL_ARTIFICIAL (t) = 1;
10124 DECL_NAMELESS (t) = 1;
10125 DECL_ARG_TYPE (t) = ptr_type_node;
10126 DECL_CONTEXT (t) = current_function_decl;
10127 TREE_USED (t) = 1;
10128 TREE_READONLY (t) = 1;
10129 DECL_ARGUMENTS (child_fn2) = t;
10130 gcc_assert (TREE_CHAIN (tmp) == NULL_TREE);
10131
10132 gimplify_function_tree (child_fn2);
10133 cgraph_node::add_new_function (fndecl: child_fn2, lowered: true);
10134
10135 vec_safe_push (v&: offload_funcs, obj: child_fn2);
10136 if (dump_file && !gimple_in_ssa_p (cfun))
10137 {
10138 dump_function_header (dump_file, child_fn2, dump_flags);
10139 dump_function_to_file (child_fn2, dump_file, dump_flags);
10140 }
10141 }
10142 }
10143
10144 /* Emit a library call to launch the offloading region, or do data
10145 transfers. */
10146 tree t1, t2, t3, t4, depend;
10147 enum built_in_function start_ix;
10148 unsigned int flags_i = 0;
10149
10150 switch (gimple_omp_target_kind (g: entry_stmt))
10151 {
10152 case GF_OMP_TARGET_KIND_REGION:
10153 start_ix = BUILT_IN_GOMP_TARGET;
10154 break;
10155 case GF_OMP_TARGET_KIND_DATA:
10156 start_ix = BUILT_IN_GOMP_TARGET_DATA;
10157 break;
10158 case GF_OMP_TARGET_KIND_UPDATE:
10159 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
10160 break;
10161 case GF_OMP_TARGET_KIND_ENTER_DATA:
10162 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
10163 break;
10164 case GF_OMP_TARGET_KIND_EXIT_DATA:
10165 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
10166 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
10167 break;
10168 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10169 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10170 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10171 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10172 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10173 start_ix = BUILT_IN_GOACC_PARALLEL;
10174 break;
10175 case GF_OMP_TARGET_KIND_OACC_DATA:
10176 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10177 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10178 start_ix = BUILT_IN_GOACC_DATA_START;
10179 break;
10180 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10181 start_ix = BUILT_IN_GOACC_UPDATE;
10182 break;
10183 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10184 start_ix = BUILT_IN_GOACC_ENTER_DATA;
10185 break;
10186 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10187 start_ix = BUILT_IN_GOACC_EXIT_DATA;
10188 break;
10189 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10190 start_ix = BUILT_IN_GOACC_DECLARE;
10191 break;
10192 default:
10193 gcc_unreachable ();
10194 }
10195
10196 tree device = NULL_TREE;
10197 location_t device_loc = UNKNOWN_LOCATION;
10198 tree goacc_flags = NULL_TREE;
10199 bool need_device_adjustment = false;
10200 gimple_stmt_iterator adj_gsi;
10201 if (is_gimple_omp_oacc (stmt: entry_stmt))
10202 {
10203 /* By default, no GOACC_FLAGs are set. */
10204 goacc_flags = integer_zero_node;
10205 }
10206 else
10207 {
10208 c = omp_find_clause (clauses, kind: OMP_CLAUSE_DEVICE);
10209 if (c)
10210 {
10211 device = OMP_CLAUSE_DEVICE_ID (c);
10212 /* Ensure 'device' is of the correct type. */
10213 device = fold_convert_loc (device_loc, integer_type_node, device);
10214 if (TREE_CODE (device) == INTEGER_CST)
10215 {
10216 if (wi::to_wide (t: device) == GOMP_DEVICE_ICV)
10217 device = build_int_cst (integer_type_node,
10218 GOMP_DEVICE_HOST_FALLBACK);
10219 else if (wi::to_wide (t: device) == GOMP_DEVICE_HOST_FALLBACK)
10220 device = build_int_cst (integer_type_node,
10221 GOMP_DEVICE_HOST_FALLBACK - 1);
10222 }
10223 else
10224 need_device_adjustment = true;
10225 device_loc = OMP_CLAUSE_LOCATION (c);
10226 if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
10227 device = build_int_cst (integer_type_node,
10228 GOMP_DEVICE_HOST_FALLBACK);
10229 }
10230 else
10231 {
10232 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
10233 library choose). */
10234 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
10235 device_loc = gimple_location (g: entry_stmt);
10236 }
10237
10238 c = omp_find_clause (clauses, kind: OMP_CLAUSE_NOWAIT);
10239 /* FIXME: in_reduction(...) nowait is unimplemented yet, pretend
10240 nowait doesn't appear. */
10241 if (c && omp_find_clause (clauses, kind: OMP_CLAUSE_IN_REDUCTION))
10242 c = NULL;
10243 if (c)
10244 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
10245 }
10246
10247 /* By default, there is no conditional. */
10248 tree cond = NULL_TREE;
10249 c = omp_find_clause (clauses, kind: OMP_CLAUSE_IF);
10250 if (c)
10251 cond = OMP_CLAUSE_IF_EXPR (c);
10252 /* If we found the clause 'if (cond)', build:
10253 OpenACC: goacc_flags = (cond ? goacc_flags
10254 : goacc_flags | GOACC_FLAG_HOST_FALLBACK)
10255 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
10256 if (cond)
10257 {
10258 tree *tp;
10259 if (is_gimple_omp_oacc (stmt: entry_stmt))
10260 tp = &goacc_flags;
10261 else
10262 tp = &device;
10263
10264 cond = gimple_boolify (cond);
10265
10266 basic_block cond_bb, then_bb, else_bb;
10267 edge e;
10268 tree tmp_var = create_tmp_var (TREE_TYPE (*tp));
10269 if (offloaded)
10270 e = split_block_after_labels (new_bb);
10271 else
10272 {
10273 gsi = gsi_last_nondebug_bb (bb: new_bb);
10274 gsi_prev (i: &gsi);
10275 e = split_block (new_bb, gsi_stmt (i: gsi));
10276 }
10277 cond_bb = e->src;
10278 new_bb = e->dest;
10279 remove_edge (e);
10280
10281 then_bb = create_empty_bb (cond_bb);
10282 else_bb = create_empty_bb (then_bb);
10283 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10284 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
10285
10286 stmt = gimple_build_cond_empty (cond);
10287 gsi = gsi_last_bb (bb: cond_bb);
10288 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10289
10290 gsi = gsi_start_bb (bb: then_bb);
10291 stmt = gimple_build_assign (tmp_var, *tp);
10292 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10293 adj_gsi = gsi;
10294
10295 gsi = gsi_start_bb (bb: else_bb);
10296 if (is_gimple_omp_oacc (stmt: entry_stmt))
10297 stmt = gimple_build_assign (tmp_var,
10298 BIT_IOR_EXPR,
10299 *tp,
10300 build_int_cst (integer_type_node,
10301 GOACC_FLAG_HOST_FALLBACK));
10302 else
10303 stmt = gimple_build_assign (tmp_var,
10304 build_int_cst (integer_type_node,
10305 GOMP_DEVICE_HOST_FALLBACK));
10306 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10307
10308 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10309 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
10310 add_bb_to_loop (then_bb, cond_bb->loop_father);
10311 add_bb_to_loop (else_bb, cond_bb->loop_father);
10312 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
10313 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
10314
10315 *tp = tmp_var;
10316
10317 gsi = gsi_last_nondebug_bb (bb: new_bb);
10318 }
10319 else
10320 {
10321 gsi = gsi_last_nondebug_bb (bb: new_bb);
10322
10323 if (device != NULL_TREE)
10324 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
10325 true, GSI_SAME_STMT);
10326 if (need_device_adjustment)
10327 {
10328 tree tmp_var = create_tmp_var (TREE_TYPE (device));
10329 stmt = gimple_build_assign (tmp_var, device);
10330 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
10331 adj_gsi = gsi_for_stmt (stmt);
10332 device = tmp_var;
10333 }
10334 }
10335
10336 if ((c = omp_find_clause (clauses, kind: OMP_CLAUSE_SELF)) != NULL_TREE)
10337 {
10338 gcc_assert ((is_gimple_omp_oacc (entry_stmt) && offloaded)
10339 || (gimple_omp_target_kind (entry_stmt)
10340 == GF_OMP_TARGET_KIND_OACC_DATA_KERNELS));
10341
10342 edge e;
10343 if (offloaded)
10344 e = split_block_after_labels (new_bb);
10345 else
10346 {
10347 gsi = gsi_last_nondebug_bb (bb: new_bb);
10348 gsi_prev (i: &gsi);
10349 e = split_block (new_bb, gsi_stmt (i: gsi));
10350 }
10351 basic_block cond_bb = e->src;
10352 new_bb = e->dest;
10353 remove_edge (e);
10354
10355 basic_block then_bb = create_empty_bb (cond_bb);
10356 basic_block else_bb = create_empty_bb (then_bb);
10357 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10358 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
10359
10360 tree self_cond = gimple_boolify (OMP_CLAUSE_SELF_EXPR (c));
10361 stmt = gimple_build_cond_empty (cond: self_cond);
10362 gsi = gsi_last_bb (bb: cond_bb);
10363 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10364
10365 tree tmp_var = create_tmp_var (TREE_TYPE (goacc_flags));
10366 stmt = gimple_build_assign (tmp_var, BIT_IOR_EXPR, goacc_flags,
10367 build_int_cst (integer_type_node,
10368 GOACC_FLAG_LOCAL_DEVICE));
10369 gsi = gsi_start_bb (bb: then_bb);
10370 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10371
10372 gsi = gsi_start_bb (bb: else_bb);
10373 stmt = gimple_build_assign (tmp_var, goacc_flags);
10374 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10375
10376 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10377 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
10378 add_bb_to_loop (then_bb, cond_bb->loop_father);
10379 add_bb_to_loop (else_bb, cond_bb->loop_father);
10380 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
10381 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
10382
10383 goacc_flags = tmp_var;
10384 gsi = gsi_last_nondebug_bb (bb: new_bb);
10385 }
10386
10387 if (need_device_adjustment)
10388 {
10389 tree uns = fold_convert (unsigned_type_node, device);
10390 uns = force_gimple_operand_gsi (&adj_gsi, uns, true, NULL_TREE,
10391 false, GSI_CONTINUE_LINKING);
10392 edge e = split_block (gsi_bb (i: adj_gsi), gsi_stmt (i: adj_gsi));
10393 basic_block cond_bb = e->src;
10394 basic_block else_bb = e->dest;
10395 if (gsi_bb (i: adj_gsi) == new_bb)
10396 {
10397 new_bb = else_bb;
10398 gsi = gsi_last_nondebug_bb (bb: new_bb);
10399 }
10400
10401 basic_block then_bb = create_empty_bb (cond_bb);
10402 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10403
10404 cond = build2 (GT_EXPR, boolean_type_node, uns,
10405 build_int_cst (unsigned_type_node,
10406 GOMP_DEVICE_HOST_FALLBACK - 1));
10407 stmt = gimple_build_cond_empty (cond);
10408 adj_gsi = gsi_last_bb (bb: cond_bb);
10409 gsi_insert_after (&adj_gsi, stmt, GSI_CONTINUE_LINKING);
10410
10411 adj_gsi = gsi_start_bb (bb: then_bb);
10412 tree add = build2 (PLUS_EXPR, integer_type_node, device,
10413 build_int_cst (integer_type_node, -1));
10414 stmt = gimple_build_assign (device, add);
10415 gsi_insert_after (&adj_gsi, stmt, GSI_CONTINUE_LINKING);
10416
10417 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10418 e->flags = EDGE_FALSE_VALUE;
10419 add_bb_to_loop (then_bb, cond_bb->loop_father);
10420 make_edge (then_bb, else_bb, EDGE_FALLTHRU);
10421 }
10422
10423 t = gimple_omp_target_data_arg (omp_target_stmt: entry_stmt);
10424 if (t == NULL)
10425 {
10426 t1 = size_zero_node;
10427 t2 = build_zero_cst (ptr_type_node);
10428 t3 = t2;
10429 t4 = t2;
10430 }
10431 else
10432 {
10433 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
10434 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
10435 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
10436 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
10437 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
10438 }
10439
10440 gimple *g;
10441 bool tagging = false;
10442 /* The maximum number used by any start_ix, without varargs. */
10443 auto_vec<tree, 11> args;
10444 if (is_gimple_omp_oacc (stmt: entry_stmt))
10445 {
10446 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
10447 TREE_TYPE (goacc_flags), goacc_flags);
10448 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
10449 NULL_TREE, true,
10450 GSI_SAME_STMT);
10451 args.quick_push (obj: goacc_flags_m);
10452 }
10453 else
10454 args.quick_push (obj: device);
10455 if (offloaded)
10456 args.quick_push (build_fold_addr_expr (child_fn2 ? child_fn2 : child_fn));
10457 args.quick_push (obj: t1);
10458 args.quick_push (obj: t2);
10459 args.quick_push (obj: t3);
10460 args.quick_push (obj: t4);
10461 switch (start_ix)
10462 {
10463 case BUILT_IN_GOACC_DATA_START:
10464 case BUILT_IN_GOACC_DECLARE:
10465 case BUILT_IN_GOMP_TARGET_DATA:
10466 break;
10467 case BUILT_IN_GOMP_TARGET:
10468 case BUILT_IN_GOMP_TARGET_UPDATE:
10469 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
10470 args.quick_push (obj: build_int_cst (unsigned_type_node, flags_i));
10471 c = omp_find_clause (clauses, kind: OMP_CLAUSE_DEPEND);
10472 if (c)
10473 depend = OMP_CLAUSE_DECL (c);
10474 else
10475 depend = build_int_cst (ptr_type_node, 0);
10476 args.quick_push (obj: depend);
10477 if (start_ix == BUILT_IN_GOMP_TARGET)
10478 args.quick_push (obj: get_target_arguments (gsi: &gsi, tgt_stmt: entry_stmt));
10479 break;
10480 case BUILT_IN_GOACC_PARALLEL:
10481 if (lookup_attribute (attr_name: "oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
10482 {
10483 tree dims = NULL_TREE;
10484 unsigned int ix;
10485
10486 /* For serial constructs we set all dimensions to 1. */
10487 for (ix = GOMP_DIM_MAX; ix--;)
10488 dims = tree_cons (NULL_TREE, integer_one_node, dims);
10489 oacc_replace_fn_attrib (fn: child_fn, dims);
10490 }
10491 else
10492 oacc_set_fn_attrib (fn: child_fn, clauses, args: &args);
10493 tagging = true;
10494 /* FALLTHRU */
10495 case BUILT_IN_GOACC_ENTER_DATA:
10496 case BUILT_IN_GOACC_EXIT_DATA:
10497 case BUILT_IN_GOACC_UPDATE:
10498 {
10499 tree t_async = NULL_TREE;
10500
10501 /* If present, use the value specified by the respective
10502 clause, making sure that is of the correct type. */
10503 c = omp_find_clause (clauses, kind: OMP_CLAUSE_ASYNC);
10504 if (c)
10505 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10506 integer_type_node,
10507 OMP_CLAUSE_ASYNC_EXPR (c));
10508 else if (!tagging)
10509 /* Default values for t_async. */
10510 t_async = fold_convert_loc (gimple_location (g: entry_stmt),
10511 integer_type_node,
10512 build_int_cst (integer_type_node,
10513 GOMP_ASYNC_SYNC));
10514 if (tagging && t_async)
10515 {
10516 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
10517
10518 if (TREE_CODE (t_async) == INTEGER_CST)
10519 {
10520 /* See if we can pack the async arg in to the tag's
10521 operand. */
10522 i_async = TREE_INT_CST_LOW (t_async);
10523 if (i_async < GOMP_LAUNCH_OP_MAX)
10524 t_async = NULL_TREE;
10525 else
10526 i_async = GOMP_LAUNCH_OP_MAX;
10527 }
10528 args.safe_push (obj: oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
10529 op: i_async));
10530 }
10531 if (t_async)
10532 args.safe_push (obj: force_gimple_operand_gsi (&gsi, t_async, true,
10533 NULL_TREE, true,
10534 GSI_SAME_STMT));
10535
10536 /* Save the argument index, and ... */
10537 unsigned t_wait_idx = args.length ();
10538 unsigned num_waits = 0;
10539 c = omp_find_clause (clauses, kind: OMP_CLAUSE_WAIT);
10540 if (!tagging || c)
10541 /* ... push a placeholder. */
10542 args.safe_push (integer_zero_node);
10543
10544 for (; c; c = OMP_CLAUSE_CHAIN (c))
10545 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
10546 {
10547 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10548 integer_type_node,
10549 OMP_CLAUSE_WAIT_EXPR (c));
10550 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
10551 GSI_SAME_STMT);
10552 args.safe_push (obj: arg);
10553 num_waits++;
10554 }
10555
10556 if (!tagging || num_waits)
10557 {
10558 tree len;
10559
10560 /* Now that we know the number, update the placeholder. */
10561 if (tagging)
10562 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, op: num_waits);
10563 else
10564 len = build_int_cst (integer_type_node, num_waits);
10565 len = fold_convert_loc (gimple_location (g: entry_stmt),
10566 unsigned_type_node, len);
10567 args[t_wait_idx] = len;
10568 }
10569 }
10570 break;
10571 default:
10572 gcc_unreachable ();
10573 }
10574 if (tagging)
10575 /* Push terminal marker - zero. */
10576 args.safe_push (obj: oacc_launch_pack (code: 0, NULL_TREE, op: 0));
10577
10578 if (child_fn2)
10579 {
10580 g = gimple_build_call_internal (IFN_GOMP_TARGET_REV, 1,
10581 build_fold_addr_expr (child_fn));
10582 gimple_set_location (g, location: gimple_location (g: entry_stmt));
10583 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10584 }
10585
10586 g = gimple_build_call_vec (builtin_decl_explicit (fncode: start_ix), args);
10587 gimple_set_location (g, location: gimple_location (g: entry_stmt));
10588 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10589 if (!offloaded)
10590 {
10591 g = gsi_stmt (i: gsi);
10592 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
10593 gsi_remove (&gsi, true);
10594 }
10595}
10596
10597/* Expand the parallel region tree rooted at REGION. Expansion
10598 proceeds in depth-first order. Innermost regions are expanded
10599 first. This way, parallel regions that require a new function to
10600 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
10601 internal dependencies in their body. */
10602
10603static void
10604expand_omp (struct omp_region *region)
10605{
10606 omp_any_child_fn_dumped = false;
10607 while (region)
10608 {
10609 location_t saved_location;
10610 gimple *inner_stmt = NULL;
10611
10612 /* First, determine whether this is a combined parallel+workshare
10613 region. */
10614 if (region->type == GIMPLE_OMP_PARALLEL)
10615 determine_parallel_type (region);
10616
10617 if (region->type == GIMPLE_OMP_FOR
10618 && gimple_omp_for_combined_p (g: last_nondebug_stmt (region->entry)))
10619 inner_stmt = last_nondebug_stmt (region->inner->entry);
10620
10621 if (region->inner)
10622 expand_omp (region: region->inner);
10623
10624 saved_location = input_location;
10625 if (gimple_has_location (g: last_nondebug_stmt (region->entry)))
10626 input_location = gimple_location (g: last_nondebug_stmt (region->entry));
10627
10628 switch (region->type)
10629 {
10630 case GIMPLE_OMP_PARALLEL:
10631 case GIMPLE_OMP_TASK:
10632 expand_omp_taskreg (region);
10633 break;
10634
10635 case GIMPLE_OMP_FOR:
10636 expand_omp_for (region, inner_stmt);
10637 break;
10638
10639 case GIMPLE_OMP_SECTIONS:
10640 expand_omp_sections (region);
10641 break;
10642
10643 case GIMPLE_OMP_SECTION:
10644 /* Individual omp sections are handled together with their
10645 parent GIMPLE_OMP_SECTIONS region. */
10646 break;
10647
10648 case GIMPLE_OMP_STRUCTURED_BLOCK:
10649 /* We should have gotten rid of these in gimple lowering. */
10650 gcc_unreachable ();
10651
10652 case GIMPLE_OMP_SINGLE:
10653 case GIMPLE_OMP_SCOPE:
10654 expand_omp_single (region);
10655 break;
10656
10657 case GIMPLE_OMP_ORDERED:
10658 {
10659 gomp_ordered *ord_stmt
10660 = as_a <gomp_ordered *> (p: last_nondebug_stmt (region->entry));
10661 if (gimple_omp_ordered_standalone_p (g: ord_stmt))
10662 {
10663 /* We'll expand these when expanding corresponding
10664 worksharing region with ordered(n) clause. */
10665 gcc_assert (region->outer
10666 && region->outer->type == GIMPLE_OMP_FOR);
10667 region->ord_stmt = ord_stmt;
10668 break;
10669 }
10670 }
10671 /* FALLTHRU */
10672 case GIMPLE_OMP_MASTER:
10673 case GIMPLE_OMP_MASKED:
10674 case GIMPLE_OMP_TASKGROUP:
10675 case GIMPLE_OMP_CRITICAL:
10676 case GIMPLE_OMP_TEAMS:
10677 expand_omp_synch (region);
10678 break;
10679
10680 case GIMPLE_OMP_ATOMIC_LOAD:
10681 expand_omp_atomic (region);
10682 break;
10683
10684 case GIMPLE_OMP_TARGET:
10685 expand_omp_target (region);
10686 break;
10687
10688 default:
10689 gcc_unreachable ();
10690 }
10691
10692 input_location = saved_location;
10693 region = region->next;
10694 }
10695 if (omp_any_child_fn_dumped)
10696 {
10697 if (dump_file)
10698 dump_function_header (dump_file, current_function_decl, dump_flags);
10699 omp_any_child_fn_dumped = false;
10700 }
10701}
10702
10703/* Helper for build_omp_regions. Scan the dominator tree starting at
10704 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
10705 true, the function ends once a single tree is built (otherwise, whole
10706 forest of OMP constructs may be built). */
10707
10708static void
10709build_omp_regions_1 (basic_block bb, struct omp_region *parent,
10710 bool single_tree)
10711{
10712 gimple_stmt_iterator gsi;
10713 gimple *stmt;
10714 basic_block son;
10715
10716 gsi = gsi_last_nondebug_bb (bb);
10717 if (!gsi_end_p (i: gsi) && is_gimple_omp (stmt: gsi_stmt (i: gsi)))
10718 {
10719 struct omp_region *region;
10720 enum gimple_code code;
10721
10722 stmt = gsi_stmt (i: gsi);
10723 code = gimple_code (g: stmt);
10724 if (code == GIMPLE_OMP_RETURN)
10725 {
10726 /* STMT is the return point out of region PARENT. Mark it
10727 as the exit point and make PARENT the immediately
10728 enclosing region. */
10729 gcc_assert (parent);
10730 region = parent;
10731 region->exit = bb;
10732 parent = parent->outer;
10733 }
10734 else if (code == GIMPLE_OMP_ATOMIC_STORE)
10735 {
10736 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
10737 GIMPLE_OMP_RETURN, but matches with
10738 GIMPLE_OMP_ATOMIC_LOAD. */
10739 gcc_assert (parent);
10740 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
10741 region = parent;
10742 region->exit = bb;
10743 parent = parent->outer;
10744 }
10745 else if (code == GIMPLE_OMP_CONTINUE)
10746 {
10747 gcc_assert (parent);
10748 parent->cont = bb;
10749 }
10750 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
10751 {
10752 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
10753 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
10754 }
10755 else
10756 {
10757 region = new_omp_region (bb, type: code, parent);
10758 /* Otherwise... */
10759 if (code == GIMPLE_OMP_TARGET)
10760 {
10761 switch (gimple_omp_target_kind (g: stmt))
10762 {
10763 case GF_OMP_TARGET_KIND_REGION:
10764 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10765 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10766 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10767 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10768 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10769 break;
10770 case GF_OMP_TARGET_KIND_UPDATE:
10771 case GF_OMP_TARGET_KIND_ENTER_DATA:
10772 case GF_OMP_TARGET_KIND_EXIT_DATA:
10773 case GF_OMP_TARGET_KIND_DATA:
10774 case GF_OMP_TARGET_KIND_OACC_DATA:
10775 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10776 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10777 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10778 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10779 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10780 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10781 /* ..., other than for those stand-alone directives...
10782 To be precise, target data isn't stand-alone, but
10783 gimplifier put the end API call into try finally block
10784 for it, so omp expansion can treat it as such. */
10785 region = NULL;
10786 break;
10787 default:
10788 gcc_unreachable ();
10789 }
10790 }
10791 else if (code == GIMPLE_OMP_ORDERED
10792 && gimple_omp_ordered_standalone_p (g: stmt))
10793 /* #pragma omp ordered depend is also just a stand-alone
10794 directive. */
10795 region = NULL;
10796 else if (code == GIMPLE_OMP_TASK
10797 && gimple_omp_task_taskwait_p (g: stmt))
10798 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
10799 region = NULL;
10800 else if (code == GIMPLE_OMP_TASKGROUP)
10801 /* #pragma omp taskgroup isn't a stand-alone directive, but
10802 gimplifier put the end API call into try finall block
10803 for it, so omp expansion can treat it as such. */
10804 region = NULL;
10805 /* ..., this directive becomes the parent for a new region. */
10806 if (region)
10807 parent = region;
10808 }
10809 }
10810
10811 if (single_tree && !parent)
10812 return;
10813
10814 for (son = first_dom_son (CDI_DOMINATORS, bb);
10815 son;
10816 son = next_dom_son (CDI_DOMINATORS, son))
10817 build_omp_regions_1 (bb: son, parent, single_tree);
10818}
10819
10820/* Builds the tree of OMP regions rooted at ROOT, storing it to
10821 root_omp_region. */
10822
10823static void
10824build_omp_regions_root (basic_block root)
10825{
10826 gcc_assert (root_omp_region == NULL);
10827 build_omp_regions_1 (bb: root, NULL, single_tree: true);
10828 gcc_assert (root_omp_region != NULL);
10829}
10830
10831/* Expands omp construct (and its subconstructs) starting in HEAD. */
10832
10833void
10834omp_expand_local (basic_block head)
10835{
10836 build_omp_regions_root (root: head);
10837 if (dump_file && (dump_flags & TDF_DETAILS))
10838 {
10839 fprintf (stream: dump_file, format: "\nOMP region tree\n\n");
10840 dump_omp_region (file: dump_file, region: root_omp_region, indent: 0);
10841 fprintf (stream: dump_file, format: "\n");
10842 }
10843
10844 remove_exit_barriers (region: root_omp_region);
10845 expand_omp (region: root_omp_region);
10846
10847 omp_free_regions ();
10848}
10849
10850/* Scan the CFG and build a tree of OMP regions. Return the root of
10851 the OMP region tree. */
10852
10853static void
10854build_omp_regions (void)
10855{
10856 gcc_assert (root_omp_region == NULL);
10857 calculate_dominance_info (CDI_DOMINATORS);
10858 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, single_tree: false);
10859}
10860
10861/* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10862
10863static unsigned int
10864execute_expand_omp (void)
10865{
10866 build_omp_regions ();
10867
10868 if (!root_omp_region)
10869 return 0;
10870
10871 if (dump_file)
10872 {
10873 fprintf (stream: dump_file, format: "\nOMP region tree\n\n");
10874 dump_omp_region (file: dump_file, region: root_omp_region, indent: 0);
10875 fprintf (stream: dump_file, format: "\n");
10876 }
10877
10878 remove_exit_barriers (region: root_omp_region);
10879
10880 expand_omp (region: root_omp_region);
10881
10882 omp_free_regions ();
10883
10884 return (TODO_cleanup_cfg
10885 | (gimple_in_ssa_p (cfun) ? TODO_update_ssa_only_virtuals : 0));
10886}
10887
10888/* OMP expansion -- the default pass, run before creation of SSA form. */
10889
10890namespace {
10891
10892const pass_data pass_data_expand_omp =
10893{
10894 .type: GIMPLE_PASS, /* type */
10895 .name: "ompexp", /* name */
10896 .optinfo_flags: OPTGROUP_OMP, /* optinfo_flags */
10897 .tv_id: TV_NONE, /* tv_id */
10898 PROP_gimple_any, /* properties_required */
10899 PROP_gimple_eomp, /* properties_provided */
10900 .properties_destroyed: 0, /* properties_destroyed */
10901 .todo_flags_start: 0, /* todo_flags_start */
10902 .todo_flags_finish: 0, /* todo_flags_finish */
10903};
10904
10905class pass_expand_omp : public gimple_opt_pass
10906{
10907public:
10908 pass_expand_omp (gcc::context *ctxt)
10909 : gimple_opt_pass (pass_data_expand_omp, ctxt)
10910 {}
10911
10912 /* opt_pass methods: */
10913 unsigned int execute (function *) final override
10914 {
10915 bool gate = ((flag_openacc != 0 || flag_openmp != 0
10916 || flag_openmp_simd != 0)
10917 && !seen_error ());
10918
10919 /* This pass always runs, to provide PROP_gimple_eomp.
10920 But often, there is nothing to do. */
10921 if (!gate)
10922 return 0;
10923
10924 return execute_expand_omp ();
10925 }
10926
10927}; // class pass_expand_omp
10928
10929} // anon namespace
10930
10931gimple_opt_pass *
10932make_pass_expand_omp (gcc::context *ctxt)
10933{
10934 return new pass_expand_omp (ctxt);
10935}
10936
10937namespace {
10938
10939const pass_data pass_data_expand_omp_ssa =
10940{
10941 .type: GIMPLE_PASS, /* type */
10942 .name: "ompexpssa", /* name */
10943 .optinfo_flags: OPTGROUP_OMP, /* optinfo_flags */
10944 .tv_id: TV_NONE, /* tv_id */
10945 PROP_cfg | PROP_ssa, /* properties_required */
10946 PROP_gimple_eomp, /* properties_provided */
10947 .properties_destroyed: 0, /* properties_destroyed */
10948 .todo_flags_start: 0, /* todo_flags_start */
10949 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10950};
10951
10952class pass_expand_omp_ssa : public gimple_opt_pass
10953{
10954public:
10955 pass_expand_omp_ssa (gcc::context *ctxt)
10956 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10957 {}
10958
10959 /* opt_pass methods: */
10960 bool gate (function *fun) final override
10961 {
10962 return !(fun->curr_properties & PROP_gimple_eomp);
10963 }
10964 unsigned int execute (function *) final override
10965 {
10966 return execute_expand_omp ();
10967 }
10968 opt_pass * clone () final override
10969 {
10970 return new pass_expand_omp_ssa (m_ctxt);
10971 }
10972
10973}; // class pass_expand_omp_ssa
10974
10975} // anon namespace
10976
10977gimple_opt_pass *
10978make_pass_expand_omp_ssa (gcc::context *ctxt)
10979{
10980 return new pass_expand_omp_ssa (ctxt);
10981}
10982
10983/* Called from tree-cfg.cc::make_edges to create cfg edges for all relevant
10984 GIMPLE_* codes. */
10985
10986bool
10987omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10988 int *region_idx)
10989{
10990 gimple *last = last_nondebug_stmt (bb);
10991 enum gimple_code code = gimple_code (g: last);
10992 struct omp_region *cur_region = *region;
10993 bool fallthru = false;
10994
10995 switch (code)
10996 {
10997 case GIMPLE_OMP_PARALLEL:
10998 case GIMPLE_OMP_FOR:
10999 case GIMPLE_OMP_SINGLE:
11000 case GIMPLE_OMP_TEAMS:
11001 case GIMPLE_OMP_MASTER:
11002 case GIMPLE_OMP_MASKED:
11003 case GIMPLE_OMP_SCOPE:
11004 case GIMPLE_OMP_CRITICAL:
11005 case GIMPLE_OMP_SECTION:
11006 cur_region = new_omp_region (bb, type: code, parent: cur_region);
11007 fallthru = true;
11008 break;
11009
11010 case GIMPLE_OMP_TASKGROUP:
11011 cur_region = new_omp_region (bb, type: code, parent: cur_region);
11012 fallthru = true;
11013 cur_region = cur_region->outer;
11014 break;
11015
11016 case GIMPLE_OMP_TASK:
11017 cur_region = new_omp_region (bb, type: code, parent: cur_region);
11018 fallthru = true;
11019 if (gimple_omp_task_taskwait_p (g: last))
11020 cur_region = cur_region->outer;
11021 break;
11022
11023 case GIMPLE_OMP_ORDERED:
11024 cur_region = new_omp_region (bb, type: code, parent: cur_region);
11025 fallthru = true;
11026 if (gimple_omp_ordered_standalone_p (g: last))
11027 cur_region = cur_region->outer;
11028 break;
11029
11030 case GIMPLE_OMP_TARGET:
11031 cur_region = new_omp_region (bb, type: code, parent: cur_region);
11032 fallthru = true;
11033 switch (gimple_omp_target_kind (g: last))
11034 {
11035 case GF_OMP_TARGET_KIND_REGION:
11036 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
11037 case GF_OMP_TARGET_KIND_OACC_KERNELS:
11038 case GF_OMP_TARGET_KIND_OACC_SERIAL:
11039 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
11040 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
11041 break;
11042 case GF_OMP_TARGET_KIND_UPDATE:
11043 case GF_OMP_TARGET_KIND_ENTER_DATA:
11044 case GF_OMP_TARGET_KIND_EXIT_DATA:
11045 case GF_OMP_TARGET_KIND_DATA:
11046 case GF_OMP_TARGET_KIND_OACC_DATA:
11047 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
11048 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
11049 case GF_OMP_TARGET_KIND_OACC_UPDATE:
11050 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
11051 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
11052 case GF_OMP_TARGET_KIND_OACC_DECLARE:
11053 cur_region = cur_region->outer;
11054 break;
11055 default:
11056 gcc_unreachable ();
11057 }
11058 break;
11059
11060 case GIMPLE_OMP_SECTIONS:
11061 cur_region = new_omp_region (bb, type: code, parent: cur_region);
11062 fallthru = true;
11063 break;
11064
11065 case GIMPLE_OMP_SECTIONS_SWITCH:
11066 fallthru = false;
11067 break;
11068
11069 case GIMPLE_OMP_ATOMIC_LOAD:
11070 case GIMPLE_OMP_ATOMIC_STORE:
11071 fallthru = true;
11072 break;
11073
11074 case GIMPLE_OMP_RETURN:
11075 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
11076 somewhere other than the next block. This will be
11077 created later. */
11078 cur_region->exit = bb;
11079 if (cur_region->type == GIMPLE_OMP_TASK)
11080 /* Add an edge corresponding to not scheduling the task
11081 immediately. */
11082 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
11083 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
11084 cur_region = cur_region->outer;
11085 break;
11086
11087 case GIMPLE_OMP_CONTINUE:
11088 cur_region->cont = bb;
11089 switch (cur_region->type)
11090 {
11091 case GIMPLE_OMP_FOR:
11092 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
11093 succs edges as abnormal to prevent splitting
11094 them. */
11095 single_succ_edge (bb: cur_region->entry)->flags |= EDGE_ABNORMAL;
11096 /* Make the loopback edge. */
11097 make_edge (bb, single_succ (bb: cur_region->entry),
11098 EDGE_ABNORMAL);
11099
11100 /* Create an edge from GIMPLE_OMP_FOR to exit, which
11101 corresponds to the case that the body of the loop
11102 is not executed at all. */
11103 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
11104 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
11105 fallthru = false;
11106 break;
11107
11108 case GIMPLE_OMP_SECTIONS:
11109 /* Wire up the edges into and out of the nested sections. */
11110 {
11111 basic_block switch_bb = single_succ (bb: cur_region->entry);
11112
11113 struct omp_region *i;
11114 for (i = cur_region->inner; i ; i = i->next)
11115 {
11116 gcc_assert (i->type == GIMPLE_OMP_SECTION);
11117 make_edge (switch_bb, i->entry, 0);
11118 make_edge (i->exit, bb, EDGE_FALLTHRU);
11119 }
11120
11121 /* Make the loopback edge to the block with
11122 GIMPLE_OMP_SECTIONS_SWITCH. */
11123 make_edge (bb, switch_bb, 0);
11124
11125 /* Make the edge from the switch to exit. */
11126 make_edge (switch_bb, bb->next_bb, 0);
11127 fallthru = false;
11128 }
11129 break;
11130
11131 case GIMPLE_OMP_TASK:
11132 fallthru = true;
11133 break;
11134
11135 default:
11136 gcc_unreachable ();
11137 }
11138 break;
11139
11140 default:
11141 gcc_unreachable ();
11142 }
11143
11144 if (*region != cur_region)
11145 {
11146 *region = cur_region;
11147 if (cur_region)
11148 *region_idx = cur_region->entry->index;
11149 else
11150 *region_idx = 0;
11151 }
11152
11153 return fallthru;
11154}
11155

source code of gcc/omp-expand.cc