1/* Lowering and expansion of OpenMP directives for HSA GPU agents.
2
3 Copyright (C) 2013-2017 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "backend.h"
25#include "tree.h"
26#include "gimple.h"
27#include "tree-pass.h"
28#include "ssa.h"
29#include "cgraph.h"
30#include "pretty-print.h"
31#include "fold-const.h"
32#include "gimplify.h"
33#include "gimple-iterator.h"
34#include "gimple-walk.h"
35#include "tree-inline.h"
36#include "langhooks.h"
37#include "omp-general.h"
38#include "omp-low.h"
39#include "omp-grid.h"
40#include "gimple-pretty-print.h"
41
42/* Return the lastprivate predicate for a given gridified loop described by
43 FD). */
44
45tree
46omp_grid_lastprivate_predicate (struct omp_for_data *fd)
47{
48 /* When dealing with a gridified loop, we need to check up to three collapsed
49 iteration variables but they are not actually captured in this fd.
50 Fortunately, we can easily rely on HSA builtins to get this
51 information. */
52
53 tree id, size;
54 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP
55 && gimple_omp_for_grid_intra_group (fd->for_stmt))
56 {
57 id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID);
58 size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE);
59 }
60 else
61 {
62 id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID);
63 size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE);
64 }
65 tree cond = NULL;
66 for (int dim = 0; dim < fd->collapse; dim++)
67 {
68 tree dim_tree = build_int_cstu (unsigned_type_node, dim);
69 tree u1 = build_int_cstu (unsigned_type_node, 1);
70 tree c2
71 = build2 (EQ_EXPR, boolean_type_node,
72 build2 (PLUS_EXPR, unsigned_type_node,
73 build_call_expr (id, 1, dim_tree), u1),
74 build_call_expr (size, 1, dim_tree));
75 if (cond)
76 cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2);
77 else
78 cond = c2;
79 }
80 return cond;
81}
82
83/* Structure describing the basic properties of the loop we ara analyzing
84 whether it can be gridified and when it is gridified. */
85
86struct grid_prop
87{
88 /* True when we are doing tiling gridification, i.e. when there is a distinct
89 distribute loop over groups and a loop construct over work-items. False
90 when distribute and parallel for loops form a combined construct. */
91 bool tiling;
92 /* Location of the target construct for optimization information
93 messages. */
94 location_t target_loc;
95 /* The collapse clause of the involved loops. Collapse value of all of them
96 must be the same for gridification to take place. */
97 size_t collapse;
98 /* Group sizes, if requested by the user or NULL if not requested. */
99 tree group_sizes[3];
100};
101
102#define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \
103 "gridified HSA kernel because "
104
105/* Return true if STMT is an assignment of a register-type into a local
106 VAR_DECL. If GRID is non-NULL, the assignment additionally must not be to
107 any of the trees specifying group sizes there. */
108
109static bool
110grid_safe_assignment_p (gimple *stmt, grid_prop *grid)
111{
112 gassign *assign = dyn_cast <gassign *> (stmt);
113 if (!assign)
114 return false;
115 if (gimple_clobber_p (assign))
116 return true;
117 tree lhs = gimple_assign_lhs (assign);
118 if (!VAR_P (lhs)
119 || !is_gimple_reg_type (TREE_TYPE (lhs))
120 || is_global_var (lhs))
121 return false;
122 if (grid)
123 for (unsigned i = 0; i < grid->collapse; i++)
124 if (lhs == grid->group_sizes[i])
125 return false;
126 return true;
127}
128
129/* Return true if all statements in SEQ are assignments to local register-type
130 variables that do not hold group size information. */
131
132static bool
133grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid)
134{
135 if (!seq)
136 return true;
137
138 gimple_stmt_iterator gsi;
139 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
140 if (!grid_safe_assignment_p (gsi_stmt (gsi), grid))
141 return false;
142 return true;
143}
144
145/* Scan statements in SEQ and call itself recursively on any bind. GRID
146 describes hitherto discovered properties of the loop that is evaluated for
147 possible gridification. If during whole search only assignments to
148 register-type local variables (that do not overwrite group size information)
149 and one single OMP statement is encountered, return true, otherwise return
150 false. RET is where we store any OMP statement encountered. */
151
152static bool
153grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid,
154 const char *name, gimple **ret)
155{
156 gimple_stmt_iterator gsi;
157 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
158 {
159 gimple *stmt = gsi_stmt (gsi);
160
161 if (grid_safe_assignment_p (stmt, grid))
162 continue;
163 if (gbind *bind = dyn_cast <gbind *> (stmt))
164 {
165 gimple_seq bind_body = gimple_bind_body (bind);
166 if (!grid_find_single_omp_among_assignments_1 (bind_body, grid, name,
167 ret))
168 return false;
169 }
170 else if (is_gimple_omp (stmt))
171 {
172 if (*ret)
173 {
174 if (dump_enabled_p ())
175 {
176 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
177 GRID_MISSED_MSG_PREFIX "%s construct "
178 "contains multiple OpenMP constructs\n",
179 name);
180 dump_printf_loc (MSG_NOTE, gimple_location (*ret),
181 "The first OpenMP construct within "
182 "a parallel\n");
183 dump_printf_loc (MSG_NOTE, gimple_location (stmt),
184 "The second OpenMP construct within "
185 "a parallel\n");
186 }
187 return false;
188 }
189 *ret = stmt;
190 }
191 else
192 {
193 if (dump_enabled_p ())
194 {
195 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
196 GRID_MISSED_MSG_PREFIX "%s construct contains "
197 "a complex statement\n", name);
198 dump_printf_loc (MSG_NOTE, gimple_location (stmt),
199 "This statement cannot be analyzed for "
200 "gridification\n");
201 }
202 return false;
203 }
204 }
205 return true;
206}
207
208/* Scan statements in SEQ and make sure that it and any binds in it contain
209 only assignments to local register-type variables (that do not overwrite
210 group size information) and one OMP construct. If so, return that
211 construct, otherwise return NULL. GRID describes hitherto discovered
212 properties of the loop that is evaluated for possible gridification. If
213 dumping is enabled and function fails, use NAME to dump a note with the
214 reason for failure. */
215
216static gimple *
217grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid,
218 const char *name)
219{
220 if (!seq)
221 {
222 if (dump_enabled_p ())
223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
224 GRID_MISSED_MSG_PREFIX "%s construct has empty body\n",
225 name);
226 return NULL;
227 }
228
229 gimple *ret = NULL;
230 if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret))
231 {
232 if (!ret && dump_enabled_p ())
233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
234 GRID_MISSED_MSG_PREFIX "%s construct does not contain"
235 " any other OpenMP construct\n", name);
236 return ret;
237 }
238 else
239 return NULL;
240}
241
242/* Walker function looking for statements there is no point gridifying (and for
243 noreturn function calls which we cannot do). Return non-NULL if such a
244 function is found. */
245
246static tree
247grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi,
248 bool *handled_ops_p,
249 struct walk_stmt_info *wi)
250{
251 *handled_ops_p = false;
252 gimple *stmt = gsi_stmt (*gsi);
253 switch (gimple_code (stmt))
254 {
255 case GIMPLE_CALL:
256 if (gimple_call_noreturn_p (as_a <gcall *> (stmt)))
257 {
258 *handled_ops_p = true;
259 wi->info = stmt;
260 return error_mark_node;
261 }
262 break;
263
264 /* We may reduce the following list if we find a way to implement the
265 clauses, but now there is no point trying further. */
266 case GIMPLE_OMP_CRITICAL:
267 case GIMPLE_OMP_TASKGROUP:
268 case GIMPLE_OMP_TASK:
269 case GIMPLE_OMP_SECTION:
270 case GIMPLE_OMP_SECTIONS:
271 case GIMPLE_OMP_SECTIONS_SWITCH:
272 case GIMPLE_OMP_TARGET:
273 case GIMPLE_OMP_ORDERED:
274 *handled_ops_p = true;
275 wi->info = stmt;
276 return error_mark_node;
277 default:
278 break;
279 }
280 return NULL;
281}
282
283/* Examine clauses of omp parallel statement PAR and if any prevents
284 gridification, issue a missed-optimization diagnostics and return false,
285 otherwise return true. GRID describes hitherto discovered properties of the
286 loop that is evaluated for possible gridification. */
287
288static bool
289grid_parallel_clauses_gridifiable (gomp_parallel *par, location_t tloc)
290{
291 tree clauses = gimple_omp_parallel_clauses (par);
292 while (clauses)
293 {
294 switch (OMP_CLAUSE_CODE (clauses))
295 {
296 case OMP_CLAUSE_NUM_THREADS:
297 if (dump_enabled_p ())
298 {
299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
300 GRID_MISSED_MSG_PREFIX "because there is "
301 "a num_threads clause of the parallel "
302 "construct\n");
303 dump_printf_loc (MSG_NOTE, gimple_location (par),
304 "Parallel construct has a num_threads clause\n");
305 }
306 return false;
307
308 case OMP_CLAUSE_REDUCTION:
309 if (dump_enabled_p ())
310 {
311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
312 GRID_MISSED_MSG_PREFIX "a reduction clause "
313 "is present\n ");
314 dump_printf_loc (MSG_NOTE, gimple_location (par),
315 "Parallel construct has a reduction clause\n");
316 }
317 return false;
318
319 default:
320 break;
321 }
322 clauses = OMP_CLAUSE_CHAIN (clauses);
323 }
324 return true;
325}
326
327/* Examine clauses and the body of omp loop statement GFOR and if something
328 prevents gridification, issue a missed-optimization diagnostics and return
329 false, otherwise return true. GRID describes hitherto discovered properties
330 of the loop that is evaluated for possible gridification. */
331
332static bool
333grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid)
334{
335 if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor),
336 grid))
337 {
338 if (dump_enabled_p ())
339 {
340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
341 GRID_MISSED_MSG_PREFIX "the inner loop "
342 "loop bounds computation contains a complex "
343 "statement\n");
344 dump_printf_loc (MSG_NOTE, gimple_location (gfor),
345 "Loop construct cannot be analyzed for "
346 "gridification\n");
347 }
348 return false;
349 }
350
351 tree clauses = gimple_omp_for_clauses (gfor);
352 while (clauses)
353 {
354 switch (OMP_CLAUSE_CODE (clauses))
355 {
356 case OMP_CLAUSE_SCHEDULE:
357 if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO)
358 {
359 if (dump_enabled_p ())
360 {
361 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
362 GRID_MISSED_MSG_PREFIX "the inner loop "
363 "has a non-automatic schedule clause\n");
364 dump_printf_loc (MSG_NOTE, gimple_location (gfor),
365 "Loop construct has a non automatic "
366 "schedule clause\n");
367 }
368 return false;
369 }
370 break;
371
372 case OMP_CLAUSE_REDUCTION:
373 if (dump_enabled_p ())
374 {
375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
376 GRID_MISSED_MSG_PREFIX "a reduction "
377 "clause is present\n ");
378 dump_printf_loc (MSG_NOTE, gimple_location (gfor),
379 "Loop construct has a reduction schedule "
380 "clause\n");
381 }
382 return false;
383
384 default:
385 break;
386 }
387 clauses = OMP_CLAUSE_CHAIN (clauses);
388 }
389 struct walk_stmt_info wi;
390 memset (&wi, 0, sizeof (wi));
391 if (walk_gimple_seq (gimple_omp_body (gfor),
392 grid_find_ungridifiable_statement,
393 NULL, &wi))
394 {
395 gimple *bad = (gimple *) wi.info;
396 if (dump_enabled_p ())
397 {
398 if (is_gimple_call (bad))
399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
400 GRID_MISSED_MSG_PREFIX "the inner loop contains "
401 "call to a noreturn function\n");
402 else
403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
404 GRID_MISSED_MSG_PREFIX "the inner loop contains "
405 "statement %s which cannot be transformed\n",
406 gimple_code_name[(int) gimple_code (bad)]);
407 dump_printf_loc (MSG_NOTE, gimple_location (bad),
408 "This statement cannot be analyzed for "
409 "gridification\n");
410 }
411 return false;
412 }
413 return true;
414}
415
416/* Given distribute omp construct represented by DIST, which in the original
417 source forms a compound construct with a looping construct, return true if it
418 can be turned into a gridified HSA kernel. Otherwise return false. GRID
419 describes hitherto discovered properties of the loop that is evaluated for
420 possible gridification. */
421
422static bool
423grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid)
424{
425 location_t tloc = grid->target_loc;
426 gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist),
427 grid, "distribute");
428 gomp_parallel *par;
429 if (!stmt
430 || !(par = dyn_cast <gomp_parallel *> (stmt))
431 || !grid_parallel_clauses_gridifiable (par, tloc))
432 return false;
433
434 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid,
435 "parallel");
436 gomp_for *gfor;
437 if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt)))
438 return false;
439
440 if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
441 {
442 if (dump_enabled_p ())
443 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
444 GRID_MISSED_MSG_PREFIX "the inner loop is not "
445 "a simple for loop\n");
446 return false;
447 }
448 gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse);
449
450 if (!grid_inner_loop_gridifiable_p (gfor, grid))
451 return false;
452
453 return true;
454}
455
456/* Given an omp loop statement GFOR, return true if it can participate in
457 tiling gridification, i.e. in one where the distribute and parallel for
458 loops do not form a compound statement. GRID describes hitherto discovered
459 properties of the loop that is evaluated for possible gridification. */
460
461static bool
462grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid)
463{
464 if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
465 {
466 if (dump_enabled_p ())
467 {
468 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
469 GRID_MISSED_MSG_PREFIX "an inner loop is not "
470 "a simple for loop\n");
471 dump_printf_loc (MSG_NOTE, gimple_location (gfor),
472 "This statement is not a simple for loop\n");
473 }
474 return false;
475 }
476
477 if (!grid_inner_loop_gridifiable_p (gfor, grid))
478 return false;
479
480 if (gimple_omp_for_collapse (gfor) != grid->collapse)
481 {
482 if (dump_enabled_p ())
483 {
484 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
485 GRID_MISSED_MSG_PREFIX "an inner loop does not "
486 "have use the same collapse clause\n");
487 dump_printf_loc (MSG_NOTE, gimple_location (gfor),
488 "Loop construct uses a different collapse clause\n");
489 }
490 return false;
491 }
492
493 struct omp_for_data fd;
494 struct omp_for_data_loop *loops
495 = (struct omp_for_data_loop *)alloca (grid->collapse
496 * sizeof (struct omp_for_data_loop));
497 omp_extract_for_data (gfor, &fd, loops);
498 for (unsigned i = 0; i < grid->collapse; i++)
499 {
500 tree itype, type = TREE_TYPE (fd.loops[i].v);
501 if (POINTER_TYPE_P (type))
502 itype = signed_type_for (type);
503 else
504 itype = type;
505
506 tree n1 = fold_convert (itype, fd.loops[i].n1);
507 tree n2 = fold_convert (itype, fd.loops[i].n2);
508 tree t = build_int_cst (itype,
509 (fd.loops[i].cond_code == LT_EXPR ? -1 : 1));
510 t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t);
511 t = fold_build2 (PLUS_EXPR, itype, t, n2);
512 t = fold_build2 (MINUS_EXPR, itype, t, n1);
513 if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR)
514 t = fold_build2 (TRUNC_DIV_EXPR, itype,
515 fold_build1 (NEGATE_EXPR, itype, t),
516 fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step));
517 else
518 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step);
519
520 if (!operand_equal_p (grid->group_sizes[i], t, 0))
521 {
522 if (dump_enabled_p ())
523 {
524 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
525 GRID_MISSED_MSG_PREFIX "the distribute and "
526 "an internal loop do not agree on tile size\n");
527 dump_printf_loc (MSG_NOTE, gimple_location (gfor),
528 "Loop construct does not seem to loop over "
529 "a tile size\n");
530 }
531 return false;
532 }
533 }
534 return true;
535}
536
537/* Facing a call to FNDECL in the body of a distribute construct, return true
538 if we can handle it or false if it precludes gridification. */
539
540static bool
541grid_call_permissible_in_distribute_p (tree fndecl)
542{
543 if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
544 return true;
545
546 const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
547 if (strstr (name, "omp_") != name)
548 return false;
549
550 if ((strcmp (name, "omp_get_thread_num") == 0)
551 || (strcmp (name, "omp_get_num_threads") == 0)
552 || (strcmp (name, "omp_get_num_teams") == 0)
553 || (strcmp (name, "omp_get_team_num") == 0)
554 || (strcmp (name, "omp_get_level") == 0)
555 || (strcmp (name, "omp_get_active_level") == 0)
556 || (strcmp (name, "omp_in_parallel") == 0))
557 return true;
558
559 return false;
560}
561
562/* Facing a call satisfying grid_call_permissible_in_distribute_p in the body
563 of a distribute construct that is pointed at by GSI, modify it as necessary
564 for gridification. If the statement itself got removed, return true. */
565
566static bool
567grid_handle_call_in_distribute (gimple_stmt_iterator *gsi)
568{
569 gimple *stmt = gsi_stmt (*gsi);
570 tree fndecl = gimple_call_fndecl (stmt);
571 gcc_checking_assert (stmt);
572 if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
573 return false;
574
575 const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
576 if ((strcmp (name, "omp_get_thread_num") == 0)
577 || (strcmp (name, "omp_get_level") == 0)
578 || (strcmp (name, "omp_get_active_level") == 0)
579 || (strcmp (name, "omp_in_parallel") == 0))
580 {
581 tree lhs = gimple_call_lhs (stmt);
582 if (lhs)
583 {
584 gassign *assign
585 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
586 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
587 }
588 gsi_remove (gsi, true);
589 return true;
590 }
591
592 /* The rest of the omp functions can stay as they are, HSA back-end will
593 handle them correctly. */
594 gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0)
595 || (strcmp (name, "omp_get_num_teams") == 0)
596 || (strcmp (name, "omp_get_team_num") == 0));
597 return false;
598}
599
600/* Given a sequence of statements within a distribute omp construct or a
601 parallel construct, which in the original source does not form a compound
602 construct with a looping construct, return true if it does not prevent us
603 from turning it into a gridified HSA kernel. Otherwise return false. GRID
604 describes hitherto discovered properties of the loop that is evaluated for
605 possible gridification. IN_PARALLEL must be true if seq is within a
606 parallel construct and flase if it is only within a distribute
607 construct. */
608
609static bool
610grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid,
611 bool in_parallel)
612{
613 gimple_stmt_iterator gsi;
614 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
615 {
616 gimple *stmt = gsi_stmt (gsi);
617
618 if (grid_safe_assignment_p (stmt, grid)
619 || gimple_code (stmt) == GIMPLE_GOTO
620 || gimple_code (stmt) == GIMPLE_LABEL
621 || gimple_code (stmt) == GIMPLE_COND)
622 continue;
623 else if (gbind *bind = dyn_cast <gbind *> (stmt))
624 {
625 if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind),
626 grid, in_parallel))
627 return false;
628 continue;
629 }
630 else if (gtry *try_stmt = dyn_cast <gtry *> (stmt))
631 {
632 if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH)
633 {
634 if (dump_enabled_p ())
635 {
636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
637 GRID_MISSED_MSG_PREFIX "the distribute "
638 "construct contains a try..catch region\n");
639 dump_printf_loc (MSG_NOTE, gimple_location (try_stmt),
640 "This statement cannot be analyzed for "
641 "tiled gridification\n");
642 }
643 return false;
644 }
645 if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt),
646 grid, in_parallel))
647 return false;
648 if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt),
649 grid, in_parallel))
650 return false;
651 continue;
652 }
653 else if (is_gimple_call (stmt))
654 {
655 tree fndecl = gimple_call_fndecl (stmt);
656 if (fndecl && grid_call_permissible_in_distribute_p (fndecl))
657 continue;
658
659 if (dump_enabled_p ())
660 {
661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
662 GRID_MISSED_MSG_PREFIX "the distribute "
663 "construct contains a call\n");
664 dump_printf_loc (MSG_NOTE, gimple_location (stmt),
665 "This statement cannot be analyzed for "
666 "tiled gridification\n");
667 }
668 return false;
669 }
670 else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt))
671 {
672 if (in_parallel)
673 {
674 if (dump_enabled_p ())
675 {
676 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
677 GRID_MISSED_MSG_PREFIX "a parallel "
678 "construct contains another parallel "
679 "construct\n");
680 dump_printf_loc (MSG_NOTE, gimple_location (stmt),
681 "This parallel construct is nested in "
682 "another one\n");
683 }
684 return false;
685 }
686 if (!grid_parallel_clauses_gridifiable (par, grid->target_loc)
687 || !grid_dist_follows_tiling_pattern (gimple_omp_body (par),
688 grid, true))
689 return false;
690 }
691 else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt))
692 {
693 if (!in_parallel)
694 {
695 if (dump_enabled_p ())
696 {
697 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
698 GRID_MISSED_MSG_PREFIX "a loop "
699 "construct is not nested within a parallel "
700 "construct\n");
701 dump_printf_loc (MSG_NOTE, gimple_location (stmt),
702 "This loop construct is not nested in "
703 "a parallel construct\n");
704 }
705 return false;
706 }
707 if (!grid_gfor_follows_tiling_pattern (gfor, grid))
708 return false;
709 }
710 else
711 {
712 if (dump_enabled_p ())
713 {
714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
715 GRID_MISSED_MSG_PREFIX "the distribute "
716 "construct contains a complex statement\n");
717 dump_printf_loc (MSG_NOTE, gimple_location (stmt),
718 "This statement cannot be analyzed for "
719 "tiled gridification\n");
720 }
721 return false;
722 }
723 }
724 return true;
725}
726
727/* If TARGET follows a pattern that can be turned into a gridified HSA kernel,
728 return true, otherwise return false. In the case of success, also fill in
729 GRID with information describing the kernel grid. */
730
731static bool
732grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid)
733{
734 if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION)
735 return false;
736
737 location_t tloc = gimple_location (target);
738 grid->target_loc = tloc;
739 gimple *stmt
740 = grid_find_single_omp_among_assignments (gimple_omp_body (target),
741 grid, "target");
742 if (!stmt)
743 return false;
744 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
745 tree group_size = NULL;
746 if (!teams)
747 {
748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
749 GRID_MISSED_MSG_PREFIX "it does not have a sole teams "
750 "construct in it.\n");
751 return false;
752 }
753
754 tree clauses = gimple_omp_teams_clauses (teams);
755 while (clauses)
756 {
757 switch (OMP_CLAUSE_CODE (clauses))
758 {
759 case OMP_CLAUSE_NUM_TEAMS:
760 if (dump_enabled_p ())
761 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
762 GRID_MISSED_MSG_PREFIX "the teams construct "
763 "contains a num_teams clause\n ");
764 return false;
765
766 case OMP_CLAUSE_REDUCTION:
767 if (dump_enabled_p ())
768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
769 GRID_MISSED_MSG_PREFIX "a reduction "
770 "clause is present\n ");
771 return false;
772
773 case OMP_CLAUSE_THREAD_LIMIT:
774 if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0)))
775 group_size = OMP_CLAUSE_OPERAND (clauses, 0);
776 break;
777
778 default:
779 break;
780 }
781 clauses = OMP_CLAUSE_CHAIN (clauses);
782 }
783
784 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid,
785 "teams");
786 if (!stmt)
787 return false;
788 gomp_for *dist = dyn_cast <gomp_for *> (stmt);
789 if (!dist)
790 {
791 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
792 GRID_MISSED_MSG_PREFIX "the teams construct does not "
793 "have a single distribute construct in it.\n");
794 return false;
795 }
796
797 gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE);
798
799 grid->collapse = gimple_omp_for_collapse (dist);
800 if (grid->collapse > 3)
801 {
802 if (dump_enabled_p ())
803 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
804 GRID_MISSED_MSG_PREFIX "the distribute construct "
805 "contains collapse clause with parameter greater "
806 "than 3\n");
807 return false;
808 }
809
810 struct omp_for_data fd;
811 struct omp_for_data_loop *dist_loops
812 = (struct omp_for_data_loop *)alloca (grid->collapse
813 * sizeof (struct omp_for_data_loop));
814 omp_extract_for_data (dist, &fd, dist_loops);
815 if (fd.chunk_size)
816 {
817 if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0))
818 {
819 if (dump_enabled_p ())
820 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
821 GRID_MISSED_MSG_PREFIX "the teams "
822 "thread limit is different from distribute "
823 "schedule chunk\n");
824 return false;
825 }
826 group_size = fd.chunk_size;
827 }
828 if (group_size && grid->collapse > 1)
829 {
830 if (dump_enabled_p ())
831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
832 GRID_MISSED_MSG_PREFIX "group size cannot be "
833 "set using thread_limit or schedule clauses "
834 "when also using a collapse clause greater than 1\n");
835 return false;
836 }
837
838 if (gimple_omp_for_combined_p (dist))
839 {
840 grid->tiling = false;
841 grid->group_sizes[0] = group_size;
842 for (unsigned i = 1; i < grid->collapse; i++)
843 grid->group_sizes[i] = NULL;
844 return grid_dist_follows_simple_pattern (dist, grid);
845 }
846 else
847 {
848 grid->tiling = true;
849 if (group_size)
850 {
851 if (dump_enabled_p ())
852 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
853 GRID_MISSED_MSG_PREFIX "group size cannot be set "
854 "using thread_limit or schedule clauses when "
855 "distribute and loop constructs do not form "
856 "one combined construct\n");
857 return false;
858 }
859 for (unsigned i = 0; i < grid->collapse; i++)
860 {
861 if (fd.loops[i].cond_code == GT_EXPR)
862 grid->group_sizes[i] = fold_build1 (NEGATE_EXPR,
863 TREE_TYPE (fd.loops[i].step),
864 fd.loops[i].step);
865 else
866 grid->group_sizes[i] = fd.loops[i].step;
867 }
868 return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid,
869 false);
870 }
871}
872
873/* Operand walker, used to remap pre-body declarations according to a hash map
874 provided in DATA. */
875
876static tree
877grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data)
878{
879 tree t = *tp;
880
881 if (DECL_P (t) || TYPE_P (t))
882 *walk_subtrees = 0;
883 else
884 *walk_subtrees = 1;
885
886 if (VAR_P (t))
887 {
888 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
889 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
890 tree *repl = declmap->get (t);
891 if (repl)
892 *tp = *repl;
893 }
894 return NULL_TREE;
895}
896
897/* Identifiers of segments into which a particular variable should be places
898 when gridifying. */
899
900enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP,
901 GRID_SEGMENT_GLOBAL};
902
903/* Mark VAR so that it is eventually placed into SEGMENT. Place an artificial
904 builtin call into SEQ that will make sure the variable is always considered
905 address taken. */
906
907static void
908grid_mark_variable_segment (tree var, enum grid_var_segment segment)
909{
910 /* Making a non-addressable variables would require that we re-gimplify all
911 their uses. Fortunately, we do not have to do this because if they are
912 not addressable, it means they are not used in atomic or parallel
913 statements and so relaxed GPU consistency rules mean we can just keep them
914 private. */
915 if (!TREE_ADDRESSABLE (var))
916 return;
917
918 switch (segment)
919 {
920 case GRID_SEGMENT_GROUP:
921 DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"),
922 NULL, DECL_ATTRIBUTES (var));
923 break;
924 case GRID_SEGMENT_GLOBAL:
925 DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"),
926 NULL, DECL_ATTRIBUTES (var));
927 break;
928 default:
929 gcc_unreachable ();
930 }
931
932 if (!TREE_STATIC (var))
933 {
934 TREE_STATIC (var) = 1;
935 varpool_node::finalize_decl (var);
936 }
937
938}
939
940/* Copy leading register-type assignments to local variables in SRC to just
941 before DST, Creating temporaries, adjusting mapping of operands in WI and
942 remapping operands as necessary. Add any new temporaries to TGT_BIND.
943 Return the first statement that does not conform to grid_safe_assignment_p
944 or NULL. If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all
945 variables in traversed bind statements so that they are put into the
946 appropriate segment. */
947
948static gimple *
949grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst,
950 gbind *tgt_bind,
951 enum grid_var_segment var_segment,
952 struct walk_stmt_info *wi)
953{
954 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
955 gimple_stmt_iterator gsi;
956 for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi))
957 {
958 gimple *stmt = gsi_stmt (gsi);
959 if (gbind *bind = dyn_cast <gbind *> (stmt))
960 {
961 gimple *r = grid_copy_leading_local_assignments
962 (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi);
963
964 if (var_segment != GRID_SEGMENT_PRIVATE)
965 for (tree var = gimple_bind_vars (bind);
966 var;
967 var = DECL_CHAIN (var))
968 grid_mark_variable_segment (var, var_segment);
969 if (r)
970 return r;
971 else
972 continue;
973 }
974 if (!grid_safe_assignment_p (stmt, NULL))
975 return stmt;
976 tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt));
977 tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL),
978 TREE_TYPE (lhs));
979 DECL_CONTEXT (repl) = current_function_decl;
980 gimple_bind_append_vars (tgt_bind, repl);
981
982 declmap->put (lhs, repl);
983 gassign *copy = as_a <gassign *> (gimple_copy (stmt));
984 walk_gimple_op (copy, grid_remap_prebody_decls, wi);
985 gsi_insert_before (dst, copy, GSI_SAME_STMT);
986 }
987 return NULL;
988}
989
990/* Statement walker function to make adjustments to statements within the
991 gridifed kernel copy. */
992
993static tree
994grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p,
995 struct walk_stmt_info *)
996{
997 *handled_ops_p = false;
998 gimple *stmt = gsi_stmt (*gsi);
999 if (gimple_code (stmt) == GIMPLE_OMP_FOR
1000 && (gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD))
1001 {
1002 gomp_for *loop = as_a <gomp_for *> (stmt);
1003 tree clauses = gimple_omp_for_clauses (loop);
1004 tree cl = omp_find_clause (clauses, OMP_CLAUSE_SAFELEN);
1005 if (cl)
1006 OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node;
1007 else
1008 {
1009 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
1010 OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node;
1011 OMP_CLAUSE_CHAIN (c) = clauses;
1012 gimple_omp_for_set_clauses (loop, c);
1013 }
1014 }
1015 return NULL_TREE;
1016}
1017
1018/* Given a PARLOOP that is a normal for looping construct but also a part of a
1019 combined construct with a simd loop, eliminate the simd loop. */
1020
1021static void
1022grid_eliminate_combined_simd_part (gomp_for *parloop)
1023{
1024 struct walk_stmt_info wi;
1025
1026 memset (&wi, 0, sizeof (wi));
1027 wi.val_only = true;
1028 enum gf_mask msk = GF_OMP_FOR_SIMD;
1029 wi.info = (void *) &msk;
1030 walk_gimple_seq (gimple_omp_body (parloop), omp_find_combined_for, NULL, &wi);
1031 gimple *stmt = (gimple *) wi.info;
1032 /* We expect that the SIMD id the only statement in the parallel loop. */
1033 gcc_assert (stmt
1034 && gimple_code (stmt) == GIMPLE_OMP_FOR
1035 && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_SIMD)
1036 && gimple_omp_for_combined_into_p (stmt)
1037 && !gimple_omp_for_combined_p (stmt));
1038 gomp_for *simd = as_a <gomp_for *> (stmt);
1039
1040 /* Copy over the iteration properties because the body refers to the index in
1041 the bottmom-most loop. */
1042 unsigned i, collapse = gimple_omp_for_collapse (parloop);
1043 gcc_checking_assert (collapse == gimple_omp_for_collapse (simd));
1044 for (i = 0; i < collapse; i++)
1045 {
1046 gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i));
1047 gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i));
1048 gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i));
1049 gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i));
1050 }
1051
1052 tree *tgt= gimple_omp_for_clauses_ptr (parloop);
1053 while (*tgt)
1054 tgt = &OMP_CLAUSE_CHAIN (*tgt);
1055
1056 /* Copy over all clauses, except for linaer clauses, which are turned into
1057 private clauses, and all other simd-specificl clauses, which are
1058 ignored. */
1059 tree *pc = gimple_omp_for_clauses_ptr (simd);
1060 while (*pc)
1061 {
1062 tree c = *pc;
1063 switch (TREE_CODE (c))
1064 {
1065 case OMP_CLAUSE_LINEAR:
1066 {
1067 tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE);
1068 OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c);
1069 OMP_CLAUSE_CHAIN (priv) = NULL;
1070 *tgt = priv;
1071 tgt = &OMP_CLAUSE_CHAIN (priv);
1072 pc = &OMP_CLAUSE_CHAIN (c);
1073 break;
1074 }
1075
1076 case OMP_CLAUSE_SAFELEN:
1077 case OMP_CLAUSE_SIMDLEN:
1078 case OMP_CLAUSE_ALIGNED:
1079 pc = &OMP_CLAUSE_CHAIN (c);
1080 break;
1081
1082 default:
1083 *pc = OMP_CLAUSE_CHAIN (c);
1084 OMP_CLAUSE_CHAIN (c) = NULL;
1085 *tgt = c;
1086 tgt = &OMP_CLAUSE_CHAIN(c);
1087 break;
1088 }
1089 }
1090
1091 /* Finally, throw away the simd and mark the parallel loop as not
1092 combined. */
1093 gimple_omp_set_body (parloop, gimple_omp_body (simd));
1094 gimple_omp_for_set_combined_p (parloop, false);
1095}
1096
1097/* Statement walker function marking all parallels as grid_phony and loops as
1098 grid ones representing threads of a particular thread group. */
1099
1100static tree
1101grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p,
1102 struct walk_stmt_info *wi_in)
1103{
1104 *handled_ops_p = false;
1105 if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi)))
1106 {
1107 *handled_ops_p = true;
1108 gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP);
1109 gimple_omp_for_set_grid_intra_group (loop, true);
1110 if (gimple_omp_for_combined_p (loop))
1111 grid_eliminate_combined_simd_part (loop);
1112
1113 struct walk_stmt_info body_wi;
1114 memset (&body_wi, 0, sizeof (body_wi));
1115 walk_gimple_seq_mod (gimple_omp_body_ptr (loop),
1116 grid_process_grid_body, NULL, &body_wi);
1117
1118 gbind *bind = (gbind *) wi_in->info;
1119 tree c;
1120 for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c))
1121 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
1122 {
1123 push_gimplify_context ();
1124 tree ov = OMP_CLAUSE_DECL (c);
1125 tree gv = copy_var_decl (ov, create_tmp_var_name (NULL),
1126 TREE_TYPE (ov));
1127
1128 grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP);
1129 DECL_CONTEXT (gv) = current_function_decl;
1130 gimple_bind_append_vars (bind, gv);
1131 tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov);
1132 gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c));
1133 x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv);
1134 gimple_seq l = NULL;
1135 gimplify_and_add (x, &l);
1136 gsi_insert_seq_after (gsi, l, GSI_SAME_STMT);
1137 pop_gimplify_context (bind);
1138 }
1139 }
1140 return NULL_TREE;
1141}
1142
1143/* Statement walker function marking all parallels as grid_phony and loops as
1144 grid ones representing threads of a particular thread group. */
1145
1146static tree
1147grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi,
1148 bool *handled_ops_p,
1149 struct walk_stmt_info *wi_in)
1150{
1151 *handled_ops_p = false;
1152 wi_in->removed_stmt = false;
1153 gimple *stmt = gsi_stmt (*gsi);
1154 if (gbind *bind = dyn_cast <gbind *> (stmt))
1155 {
1156 for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var))
1157 grid_mark_variable_segment (var, GRID_SEGMENT_GROUP);
1158 }
1159 else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt))
1160 {
1161 *handled_ops_p = true;
1162 gimple_omp_parallel_set_grid_phony (parallel, true);
1163
1164 gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
1165 gimple_bind_set_body (new_bind, gimple_omp_body (parallel));
1166 gimple_seq s = NULL;
1167 gimple_seq_add_stmt (&s, new_bind);
1168 gimple_omp_set_body (parallel, s);
1169
1170 struct walk_stmt_info wi_par;
1171 memset (&wi_par, 0, sizeof (wi_par));
1172 wi_par.info = new_bind;
1173 walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind),
1174 grid_mark_tiling_loops, NULL, &wi_par);
1175 }
1176 else if (is_a <gcall *> (stmt))
1177 wi_in->removed_stmt = grid_handle_call_in_distribute (gsi);
1178 return NULL_TREE;
1179}
1180
1181/* Given freshly copied top level kernel SEQ, identify the individual OMP
1182 components, mark them as part of kernel, copy assignment leading to them
1183 just before DST, remapping them using WI and adding new temporaries to
1184 TGT_BIND, and and return the loop that will be used for kernel dispatch. */
1185
1186static gomp_for *
1187grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq,
1188 gimple_stmt_iterator *dst,
1189 gbind *tgt_bind, struct walk_stmt_info *wi)
1190{
1191 gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind,
1192 GRID_SEGMENT_GLOBAL, wi);
1193 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
1194 gcc_assert (teams);
1195 gimple_omp_teams_set_grid_phony (teams, true);
1196 stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst,
1197 tgt_bind, GRID_SEGMENT_GLOBAL,
1198 wi);
1199 gcc_checking_assert (stmt);
1200 gomp_for *dist = dyn_cast <gomp_for *> (stmt);
1201 gcc_assert (dist);
1202 gimple_seq prebody = gimple_omp_for_pre_body (dist);
1203 if (prebody)
1204 grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
1205 GRID_SEGMENT_GROUP, wi);
1206
1207 if (grid->tiling)
1208 {
1209 gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP);
1210 gimple_omp_for_set_grid_group_iter (dist, true);
1211
1212 struct walk_stmt_info wi_tiled;
1213 memset (&wi_tiled, 0, sizeof (wi_tiled));
1214 walk_gimple_seq_mod (gimple_omp_body_ptr (dist),
1215 grid_mark_tiling_parallels_and_loops, NULL,
1216 &wi_tiled);
1217 return dist;
1218 }
1219 else
1220 {
1221 gimple_omp_for_set_grid_phony (dist, true);
1222 stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst,
1223 tgt_bind,
1224 GRID_SEGMENT_PRIVATE, wi);
1225 gcc_checking_assert (stmt);
1226 gomp_parallel *parallel = as_a <gomp_parallel *> (stmt);
1227 gimple_omp_parallel_set_grid_phony (parallel, true);
1228 stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel),
1229 dst, tgt_bind,
1230 GRID_SEGMENT_PRIVATE, wi);
1231 gomp_for *inner_loop = as_a <gomp_for *> (stmt);
1232 gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP);
1233 prebody = gimple_omp_for_pre_body (inner_loop);
1234 if (prebody)
1235 grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
1236 GRID_SEGMENT_PRIVATE, wi);
1237
1238 if (gimple_omp_for_combined_p (inner_loop))
1239 grid_eliminate_combined_simd_part (inner_loop);
1240 struct walk_stmt_info body_wi;
1241 memset (&body_wi, 0, sizeof (body_wi));
1242 walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop),
1243 grid_process_grid_body, NULL, &body_wi);
1244
1245 return inner_loop;
1246 }
1247}
1248
1249/* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern,
1250 create a GPU kernel for it. GSI must point to the same statement, TGT_BIND
1251 is the bind into which temporaries inserted before TARGET should be
1252 added. */
1253
1254static void
1255grid_attempt_target_gridification (gomp_target *target,
1256 gimple_stmt_iterator *gsi,
1257 gbind *tgt_bind)
1258{
1259 /* removed group_size */
1260 grid_prop grid;
1261 memset (&grid, 0, sizeof (grid));
1262 if (!target || !grid_target_follows_gridifiable_pattern (target, &grid))
1263 return;
1264
1265 location_t loc = gimple_location (target);
1266 if (dump_enabled_p ())
1267 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
1268 "Target construct will be turned into a gridified HSA "
1269 "kernel\n");
1270
1271 /* Copy target body to a GPUKERNEL construct: */
1272 gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals
1273 (gimple_omp_body (target));
1274
1275 hash_map<tree, tree> *declmap = new hash_map<tree, tree>;
1276 struct walk_stmt_info wi;
1277 memset (&wi, 0, sizeof (struct walk_stmt_info));
1278 wi.info = declmap;
1279
1280 /* Copy assignments in between OMP statements before target, mark OMP
1281 statements within copy appropriately. */
1282 gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi,
1283 tgt_bind, &wi);
1284
1285 gbind *old_bind
1286 = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target)));
1287 gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq));
1288 tree new_block = gimple_bind_block (new_bind);
1289 tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind));
1290 BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block);
1291 BLOCK_SUBBLOCKS (enc_block) = new_block;
1292 BLOCK_SUPERCONTEXT (new_block) = enc_block;
1293 gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq);
1294 gimple_seq_add_stmt
1295 (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))),
1296 gpukernel);
1297
1298 for (size_t i = 0; i < grid.collapse; i++)
1299 walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL);
1300 push_gimplify_context ();
1301 for (size_t i = 0; i < grid.collapse; i++)
1302 {
1303 tree itype, type = TREE_TYPE (gimple_omp_for_index (inner_loop, i));
1304 if (POINTER_TYPE_P (type))
1305 itype = signed_type_for (type);
1306 else
1307 itype = type;
1308
1309 enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i);
1310 tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i));
1311 walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL);
1312 tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i));
1313 walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL);
1314 omp_adjust_for_condition (loc, &cond_code, &n2);
1315 n1 = fold_convert (itype, n1);
1316 n2 = fold_convert (itype, n2);
1317
1318 tree cond = fold_build2 (cond_code, boolean_type_node, n1, n2);
1319 tree step
1320 = omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i));
1321
1322 tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1));
1323 t = fold_build2 (PLUS_EXPR, itype, step, t);
1324 t = fold_build2 (PLUS_EXPR, itype, t, n2);
1325 t = fold_build2 (MINUS_EXPR, itype, t, n1);
1326 if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR)
1327 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1328 fold_build1 (NEGATE_EXPR, itype, t),
1329 fold_build1 (NEGATE_EXPR, itype, step));
1330 else
1331 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
1332 t = fold_build3 (COND_EXPR, itype, cond, t, build_zero_cst (itype));
1333 if (grid.tiling)
1334 {
1335 if (cond_code == GT_EXPR)
1336 step = fold_build1 (NEGATE_EXPR, itype, step);
1337 t = fold_build2 (MULT_EXPR, itype, t, step);
1338 }
1339
1340 tree gs = fold_convert (uint32_type_node, t);
1341 gimple_seq tmpseq = NULL;
1342 gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue);
1343 if (!gimple_seq_empty_p (tmpseq))
1344 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
1345
1346 tree ws;
1347 if (grid.group_sizes[i])
1348 {
1349 ws = fold_convert (uint32_type_node, grid.group_sizes[i]);
1350 tmpseq = NULL;
1351 gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue);
1352 if (!gimple_seq_empty_p (tmpseq))
1353 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
1354 }
1355 else
1356 ws = build_zero_cst (uint32_type_node);
1357
1358 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_);
1359 OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i;
1360 OMP_CLAUSE__GRIDDIM__SIZE (c) = gs;
1361 OMP_CLAUSE__GRIDDIM__GROUP (c) = ws;
1362 OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target);
1363 gimple_omp_target_set_clauses (target, c);
1364 }
1365 pop_gimplify_context (tgt_bind);
1366 delete declmap;
1367 return;
1368}
1369
1370/* Walker function doing all the work for create_target_kernels. */
1371
1372static tree
1373grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi,
1374 bool *handled_ops_p,
1375 struct walk_stmt_info *incoming)
1376{
1377 *handled_ops_p = false;
1378
1379 gimple *stmt = gsi_stmt (*gsi);
1380 gomp_target *target = dyn_cast <gomp_target *> (stmt);
1381 if (target)
1382 {
1383 gbind *tgt_bind = (gbind *) incoming->info;
1384 gcc_checking_assert (tgt_bind);
1385 grid_attempt_target_gridification (target, gsi, tgt_bind);
1386 return NULL_TREE;
1387 }
1388 gbind *bind = dyn_cast <gbind *> (stmt);
1389 if (bind)
1390 {
1391 *handled_ops_p = true;
1392 struct walk_stmt_info wi;
1393 memset (&wi, 0, sizeof (wi));
1394 wi.info = bind;
1395 walk_gimple_seq_mod (gimple_bind_body_ptr (bind),
1396 grid_gridify_all_targets_stmt, NULL, &wi);
1397 }
1398 return NULL_TREE;
1399}
1400
1401/* Attempt to gridify all target constructs in BODY_P. All such targets will
1402 have their bodies duplicated, with the new copy being put into a
1403 gimple_omp_grid_body statement. All kernel-related construct within the
1404 grid_body will be marked with phony flags or kernel kinds. Moreover, some
1405 re-structuring is often needed, such as copying pre-bodies before the target
1406 construct so that kernel grid sizes can be computed. */
1407
1408void
1409omp_grid_gridify_all_targets (gimple_seq *body_p)
1410{
1411 struct walk_stmt_info wi;
1412 memset (&wi, 0, sizeof (wi));
1413 walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi);
1414}
1415