1 | /* Decompose OpenACC 'kernels' constructs into parts, a sequence of compute |
2 | constructs |
3 | |
4 | Copyright (C) 2020-2024 Free Software Foundation, Inc. |
5 | |
6 | This file is part of GCC. |
7 | |
8 | GCC is free software; you can redistribute it and/or modify it under |
9 | the terms of the GNU General Public License as published by the Free |
10 | Software Foundation; either version 3, or (at your option) any later |
11 | version. |
12 | |
13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
16 | for more details. |
17 | |
18 | You should have received a copy of the GNU General Public License |
19 | along with GCC; see the file COPYING3. If not see |
20 | <http://www.gnu.org/licenses/>. */ |
21 | |
22 | #include "config.h" |
23 | #include "system.h" |
24 | #include "coretypes.h" |
25 | #include "backend.h" |
26 | #include "target.h" |
27 | #include "tree.h" |
28 | #include "langhooks.h" |
29 | #include "gimple.h" |
30 | #include "tree-pass.h" |
31 | #include "cgraph.h" |
32 | #include "fold-const.h" |
33 | #include "gimplify.h" |
34 | #include "gimple-iterator.h" |
35 | #include "gimple-walk.h" |
36 | #include "gomp-constants.h" |
37 | #include "omp-general.h" |
38 | #include "diagnostic-core.h" |
39 | |
40 | |
41 | /* This preprocessing pass is run immediately before lower_omp. It decomposes |
42 | OpenACC 'kernels' constructs into parts, a sequence of compute constructs. |
43 | |
44 | The translation is as follows: |
45 | - The entire 'kernels' region is turned into a 'data' region with clauses |
46 | taken from the 'kernels' region. New 'create' clauses are added for all |
47 | variables declared at the top level in the kernels region. |
48 | - Any loop nests annotated with an OpenACC 'loop' directive are wrapped in |
49 | a new compute construct. |
50 | - 'loop' directives without an explicit 'independent' or 'seq' clause |
51 | get an 'auto' clause added; other clauses are preserved on the loop |
52 | or moved to the new surrounding compute construct, as applicable. |
53 | - Any sequences of other code (non-loops, non-OpenACC 'loop's) are wrapped |
54 | in new "gang-single" compute construct: 'worker'/'vector' parallelism is |
55 | preserved, but 'num_gangs (1)' is enforced. |
56 | - Both points above only apply at the topmost level in the region, that |
57 | is, the transformation does not introduce new compute constructs inside |
58 | nested statement bodies. In particular, this means that a |
59 | gang-parallelizable loop inside an 'if' statement is made "gang-single". |
60 | - In order to make the host wait only once for the whole region instead |
61 | of once per device kernel launch, the new compute constructs are |
62 | annotated 'async'. Unless the original 'kernels' construct already was |
63 | marked 'async', the entire region ends with a 'wait' directive. If the |
64 | original 'kernels' construct was marked 'async', the synthesized 'async' |
65 | clauses use the original 'kernels' construct's 'async' argument |
66 | (possibly implicit). |
67 | */ |
68 | |
69 | |
70 | /*TODO Things are conceptually wrong here: 'loop' clauses may be hidden behind |
71 | 'device_type', so we have to defer a lot of processing until we're in the |
72 | offloading compilation. "Fortunately", GCC doesn't support the OpenACC |
73 | 'device_type' clause yet, so we get away that. */ |
74 | |
75 | |
76 | /* Helper function for decompose_kernels_region_body. If STMT contains a |
77 | "top-level" OMP_FOR statement, returns a pointer to that statement; |
78 | returns NULL otherwise. |
79 | |
80 | A "top-level" OMP_FOR statement is one that is possibly accompanied by |
81 | small snippets of setup code. Specifically, this function accepts an |
82 | OMP_FOR possibly wrapped in a singleton bind and a singleton try |
83 | statement to allow for a local loop variable, but not an OMP_FOR |
84 | statement nested in any other constructs. Alternatively, it accepts a |
85 | non-singleton bind containing only assignments and then an OMP_FOR |
86 | statement at the very end. The former style can be generated by the C |
87 | frontend, the latter by the Fortran frontend. */ |
88 | |
89 | static gimple * |
90 | top_level_omp_for_in_stmt (gimple *stmt) |
91 | { |
92 | if (gimple_code (g: stmt) == GIMPLE_OMP_FOR) |
93 | return stmt; |
94 | |
95 | if (gimple_code (g: stmt) == GIMPLE_BIND) |
96 | { |
97 | gimple_seq body = gimple_bind_body (gs: as_a <gbind *> (p: stmt)); |
98 | if (gimple_seq_singleton_p (seq: body)) |
99 | { |
100 | /* Accept an OMP_FOR statement, or a try statement containing only |
101 | a single OMP_FOR. */ |
102 | gimple *maybe_for_or_try = gimple_seq_first_stmt (s: body); |
103 | if (gimple_code (g: maybe_for_or_try) == GIMPLE_OMP_FOR) |
104 | return maybe_for_or_try; |
105 | else if (gimple_code (g: maybe_for_or_try) == GIMPLE_TRY) |
106 | { |
107 | gimple_seq try_body = gimple_try_eval (gs: maybe_for_or_try); |
108 | if (!gimple_seq_singleton_p (seq: try_body)) |
109 | return NULL; |
110 | gimple *maybe_omp_for_stmt = gimple_seq_first_stmt (s: try_body); |
111 | if (gimple_code (g: maybe_omp_for_stmt) == GIMPLE_OMP_FOR) |
112 | return maybe_omp_for_stmt; |
113 | } |
114 | } |
115 | else |
116 | { |
117 | gimple_stmt_iterator gsi; |
118 | /* Accept only a block of optional assignments followed by an |
119 | OMP_FOR at the end. No other kinds of statements allowed. */ |
120 | for (gsi = gsi_start (seq&: body); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
121 | { |
122 | gimple *body_stmt = gsi_stmt (i: gsi); |
123 | if (gimple_code (g: body_stmt) == GIMPLE_ASSIGN) |
124 | continue; |
125 | else if (gimple_code (g: body_stmt) == GIMPLE_OMP_FOR |
126 | && gsi_one_before_end_p (i: gsi)) |
127 | return body_stmt; |
128 | else |
129 | return NULL; |
130 | } |
131 | } |
132 | } |
133 | |
134 | return NULL; |
135 | } |
136 | |
137 | /* Helper for adjust_region_code: evaluate the statement at GSI_P. */ |
138 | |
139 | static tree |
140 | adjust_region_code_walk_stmt_fn (gimple_stmt_iterator *gsi_p, |
141 | bool *handled_ops_p, |
142 | struct walk_stmt_info *wi) |
143 | { |
144 | int *region_code = (int *) wi->info; |
145 | |
146 | gimple *stmt = gsi_stmt (i: *gsi_p); |
147 | switch (gimple_code (g: stmt)) |
148 | { |
149 | case GIMPLE_OMP_FOR: |
150 | { |
151 | tree clauses = gimple_omp_for_clauses (gs: stmt); |
152 | if (omp_find_clause (clauses, kind: OMP_CLAUSE_INDEPENDENT)) |
153 | { |
154 | /* Explicit 'independent' clause. */ |
155 | /* Keep going; recurse into loop body. */ |
156 | break; |
157 | } |
158 | else if (omp_find_clause (clauses, kind: OMP_CLAUSE_SEQ)) |
159 | { |
160 | /* Explicit 'seq' clause. */ |
161 | /* We'll "parallelize" if at some level a loop construct has been |
162 | marked up by the user as unparallelizable ('seq' clause; we'll |
163 | respect that in the later processing). Given that the user has |
164 | explicitly marked it up, this loop construct cannot be |
165 | performance-critical, and in this case it's also fine to |
166 | "parallelize" instead of "gang-single", because any outer or |
167 | inner loops may still exploit the available parallelism. */ |
168 | /* Keep going; recurse into loop body. */ |
169 | break; |
170 | } |
171 | else |
172 | { |
173 | /* Explicit or implicit 'auto' clause. */ |
174 | /* The user would like this loop analyzed ('auto' clause) and |
175 | typically parallelized, but we don't have available yet the |
176 | compiler logic to analyze this, so can't parallelize it here, so |
177 | we'd very likely be running into a performance problem if we |
178 | were to execute this unparallelized, thus forward the whole loop |
179 | nest to 'parloops'. */ |
180 | *region_code = GF_OMP_TARGET_KIND_OACC_KERNELS; |
181 | /* Terminate: final decision for this region. */ |
182 | *handled_ops_p = true; |
183 | return integer_zero_node; |
184 | } |
185 | gcc_unreachable (); |
186 | } |
187 | |
188 | case GIMPLE_COND: |
189 | case GIMPLE_GOTO: |
190 | case GIMPLE_SWITCH: |
191 | case GIMPLE_ASM: |
192 | case GIMPLE_ASSUME: |
193 | case GIMPLE_TRANSACTION: |
194 | case GIMPLE_RETURN: |
195 | /* Statement that might constitute some looping/control flow pattern. */ |
196 | /* The user would like this code analyzed (implicit inside a 'kernels' |
197 | region) and typically parallelized, but we don't have available yet |
198 | the compiler logic to analyze this, so can't parallelize it here, so |
199 | we'd very likely be running into a performance problem if we were to |
200 | execute this unparallelized, thus forward the whole thing to |
201 | 'parloops'. */ |
202 | *region_code = GF_OMP_TARGET_KIND_OACC_KERNELS; |
203 | /* Terminate: final decision for this region. */ |
204 | *handled_ops_p = true; |
205 | return integer_zero_node; |
206 | |
207 | default: |
208 | /* Keep going. */ |
209 | break; |
210 | } |
211 | |
212 | return NULL; |
213 | } |
214 | |
215 | /* Adjust the REGION_CODE for the region in GS. */ |
216 | |
217 | static void |
218 | adjust_region_code (gimple_seq gs, int *region_code) |
219 | { |
220 | struct walk_stmt_info wi; |
221 | memset (s: &wi, c: 0, n: sizeof (wi)); |
222 | wi.info = region_code; |
223 | walk_gimple_seq (gs, adjust_region_code_walk_stmt_fn, NULL, &wi); |
224 | } |
225 | |
226 | /* Helper function for make_loops_gang_single for walking the tree. If the |
227 | statement indicated by GSI_P is an OpenACC for loop with a gang clause, |
228 | issue a warning and remove the clause. */ |
229 | |
230 | static tree |
231 | visit_loops_in_gang_single_region (gimple_stmt_iterator *gsi_p, |
232 | bool *handled_ops_p, |
233 | struct walk_stmt_info *) |
234 | { |
235 | *handled_ops_p = false; |
236 | |
237 | gimple *stmt = gsi_stmt (i: *gsi_p); |
238 | switch (gimple_code (g: stmt)) |
239 | { |
240 | case GIMPLE_OMP_FOR: |
241 | /*TODO Given the current 'adjust_region_code' algorithm, this is |
242 | actually... */ |
243 | #if 0 |
244 | gcc_unreachable (); |
245 | #else |
246 | /* ..., but due to bugs (PR100400), we may actually come here. |
247 | Reliably catch this, regardless of checking level. */ |
248 | internal_error ("PR100400" ); |
249 | #endif |
250 | |
251 | { |
252 | tree clauses = gimple_omp_for_clauses (gs: stmt); |
253 | tree prev_clause = NULL; |
254 | for (tree clause = clauses; clause; clause = OMP_CLAUSE_CHAIN (clause)) |
255 | { |
256 | if (OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_GANG) |
257 | { |
258 | /* It makes no sense to have a 'gang' clause in a "gang-single" |
259 | region, so warn and remove it. */ |
260 | warning_at (gimple_location (g: stmt), 0, |
261 | "conditionally executed loop in %<kernels%> region" |
262 | " will be executed by a single gang;" |
263 | " ignoring %<gang%> clause" ); |
264 | if (prev_clause != NULL) |
265 | OMP_CLAUSE_CHAIN (prev_clause) = OMP_CLAUSE_CHAIN (clause); |
266 | else |
267 | clauses = OMP_CLAUSE_CHAIN (clause); |
268 | |
269 | break; |
270 | } |
271 | prev_clause = clause; |
272 | } |
273 | gimple_omp_for_set_clauses (gs: stmt, clauses); |
274 | } |
275 | /* No need to recurse into nested statements; no loop nested inside |
276 | this loop can be gang-partitioned. */ |
277 | sorry ("%<gang%> loop in %<gang-single%> region" ); |
278 | *handled_ops_p = true; |
279 | break; |
280 | |
281 | default: |
282 | break; |
283 | } |
284 | |
285 | return NULL; |
286 | } |
287 | |
288 | /* Visit all nested OpenACC loops in the sequence indicated by GS. This |
289 | statement is expected to be inside a gang-single region. Issue a warning |
290 | for any loops inside it that have gang clauses and remove the clauses. */ |
291 | |
292 | static void |
293 | make_loops_gang_single (gimple_seq gs) |
294 | { |
295 | struct walk_stmt_info wi; |
296 | memset (s: &wi, c: 0, n: sizeof (wi)); |
297 | walk_gimple_seq (gs, visit_loops_in_gang_single_region, NULL, &wi); |
298 | } |
299 | |
300 | /* Construct a "gang-single" compute construct at LOC containing the STMTS. |
301 | Annotate with CLAUSES, which must not contain a 'num_gangs' clause, and an |
302 | additional 'num_gangs (1)' clause to force "gang-single" execution. */ |
303 | |
304 | static gimple * |
305 | make_region_seq (location_t loc, gimple_seq stmts, |
306 | tree num_gangs_clause, |
307 | tree num_workers_clause, |
308 | tree vector_length_clause, |
309 | tree clauses) |
310 | { |
311 | /* This correctly unshares the entire clause chain rooted here. */ |
312 | clauses = unshare_expr (clauses); |
313 | |
314 | dump_user_location_t loc_stmts_first = gimple_seq_first (s: stmts); |
315 | |
316 | /* Figure out the region code for this region. */ |
317 | /* Optimistic default: assume "setup code", no looping; thus not |
318 | performance-critical. */ |
319 | int region_code = GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE; |
320 | adjust_region_code (gs: stmts, region_code: ®ion_code); |
321 | |
322 | if (region_code == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE) |
323 | { |
324 | if (dump_enabled_p ()) |
325 | /*TODO MSG_MISSED_OPTIMIZATION? */ |
326 | dump_printf_loc (MSG_NOTE, loc_stmts_first, |
327 | "beginning %<gang-single%> part" |
328 | " in OpenACC %<kernels%> region\n" ); |
329 | |
330 | /* Synthesize a 'num_gangs (1)' clause. */ |
331 | tree gang_single_clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS); |
332 | OMP_CLAUSE_OPERAND (gang_single_clause, 0) = integer_one_node; |
333 | OMP_CLAUSE_CHAIN (gang_single_clause) = clauses; |
334 | clauses = gang_single_clause; |
335 | |
336 | /* Remove and issue warnings about gang clauses on any OpenACC |
337 | loops nested inside this sequentially executed statement. */ |
338 | make_loops_gang_single (gs: stmts); |
339 | } |
340 | else if (region_code == GF_OMP_TARGET_KIND_OACC_KERNELS) |
341 | { |
342 | if (dump_enabled_p ()) |
343 | dump_printf_loc (MSG_NOTE, loc_stmts_first, |
344 | "beginning %<parloops%> part" |
345 | " in OpenACC %<kernels%> region\n" ); |
346 | |
347 | /* As we're transforming a 'GF_OMP_TARGET_KIND_OACC_KERNELS' into another |
348 | 'GF_OMP_TARGET_KIND_OACC_KERNELS', this isn't doing any of the clauses |
349 | mangling that 'make_region_loop_nest' is doing. */ |
350 | /* Re-assemble the clauses stripped off earlier. */ |
351 | if (num_gangs_clause != NULL) |
352 | { |
353 | tree c = unshare_expr (num_gangs_clause); |
354 | OMP_CLAUSE_CHAIN (c) = clauses; |
355 | clauses = c; |
356 | } |
357 | if (num_workers_clause != NULL) |
358 | { |
359 | tree c = unshare_expr (num_workers_clause); |
360 | OMP_CLAUSE_CHAIN (c) = clauses; |
361 | clauses = c; |
362 | } |
363 | if (vector_length_clause != NULL) |
364 | { |
365 | tree c = unshare_expr (vector_length_clause); |
366 | OMP_CLAUSE_CHAIN (c) = clauses; |
367 | clauses = c; |
368 | } |
369 | } |
370 | else |
371 | gcc_unreachable (); |
372 | |
373 | /* Build the gang-single region. */ |
374 | gimple *single_region = gimple_build_omp_target (NULL, region_code, clauses); |
375 | gimple_set_location (g: single_region, location: loc); |
376 | gbind *single_body = gimple_build_bind (NULL, stmts, make_node (BLOCK)); |
377 | gimple_omp_set_body (gs: single_region, body: single_body); |
378 | |
379 | return single_region; |
380 | } |
381 | |
382 | /* Helper function for make_region_loop_nest. Adds a 'num_gangs' |
383 | ('num_workers', 'vector_length') clause to the given CLAUSES, either the one |
384 | from the parent compute construct (PARENT_CLAUSE) or a new one based on the |
385 | loop's own LOOP_CLAUSE ('gang (num: N)' or similar for 'worker' or 'vector' |
386 | clauses) with the given CLAUSE_CODE. Does nothing if neither PARENT_CLAUSE |
387 | nor LOOP_CLAUSE exist. Returns the new clauses. */ |
388 | |
389 | static tree |
390 | add_parent_or_loop_num_clause (tree parent_clause, tree loop_clause, |
391 | omp_clause_code clause_code, tree clauses) |
392 | { |
393 | if (parent_clause != NULL) |
394 | { |
395 | tree num_clause = unshare_expr (parent_clause); |
396 | OMP_CLAUSE_CHAIN (num_clause) = clauses; |
397 | clauses = num_clause; |
398 | } |
399 | else if (loop_clause != NULL) |
400 | { |
401 | /* The kernels region does not have a 'num_gangs' clause, but the loop |
402 | itself had a 'gang (num: N)' clause. Honor it by adding a |
403 | 'num_gangs (N)' clause on the compute construct. */ |
404 | tree num = OMP_CLAUSE_OPERAND (loop_clause, 0); |
405 | tree new_num_clause |
406 | = build_omp_clause (OMP_CLAUSE_LOCATION (loop_clause), clause_code); |
407 | OMP_CLAUSE_OPERAND (new_num_clause, 0) = num; |
408 | OMP_CLAUSE_CHAIN (new_num_clause) = clauses; |
409 | clauses = new_num_clause; |
410 | } |
411 | return clauses; |
412 | } |
413 | |
414 | /* Helper for make_region_loop_nest, looking for 'worker (num: N)' or 'vector |
415 | (length: N)' clauses in nested loops. Removes the argument, transferring it |
416 | to the enclosing compute construct (via WI->INFO). If arguments within the |
417 | same loop nest conflict, emits a warning. |
418 | |
419 | This function also decides whether to add an 'auto' clause on each of these |
420 | nested loops. */ |
421 | |
422 | struct adjust_nested_loop_clauses_wi_info |
423 | { |
424 | tree *loop_gang_clause_ptr; |
425 | tree *loop_worker_clause_ptr; |
426 | tree *loop_vector_clause_ptr; |
427 | }; |
428 | |
429 | static tree |
430 | adjust_nested_loop_clauses (gimple_stmt_iterator *gsi_p, bool *, |
431 | struct walk_stmt_info *wi) |
432 | { |
433 | struct adjust_nested_loop_clauses_wi_info *wi_info |
434 | = (struct adjust_nested_loop_clauses_wi_info *) wi->info; |
435 | gimple *stmt = gsi_stmt (i: *gsi_p); |
436 | |
437 | if (gimple_code (g: stmt) == GIMPLE_OMP_FOR) |
438 | { |
439 | bool add_auto_clause = true; |
440 | tree loop_clauses = gimple_omp_for_clauses (gs: stmt); |
441 | tree loop_clause = loop_clauses; |
442 | for (; loop_clause; loop_clause = OMP_CLAUSE_CHAIN (loop_clause)) |
443 | { |
444 | tree *outer_clause_ptr = NULL; |
445 | switch (OMP_CLAUSE_CODE (loop_clause)) |
446 | { |
447 | case OMP_CLAUSE_GANG: |
448 | outer_clause_ptr = wi_info->loop_gang_clause_ptr; |
449 | break; |
450 | case OMP_CLAUSE_WORKER: |
451 | outer_clause_ptr = wi_info->loop_worker_clause_ptr; |
452 | break; |
453 | case OMP_CLAUSE_VECTOR: |
454 | outer_clause_ptr = wi_info->loop_vector_clause_ptr; |
455 | break; |
456 | case OMP_CLAUSE_SEQ: |
457 | case OMP_CLAUSE_INDEPENDENT: |
458 | case OMP_CLAUSE_AUTO: |
459 | add_auto_clause = false; |
460 | default: |
461 | break; |
462 | } |
463 | if (outer_clause_ptr != NULL) |
464 | { |
465 | if (OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL |
466 | && *outer_clause_ptr == NULL) |
467 | { |
468 | /* Transfer the clause to the enclosing compute construct and |
469 | remove the numerical argument from the 'loop'. */ |
470 | *outer_clause_ptr = unshare_expr (loop_clause); |
471 | OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL; |
472 | } |
473 | else if (OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL && |
474 | OMP_CLAUSE_OPERAND (*outer_clause_ptr, 0) != NULL) |
475 | { |
476 | /* See if both of these are the same constant. If they |
477 | aren't, emit a warning. */ |
478 | tree old_op = OMP_CLAUSE_OPERAND (*outer_clause_ptr, 0); |
479 | tree new_op = OMP_CLAUSE_OPERAND (loop_clause, 0); |
480 | if (!(cst_and_fits_in_hwi (old_op) && |
481 | cst_and_fits_in_hwi (new_op) && |
482 | int_cst_value (old_op) == int_cst_value (new_op))) |
483 | { |
484 | const char *clause_name |
485 | = omp_clause_code_name[OMP_CLAUSE_CODE (loop_clause)]; |
486 | error_at (gimple_location (g: stmt), |
487 | "cannot honor conflicting %qs clause" , |
488 | clause_name); |
489 | inform (OMP_CLAUSE_LOCATION (*outer_clause_ptr), |
490 | "location of the previous clause" |
491 | " in the same loop nest" ); |
492 | } |
493 | OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL; |
494 | } |
495 | } |
496 | } |
497 | if (add_auto_clause) |
498 | { |
499 | tree auto_clause |
500 | = build_omp_clause (gimple_location (g: stmt), OMP_CLAUSE_AUTO); |
501 | OMP_CLAUSE_CHAIN (auto_clause) = loop_clauses; |
502 | gimple_omp_for_set_clauses (gs: stmt, clauses: auto_clause); |
503 | } |
504 | } |
505 | |
506 | return NULL; |
507 | } |
508 | |
509 | /* Helper for make_region_loop_nest. Transform OpenACC 'kernels'/'loop' |
510 | construct clauses into OpenACC 'parallel'/'loop' construct ones. */ |
511 | |
512 | static tree |
513 | transform_kernels_loop_clauses (gimple *omp_for, |
514 | tree num_gangs_clause, |
515 | tree num_workers_clause, |
516 | tree vector_length_clause, |
517 | tree clauses) |
518 | { |
519 | /* If this loop in a kernels region does not have an explicit 'seq', |
520 | 'independent', or 'auto' clause, we must give it an explicit 'auto' |
521 | clause. |
522 | We also check for 'gang (num: N)' clauses. These must not appear in |
523 | kernels regions that have their own 'num_gangs' clause. Otherwise, they |
524 | must be converted and put on the region; similarly for 'worker' and |
525 | 'vector' clauses. */ |
526 | bool add_auto_clause = true; |
527 | tree loop_gang_clause = NULL, loop_worker_clause = NULL, |
528 | loop_vector_clause = NULL; |
529 | tree loop_clauses = gimple_omp_for_clauses (gs: omp_for); |
530 | for (tree loop_clause = loop_clauses; |
531 | loop_clause; |
532 | loop_clause = OMP_CLAUSE_CHAIN (loop_clause)) |
533 | { |
534 | bool found_num_clause = false; |
535 | tree *clause_ptr, clause_to_check; |
536 | switch (OMP_CLAUSE_CODE (loop_clause)) |
537 | { |
538 | case OMP_CLAUSE_GANG: |
539 | found_num_clause = true; |
540 | clause_ptr = &loop_gang_clause; |
541 | clause_to_check = num_gangs_clause; |
542 | break; |
543 | case OMP_CLAUSE_WORKER: |
544 | found_num_clause = true; |
545 | clause_ptr = &loop_worker_clause; |
546 | clause_to_check = num_workers_clause; |
547 | break; |
548 | case OMP_CLAUSE_VECTOR: |
549 | found_num_clause = true; |
550 | clause_ptr = &loop_vector_clause; |
551 | clause_to_check = vector_length_clause; |
552 | break; |
553 | case OMP_CLAUSE_INDEPENDENT: |
554 | case OMP_CLAUSE_SEQ: |
555 | case OMP_CLAUSE_AUTO: |
556 | add_auto_clause = false; |
557 | default: |
558 | break; |
559 | } |
560 | if (found_num_clause && OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL) |
561 | { |
562 | if (clause_to_check) |
563 | { |
564 | const char *clause_name |
565 | = omp_clause_code_name[OMP_CLAUSE_CODE (loop_clause)]; |
566 | const char *parent_clause_name |
567 | = omp_clause_code_name[OMP_CLAUSE_CODE (clause_to_check)]; |
568 | error_at (OMP_CLAUSE_LOCATION (loop_clause), |
569 | "argument not permitted on %qs clause" |
570 | " in OpenACC %<kernels%> region with a %qs clause" , |
571 | clause_name, parent_clause_name); |
572 | inform (OMP_CLAUSE_LOCATION (clause_to_check), |
573 | "location of OpenACC %<kernels%>" ); |
574 | } |
575 | /* Copy the 'gang (N)'/'worker (N)'/'vector (N)' clause to the |
576 | enclosing compute construct. */ |
577 | *clause_ptr = unshare_expr (loop_clause); |
578 | OMP_CLAUSE_CHAIN (*clause_ptr) = NULL; |
579 | /* Leave a 'gang'/'worker'/'vector' clause on the 'loop', but without |
580 | argument. */ |
581 | OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL; |
582 | } |
583 | } |
584 | if (add_auto_clause) |
585 | { |
586 | tree auto_clause = build_omp_clause (gimple_location (g: omp_for), |
587 | OMP_CLAUSE_AUTO); |
588 | OMP_CLAUSE_CHAIN (auto_clause) = loop_clauses; |
589 | loop_clauses = auto_clause; |
590 | } |
591 | gimple_omp_for_set_clauses (gs: omp_for, clauses: loop_clauses); |
592 | /* We must also recurse into the loop; it might contain nested loops having |
593 | their own 'worker (num: W)' or 'vector (length: V)' clauses. Turn these |
594 | into 'worker'/'vector' clauses on the compute construct. */ |
595 | struct walk_stmt_info wi; |
596 | memset (s: &wi, c: 0, n: sizeof (wi)); |
597 | struct adjust_nested_loop_clauses_wi_info wi_info; |
598 | wi_info.loop_gang_clause_ptr = &loop_gang_clause; |
599 | wi_info.loop_worker_clause_ptr = &loop_worker_clause; |
600 | wi_info.loop_vector_clause_ptr = &loop_vector_clause; |
601 | wi.info = &wi_info; |
602 | gimple *body = gimple_omp_body (gs: omp_for); |
603 | walk_gimple_seq (body, adjust_nested_loop_clauses, NULL, &wi); |
604 | /* Check if there were conflicting numbers of workers or vector length. */ |
605 | if (loop_gang_clause != NULL && |
606 | OMP_CLAUSE_OPERAND (loop_gang_clause, 0) == NULL) |
607 | loop_gang_clause = NULL; |
608 | if (loop_worker_clause != NULL && |
609 | OMP_CLAUSE_OPERAND (loop_worker_clause, 0) == NULL) |
610 | loop_worker_clause = NULL; |
611 | if (loop_vector_clause != NULL && |
612 | OMP_CLAUSE_OPERAND (loop_vector_clause, 0) == NULL) |
613 | vector_length_clause = NULL; |
614 | |
615 | /* If the kernels region had 'num_gangs', 'num_worker', 'vector_length' |
616 | clauses, add these to this new compute construct. */ |
617 | clauses |
618 | = add_parent_or_loop_num_clause (parent_clause: num_gangs_clause, loop_clause: loop_gang_clause, |
619 | clause_code: OMP_CLAUSE_NUM_GANGS, clauses); |
620 | clauses |
621 | = add_parent_or_loop_num_clause (parent_clause: num_workers_clause, loop_clause: loop_worker_clause, |
622 | clause_code: OMP_CLAUSE_NUM_WORKERS, clauses); |
623 | clauses |
624 | = add_parent_or_loop_num_clause (parent_clause: vector_length_clause, loop_clause: loop_vector_clause, |
625 | clause_code: OMP_CLAUSE_VECTOR_LENGTH, clauses); |
626 | |
627 | return clauses; |
628 | } |
629 | |
630 | /* Construct a possibly gang-parallel compute construct containing the STMT, |
631 | which must be identical to, or a bind containing, the loop OMP_FOR. |
632 | |
633 | The NUM_GANGS_CLAUSE, NUM_WORKERS_CLAUSE, and VECTOR_LENGTH_CLAUSE are |
634 | optional clauses from the original kernels region and must not be contained |
635 | in the other CLAUSES. The newly created compute construct is annotated with |
636 | the optional NUM_GANGS_CLAUSE as well as the other CLAUSES. If there is no |
637 | NUM_GANGS_CLAUSE but the loop has a 'gang (num: N)' clause, that is |
638 | converted to a 'num_gangs (N)' clause on the new compute construct, and |
639 | similarly for 'worker' and 'vector' clauses. |
640 | |
641 | The outermost loop gets an 'auto' clause unless there already is an |
642 | 'seq'/'independent'/'auto' clause. Nested loops inside OMP_FOR are treated |
643 | similarly by the adjust_nested_loop_clauses function. */ |
644 | |
645 | static gimple * |
646 | make_region_loop_nest (gimple *omp_for, gimple_seq stmts, |
647 | tree num_gangs_clause, |
648 | tree num_workers_clause, |
649 | tree vector_length_clause, |
650 | tree clauses) |
651 | { |
652 | /* This correctly unshares the entire clause chain rooted here. */ |
653 | clauses = unshare_expr (clauses); |
654 | |
655 | /* Figure out the region code for this region. */ |
656 | /* Optimistic default: assume that the loop nest is parallelizable |
657 | (essentially, no GIMPLE_OMP_FOR with (explicit or implicit) 'auto' clause, |
658 | and no un-annotated loops). */ |
659 | int region_code = GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED; |
660 | adjust_region_code (gs: stmts, region_code: ®ion_code); |
661 | |
662 | if (region_code == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED) |
663 | { |
664 | if (dump_enabled_p ()) |
665 | /* This is not MSG_OPTIMIZED_LOCATIONS, as we're just doing what the |
666 | user asked us to. */ |
667 | dump_printf_loc (MSG_NOTE, omp_for, |
668 | "parallelized loop nest" |
669 | " in OpenACC %<kernels%> region\n" ); |
670 | |
671 | clauses = transform_kernels_loop_clauses (omp_for, |
672 | num_gangs_clause, |
673 | num_workers_clause, |
674 | vector_length_clause, |
675 | clauses); |
676 | } |
677 | else if (region_code == GF_OMP_TARGET_KIND_OACC_KERNELS) |
678 | { |
679 | if (dump_enabled_p ()) |
680 | dump_printf_loc (MSG_NOTE, omp_for, |
681 | "forwarded loop nest" |
682 | " in OpenACC %<kernels%> region" |
683 | " to %<parloops%> for analysis\n" ); |
684 | |
685 | /* We're transforming one 'GF_OMP_TARGET_KIND_OACC_KERNELS' into another |
686 | 'GF_OMP_TARGET_KIND_OACC_KERNELS', so don't have to |
687 | 'transform_kernels_loop_clauses'. */ |
688 | /* Re-assemble the clauses stripped off earlier. */ |
689 | clauses |
690 | = add_parent_or_loop_num_clause (parent_clause: num_gangs_clause, NULL, |
691 | clause_code: OMP_CLAUSE_NUM_GANGS, clauses); |
692 | clauses |
693 | = add_parent_or_loop_num_clause (parent_clause: num_workers_clause, NULL, |
694 | clause_code: OMP_CLAUSE_NUM_WORKERS, clauses); |
695 | clauses |
696 | = add_parent_or_loop_num_clause (parent_clause: vector_length_clause, NULL, |
697 | clause_code: OMP_CLAUSE_VECTOR_LENGTH, clauses); |
698 | } |
699 | else |
700 | gcc_unreachable (); |
701 | |
702 | gimple *parallel_body_bind |
703 | = gimple_build_bind (NULL, stmts, make_node (BLOCK)); |
704 | gimple *parallel_region |
705 | = gimple_build_omp_target (parallel_body_bind, region_code, clauses); |
706 | gimple_set_location (g: parallel_region, location: gimple_location (g: omp_for)); |
707 | |
708 | return parallel_region; |
709 | } |
710 | |
711 | /* Eliminate any binds directly inside BIND by adding their statements to |
712 | BIND (i.e., modifying it in place), excluding binds that hold only an |
713 | OMP_FOR loop and associated setup/cleanup code. Recurse into binds but |
714 | not other statements. Return a chain of the local variables of eliminated |
715 | binds, i.e., the local variables found in nested binds. If |
716 | INCLUDE_TOPLEVEL_VARS is true, this also includes the variables belonging |
717 | to BIND itself. */ |
718 | |
719 | static tree |
720 | flatten_binds (gbind *bind, bool include_toplevel_vars = false) |
721 | { |
722 | tree vars = NULL, last_var = NULL; |
723 | |
724 | if (include_toplevel_vars) |
725 | { |
726 | vars = gimple_bind_vars (bind_stmt: bind); |
727 | last_var = vars; |
728 | } |
729 | |
730 | gimple_seq new_body = NULL; |
731 | gimple_seq body_sequence = gimple_bind_body (gs: bind); |
732 | gimple_stmt_iterator gsi, gsi_n; |
733 | for (gsi = gsi_start (seq&: body_sequence); !gsi_end_p (i: gsi); gsi = gsi_n) |
734 | { |
735 | /* Advance the iterator here because otherwise it would be invalidated |
736 | by moving statements below. */ |
737 | gsi_n = gsi; |
738 | gsi_next (i: &gsi_n); |
739 | |
740 | gimple *stmt = gsi_stmt (i: gsi); |
741 | /* Flatten bind statements, except the ones that contain only an |
742 | OpenACC for loop. */ |
743 | if (gimple_code (g: stmt) == GIMPLE_BIND |
744 | && !top_level_omp_for_in_stmt (stmt)) |
745 | { |
746 | gbind *inner_bind = as_a <gbind *> (p: stmt); |
747 | /* Flatten recursively, and collect all variables. */ |
748 | tree inner_vars = flatten_binds (bind: inner_bind, include_toplevel_vars: true); |
749 | gimple_seq inner_sequence = gimple_bind_body (gs: inner_bind); |
750 | if (flag_checking) |
751 | { |
752 | for (gimple_stmt_iterator inner_gsi = gsi_start (seq&: inner_sequence); |
753 | !gsi_end_p (i: inner_gsi); |
754 | gsi_next (i: &inner_gsi)) |
755 | { |
756 | gimple *inner_stmt = gsi_stmt (i: inner_gsi); |
757 | gcc_assert (gimple_code (inner_stmt) != GIMPLE_BIND |
758 | || top_level_omp_for_in_stmt (inner_stmt)); |
759 | } |
760 | } |
761 | gimple_seq_add_seq (&new_body, inner_sequence); |
762 | /* Find the last variable; we will append others to it. */ |
763 | while (last_var != NULL && TREE_CHAIN (last_var) != NULL) |
764 | last_var = TREE_CHAIN (last_var); |
765 | if (last_var != NULL) |
766 | { |
767 | TREE_CHAIN (last_var) = inner_vars; |
768 | last_var = inner_vars; |
769 | } |
770 | else |
771 | { |
772 | vars = inner_vars; |
773 | last_var = vars; |
774 | } |
775 | } |
776 | else |
777 | gimple_seq_add_stmt (&new_body, stmt); |
778 | } |
779 | |
780 | /* Put the possibly transformed body back into the bind. */ |
781 | gimple_bind_set_body (bind_stmt: bind, seq: new_body); |
782 | return vars; |
783 | } |
784 | |
785 | /* Helper function for places where we construct data regions. Wraps the BODY |
786 | inside a try-finally construct at LOC that calls __builtin_GOACC_data_end |
787 | in its cleanup block. Returns this try statement. */ |
788 | |
789 | static gimple * |
790 | make_data_region_try_statement (location_t loc, gimple *body) |
791 | { |
792 | tree data_end_fn = builtin_decl_explicit (fncode: BUILT_IN_GOACC_DATA_END); |
793 | gimple *call = gimple_build_call (data_end_fn, 0); |
794 | gimple_seq cleanup = NULL; |
795 | gimple_seq_add_stmt (&cleanup, call); |
796 | gimple *try_stmt = gimple_build_try (body, cleanup, GIMPLE_TRY_FINALLY); |
797 | gimple_set_location (g: body, location: loc); |
798 | return try_stmt; |
799 | } |
800 | |
801 | /* If INNER_BIND_VARS holds variables, build an OpenACC data region with |
802 | location LOC containing BODY and having 'create (var)' clauses for each |
803 | variable (as a side effect, such variables also get TREE_ADDRESSABLE set). |
804 | If INNER_CLEANUP is present, add a try-finally statement with |
805 | this cleanup code in the finally block. Return the new data region, or |
806 | the original BODY if no data region was needed. */ |
807 | |
808 | static gimple * |
809 | maybe_build_inner_data_region (location_t loc, gimple *body, |
810 | tree inner_bind_vars, gimple *inner_cleanup) |
811 | { |
812 | /* Is this an instantiation of a template? (In this case, we don't care what |
813 | the generic decl is - just whether the function decl has one.) */ |
814 | bool generic_inst_p |
815 | = (lang_hooks.decls.get_generic_function_decl (current_function_decl) |
816 | != NULL); |
817 | |
818 | /* Build data 'create (var)' clauses for these local variables. |
819 | Below we will add these to a data region enclosing the entire body |
820 | of the decomposed kernels region. */ |
821 | tree prev_mapped_var = NULL, next = NULL, artificial_vars = NULL, |
822 | inner_data_clauses = NULL; |
823 | for (tree v = inner_bind_vars; v; v = next) |
824 | { |
825 | next = TREE_CHAIN (v); |
826 | if (DECL_ARTIFICIAL (v) |
827 | || TREE_CODE (v) == CONST_DECL |
828 | || generic_inst_p) |
829 | { |
830 | /* If this is an artificial temporary, it need not be mapped. We |
831 | move its declaration into the bind inside the data region. |
832 | Also avoid mapping variables if we are inside a template |
833 | instantiation; the code does not contain all the copies to |
834 | temporaries that would make this legal. */ |
835 | TREE_CHAIN (v) = artificial_vars; |
836 | artificial_vars = v; |
837 | if (prev_mapped_var != NULL) |
838 | TREE_CHAIN (prev_mapped_var) = next; |
839 | else |
840 | inner_bind_vars = next; |
841 | } |
842 | else |
843 | { |
844 | /* Otherwise, build the map clause. */ |
845 | tree new_clause = build_omp_clause (loc, OMP_CLAUSE_MAP); |
846 | OMP_CLAUSE_SET_MAP_KIND (new_clause, GOMP_MAP_ALLOC); |
847 | OMP_CLAUSE_DECL (new_clause) = v; |
848 | OMP_CLAUSE_SIZE (new_clause) = DECL_SIZE_UNIT (v); |
849 | OMP_CLAUSE_CHAIN (new_clause) = inner_data_clauses; |
850 | inner_data_clauses = new_clause; |
851 | |
852 | prev_mapped_var = v; |
853 | |
854 | /* See <https://gcc.gnu.org/PR100280>. */ |
855 | if (!TREE_ADDRESSABLE (v)) |
856 | { |
857 | /* Request that OMP lowering make 'v' addressable. */ |
858 | OMP_CLAUSE_MAP_DECL_MAKE_ADDRESSABLE (new_clause) = 1; |
859 | |
860 | if (dump_enabled_p ()) |
861 | { |
862 | const dump_user_location_t d_u_loc |
863 | = dump_user_location_t::from_location_t (loc); |
864 | /* PR100695 "Format decoder, quoting in 'dump_printf' etc." */ |
865 | #if __GNUC__ >= 10 |
866 | # pragma GCC diagnostic push |
867 | # pragma GCC diagnostic ignored "-Wformat" |
868 | #endif |
869 | dump_printf_loc (MSG_NOTE, d_u_loc, |
870 | "OpenACC %<kernels%> decomposition:" |
871 | " variable %<%T%> declared in block" |
872 | " requested to be made addressable\n" , |
873 | v); |
874 | #if __GNUC__ >= 10 |
875 | # pragma GCC diagnostic pop |
876 | #endif |
877 | } |
878 | } |
879 | } |
880 | } |
881 | |
882 | if (artificial_vars) |
883 | body = gimple_build_bind (artificial_vars, body, make_node (BLOCK)); |
884 | |
885 | /* If we determined above that there are variables that need to be created |
886 | on the device, construct a data region for them and wrap the body |
887 | inside that. */ |
888 | if (inner_data_clauses != NULL) |
889 | { |
890 | gcc_assert (inner_bind_vars != NULL); |
891 | gimple *inner_data_region |
892 | = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_DATA_KERNELS, |
893 | inner_data_clauses); |
894 | gimple_set_location (g: inner_data_region, location: loc); |
895 | /* Make sure __builtin_GOACC_data_end is called at the end. */ |
896 | gimple *try_stmt = make_data_region_try_statement (loc, body); |
897 | gimple_omp_set_body (gs: inner_data_region, body: try_stmt); |
898 | gimple *bind_body; |
899 | if (inner_cleanup != NULL) |
900 | /* Clobber all the inner variables that need to be clobbered. */ |
901 | bind_body = gimple_build_try (inner_data_region, inner_cleanup, |
902 | GIMPLE_TRY_FINALLY); |
903 | else |
904 | bind_body = inner_data_region; |
905 | body = gimple_build_bind (inner_bind_vars, bind_body, make_node (BLOCK)); |
906 | } |
907 | |
908 | return body; |
909 | } |
910 | |
911 | static void |
912 | add_wait (location_t loc, gimple_seq *region_body) |
913 | { |
914 | /* A "#pragma acc wait" is just a call GOACC_wait (acc_async_sync, 0). */ |
915 | tree wait_fn = builtin_decl_explicit (fncode: BUILT_IN_GOACC_WAIT); |
916 | tree sync_arg = build_int_cst (integer_type_node, GOMP_ASYNC_SYNC); |
917 | gimple *wait_call = gimple_build_call (wait_fn, 2, |
918 | sync_arg, integer_zero_node); |
919 | gimple_set_location (g: wait_call, location: loc); |
920 | gimple_seq_add_stmt (region_body, wait_call); |
921 | } |
922 | |
923 | /* Helper function of decompose_kernels_region_body. The statements in |
924 | REGION_BODY are expected to be decomposed parts; add an 'async' clause to |
925 | each. Also add a 'wait' directive at the end of the sequence. */ |
926 | |
927 | static void |
928 | add_async_clauses_and_wait (location_t loc, gimple_seq *region_body) |
929 | { |
930 | tree default_async_queue |
931 | = build_int_cst (integer_type_node, GOMP_ASYNC_NOVAL); |
932 | for (gimple_stmt_iterator gsi = gsi_start (seq&: *region_body); |
933 | !gsi_end_p (i: gsi); |
934 | gsi_next (i: &gsi)) |
935 | { |
936 | gimple *stmt = gsi_stmt (i: gsi); |
937 | tree target_clauses = gimple_omp_target_clauses (gs: stmt); |
938 | tree new_async_clause = build_omp_clause (loc, OMP_CLAUSE_ASYNC); |
939 | OMP_CLAUSE_OPERAND (new_async_clause, 0) = default_async_queue; |
940 | OMP_CLAUSE_CHAIN (new_async_clause) = target_clauses; |
941 | target_clauses = new_async_clause; |
942 | gimple_omp_target_set_clauses (omp_target_stmt: as_a <gomp_target *> (p: stmt), |
943 | clauses: target_clauses); |
944 | } |
945 | add_wait (loc, region_body); |
946 | } |
947 | |
948 | /* Auxiliary analysis of the body of a kernels region, to determine for each |
949 | OpenACC loop whether it is control-dependent (i.e., not necessarily |
950 | executed every time the kernels region is entered) or not. |
951 | We say that a loop is control-dependent if there is some cond, switch, or |
952 | goto statement that jumps over it, forwards or backwards. For example, |
953 | if the loop is controlled by an if statement, then a jump to the true |
954 | block, the false block, or from one of those blocks to the control flow |
955 | join point will necessarily jump over the loop. |
956 | This analysis implements an ad-hoc union-find data structure classifying |
957 | statements into "control-flow regions" as follows: Most statements are in |
958 | the same region as their predecessor, except that each OpenACC loop is in |
959 | a region of its own, and each OpenACC loop's successor starts a new |
960 | region. We then unite the regions of any statements linked by jumps, |
961 | placing any cond, switch, or goto statement in the same region as its |
962 | target label(s). |
963 | In the end, control dependence of OpenACC loops can be determined by |
964 | comparing their immediate predecessor and successor statements' regions. |
965 | A jump crosses the loop if and only if the predecessor and successor are |
966 | in the same region. (If there is no predecessor or successor, the loop |
967 | is executed unconditionally.) |
968 | The methods in this class identify statements by their index in the |
969 | kernels region's body. */ |
970 | |
971 | class control_flow_regions |
972 | { |
973 | public: |
974 | /* Initialize an instance and pre-compute the control-flow region |
975 | information for the statement sequence SEQ. */ |
976 | control_flow_regions (gimple_seq seq); |
977 | |
978 | /* Return true if the statement with the given index IDX in the analyzed |
979 | statement sequence is an unconditionally executed OpenACC loop. */ |
980 | bool is_unconditional_oacc_for_loop (size_t idx); |
981 | |
982 | private: |
983 | /* Find the region representative for the statement identified by index |
984 | STMT_IDX. */ |
985 | size_t find_rep (size_t stmt_idx); |
986 | |
987 | /* Union the regions containing the statements represented by |
988 | representatives A and B. */ |
989 | void union_reps (size_t a, size_t b); |
990 | |
991 | /* Helper for the constructor. Performs the actual computation of the |
992 | control-flow regions in the statement sequence SEQ. */ |
993 | void compute_regions (gimple_seq seq); |
994 | |
995 | /* The mapping from statement indices to region representatives. */ |
996 | vec <size_t> representatives; |
997 | |
998 | /* A cache mapping statement indices to a flag indicating whether the |
999 | statement is a top level OpenACC for loop. */ |
1000 | vec <bool> omp_for_loops; |
1001 | }; |
1002 | |
1003 | control_flow_regions::control_flow_regions (gimple_seq seq) |
1004 | { |
1005 | representatives.create (nelems: 1); |
1006 | omp_for_loops.create (nelems: 1); |
1007 | compute_regions (seq); |
1008 | } |
1009 | |
1010 | bool |
1011 | control_flow_regions::is_unconditional_oacc_for_loop (size_t idx) |
1012 | { |
1013 | if (idx == 0 || idx == representatives.length () - 1) |
1014 | /* The first or last statement in the kernels region. This means that |
1015 | there is no room before or after it for a jump or a label. Thus |
1016 | there cannot be a jump across it, so it is unconditional. */ |
1017 | return true; |
1018 | /* Otherwise, the loop is unconditional if the statements before and after |
1019 | it are in different control flow regions. Scan forward and backward, |
1020 | skipping over neighboring OpenACC for loops, to find these preceding |
1021 | statements. */ |
1022 | size_t prev_index = idx - 1; |
1023 | while (prev_index > 0 && omp_for_loops [prev_index] == true) |
1024 | prev_index--; |
1025 | /* If all preceding statements are also OpenACC loops, all of these are |
1026 | unconditional. */ |
1027 | if (prev_index == 0) |
1028 | return true; |
1029 | size_t succ_index = idx + 1; |
1030 | while (succ_index < omp_for_loops.length () |
1031 | && omp_for_loops [succ_index] == true) |
1032 | succ_index++; |
1033 | /* If all following statements are also OpenACC loops, all of these are |
1034 | unconditional. */ |
1035 | if (succ_index == omp_for_loops.length ()) |
1036 | return true; |
1037 | return (find_rep (stmt_idx: prev_index) != find_rep (stmt_idx: succ_index)); |
1038 | } |
1039 | |
1040 | size_t |
1041 | control_flow_regions::find_rep (size_t stmt_idx) |
1042 | { |
1043 | size_t rep = stmt_idx, aux = stmt_idx; |
1044 | /* Find the root representative of this statement. */ |
1045 | while (representatives[rep] != rep) |
1046 | rep = representatives[rep]; |
1047 | /* Compress the path from the original statement to the representative. */ |
1048 | while (representatives[aux] != rep) |
1049 | { |
1050 | size_t tmp = representatives[aux]; |
1051 | representatives[aux] = rep; |
1052 | aux = tmp; |
1053 | } |
1054 | return rep; |
1055 | } |
1056 | |
1057 | void |
1058 | control_flow_regions::union_reps (size_t a, size_t b) |
1059 | { |
1060 | a = find_rep (stmt_idx: a); |
1061 | b = find_rep (stmt_idx: b); |
1062 | representatives[b] = a; |
1063 | } |
1064 | |
1065 | void |
1066 | control_flow_regions::compute_regions (gimple_seq seq) |
1067 | { |
1068 | hash_map <gimple *, size_t> control_flow_reps; |
1069 | hash_map <tree, size_t> label_reps; |
1070 | size_t current_region = 0, idx = 0; |
1071 | |
1072 | /* In a first pass, assign an initial region to each statement. Except in |
1073 | the case of OpenACC loops, each statement simply gets the same region |
1074 | representative as its predecessor. */ |
1075 | for (gimple_stmt_iterator gsi = gsi_start (seq); |
1076 | !gsi_end_p (i: gsi); |
1077 | gsi_next (i: &gsi)) |
1078 | { |
1079 | gimple *stmt = gsi_stmt (i: gsi); |
1080 | gimple *omp_for = top_level_omp_for_in_stmt (stmt); |
1081 | omp_for_loops.safe_push (obj: omp_for != NULL); |
1082 | if (omp_for != NULL) |
1083 | { |
1084 | /* Assign a new region to this loop and to its successor. */ |
1085 | current_region = idx; |
1086 | representatives.safe_push (obj: current_region); |
1087 | current_region++; |
1088 | } |
1089 | else |
1090 | { |
1091 | representatives.safe_push (obj: current_region); |
1092 | /* Remember any jumps and labels for the second pass below. */ |
1093 | if (gimple_code (g: stmt) == GIMPLE_COND |
1094 | || gimple_code (g: stmt) == GIMPLE_SWITCH |
1095 | || gimple_code (g: stmt) == GIMPLE_GOTO) |
1096 | control_flow_reps.put (k: stmt, v: current_region); |
1097 | else if (gimple_code (g: stmt) == GIMPLE_LABEL) |
1098 | label_reps.put (k: gimple_label_label (gs: as_a <glabel *> (p: stmt)), |
1099 | v: current_region); |
1100 | } |
1101 | idx++; |
1102 | } |
1103 | gcc_assert (representatives.length () == omp_for_loops.length ()); |
1104 | |
1105 | /* Revisit all the control flow statements and union the region of each |
1106 | cond, switch, or goto statement with the target labels' regions. */ |
1107 | for (hash_map <gimple *, size_t>::iterator it = control_flow_reps.begin (); |
1108 | it != control_flow_reps.end (); |
1109 | ++it) |
1110 | { |
1111 | gimple *stmt = (*it).first; |
1112 | size_t stmt_rep = (*it).second; |
1113 | switch (gimple_code (g: stmt)) |
1114 | { |
1115 | tree label; |
1116 | unsigned int n; |
1117 | |
1118 | case GIMPLE_COND: |
1119 | label = gimple_cond_true_label (gs: as_a <gcond *> (p: stmt)); |
1120 | union_reps (a: stmt_rep, b: *label_reps.get (k: label)); |
1121 | label = gimple_cond_false_label (gs: as_a <gcond *> (p: stmt)); |
1122 | union_reps (a: stmt_rep, b: *label_reps.get (k: label)); |
1123 | break; |
1124 | |
1125 | case GIMPLE_SWITCH: |
1126 | n = gimple_switch_num_labels (gs: as_a <gswitch *> (p: stmt)); |
1127 | for (unsigned int i = 0; i < n; i++) |
1128 | { |
1129 | tree switch_case |
1130 | = gimple_switch_label (gs: as_a <gswitch *> (p: stmt), index: i); |
1131 | label = CASE_LABEL (switch_case); |
1132 | union_reps (a: stmt_rep, b: *label_reps.get (k: label)); |
1133 | } |
1134 | break; |
1135 | |
1136 | case GIMPLE_GOTO: |
1137 | label = gimple_goto_dest (gs: stmt); |
1138 | union_reps (a: stmt_rep, b: *label_reps.get (k: label)); |
1139 | break; |
1140 | |
1141 | default: |
1142 | gcc_unreachable (); |
1143 | } |
1144 | } |
1145 | } |
1146 | |
1147 | /* Decompose the body of the KERNELS_REGION, which was originally annotated |
1148 | with the KERNELS_CLAUSES, into a series of compute constructs. */ |
1149 | |
1150 | static gimple * |
1151 | decompose_kernels_region_body (gimple *kernels_region, tree kernels_clauses) |
1152 | { |
1153 | location_t loc = gimple_location (g: kernels_region); |
1154 | |
1155 | /* The kernels clauses will be propagated to the child clauses unmodified, |
1156 | except that the 'num_gangs', 'num_workers', and 'vector_length' clauses |
1157 | will only be added to loop regions. The other regions are "gang-single" |
1158 | and get an explicit 'num_gangs (1)' clause. So separate out the |
1159 | 'num_gangs', 'num_workers', and 'vector_length' clauses here. |
1160 | Also check for the presence of an 'async' clause but do not remove it from |
1161 | the 'kernels' clauses. */ |
1162 | tree num_gangs_clause = NULL, num_workers_clause = NULL, |
1163 | vector_length_clause = NULL; |
1164 | tree async_clause = NULL; |
1165 | tree prev_clause = NULL, next_clause = NULL; |
1166 | tree parallel_clauses = kernels_clauses; |
1167 | for (tree c = parallel_clauses; c; c = next_clause) |
1168 | { |
1169 | /* Preserve this here, as we might NULL it later. */ |
1170 | next_clause = OMP_CLAUSE_CHAIN (c); |
1171 | |
1172 | if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_GANGS |
1173 | || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_WORKERS |
1174 | || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_VECTOR_LENGTH) |
1175 | { |
1176 | /* Cut this clause out of the chain. */ |
1177 | if (prev_clause != NULL) |
1178 | OMP_CLAUSE_CHAIN (prev_clause) = OMP_CLAUSE_CHAIN (c); |
1179 | else |
1180 | kernels_clauses = OMP_CLAUSE_CHAIN (c); |
1181 | OMP_CLAUSE_CHAIN (c) = NULL; |
1182 | switch (OMP_CLAUSE_CODE (c)) |
1183 | { |
1184 | case OMP_CLAUSE_NUM_GANGS: |
1185 | num_gangs_clause = c; |
1186 | break; |
1187 | case OMP_CLAUSE_NUM_WORKERS: |
1188 | num_workers_clause = c; |
1189 | break; |
1190 | case OMP_CLAUSE_VECTOR_LENGTH: |
1191 | vector_length_clause = c; |
1192 | break; |
1193 | default: |
1194 | gcc_unreachable (); |
1195 | } |
1196 | } |
1197 | else |
1198 | prev_clause = c; |
1199 | if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_ASYNC) |
1200 | async_clause = c; |
1201 | } |
1202 | |
1203 | gimple *kernels_body = gimple_omp_body (gs: kernels_region); |
1204 | gbind *kernels_bind = as_a <gbind *> (p: kernels_body); |
1205 | |
1206 | /* The body of the region may contain other nested binds declaring inner |
1207 | local variables. Collapse all these binds into one to ensure that we |
1208 | have a single sequence of statements to iterate over; also, collect all |
1209 | inner variables. */ |
1210 | tree inner_bind_vars = flatten_binds (bind: kernels_bind); |
1211 | gimple_seq body_sequence = gimple_bind_body (gs: kernels_bind); |
1212 | |
1213 | /* All these inner variables will get allocated on the device (below, by |
1214 | calling maybe_build_inner_data_region). Here we create 'present' |
1215 | clauses for them and add these clauses to the list of clauses to be |
1216 | attached to each inner compute construct. */ |
1217 | tree present_clauses = kernels_clauses; |
1218 | for (tree var = inner_bind_vars; var; var = TREE_CHAIN (var)) |
1219 | { |
1220 | if (!DECL_ARTIFICIAL (var) && TREE_CODE (var) != CONST_DECL) |
1221 | { |
1222 | tree present_clause = build_omp_clause (loc, OMP_CLAUSE_MAP); |
1223 | OMP_CLAUSE_SET_MAP_KIND (present_clause, GOMP_MAP_FORCE_PRESENT); |
1224 | OMP_CLAUSE_DECL (present_clause) = var; |
1225 | OMP_CLAUSE_SIZE (present_clause) = DECL_SIZE_UNIT (var); |
1226 | OMP_CLAUSE_CHAIN (present_clause) = present_clauses; |
1227 | present_clauses = present_clause; |
1228 | } |
1229 | } |
1230 | kernels_clauses = present_clauses; |
1231 | |
1232 | /* In addition to nested binds, the "real" body of the region may be |
1233 | nested inside a try-finally block. Find its cleanup block, which |
1234 | contains code to clobber the local variables that must be clobbered. */ |
1235 | gimple *inner_cleanup = NULL; |
1236 | if (body_sequence != NULL && gimple_code (g: body_sequence) == GIMPLE_TRY) |
1237 | { |
1238 | if (gimple_seq_singleton_p (seq: body_sequence)) |
1239 | { |
1240 | /* The try statement is the only thing inside the bind. */ |
1241 | inner_cleanup = gimple_try_cleanup (gs: body_sequence); |
1242 | body_sequence = gimple_try_eval (gs: body_sequence); |
1243 | } |
1244 | else |
1245 | { |
1246 | /* The bind's body starts with a try statement, but it is followed |
1247 | by other things. */ |
1248 | gimple_stmt_iterator gsi = gsi_start (seq&: body_sequence); |
1249 | gimple *try_stmt = gsi_stmt (i: gsi); |
1250 | inner_cleanup = gimple_try_cleanup (gs: try_stmt); |
1251 | gimple *try_body = gimple_try_eval (gs: try_stmt); |
1252 | |
1253 | gsi_remove (&gsi, false); |
1254 | /* Now gsi indicates the sequence of statements after the try |
1255 | statement in the bind. Append the statement in the try body and |
1256 | the trailing statements from gsi. */ |
1257 | gsi_insert_seq_before (&gsi, try_body, GSI_CONTINUE_LINKING); |
1258 | body_sequence = gsi_stmt (i: gsi); |
1259 | } |
1260 | } |
1261 | |
1262 | /* This sequence will collect all the top-level statements in the body of |
1263 | the data region we are about to construct. */ |
1264 | gimple_seq region_body = NULL; |
1265 | /* This sequence will collect consecutive statements to be put into a |
1266 | gang-single region. */ |
1267 | gimple_seq gang_single_seq = NULL; |
1268 | /* Flag recording whether the gang_single_seq only contains copies to |
1269 | local variables. These may be loop setup code that should not be |
1270 | separated from the loop. */ |
1271 | bool only_simple_assignments = true; |
1272 | |
1273 | /* Precompute the control flow region information to determine whether an |
1274 | OpenACC loop is executed conditionally or unconditionally. */ |
1275 | control_flow_regions cf_regions (body_sequence); |
1276 | |
1277 | /* Iterate over the statements in the kernels region's body. */ |
1278 | size_t idx = 0; |
1279 | gimple_stmt_iterator gsi, gsi_n; |
1280 | for (gsi = gsi_start (seq&: body_sequence); !gsi_end_p (i: gsi); gsi = gsi_n, idx++) |
1281 | { |
1282 | /* Advance the iterator here because otherwise it would be invalidated |
1283 | by moving statements below. */ |
1284 | gsi_n = gsi; |
1285 | gsi_next (i: &gsi_n); |
1286 | |
1287 | gimple *stmt = gsi_stmt (i: gsi); |
1288 | if (gimple_code (g: stmt) == GIMPLE_DEBUG) |
1289 | { |
1290 | if (flag_compare_debug_opt || flag_compare_debug) |
1291 | /* Let the usual '-fcompare-debug' analysis bail out, as |
1292 | necessary. */ |
1293 | ; |
1294 | else |
1295 | sorry_at (loc, "%qs not yet supported" , |
1296 | gimple_code_name[gimple_code (g: stmt)]); |
1297 | } |
1298 | gimple *omp_for = top_level_omp_for_in_stmt (stmt); |
1299 | bool is_unconditional_oacc_for_loop = false; |
1300 | if (omp_for != NULL) |
1301 | is_unconditional_oacc_for_loop |
1302 | = cf_regions.is_unconditional_oacc_for_loop (idx); |
1303 | if (omp_for != NULL |
1304 | && is_unconditional_oacc_for_loop) |
1305 | { |
1306 | /* This is an OMP for statement, put it into a separate region. |
1307 | But first, construct a gang-single region containing any |
1308 | complex sequential statements we may have seen. */ |
1309 | if (gang_single_seq != NULL && !only_simple_assignments) |
1310 | { |
1311 | gimple *single_region |
1312 | = make_region_seq (loc, stmts: gang_single_seq, |
1313 | num_gangs_clause, |
1314 | num_workers_clause, |
1315 | vector_length_clause, |
1316 | clauses: kernels_clauses); |
1317 | gimple_seq_add_stmt (®ion_body, single_region); |
1318 | } |
1319 | else if (gang_single_seq != NULL && only_simple_assignments) |
1320 | { |
1321 | /* There is a sequence of sequential statements preceding this |
1322 | loop, but they are all simple assignments. This is |
1323 | probably setup code for the loop; in particular, Fortran DO |
1324 | loops are preceded by code to copy the loop limit variable |
1325 | to a temporary. Group this code together with the loop |
1326 | itself. */ |
1327 | gimple_seq_add_stmt (&gang_single_seq, stmt); |
1328 | stmt = gimple_build_bind (NULL, gang_single_seq, |
1329 | make_node (BLOCK)); |
1330 | } |
1331 | gang_single_seq = NULL; |
1332 | only_simple_assignments = true; |
1333 | |
1334 | gimple_seq parallel_seq = NULL; |
1335 | gimple_seq_add_stmt (¶llel_seq, stmt); |
1336 | gimple *parallel_region |
1337 | = make_region_loop_nest (omp_for, stmts: parallel_seq, |
1338 | num_gangs_clause, |
1339 | num_workers_clause, |
1340 | vector_length_clause, |
1341 | clauses: kernels_clauses); |
1342 | gimple_seq_add_stmt (®ion_body, parallel_region); |
1343 | } |
1344 | else |
1345 | { |
1346 | if (omp_for != NULL) |
1347 | { |
1348 | gcc_checking_assert (!is_unconditional_oacc_for_loop); |
1349 | if (dump_enabled_p ()) |
1350 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, omp_for, |
1351 | "unparallelized loop nest" |
1352 | " in OpenACC %<kernels%> region:" |
1353 | " it's executed conditionally\n" ); |
1354 | } |
1355 | |
1356 | /* This is not an unconditional OMP for statement, so it will be |
1357 | put into a gang-single region. */ |
1358 | gimple_seq_add_stmt (&gang_single_seq, stmt); |
1359 | /* Is this a simple assignment? We call it simple if it is an |
1360 | assignment to an artificial local variable. This captures |
1361 | Fortran loop setup code computing loop bounds and offsets. */ |
1362 | bool is_simple_assignment |
1363 | = (gimple_code (g: stmt) == GIMPLE_ASSIGN |
1364 | && TREE_CODE (gimple_assign_lhs (stmt)) == VAR_DECL |
1365 | && DECL_ARTIFICIAL (gimple_assign_lhs (stmt))); |
1366 | if (!is_simple_assignment) |
1367 | only_simple_assignments = false; |
1368 | } |
1369 | } |
1370 | |
1371 | /* If we did not emit a new region, and are not going to emit one now |
1372 | (that is, the original region was empty), prepare to emit a dummy so as |
1373 | to preserve the original construct, which other processing (at least |
1374 | test cases) depend on. */ |
1375 | if (region_body == NULL && gang_single_seq == NULL) |
1376 | { |
1377 | gimple *stmt = gimple_build_nop (); |
1378 | gimple_set_location (g: stmt, location: loc); |
1379 | gimple_seq_add_stmt (&gang_single_seq, stmt); |
1380 | } |
1381 | |
1382 | /* Gather up any remaining gang-single statements. */ |
1383 | if (gang_single_seq != NULL) |
1384 | { |
1385 | gimple *single_region |
1386 | = make_region_seq (loc, stmts: gang_single_seq, |
1387 | num_gangs_clause, |
1388 | num_workers_clause, |
1389 | vector_length_clause, |
1390 | clauses: kernels_clauses); |
1391 | gimple_seq_add_stmt (®ion_body, single_region); |
1392 | } |
1393 | |
1394 | /* We want to launch these kernels asynchronously. If the original |
1395 | kernels region had an async clause, this is done automatically because |
1396 | that async clause was copied to the individual regions we created. |
1397 | Otherwise, add an async clause to each newly created region, as well as |
1398 | a wait directive at the end. */ |
1399 | if (async_clause == NULL) |
1400 | add_async_clauses_and_wait (loc, region_body: ®ion_body); |
1401 | else |
1402 | /* !!! If we have asynchronous parallel blocks inside a (synchronous) data |
1403 | region, then target memory will get unmapped at the point the data |
1404 | region ends, even if the inner asynchronous parallels have not yet |
1405 | completed. For kernels marked "async", we might want to use "enter data |
1406 | async(...)" and "exit data async(...)" instead, or asynchronous data |
1407 | regions (see also <https://gcc.gnu.org/PR97390> |
1408 | "[OpenACC] 'async' clause on 'data' construct", |
1409 | which is to share the same implementation). |
1410 | For now, insert a (synchronous) wait at the end of the block. */ |
1411 | add_wait (loc, region_body: ®ion_body); |
1412 | |
1413 | tree kernels_locals = gimple_bind_vars (bind_stmt: as_a <gbind *> (p: kernels_body)); |
1414 | gimple *body = gimple_build_bind (kernels_locals, region_body, |
1415 | make_node (BLOCK)); |
1416 | |
1417 | /* If we found variables declared in nested scopes, build a data region to |
1418 | map them to the device. */ |
1419 | body = maybe_build_inner_data_region (loc, body, inner_bind_vars, |
1420 | inner_cleanup); |
1421 | |
1422 | return body; |
1423 | } |
1424 | |
1425 | /* Decompose one OpenACC 'kernels' construct into an OpenACC 'data' construct |
1426 | containing the original OpenACC 'kernels' construct's region cut up into a |
1427 | sequence of compute constructs. */ |
1428 | |
1429 | static gimple * |
1430 | omp_oacc_kernels_decompose_1 (gimple *kernels_stmt) |
1431 | { |
1432 | gcc_checking_assert (gimple_omp_target_kind (kernels_stmt) |
1433 | == GF_OMP_TARGET_KIND_OACC_KERNELS); |
1434 | location_t loc = gimple_location (g: kernels_stmt); |
1435 | |
1436 | /* Collect the data clauses of the OpenACC 'kernels' directive and create a |
1437 | new OpenACC 'data' construct with those clauses. */ |
1438 | tree kernels_clauses = gimple_omp_target_clauses (gs: kernels_stmt); |
1439 | tree data_clauses = NULL; |
1440 | for (tree c = kernels_clauses; c; c = OMP_CLAUSE_CHAIN (c)) |
1441 | { |
1442 | /* Certain clauses are copied to the enclosing OpenACC 'data'. Other |
1443 | clauses remain on the OpenACC 'kernels'. */ |
1444 | if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP) |
1445 | { |
1446 | tree decl = OMP_CLAUSE_DECL (c); |
1447 | HOST_WIDE_INT map_kind = OMP_CLAUSE_MAP_KIND (c); |
1448 | switch (map_kind) |
1449 | { |
1450 | default: |
1451 | if (map_kind == GOMP_MAP_ALLOC |
1452 | && integer_zerop (OMP_CLAUSE_SIZE (c))) |
1453 | /* ??? This is an alloc clause for mapping a pointer whose |
1454 | target is already mapped. We leave these on the inner |
1455 | compute constructs because moving them to the outer data |
1456 | region causes runtime errors. */ |
1457 | break; |
1458 | |
1459 | /* For non-artificial variables, and for non-declaration |
1460 | expressions like A[0:n], copy the clause to the data |
1461 | region. */ |
1462 | if ((DECL_P (decl) && !DECL_ARTIFICIAL (decl)) |
1463 | || !DECL_P (decl)) |
1464 | { |
1465 | tree new_clause = build_omp_clause (OMP_CLAUSE_LOCATION (c), |
1466 | OMP_CLAUSE_MAP); |
1467 | OMP_CLAUSE_SET_MAP_KIND (new_clause, map_kind); |
1468 | /* This must be unshared here to avoid "incorrect sharing |
1469 | of tree nodes" errors from verify_gimple. */ |
1470 | OMP_CLAUSE_DECL (new_clause) = unshare_expr (decl); |
1471 | OMP_CLAUSE_SIZE (new_clause) = OMP_CLAUSE_SIZE (c); |
1472 | OMP_CLAUSE_CHAIN (new_clause) = data_clauses; |
1473 | data_clauses = new_clause; |
1474 | |
1475 | /* Now that this data is mapped, turn the data clause on the |
1476 | inner OpenACC 'kernels' into a 'present' clause. */ |
1477 | OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_FORCE_PRESENT); |
1478 | |
1479 | /* See <https://gcc.gnu.org/PR100280>, |
1480 | <https://gcc.gnu.org/PR104086>. */ |
1481 | if (DECL_P (decl) |
1482 | && !TREE_ADDRESSABLE (decl)) |
1483 | { |
1484 | /* Request that OMP lowering make 'decl' addressable. */ |
1485 | OMP_CLAUSE_MAP_DECL_MAKE_ADDRESSABLE (new_clause) = 1; |
1486 | |
1487 | if (dump_enabled_p ()) |
1488 | { |
1489 | location_t loc = OMP_CLAUSE_LOCATION (new_clause); |
1490 | const dump_user_location_t d_u_loc |
1491 | = dump_user_location_t::from_location_t (loc); |
1492 | /* PR100695 "Format decoder, quoting in 'dump_printf' |
1493 | etc." */ |
1494 | #if __GNUC__ >= 10 |
1495 | # pragma GCC diagnostic push |
1496 | # pragma GCC diagnostic ignored "-Wformat" |
1497 | #endif |
1498 | dump_printf_loc |
1499 | (MSG_NOTE, d_u_loc, |
1500 | "OpenACC %<kernels%> decomposition:" |
1501 | " variable %<%T%> in %qs clause" |
1502 | " requested to be made addressable\n" , |
1503 | decl, |
1504 | user_omp_clause_code_name (new_clause, true)); |
1505 | #if __GNUC__ >= 10 |
1506 | # pragma GCC diagnostic pop |
1507 | #endif |
1508 | } |
1509 | } |
1510 | } |
1511 | break; |
1512 | |
1513 | case GOMP_MAP_POINTER: |
1514 | case GOMP_MAP_TO_PSET: |
1515 | case GOMP_MAP_FIRSTPRIVATE_POINTER: |
1516 | case GOMP_MAP_FIRSTPRIVATE_REFERENCE: |
1517 | /* ??? Copying these map kinds leads to internal compiler |
1518 | errors in later passes. */ |
1519 | break; |
1520 | } |
1521 | } |
1522 | else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IF |
1523 | || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SELF) |
1524 | { |
1525 | /* If there is an 'if' or 'self' clause, it must be duplicated to the |
1526 | enclosing data region. Temporarily remove its chain to avoid |
1527 | copying it. */ |
1528 | tree saved_chain = OMP_CLAUSE_CHAIN (c); |
1529 | OMP_CLAUSE_CHAIN (c) = NULL; |
1530 | tree new_clause = unshare_expr (c); |
1531 | OMP_CLAUSE_CHAIN (c) = saved_chain; |
1532 | OMP_CLAUSE_CHAIN (new_clause) = data_clauses; |
1533 | data_clauses = new_clause; |
1534 | } |
1535 | } |
1536 | /* Restore the original order of the clauses. */ |
1537 | data_clauses = nreverse (data_clauses); |
1538 | |
1539 | gimple *data_region |
1540 | = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_DATA_KERNELS, |
1541 | data_clauses); |
1542 | gimple_set_location (g: data_region, location: loc); |
1543 | |
1544 | /* Transform the body of the kernels region into a sequence of compute |
1545 | constructs. */ |
1546 | gimple *body = decompose_kernels_region_body (kernels_region: kernels_stmt, |
1547 | kernels_clauses); |
1548 | |
1549 | /* Put the transformed pieces together. The entire body of the region is |
1550 | wrapped in a try-finally statement that calls __builtin_GOACC_data_end |
1551 | for cleanup. */ |
1552 | gimple *try_stmt = make_data_region_try_statement (loc, body); |
1553 | gimple_omp_set_body (gs: data_region, body: try_stmt); |
1554 | |
1555 | return data_region; |
1556 | } |
1557 | |
1558 | |
1559 | /* Decompose OpenACC 'kernels' constructs in the current function. */ |
1560 | |
1561 | static tree |
1562 | omp_oacc_kernels_decompose_callback_stmt (gimple_stmt_iterator *gsi_p, |
1563 | bool *handled_ops_p, |
1564 | struct walk_stmt_info *) |
1565 | { |
1566 | gimple *stmt = gsi_stmt (i: *gsi_p); |
1567 | |
1568 | if ((gimple_code (g: stmt) == GIMPLE_OMP_TARGET) |
1569 | && gimple_omp_target_kind (g: stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS) |
1570 | { |
1571 | gimple *stmt_new = omp_oacc_kernels_decompose_1 (kernels_stmt: stmt); |
1572 | gsi_replace (gsi_p, stmt_new, false); |
1573 | *handled_ops_p = true; |
1574 | } |
1575 | else |
1576 | *handled_ops_p = false; |
1577 | |
1578 | return NULL; |
1579 | } |
1580 | |
1581 | static unsigned int |
1582 | omp_oacc_kernels_decompose (void) |
1583 | { |
1584 | gimple_seq body = gimple_body (current_function_decl); |
1585 | |
1586 | struct walk_stmt_info wi; |
1587 | memset (s: &wi, c: 0, n: sizeof (wi)); |
1588 | walk_gimple_seq_mod (&body, omp_oacc_kernels_decompose_callback_stmt, NULL, |
1589 | &wi); |
1590 | |
1591 | gimple_set_body (current_function_decl, body); |
1592 | |
1593 | return 0; |
1594 | } |
1595 | |
1596 | |
1597 | namespace { |
1598 | |
1599 | const pass_data pass_data_omp_oacc_kernels_decompose = |
1600 | { |
1601 | .type: GIMPLE_PASS, /* type */ |
1602 | .name: "omp_oacc_kernels_decompose" , /* name */ |
1603 | .optinfo_flags: OPTGROUP_OMP, /* optinfo_flags */ |
1604 | .tv_id: TV_NONE, /* tv_id */ |
1605 | PROP_gimple_any, /* properties_required */ |
1606 | .properties_provided: 0, /* properties_provided */ |
1607 | .properties_destroyed: 0, /* properties_destroyed */ |
1608 | .todo_flags_start: 0, /* todo_flags_start */ |
1609 | .todo_flags_finish: 0, /* todo_flags_finish */ |
1610 | }; |
1611 | |
1612 | class pass_omp_oacc_kernels_decompose : public gimple_opt_pass |
1613 | { |
1614 | public: |
1615 | pass_omp_oacc_kernels_decompose (gcc::context *ctxt) |
1616 | : gimple_opt_pass (pass_data_omp_oacc_kernels_decompose, ctxt) |
1617 | {} |
1618 | |
1619 | /* opt_pass methods: */ |
1620 | bool gate (function *) final override |
1621 | { |
1622 | return (flag_openacc |
1623 | && param_openacc_kernels == OPENACC_KERNELS_DECOMPOSE); |
1624 | } |
1625 | unsigned int execute (function *) final override |
1626 | { |
1627 | return omp_oacc_kernels_decompose (); |
1628 | } |
1629 | |
1630 | }; // class pass_omp_oacc_kernels_decompose |
1631 | |
1632 | } // anon namespace |
1633 | |
1634 | gimple_opt_pass * |
1635 | make_pass_omp_oacc_kernels_decompose (gcc::context *ctxt) |
1636 | { |
1637 | return new pass_omp_oacc_kernels_decompose (ctxt); |
1638 | } |
1639 | |