1 | /* OMP constructs' SIMD clone supporting code. |
2 | |
3 | Copyright (C) 2005-2024 Free Software Foundation, Inc. |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free |
9 | Software Foundation; either version 3, or (at your option) any later |
10 | version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
15 | for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | #include "config.h" |
22 | #include "system.h" |
23 | #include "coretypes.h" |
24 | #include "backend.h" |
25 | #include "target.h" |
26 | #include "tree.h" |
27 | #include "gimple.h" |
28 | #include "cfghooks.h" |
29 | #include "alloc-pool.h" |
30 | #include "tree-pass.h" |
31 | #include "ssa.h" |
32 | #include "cgraph.h" |
33 | #include "pretty-print.h" |
34 | #include "diagnostic-core.h" |
35 | #include "fold-const.h" |
36 | #include "stor-layout.h" |
37 | #include "cfganal.h" |
38 | #include "gimplify.h" |
39 | #include "gimple-iterator.h" |
40 | #include "gimplify-me.h" |
41 | #include "gimple-walk.h" |
42 | #include "langhooks.h" |
43 | #include "tree-cfg.h" |
44 | #include "tree-into-ssa.h" |
45 | #include "tree-dfa.h" |
46 | #include "cfgloop.h" |
47 | #include "symbol-summary.h" |
48 | #include "ipa-param-manipulation.h" |
49 | #include "tree-eh.h" |
50 | #include "varasm.h" |
51 | #include "stringpool.h" |
52 | #include "attribs.h" |
53 | #include "omp-simd-clone.h" |
54 | #include "omp-low.h" |
55 | #include "omp-general.h" |
56 | |
57 | /* Print debug info for ok_for_auto_simd_clone to the dump file, logging |
58 | failure reason EXCUSE for function DECL. Always returns false. */ |
59 | static bool |
60 | auto_simd_fail (tree decl, const char *excuse) |
61 | { |
62 | if (dump_file && (dump_flags & TDF_DETAILS)) |
63 | fprintf (stream: dump_file, format: "\nNot auto-cloning %s because %s\n" , |
64 | IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)), |
65 | excuse); |
66 | return false; |
67 | } |
68 | |
69 | /* Helper function for ok_for_auto_simd_clone; return false if the statement |
70 | violates restrictions for an "omp declare simd" function. Specifically, |
71 | the function must not |
72 | - throw or call setjmp/longjmp |
73 | - write memory that could alias parallel calls |
74 | - read volatile memory |
75 | - include openmp directives or calls |
76 | - call functions that might do those things */ |
77 | |
78 | static bool |
79 | auto_simd_check_stmt (gimple *stmt, tree outer) |
80 | { |
81 | tree decl; |
82 | |
83 | switch (gimple_code (g: stmt)) |
84 | { |
85 | case GIMPLE_CALL: |
86 | |
87 | /* Calls to functions that are CONST or PURE are ok, even if they |
88 | are internal functions without a decl. Reject other internal |
89 | functions. */ |
90 | if (gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)) |
91 | break; |
92 | if (gimple_call_internal_p (gs: stmt)) |
93 | return auto_simd_fail (decl: outer, |
94 | excuse: "body contains internal function call" ); |
95 | |
96 | decl = gimple_call_fndecl (gs: stmt); |
97 | |
98 | /* We can't know whether indirect calls are safe. */ |
99 | if (decl == NULL_TREE) |
100 | return auto_simd_fail (decl: outer, excuse: "body contains indirect call" ); |
101 | |
102 | /* Calls to functions that are already marked "omp declare simd" are |
103 | OK. */ |
104 | if (lookup_attribute (attr_name: "omp declare simd" , DECL_ATTRIBUTES (decl))) |
105 | break; |
106 | |
107 | /* Let recursive calls to the current function through. */ |
108 | if (decl == outer) |
109 | break; |
110 | |
111 | /* Other function calls are not permitted. This covers all calls to |
112 | the libgomp API and setjmp/longjmp, too, as well as things like |
113 | __cxa_throw_ related to exception handling. */ |
114 | return auto_simd_fail (decl: outer, excuse: "body contains unsafe function call" ); |
115 | |
116 | /* Reject EH-related constructs. Most of the EH gimple codes are |
117 | already lowered by the time this pass runs during IPA. |
118 | GIMPLE_EH_DISPATCH and GIMPLE_RESX remain and are lowered by |
119 | pass_lower_eh_dispatch and pass_lower_resx, respectively; those |
120 | passes run later. */ |
121 | case GIMPLE_EH_DISPATCH: |
122 | case GIMPLE_RESX: |
123 | return auto_simd_fail (decl: outer, excuse: "body contains EH constructs" ); |
124 | |
125 | /* Asms are not permitted since we don't know what they do. */ |
126 | case GIMPLE_ASM: |
127 | return auto_simd_fail (decl: outer, excuse: "body contains inline asm" ); |
128 | |
129 | default: |
130 | break; |
131 | } |
132 | |
133 | /* Memory writes are not permitted. |
134 | FIXME: this could be relaxed a little to permit writes to |
135 | function-local variables that could not alias other instances |
136 | of the function running in parallel. */ |
137 | if (gimple_store_p (gs: stmt)) |
138 | return auto_simd_fail (decl: outer, excuse: "body includes memory write" ); |
139 | |
140 | /* Volatile reads are not permitted. */ |
141 | if (gimple_has_volatile_ops (stmt)) |
142 | return auto_simd_fail (decl: outer, excuse: "body includes volatile op" ); |
143 | |
144 | /* Otherwise OK. */ |
145 | return true; |
146 | } |
147 | |
148 | /* Helper function for ok_for_auto_simd_clone: return true if type T is |
149 | plausible for a cloneable function argument or return type. */ |
150 | static bool |
151 | plausible_type_for_simd_clone (tree t) |
152 | { |
153 | if (VOID_TYPE_P (t)) |
154 | return true; |
155 | else if (RECORD_OR_UNION_TYPE_P (t) || !is_a <scalar_mode> (TYPE_MODE (t))) |
156 | /* Small record/union types may fit into a scalar mode, but are |
157 | still not suitable. */ |
158 | return false; |
159 | else if (TYPE_ATOMIC (t)) |
160 | /* Atomic types trigger warnings in simd_clone_clauses_extract. */ |
161 | return false; |
162 | else |
163 | return true; |
164 | } |
165 | |
166 | /* Check if the function NODE appears suitable for auto-annotation |
167 | with "declare simd". */ |
168 | |
169 | static bool |
170 | ok_for_auto_simd_clone (struct cgraph_node *node) |
171 | { |
172 | tree decl = node->decl; |
173 | tree t; |
174 | basic_block bb; |
175 | |
176 | /* Nothing to do if the function isn't a definition or doesn't |
177 | have a body. */ |
178 | if (!node->definition || !node->has_gimple_body_p ()) |
179 | return auto_simd_fail (decl, excuse: "no definition or body" ); |
180 | |
181 | /* No point in trying to generate implicit clones if the function |
182 | isn't used in the compilation unit. */ |
183 | if (!node->callers) |
184 | return auto_simd_fail (decl, excuse: "function is not used" ); |
185 | |
186 | /* Nothing to do if the function already has the "omp declare simd" |
187 | attribute, is marked noclone, or is not "omp declare target". */ |
188 | if (lookup_attribute (attr_name: "omp declare simd" , DECL_ATTRIBUTES (decl)) |
189 | || lookup_attribute (attr_name: "noclone" , DECL_ATTRIBUTES (decl)) |
190 | || !lookup_attribute (attr_name: "omp declare target" , DECL_ATTRIBUTES (decl))) |
191 | return auto_simd_fail (decl, excuse: "incompatible attributes" ); |
192 | |
193 | /* Check whether the function is restricted host/nohost via the |
194 | "omp declare target device_type" clause, and that doesn't match |
195 | what we're compiling for. Internally, these translate into |
196 | "omp declare target [no]host" attributes on the decl; "any" |
197 | translates into both attributes, but the default (which is supposed |
198 | to be equivalent to "any") is neither. */ |
199 | tree host = lookup_attribute (attr_name: "omp declare target host" , |
200 | DECL_ATTRIBUTES (decl)); |
201 | tree nohost = lookup_attribute (attr_name: "omp declare target nohost" , |
202 | DECL_ATTRIBUTES (decl)); |
203 | #ifdef ACCEL_COMPILER |
204 | if (host && !nohost) |
205 | return auto_simd_fail (decl, "device doesn't match for accel compiler" ); |
206 | #else |
207 | if (nohost && !host) |
208 | return auto_simd_fail (decl, excuse: "device doesn't match for host compiler" ); |
209 | #endif |
210 | |
211 | /* Backends will check for vectorizable arguments/return types in a |
212 | target-specific way, but we can immediately filter out functions |
213 | that have implausible argument/return types. */ |
214 | t = TREE_TYPE (TREE_TYPE (decl)); |
215 | if (!plausible_type_for_simd_clone (t)) |
216 | return auto_simd_fail (decl, excuse: "return type fails sniff test" ); |
217 | |
218 | if (TYPE_ARG_TYPES (TREE_TYPE (decl))) |
219 | { |
220 | for (tree temp = TYPE_ARG_TYPES (TREE_TYPE (decl)); |
221 | temp; temp = TREE_CHAIN (temp)) |
222 | { |
223 | t = TREE_VALUE (temp); |
224 | if (!plausible_type_for_simd_clone (t)) |
225 | return auto_simd_fail (decl, excuse: "argument type fails sniff test" ); |
226 | } |
227 | } |
228 | else if (DECL_ARGUMENTS (decl)) |
229 | { |
230 | for (tree temp = DECL_ARGUMENTS (decl); temp; temp = DECL_CHAIN (temp)) |
231 | { |
232 | t = TREE_TYPE (temp); |
233 | if (!plausible_type_for_simd_clone (t)) |
234 | return auto_simd_fail (decl, excuse: "argument type fails sniff test" ); |
235 | } |
236 | } |
237 | else |
238 | return auto_simd_fail (decl, excuse: "function has no arguments" ); |
239 | |
240 | /* Scan the function body to see if it is suitable for SIMD-ization. */ |
241 | node->get_body (); |
242 | |
243 | FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (decl)) |
244 | { |
245 | for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); |
246 | gsi_next (i: &gsi)) |
247 | if (!auto_simd_check_stmt (stmt: gsi_stmt (i: gsi), outer: decl)) |
248 | return false; |
249 | } |
250 | |
251 | /* All is good. */ |
252 | if (dump_file) |
253 | fprintf (stream: dump_file, format: "\nMarking %s for auto-cloning\n" , |
254 | IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))); |
255 | return true; |
256 | } |
257 | |
258 | /* Allocate a fresh `simd_clone' and return it. NARGS is the number |
259 | of arguments to reserve space for. */ |
260 | |
261 | static struct cgraph_simd_clone * |
262 | simd_clone_struct_alloc (int nargs) |
263 | { |
264 | struct cgraph_simd_clone *clone_info; |
265 | size_t len = (sizeof (struct cgraph_simd_clone) |
266 | + nargs * sizeof (struct cgraph_simd_clone_arg)); |
267 | clone_info = (struct cgraph_simd_clone *) |
268 | ggc_internal_cleared_alloc (s: len); |
269 | return clone_info; |
270 | } |
271 | |
272 | /* Make a copy of the `struct cgraph_simd_clone' in FROM to TO. */ |
273 | |
274 | static inline void |
275 | simd_clone_struct_copy (struct cgraph_simd_clone *to, |
276 | struct cgraph_simd_clone *from) |
277 | { |
278 | memcpy (dest: to, src: from, n: (sizeof (struct cgraph_simd_clone) |
279 | + ((from->nargs - from->inbranch) |
280 | * sizeof (struct cgraph_simd_clone_arg)))); |
281 | } |
282 | |
283 | /* Fill an empty vector ARGS with parameter types of function FNDECL. This |
284 | uses TYPE_ARG_TYPES if available, otherwise falls back to types of |
285 | DECL_ARGUMENTS types. */ |
286 | |
287 | static void |
288 | simd_clone_vector_of_formal_parm_types (vec<tree> *args, tree fndecl) |
289 | { |
290 | if (TYPE_ARG_TYPES (TREE_TYPE (fndecl))) |
291 | { |
292 | push_function_arg_types (types: args, TREE_TYPE (fndecl)); |
293 | return; |
294 | } |
295 | push_function_arg_decls (args, fndecl); |
296 | unsigned int i; |
297 | tree arg; |
298 | FOR_EACH_VEC_ELT (*args, i, arg) |
299 | (*args)[i] = TREE_TYPE ((*args)[i]); |
300 | } |
301 | |
302 | /* Given a simd function in NODE, extract the simd specific |
303 | information from the OMP clauses passed in CLAUSES, and return |
304 | the struct cgraph_simd_clone * if it should be cloned. *INBRANCH_SPECIFIED |
305 | is set to TRUE if the `inbranch' or `notinbranch' clause specified, |
306 | otherwise set to FALSE. */ |
307 | |
308 | static struct cgraph_simd_clone * |
309 | (struct cgraph_node *node, tree clauses, |
310 | bool *inbranch_specified) |
311 | { |
312 | auto_vec<tree> args; |
313 | simd_clone_vector_of_formal_parm_types (args: &args, fndecl: node->decl); |
314 | tree t; |
315 | int n; |
316 | *inbranch_specified = false; |
317 | |
318 | n = args.length (); |
319 | if (n > 0 && args.last () == void_type_node) |
320 | n--; |
321 | |
322 | /* Allocate one more than needed just in case this is an in-branch |
323 | clone which will require a mask argument. */ |
324 | struct cgraph_simd_clone *clone_info = simd_clone_struct_alloc (nargs: n + 1); |
325 | clone_info->nargs = n; |
326 | |
327 | if (!clauses) |
328 | goto out; |
329 | |
330 | clauses = TREE_VALUE (clauses); |
331 | if (!clauses || TREE_CODE (clauses) != OMP_CLAUSE) |
332 | goto out; |
333 | |
334 | for (t = clauses; t; t = OMP_CLAUSE_CHAIN (t)) |
335 | { |
336 | switch (OMP_CLAUSE_CODE (t)) |
337 | { |
338 | case OMP_CLAUSE_INBRANCH: |
339 | clone_info->inbranch = 1; |
340 | *inbranch_specified = true; |
341 | break; |
342 | case OMP_CLAUSE_NOTINBRANCH: |
343 | clone_info->inbranch = 0; |
344 | *inbranch_specified = true; |
345 | break; |
346 | case OMP_CLAUSE_SIMDLEN: |
347 | clone_info->simdlen |
348 | = TREE_INT_CST_LOW (OMP_CLAUSE_SIMDLEN_EXPR (t)); |
349 | break; |
350 | case OMP_CLAUSE_LINEAR: |
351 | { |
352 | tree decl = OMP_CLAUSE_DECL (t); |
353 | tree step = OMP_CLAUSE_LINEAR_STEP (t); |
354 | int argno = TREE_INT_CST_LOW (decl); |
355 | if (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE (t)) |
356 | { |
357 | enum cgraph_simd_clone_arg_type arg_type; |
358 | if (TREE_CODE (args[argno]) == REFERENCE_TYPE) |
359 | switch (OMP_CLAUSE_LINEAR_KIND (t)) |
360 | { |
361 | case OMP_CLAUSE_LINEAR_REF: |
362 | arg_type |
363 | = SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP; |
364 | break; |
365 | case OMP_CLAUSE_LINEAR_UVAL: |
366 | arg_type |
367 | = SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP; |
368 | break; |
369 | case OMP_CLAUSE_LINEAR_VAL: |
370 | case OMP_CLAUSE_LINEAR_DEFAULT: |
371 | arg_type |
372 | = SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP; |
373 | break; |
374 | default: |
375 | gcc_unreachable (); |
376 | } |
377 | else |
378 | arg_type = SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP; |
379 | clone_info->args[argno].arg_type = arg_type; |
380 | clone_info->args[argno].linear_step = tree_to_shwi (step); |
381 | gcc_assert (clone_info->args[argno].linear_step >= 0 |
382 | && clone_info->args[argno].linear_step < n); |
383 | } |
384 | else |
385 | { |
386 | if (POINTER_TYPE_P (args[argno])) |
387 | step = fold_convert (ssizetype, step); |
388 | if (!tree_fits_shwi_p (step)) |
389 | { |
390 | warning_at (OMP_CLAUSE_LOCATION (t), OPT_Wopenmp, |
391 | "ignoring large linear step" ); |
392 | return NULL; |
393 | } |
394 | else if (integer_zerop (step)) |
395 | { |
396 | warning_at (OMP_CLAUSE_LOCATION (t), OPT_Wopenmp, |
397 | "ignoring zero linear step" ); |
398 | return NULL; |
399 | } |
400 | else |
401 | { |
402 | enum cgraph_simd_clone_arg_type arg_type; |
403 | if (TREE_CODE (args[argno]) == REFERENCE_TYPE) |
404 | switch (OMP_CLAUSE_LINEAR_KIND (t)) |
405 | { |
406 | case OMP_CLAUSE_LINEAR_REF: |
407 | arg_type |
408 | = SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP; |
409 | break; |
410 | case OMP_CLAUSE_LINEAR_UVAL: |
411 | arg_type |
412 | = SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP; |
413 | break; |
414 | case OMP_CLAUSE_LINEAR_VAL: |
415 | case OMP_CLAUSE_LINEAR_DEFAULT: |
416 | arg_type |
417 | = SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP; |
418 | break; |
419 | default: |
420 | gcc_unreachable (); |
421 | } |
422 | else |
423 | arg_type = SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP; |
424 | clone_info->args[argno].arg_type = arg_type; |
425 | clone_info->args[argno].linear_step = tree_to_shwi (step); |
426 | } |
427 | } |
428 | break; |
429 | } |
430 | case OMP_CLAUSE_UNIFORM: |
431 | { |
432 | tree decl = OMP_CLAUSE_DECL (t); |
433 | int argno = tree_to_uhwi (decl); |
434 | clone_info->args[argno].arg_type |
435 | = SIMD_CLONE_ARG_TYPE_UNIFORM; |
436 | break; |
437 | } |
438 | case OMP_CLAUSE_ALIGNED: |
439 | { |
440 | /* Ignore aligned (x) for declare simd, for the ABI we really |
441 | need an alignment specified. */ |
442 | if (OMP_CLAUSE_ALIGNED_ALIGNMENT (t) == NULL_TREE) |
443 | break; |
444 | tree decl = OMP_CLAUSE_DECL (t); |
445 | int argno = tree_to_uhwi (decl); |
446 | clone_info->args[argno].alignment |
447 | = TREE_INT_CST_LOW (OMP_CLAUSE_ALIGNED_ALIGNMENT (t)); |
448 | break; |
449 | } |
450 | default: |
451 | break; |
452 | } |
453 | } |
454 | |
455 | out: |
456 | if (TYPE_ATOMIC (TREE_TYPE (TREE_TYPE (node->decl)))) |
457 | { |
458 | warning_at (DECL_SOURCE_LOCATION (node->decl), OPT_Wopenmp, |
459 | "ignoring %<#pragma omp declare simd%> on function " |
460 | "with %<_Atomic%> qualified return type" ); |
461 | return NULL; |
462 | } |
463 | |
464 | for (unsigned int argno = 0; argno < clone_info->nargs; argno++) |
465 | if (TYPE_ATOMIC (args[argno]) |
466 | && clone_info->args[argno].arg_type != SIMD_CLONE_ARG_TYPE_UNIFORM) |
467 | { |
468 | warning_at (DECL_SOURCE_LOCATION (node->decl), OPT_Wopenmp, |
469 | "ignoring %<#pragma omp declare simd%> on function " |
470 | "with %<_Atomic%> qualified non-%<uniform%> argument" ); |
471 | args.release (); |
472 | return NULL; |
473 | } |
474 | |
475 | return clone_info; |
476 | } |
477 | |
478 | /* Given a SIMD clone in NODE, calculate the characteristic data |
479 | type and return the coresponding type. The characteristic data |
480 | type is computed as described in the Intel Vector ABI. */ |
481 | |
482 | static tree |
483 | simd_clone_compute_base_data_type (struct cgraph_node *node, |
484 | struct cgraph_simd_clone *clone_info) |
485 | { |
486 | tree type = integer_type_node; |
487 | tree fndecl = node->decl; |
488 | |
489 | /* a) For non-void function, the characteristic data type is the |
490 | return type. */ |
491 | if (TREE_CODE (TREE_TYPE (TREE_TYPE (fndecl))) != VOID_TYPE) |
492 | type = TREE_TYPE (TREE_TYPE (fndecl)); |
493 | |
494 | /* b) If the function has any non-uniform, non-linear parameters, |
495 | then the characteristic data type is the type of the first |
496 | such parameter. */ |
497 | else |
498 | { |
499 | auto_vec<tree> map; |
500 | simd_clone_vector_of_formal_parm_types (args: &map, fndecl); |
501 | for (unsigned int i = 0; i < clone_info->nargs; ++i) |
502 | if (clone_info->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR) |
503 | { |
504 | type = map[i]; |
505 | break; |
506 | } |
507 | } |
508 | |
509 | /* c) If the characteristic data type determined by a) or b) above |
510 | is struct, union, or class type which is pass-by-value (except |
511 | for the type that maps to the built-in complex data type), the |
512 | characteristic data type is int. */ |
513 | if (RECORD_OR_UNION_TYPE_P (type) |
514 | && !aggregate_value_p (type, NULL) |
515 | && TREE_CODE (type) != COMPLEX_TYPE) |
516 | return integer_type_node; |
517 | |
518 | /* d) If none of the above three classes is applicable, the |
519 | characteristic data type is int. */ |
520 | |
521 | return type; |
522 | |
523 | /* e) For Intel Xeon Phi native and offload compilation, if the |
524 | resulting characteristic data type is 8-bit or 16-bit integer |
525 | data type, the characteristic data type is int. */ |
526 | /* Well, we don't handle Xeon Phi yet. */ |
527 | } |
528 | |
529 | static tree |
530 | simd_clone_mangle (struct cgraph_node *node, |
531 | struct cgraph_simd_clone *clone_info) |
532 | { |
533 | char vecsize_mangle = clone_info->vecsize_mangle; |
534 | char mask = clone_info->inbranch ? 'M' : 'N'; |
535 | poly_uint64 simdlen = clone_info->simdlen; |
536 | unsigned int n; |
537 | pretty_printer pp; |
538 | |
539 | gcc_assert (vecsize_mangle && maybe_ne (simdlen, 0U)); |
540 | |
541 | pp_string (&pp, "_ZGV" ); |
542 | pp_character (&pp, vecsize_mangle); |
543 | pp_character (&pp, mask); |
544 | /* For now, simdlen is always constant, while variable simdlen pp 'n'. */ |
545 | unsigned int len = simdlen.to_constant (); |
546 | pp_decimal_int (&pp, (len)); |
547 | |
548 | for (n = 0; n < clone_info->nargs; ++n) |
549 | { |
550 | struct cgraph_simd_clone_arg arg = clone_info->args[n]; |
551 | |
552 | switch (arg.arg_type) |
553 | { |
554 | case SIMD_CLONE_ARG_TYPE_UNIFORM: |
555 | pp_character (&pp, 'u'); |
556 | break; |
557 | case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP: |
558 | pp_character (&pp, 'l'); |
559 | goto mangle_linear; |
560 | case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP: |
561 | pp_character (&pp, 'R'); |
562 | goto mangle_linear; |
563 | case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP: |
564 | pp_character (&pp, 'L'); |
565 | goto mangle_linear; |
566 | case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP: |
567 | pp_character (&pp, 'U'); |
568 | goto mangle_linear; |
569 | mangle_linear: |
570 | gcc_assert (arg.linear_step != 0); |
571 | if (arg.linear_step > 1) |
572 | pp_unsigned_wide_integer (&pp, arg.linear_step); |
573 | else if (arg.linear_step < 0) |
574 | { |
575 | pp_character (&pp, 'n'); |
576 | pp_unsigned_wide_integer (&pp, (-(unsigned HOST_WIDE_INT) |
577 | arg.linear_step)); |
578 | } |
579 | break; |
580 | case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP: |
581 | pp_string (&pp, "ls" ); |
582 | pp_unsigned_wide_integer (&pp, arg.linear_step); |
583 | break; |
584 | case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP: |
585 | pp_string (&pp, "Rs" ); |
586 | pp_unsigned_wide_integer (&pp, arg.linear_step); |
587 | break; |
588 | case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP: |
589 | pp_string (&pp, "Ls" ); |
590 | pp_unsigned_wide_integer (&pp, arg.linear_step); |
591 | break; |
592 | case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP: |
593 | pp_string (&pp, "Us" ); |
594 | pp_unsigned_wide_integer (&pp, arg.linear_step); |
595 | break; |
596 | default: |
597 | pp_character (&pp, 'v'); |
598 | } |
599 | if (arg.alignment) |
600 | { |
601 | pp_character (&pp, 'a'); |
602 | pp_decimal_int (&pp, arg.alignment); |
603 | } |
604 | } |
605 | |
606 | pp_underscore (&pp); |
607 | const char *str = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl)); |
608 | if (*str == '*') |
609 | ++str; |
610 | pp_string (&pp, str); |
611 | str = pp_formatted_text (&pp); |
612 | |
613 | /* If there already is a SIMD clone with the same mangled name, don't |
614 | add another one. This can happen e.g. for |
615 | #pragma omp declare simd |
616 | #pragma omp declare simd simdlen(8) |
617 | int foo (int, int); |
618 | if the simdlen is assumed to be 8 for the first one, etc. */ |
619 | for (struct cgraph_node *clone = node->simd_clones; clone; |
620 | clone = clone->simdclone->next_clone) |
621 | if (id_equal (DECL_ASSEMBLER_NAME (clone->decl), str)) |
622 | return NULL_TREE; |
623 | |
624 | return get_identifier (str); |
625 | } |
626 | |
627 | /* Create a simd clone of OLD_NODE and return it. If FORCE_LOCAL is true, |
628 | create it as a local symbol, otherwise copy the symbol linkage and |
629 | visibility attributes from OLD_NODE. */ |
630 | |
631 | static struct cgraph_node * |
632 | simd_clone_create (struct cgraph_node *old_node, bool force_local) |
633 | { |
634 | struct cgraph_node *new_node; |
635 | if (old_node->definition) |
636 | { |
637 | if (!old_node->has_gimple_body_p ()) |
638 | return NULL; |
639 | old_node->get_body (); |
640 | new_node = old_node->create_version_clone_with_body (redirect_callers: vNULL, NULL, NULL, |
641 | NULL, NULL, |
642 | clone_name: "simdclone" ); |
643 | } |
644 | else |
645 | { |
646 | tree old_decl = old_node->decl; |
647 | tree new_decl = copy_node (old_node->decl); |
648 | DECL_NAME (new_decl) = clone_function_name_numbered (decl: old_decl, |
649 | suffix: "simdclone" ); |
650 | SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl)); |
651 | SET_DECL_RTL (new_decl, NULL); |
652 | DECL_STATIC_CONSTRUCTOR (new_decl) = 0; |
653 | DECL_STATIC_DESTRUCTOR (new_decl) = 0; |
654 | new_node = old_node->create_version_clone (new_decl, redirect_callers: vNULL, NULL); |
655 | if (old_node->in_other_partition) |
656 | new_node->in_other_partition = 1; |
657 | } |
658 | if (new_node == NULL) |
659 | return new_node; |
660 | |
661 | set_decl_built_in_function (decl: new_node->decl, fclass: NOT_BUILT_IN, fcode: 0); |
662 | if (force_local) |
663 | { |
664 | TREE_PUBLIC (new_node->decl) = 0; |
665 | DECL_COMDAT (new_node->decl) = 0; |
666 | DECL_WEAK (new_node->decl) = 0; |
667 | DECL_EXTERNAL (new_node->decl) = 0; |
668 | DECL_VISIBILITY_SPECIFIED (new_node->decl) = 0; |
669 | DECL_VISIBILITY (new_node->decl) = VISIBILITY_DEFAULT; |
670 | DECL_DLLIMPORT_P (new_node->decl) = 0; |
671 | } |
672 | else |
673 | { |
674 | TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl); |
675 | DECL_COMDAT (new_node->decl) = DECL_COMDAT (old_node->decl); |
676 | DECL_WEAK (new_node->decl) = DECL_WEAK (old_node->decl); |
677 | DECL_EXTERNAL (new_node->decl) = DECL_EXTERNAL (old_node->decl); |
678 | DECL_VISIBILITY_SPECIFIED (new_node->decl) |
679 | = DECL_VISIBILITY_SPECIFIED (old_node->decl); |
680 | DECL_VISIBILITY (new_node->decl) = DECL_VISIBILITY (old_node->decl); |
681 | DECL_DLLIMPORT_P (new_node->decl) = DECL_DLLIMPORT_P (old_node->decl); |
682 | if (DECL_ONE_ONLY (old_node->decl)) |
683 | make_decl_one_only (new_node->decl, |
684 | DECL_ASSEMBLER_NAME (new_node->decl)); |
685 | |
686 | /* The method cgraph_version_clone_with_body () will force the new |
687 | symbol local. Undo this, and inherit external visibility from |
688 | the old node. */ |
689 | new_node->local = old_node->local; |
690 | new_node->externally_visible = old_node->externally_visible; |
691 | new_node->calls_declare_variant_alt |
692 | = old_node->calls_declare_variant_alt; |
693 | } |
694 | |
695 | /* Mark clones with internal linkage as gc'able, so they will not be |
696 | emitted unless the vectorizer can actually use them. */ |
697 | if (!TREE_PUBLIC (new_node->decl)) |
698 | new_node->gc_candidate = true; |
699 | |
700 | return new_node; |
701 | } |
702 | |
703 | /* Adjust the return type of the given function to its appropriate |
704 | vector counterpart. */ |
705 | |
706 | static void |
707 | simd_clone_adjust_return_type (struct cgraph_node *node) |
708 | { |
709 | tree fndecl = node->decl; |
710 | tree orig_rettype = TREE_TYPE (TREE_TYPE (fndecl)); |
711 | poly_uint64 veclen; |
712 | tree t; |
713 | |
714 | /* Adjust the function return type. */ |
715 | if (orig_rettype == void_type_node) |
716 | return; |
717 | t = TREE_TYPE (TREE_TYPE (fndecl)); |
718 | if (INTEGRAL_TYPE_P (t) || POINTER_TYPE_P (t)) |
719 | veclen = node->simdclone->vecsize_int; |
720 | else |
721 | veclen = node->simdclone->vecsize_float; |
722 | if (known_eq (veclen, 0U)) |
723 | veclen = node->simdclone->simdlen; |
724 | else |
725 | veclen = exact_div (a: veclen, b: GET_MODE_BITSIZE (SCALAR_TYPE_MODE (t))); |
726 | if (multiple_p (a: veclen, b: node->simdclone->simdlen)) |
727 | veclen = node->simdclone->simdlen; |
728 | if (POINTER_TYPE_P (t)) |
729 | t = pointer_sized_int_node; |
730 | if (known_eq (veclen, node->simdclone->simdlen)) |
731 | t = build_vector_type (t, node->simdclone->simdlen); |
732 | else |
733 | { |
734 | t = build_vector_type (t, veclen); |
735 | t = build_array_type_nelts (t, exact_div (a: node->simdclone->simdlen, |
736 | b: veclen)); |
737 | } |
738 | TREE_TYPE (TREE_TYPE (fndecl)) = t; |
739 | } |
740 | |
741 | /* Each vector argument has a corresponding array to be used locally |
742 | as part of the eventual loop. Create such temporary array and |
743 | return it. |
744 | |
745 | PREFIX is the prefix to be used for the temporary. |
746 | |
747 | TYPE is the inner element type. |
748 | |
749 | SIMDLEN is the number of elements. */ |
750 | |
751 | static tree |
752 | create_tmp_simd_array (const char *prefix, tree type, poly_uint64 simdlen) |
753 | { |
754 | tree atype = build_array_type_nelts (type, simdlen); |
755 | tree avar = create_tmp_var_raw (atype, prefix); |
756 | gimple_add_tmp_var (avar); |
757 | return avar; |
758 | } |
759 | |
760 | /* Modify the function argument types to their corresponding vector |
761 | counterparts if appropriate. Also, create one array for each simd |
762 | argument to be used locally when using the function arguments as |
763 | part of the loop. |
764 | |
765 | NODE is the function whose arguments are to be adjusted. |
766 | |
767 | If NODE does not represent function definition, returns NULL. Otherwise |
768 | returns an adjustment class that will be filled describing how the argument |
769 | declarations will be remapped. New arguments which are not to be remapped |
770 | are marked with USER_FLAG. */ |
771 | |
772 | static void |
773 | simd_clone_adjust_argument_types (struct cgraph_node *node) |
774 | { |
775 | auto_vec<tree> args; |
776 | |
777 | if (node->definition) |
778 | push_function_arg_decls (args: &args, fndecl: node->decl); |
779 | else |
780 | simd_clone_vector_of_formal_parm_types (args: &args, fndecl: node->decl); |
781 | struct cgraph_simd_clone *sc = node->simdclone; |
782 | unsigned i, k; |
783 | poly_uint64 veclen; |
784 | auto_vec<tree> new_params; |
785 | |
786 | for (i = 0; i < sc->nargs; ++i) |
787 | { |
788 | tree parm = NULL_TREE; |
789 | tree parm_type = NULL_TREE; |
790 | if (i < args.length()) |
791 | { |
792 | parm = args[i]; |
793 | parm_type = node->definition ? TREE_TYPE (parm) : parm; |
794 | } |
795 | |
796 | sc->args[i].orig_arg = node->definition ? parm : NULL_TREE; |
797 | sc->args[i].orig_type = parm_type; |
798 | |
799 | switch (sc->args[i].arg_type) |
800 | { |
801 | default: |
802 | new_params.safe_push (obj: parm_type); |
803 | break; |
804 | case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP: |
805 | case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP: |
806 | new_params.safe_push (obj: parm_type); |
807 | if (node->definition) |
808 | sc->args[i].simd_array |
809 | = create_tmp_simd_array (IDENTIFIER_POINTER (DECL_NAME (parm)), |
810 | TREE_TYPE (parm_type), |
811 | simdlen: sc->simdlen); |
812 | break; |
813 | case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP: |
814 | case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP: |
815 | case SIMD_CLONE_ARG_TYPE_VECTOR: |
816 | if (INTEGRAL_TYPE_P (parm_type) || POINTER_TYPE_P (parm_type)) |
817 | veclen = sc->vecsize_int; |
818 | else |
819 | veclen = sc->vecsize_float; |
820 | if (known_eq (veclen, 0U)) |
821 | veclen = sc->simdlen; |
822 | else |
823 | veclen |
824 | = exact_div (a: veclen, |
825 | b: GET_MODE_BITSIZE (SCALAR_TYPE_MODE (parm_type))); |
826 | if (multiple_p (a: veclen, b: sc->simdlen)) |
827 | veclen = sc->simdlen; |
828 | tree vtype; |
829 | if (POINTER_TYPE_P (parm_type)) |
830 | vtype = build_vector_type (pointer_sized_int_node, veclen); |
831 | else |
832 | vtype = build_vector_type (parm_type, veclen); |
833 | sc->args[i].vector_type = vtype; |
834 | k = vector_unroll_factor (sc->simdlen, veclen); |
835 | for (unsigned j = 0; j < k; j++) |
836 | new_params.safe_push (obj: vtype); |
837 | |
838 | if (node->definition) |
839 | sc->args[i].simd_array |
840 | = create_tmp_simd_array (DECL_NAME (parm) |
841 | ? IDENTIFIER_POINTER (DECL_NAME (parm)) |
842 | : NULL, type: parm_type, simdlen: sc->simdlen); |
843 | } |
844 | } |
845 | |
846 | if (sc->inbranch) |
847 | { |
848 | tree base_type = simd_clone_compute_base_data_type (node: sc->origin, clone_info: sc); |
849 | tree mask_type; |
850 | if (INTEGRAL_TYPE_P (base_type) || POINTER_TYPE_P (base_type)) |
851 | veclen = sc->vecsize_int; |
852 | else |
853 | veclen = sc->vecsize_float; |
854 | if (known_eq (veclen, 0U)) |
855 | veclen = sc->simdlen; |
856 | else |
857 | veclen = exact_div (a: veclen, |
858 | b: GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type))); |
859 | if (multiple_p (a: veclen, b: sc->simdlen)) |
860 | veclen = sc->simdlen; |
861 | if (sc->mask_mode != VOIDmode) |
862 | mask_type |
863 | = lang_hooks.types.type_for_mode (sc->mask_mode, 1); |
864 | else if (POINTER_TYPE_P (base_type)) |
865 | mask_type = build_vector_type (pointer_sized_int_node, veclen); |
866 | else |
867 | mask_type = build_vector_type (base_type, veclen); |
868 | |
869 | k = vector_unroll_factor (sc->simdlen, veclen); |
870 | |
871 | /* We have previously allocated one extra entry for the mask. Use |
872 | it and fill it. */ |
873 | sc->nargs++; |
874 | if (sc->mask_mode != VOIDmode) |
875 | base_type = boolean_type_node; |
876 | if (node->definition) |
877 | { |
878 | sc->args[i].orig_arg |
879 | = build_decl (UNKNOWN_LOCATION, PARM_DECL, NULL, base_type); |
880 | if (sc->mask_mode == VOIDmode) |
881 | sc->args[i].simd_array |
882 | = create_tmp_simd_array (prefix: "mask" , type: base_type, simdlen: sc->simdlen); |
883 | else if (k > 1) |
884 | sc->args[i].simd_array |
885 | = create_tmp_simd_array (prefix: "mask" , type: mask_type, simdlen: k); |
886 | else |
887 | sc->args[i].simd_array = NULL_TREE; |
888 | } |
889 | sc->args[i].orig_type = base_type; |
890 | sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK; |
891 | sc->args[i].vector_type = mask_type; |
892 | } |
893 | |
894 | if (!node->definition) |
895 | { |
896 | tree new_arg_types = NULL_TREE, new_reversed; |
897 | bool last_parm_void = false; |
898 | if (args.length () > 0 && args.last () == void_type_node) |
899 | last_parm_void = true; |
900 | |
901 | gcc_assert (TYPE_ARG_TYPES (TREE_TYPE (node->decl))); |
902 | for (i = 0; i < new_params.length (); i++) |
903 | new_arg_types = tree_cons (NULL_TREE, new_params[i], new_arg_types); |
904 | new_reversed = nreverse (new_arg_types); |
905 | if (last_parm_void) |
906 | { |
907 | if (new_reversed) |
908 | TREE_CHAIN (new_arg_types) = void_list_node; |
909 | else |
910 | new_reversed = void_list_node; |
911 | } |
912 | TYPE_ARG_TYPES (TREE_TYPE (node->decl)) = new_reversed; |
913 | } |
914 | } |
915 | |
916 | /* Initialize and copy the function arguments in NODE to their |
917 | corresponding local simd arrays. Returns a fresh gimple_seq with |
918 | the instruction sequence generated. */ |
919 | |
920 | static gimple_seq |
921 | simd_clone_init_simd_arrays (struct cgraph_node *node, |
922 | ipa_param_body_adjustments *adjustments) |
923 | { |
924 | gimple_seq seq = NULL; |
925 | unsigned i = 0, j = 0, k; |
926 | |
927 | for (tree arg = DECL_ARGUMENTS (node->decl); |
928 | arg; |
929 | arg = DECL_CHAIN (arg), i++, j++) |
930 | { |
931 | ipa_adjusted_param adj = (*adjustments->m_adj_params)[j]; |
932 | if (adj.op == IPA_PARAM_OP_COPY |
933 | || POINTER_TYPE_P (TREE_TYPE (arg))) |
934 | continue; |
935 | |
936 | node->simdclone->args[i].vector_arg = arg; |
937 | |
938 | tree array = node->simdclone->args[i].simd_array; |
939 | if (node->simdclone->mask_mode != VOIDmode |
940 | && adj.param_prefix_index == IPA_PARAM_PREFIX_MASK) |
941 | { |
942 | if (array == NULL_TREE) |
943 | continue; |
944 | unsigned int l |
945 | = tree_to_uhwi (TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (array)))); |
946 | for (k = 0; k <= l; k++) |
947 | { |
948 | if (k) |
949 | { |
950 | arg = DECL_CHAIN (arg); |
951 | j++; |
952 | } |
953 | tree t = build4 (ARRAY_REF, TREE_TYPE (TREE_TYPE (array)), |
954 | array, size_int (k), NULL, NULL); |
955 | t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg); |
956 | gimplify_and_add (t, &seq); |
957 | } |
958 | continue; |
959 | } |
960 | if (!VECTOR_TYPE_P (TREE_TYPE (arg)) |
961 | || known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg)), |
962 | node->simdclone->simdlen)) |
963 | { |
964 | tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array))); |
965 | tree ptr = build_fold_addr_expr (array); |
966 | tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr, |
967 | build_int_cst (ptype, 0)); |
968 | t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg); |
969 | gimplify_and_add (t, &seq); |
970 | } |
971 | else |
972 | { |
973 | poly_uint64 simdlen = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg)); |
974 | unsigned int times = vector_unroll_factor (node->simdclone->simdlen, |
975 | simdlen); |
976 | tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array))); |
977 | for (k = 0; k < times; k++) |
978 | { |
979 | tree ptr = build_fold_addr_expr (array); |
980 | int elemsize; |
981 | if (k) |
982 | { |
983 | arg = DECL_CHAIN (arg); |
984 | j++; |
985 | } |
986 | tree elemtype = TREE_TYPE (TREE_TYPE (arg)); |
987 | elemsize = GET_MODE_SIZE (SCALAR_TYPE_MODE (elemtype)); |
988 | tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr, |
989 | build_int_cst (ptype, k * elemsize * simdlen)); |
990 | t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg); |
991 | gimplify_and_add (t, &seq); |
992 | } |
993 | } |
994 | } |
995 | return seq; |
996 | } |
997 | |
998 | /* Callback info for ipa_simd_modify_stmt_ops below. */ |
999 | |
1000 | struct modify_stmt_info { |
1001 | ipa_param_body_adjustments *adjustments; |
1002 | gimple *stmt; |
1003 | gimple *after_stmt; |
1004 | /* True if the parent statement was modified by |
1005 | ipa_simd_modify_stmt_ops. */ |
1006 | bool modified; |
1007 | }; |
1008 | |
1009 | /* Callback for walk_gimple_op. |
1010 | |
1011 | Adjust operands from a given statement as specified in the |
1012 | adjustments vector in the callback data. */ |
1013 | |
1014 | static tree |
1015 | ipa_simd_modify_stmt_ops (tree *tp, int *walk_subtrees, void *data) |
1016 | { |
1017 | struct walk_stmt_info *wi = (struct walk_stmt_info *) data; |
1018 | struct modify_stmt_info *info = (struct modify_stmt_info *) wi->info; |
1019 | tree *orig_tp = tp; |
1020 | if (TREE_CODE (*tp) == ADDR_EXPR) |
1021 | tp = &TREE_OPERAND (*tp, 0); |
1022 | |
1023 | if (TREE_CODE (*tp) == BIT_FIELD_REF |
1024 | || TREE_CODE (*tp) == IMAGPART_EXPR |
1025 | || TREE_CODE (*tp) == REALPART_EXPR) |
1026 | tp = &TREE_OPERAND (*tp, 0); |
1027 | |
1028 | tree repl = NULL_TREE; |
1029 | ipa_param_body_replacement *pbr = NULL; |
1030 | |
1031 | if (TREE_CODE (*tp) == PARM_DECL) |
1032 | { |
1033 | pbr = info->adjustments->get_expr_replacement (expr: *tp, ignore_default_def: true); |
1034 | if (pbr) |
1035 | repl = pbr->repl; |
1036 | } |
1037 | else if (TYPE_P (*tp)) |
1038 | *walk_subtrees = 0; |
1039 | |
1040 | if (repl) |
1041 | repl = unshare_expr (repl); |
1042 | else |
1043 | { |
1044 | if (tp != orig_tp) |
1045 | { |
1046 | *walk_subtrees = 0; |
1047 | bool modified = info->modified; |
1048 | info->modified = false; |
1049 | walk_tree (tp, ipa_simd_modify_stmt_ops, wi, wi->pset); |
1050 | if (!info->modified) |
1051 | { |
1052 | info->modified = modified; |
1053 | return NULL_TREE; |
1054 | } |
1055 | info->modified = modified; |
1056 | repl = *tp; |
1057 | } |
1058 | else |
1059 | return NULL_TREE; |
1060 | } |
1061 | |
1062 | if (tp != orig_tp) |
1063 | { |
1064 | if (gimple_code (g: info->stmt) == GIMPLE_PHI |
1065 | && pbr |
1066 | && TREE_CODE (*orig_tp) == ADDR_EXPR |
1067 | && TREE_CODE (TREE_OPERAND (*orig_tp, 0)) == PARM_DECL |
1068 | && pbr->dummy) |
1069 | { |
1070 | gcc_assert (TREE_CODE (pbr->dummy) == SSA_NAME); |
1071 | *orig_tp = pbr->dummy; |
1072 | info->modified = true; |
1073 | return NULL_TREE; |
1074 | } |
1075 | |
1076 | repl = build_fold_addr_expr (repl); |
1077 | gimple *stmt; |
1078 | if (is_gimple_debug (gs: info->stmt)) |
1079 | { |
1080 | tree vexpr = build_debug_expr_decl (TREE_TYPE (repl)); |
1081 | stmt = gimple_build_debug_source_bind (vexpr, repl, NULL); |
1082 | repl = vexpr; |
1083 | } |
1084 | else |
1085 | { |
1086 | stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (repl)), repl); |
1087 | repl = gimple_assign_lhs (gs: stmt); |
1088 | } |
1089 | gimple_stmt_iterator gsi; |
1090 | if (gimple_code (g: info->stmt) == GIMPLE_PHI) |
1091 | { |
1092 | if (info->after_stmt) |
1093 | gsi = gsi_for_stmt (info->after_stmt); |
1094 | else |
1095 | gsi = gsi_after_labels (bb: single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); |
1096 | /* Cache SSA_NAME for next time. */ |
1097 | if (pbr |
1098 | && TREE_CODE (*orig_tp) == ADDR_EXPR |
1099 | && TREE_CODE (TREE_OPERAND (*orig_tp, 0)) == PARM_DECL) |
1100 | { |
1101 | gcc_assert (!pbr->dummy); |
1102 | pbr->dummy = repl; |
1103 | } |
1104 | } |
1105 | else |
1106 | gsi = gsi_for_stmt (info->stmt); |
1107 | if (info->after_stmt) |
1108 | gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); |
1109 | else |
1110 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); |
1111 | if (gimple_code (g: info->stmt) == GIMPLE_PHI) |
1112 | info->after_stmt = stmt; |
1113 | *orig_tp = repl; |
1114 | } |
1115 | else if (!useless_type_conversion_p (TREE_TYPE (*tp), TREE_TYPE (repl))) |
1116 | { |
1117 | tree vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (*tp), repl); |
1118 | *tp = vce; |
1119 | } |
1120 | else |
1121 | *tp = repl; |
1122 | |
1123 | info->modified = true; |
1124 | return NULL_TREE; |
1125 | } |
1126 | |
1127 | /* Traverse the function body and perform all modifications as |
1128 | described in ADJUSTMENTS. At function return, ADJUSTMENTS will be |
1129 | modified such that the replacement/reduction value will now be an |
1130 | offset into the corresponding simd_array. |
1131 | |
1132 | This function will replace all function argument uses with their |
1133 | corresponding simd array elements, and ajust the return values |
1134 | accordingly. */ |
1135 | |
1136 | static void |
1137 | ipa_simd_modify_function_body (struct cgraph_node *node, |
1138 | ipa_param_body_adjustments *adjustments, |
1139 | tree retval_array, tree iter) |
1140 | { |
1141 | basic_block bb; |
1142 | unsigned int i, j; |
1143 | |
1144 | |
1145 | /* Register replacements for every function argument use to an offset into |
1146 | the corresponding simd_array. */ |
1147 | for (i = 0, j = 0; i < node->simdclone->nargs; ++i, ++j) |
1148 | { |
1149 | if (!node->simdclone->args[i].vector_arg |
1150 | || (*adjustments->m_adj_params)[j].user_flag) |
1151 | continue; |
1152 | |
1153 | tree basetype = TREE_TYPE (node->simdclone->args[i].orig_arg); |
1154 | tree vectype = TREE_TYPE (node->simdclone->args[i].vector_arg); |
1155 | tree r = build4 (ARRAY_REF, basetype, node->simdclone->args[i].simd_array, |
1156 | iter, NULL_TREE, NULL_TREE); |
1157 | adjustments->register_replacement (apm: &(*adjustments->m_adj_params)[j], replacement: r); |
1158 | |
1159 | if (multiple_p (a: node->simdclone->simdlen, b: TYPE_VECTOR_SUBPARTS (node: vectype))) |
1160 | j += vector_unroll_factor (node->simdclone->simdlen, |
1161 | TYPE_VECTOR_SUBPARTS (vectype)) - 1; |
1162 | } |
1163 | adjustments->sort_replacements (); |
1164 | |
1165 | tree name; |
1166 | FOR_EACH_SSA_NAME (i, name, cfun) |
1167 | { |
1168 | tree base_var; |
1169 | if (SSA_NAME_VAR (name) |
1170 | && TREE_CODE (SSA_NAME_VAR (name)) == PARM_DECL |
1171 | && (base_var |
1172 | = adjustments->get_replacement_ssa_base (SSA_NAME_VAR (name)))) |
1173 | { |
1174 | if (SSA_NAME_IS_DEFAULT_DEF (name)) |
1175 | { |
1176 | tree old_decl = SSA_NAME_VAR (name); |
1177 | bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)); |
1178 | gimple_stmt_iterator gsi = gsi_after_labels (bb); |
1179 | tree repl = adjustments->lookup_replacement (base: old_decl, unit_offset: 0); |
1180 | gcc_checking_assert (repl); |
1181 | repl = unshare_expr (repl); |
1182 | set_ssa_default_def (cfun, old_decl, NULL_TREE); |
1183 | SET_SSA_NAME_VAR_OR_IDENTIFIER (name, base_var); |
1184 | SSA_NAME_IS_DEFAULT_DEF (name) = 0; |
1185 | gimple *stmt = gimple_build_assign (name, repl); |
1186 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); |
1187 | } |
1188 | else |
1189 | SET_SSA_NAME_VAR_OR_IDENTIFIER (name, base_var); |
1190 | } |
1191 | } |
1192 | |
1193 | struct modify_stmt_info info; |
1194 | info.adjustments = adjustments; |
1195 | |
1196 | FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) |
1197 | { |
1198 | gimple_stmt_iterator gsi; |
1199 | |
1200 | for (gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
1201 | { |
1202 | gphi *phi = as_a <gphi *> (p: gsi_stmt (i: gsi)); |
1203 | int i, n = gimple_phi_num_args (gs: phi); |
1204 | info.stmt = phi; |
1205 | info.after_stmt = NULL; |
1206 | struct walk_stmt_info wi; |
1207 | memset (s: &wi, c: 0, n: sizeof (wi)); |
1208 | info.modified = false; |
1209 | wi.info = &info; |
1210 | for (i = 0; i < n; ++i) |
1211 | { |
1212 | int walk_subtrees = 1; |
1213 | tree arg = gimple_phi_arg_def (gs: phi, index: i); |
1214 | tree op = arg; |
1215 | ipa_simd_modify_stmt_ops (tp: &op, walk_subtrees: &walk_subtrees, data: &wi); |
1216 | if (op != arg) |
1217 | { |
1218 | SET_PHI_ARG_DEF (phi, i, op); |
1219 | gcc_assert (TREE_CODE (op) == SSA_NAME); |
1220 | if (gimple_phi_arg_edge (phi, i)->flags & EDGE_ABNORMAL) |
1221 | SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op) = 1; |
1222 | } |
1223 | } |
1224 | } |
1225 | |
1226 | gsi = gsi_start_bb (bb); |
1227 | while (!gsi_end_p (i: gsi)) |
1228 | { |
1229 | gimple *stmt = gsi_stmt (i: gsi); |
1230 | info.stmt = stmt; |
1231 | info.after_stmt = NULL; |
1232 | struct walk_stmt_info wi; |
1233 | |
1234 | memset (s: &wi, c: 0, n: sizeof (wi)); |
1235 | info.modified = false; |
1236 | wi.info = &info; |
1237 | walk_gimple_op (stmt, ipa_simd_modify_stmt_ops, &wi); |
1238 | |
1239 | if (greturn *return_stmt = dyn_cast <greturn *> (p: stmt)) |
1240 | { |
1241 | tree retval = gimple_return_retval (gs: return_stmt); |
1242 | edge e = find_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun)); |
1243 | e->flags |= EDGE_FALLTHRU; |
1244 | if (!retval) |
1245 | { |
1246 | gsi_remove (&gsi, true); |
1247 | continue; |
1248 | } |
1249 | |
1250 | /* Replace `return foo' with `retval_array[iter] = foo'. */ |
1251 | tree ref = build4 (ARRAY_REF, TREE_TYPE (retval), |
1252 | retval_array, iter, NULL, NULL); |
1253 | stmt = gimple_build_assign (ref, retval); |
1254 | gsi_replace (&gsi, stmt, true); |
1255 | info.modified = true; |
1256 | } |
1257 | |
1258 | if (info.modified) |
1259 | { |
1260 | update_stmt (s: stmt); |
1261 | /* If the above changed the var of a debug bind into something |
1262 | different, remove the debug stmt. We could also for all the |
1263 | replaced parameters add VAR_DECLs for debug info purposes, |
1264 | add debug stmts for those to be the simd array accesses and |
1265 | replace debug stmt var operand with that var. Debugging of |
1266 | vectorized loops doesn't work too well, so don't bother for |
1267 | now. */ |
1268 | if ((gimple_debug_bind_p (s: stmt) |
1269 | && !DECL_P (gimple_debug_bind_get_var (stmt))) |
1270 | || (gimple_debug_source_bind_p (s: stmt) |
1271 | && !DECL_P (gimple_debug_source_bind_get_var (stmt)))) |
1272 | { |
1273 | gsi_remove (&gsi, true); |
1274 | continue; |
1275 | } |
1276 | if (maybe_clean_eh_stmt (stmt)) |
1277 | gimple_purge_dead_eh_edges (gimple_bb (g: stmt)); |
1278 | } |
1279 | gsi_next (i: &gsi); |
1280 | } |
1281 | } |
1282 | } |
1283 | |
1284 | /* Helper function of simd_clone_adjust, return linear step addend |
1285 | of Ith argument. */ |
1286 | |
1287 | static tree |
1288 | simd_clone_linear_addend (struct cgraph_node *node, unsigned int i, |
1289 | tree addtype, basic_block entry_bb) |
1290 | { |
1291 | tree ptype = NULL_TREE; |
1292 | switch (node->simdclone->args[i].arg_type) |
1293 | { |
1294 | case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP: |
1295 | case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP: |
1296 | case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP: |
1297 | case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP: |
1298 | return build_int_cst (addtype, node->simdclone->args[i].linear_step); |
1299 | case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP: |
1300 | case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP: |
1301 | ptype = TREE_TYPE (node->simdclone->args[i].orig_arg); |
1302 | break; |
1303 | case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP: |
1304 | case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP: |
1305 | ptype = TREE_TYPE (TREE_TYPE (node->simdclone->args[i].orig_arg)); |
1306 | break; |
1307 | default: |
1308 | gcc_unreachable (); |
1309 | } |
1310 | |
1311 | unsigned int idx = node->simdclone->args[i].linear_step; |
1312 | tree arg = node->simdclone->args[idx].orig_arg; |
1313 | gcc_assert (is_gimple_reg_type (TREE_TYPE (arg))); |
1314 | gimple_stmt_iterator gsi = gsi_after_labels (bb: entry_bb); |
1315 | gimple *g; |
1316 | tree ret; |
1317 | if (is_gimple_reg (arg)) |
1318 | ret = get_or_create_ssa_default_def (cfun, arg); |
1319 | else |
1320 | { |
1321 | g = gimple_build_assign (make_ssa_name (TREE_TYPE (arg)), arg); |
1322 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
1323 | ret = gimple_assign_lhs (gs: g); |
1324 | } |
1325 | if (TREE_CODE (TREE_TYPE (arg)) == REFERENCE_TYPE) |
1326 | { |
1327 | g = gimple_build_assign (make_ssa_name (TREE_TYPE (TREE_TYPE (arg))), |
1328 | build_simple_mem_ref (ret)); |
1329 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
1330 | ret = gimple_assign_lhs (gs: g); |
1331 | } |
1332 | if (!useless_type_conversion_p (addtype, TREE_TYPE (ret))) |
1333 | { |
1334 | g = gimple_build_assign (make_ssa_name (var: addtype), NOP_EXPR, ret); |
1335 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
1336 | ret = gimple_assign_lhs (gs: g); |
1337 | } |
1338 | if (POINTER_TYPE_P (ptype)) |
1339 | { |
1340 | tree size = TYPE_SIZE_UNIT (TREE_TYPE (ptype)); |
1341 | if (size && TREE_CODE (size) == INTEGER_CST) |
1342 | { |
1343 | g = gimple_build_assign (make_ssa_name (var: addtype), MULT_EXPR, |
1344 | ret, fold_convert (addtype, size)); |
1345 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
1346 | ret = gimple_assign_lhs (gs: g); |
1347 | } |
1348 | } |
1349 | return ret; |
1350 | } |
1351 | |
1352 | /* Adjust the argument types in NODE to their appropriate vector |
1353 | counterparts. */ |
1354 | |
1355 | static void |
1356 | simd_clone_adjust (struct cgraph_node *node) |
1357 | { |
1358 | push_cfun (DECL_STRUCT_FUNCTION (node->decl)); |
1359 | |
1360 | tree orig_rettype = TREE_TYPE (TREE_TYPE (node->decl)); |
1361 | TREE_TYPE (node->decl) = build_distinct_type_copy (TREE_TYPE (node->decl)); |
1362 | simd_clone_adjust_return_type (node); |
1363 | simd_clone_adjust_argument_types (node); |
1364 | targetm.simd_clone.adjust (node); |
1365 | tree retval = NULL_TREE; |
1366 | if (orig_rettype != void_type_node) |
1367 | { |
1368 | poly_uint64 veclen; |
1369 | if (INTEGRAL_TYPE_P (orig_rettype) || POINTER_TYPE_P (orig_rettype)) |
1370 | veclen = node->simdclone->vecsize_int; |
1371 | else |
1372 | veclen = node->simdclone->vecsize_float; |
1373 | if (known_eq (veclen, 0U)) |
1374 | veclen = node->simdclone->simdlen; |
1375 | else |
1376 | veclen = exact_div (a: veclen, |
1377 | b: GET_MODE_BITSIZE (SCALAR_TYPE_MODE (orig_rettype))); |
1378 | if (multiple_p (a: veclen, b: node->simdclone->simdlen)) |
1379 | veclen = node->simdclone->simdlen; |
1380 | |
1381 | retval = DECL_RESULT (node->decl); |
1382 | /* Adjust the DECL_RESULT. */ |
1383 | TREE_TYPE (retval) = TREE_TYPE (TREE_TYPE (node->decl)); |
1384 | relayout_decl (retval); |
1385 | |
1386 | tree atype = build_array_type_nelts (orig_rettype, |
1387 | node->simdclone->simdlen); |
1388 | if (maybe_ne (a: veclen, b: node->simdclone->simdlen)) |
1389 | retval = build1 (VIEW_CONVERT_EXPR, atype, retval); |
1390 | else |
1391 | { |
1392 | /* Set up a SIMD array to use as the return value. */ |
1393 | retval = create_tmp_var_raw (atype, "retval" ); |
1394 | gimple_add_tmp_var (retval); |
1395 | } |
1396 | } |
1397 | |
1398 | struct cgraph_simd_clone *sc = node->simdclone; |
1399 | vec<ipa_adjusted_param, va_gc> *new_params = NULL; |
1400 | vec_safe_reserve (v&: new_params, nelems: sc->nargs); |
1401 | unsigned i, j, k; |
1402 | for (i = 0; i < sc->nargs; ++i) |
1403 | { |
1404 | ipa_adjusted_param adj; |
1405 | memset (s: &adj, c: 0, n: sizeof (adj)); |
1406 | poly_uint64 veclen; |
1407 | tree elem_type; |
1408 | |
1409 | adj.base_index = i; |
1410 | adj.prev_clone_index = i; |
1411 | switch (sc->args[i].arg_type) |
1412 | { |
1413 | default: |
1414 | /* No adjustment necessary for scalar arguments. */ |
1415 | adj.op = IPA_PARAM_OP_COPY; |
1416 | break; |
1417 | case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP: |
1418 | adj.op = IPA_PARAM_OP_COPY; |
1419 | break; |
1420 | case SIMD_CLONE_ARG_TYPE_MASK: |
1421 | case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP: |
1422 | case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP: |
1423 | case SIMD_CLONE_ARG_TYPE_VECTOR: |
1424 | if (sc->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK |
1425 | && sc->mask_mode != VOIDmode) |
1426 | elem_type = boolean_type_node; |
1427 | else |
1428 | elem_type = TREE_TYPE (sc->args[i].vector_type); |
1429 | if (INTEGRAL_TYPE_P (elem_type) || POINTER_TYPE_P (elem_type)) |
1430 | veclen = sc->vecsize_int; |
1431 | else |
1432 | veclen = sc->vecsize_float; |
1433 | if (known_eq (veclen, 0U)) |
1434 | veclen = sc->simdlen; |
1435 | else |
1436 | veclen |
1437 | = exact_div (a: veclen, |
1438 | b: GET_MODE_BITSIZE (SCALAR_TYPE_MODE (elem_type))); |
1439 | if (multiple_p (a: veclen, b: sc->simdlen)) |
1440 | veclen = sc->simdlen; |
1441 | if (sc->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK) |
1442 | { |
1443 | adj.user_flag = 1; |
1444 | adj.param_prefix_index = IPA_PARAM_PREFIX_MASK; |
1445 | } |
1446 | else |
1447 | adj.param_prefix_index = IPA_PARAM_PREFIX_SIMD; |
1448 | adj.op = IPA_PARAM_OP_NEW; |
1449 | adj.type = sc->args[i].vector_type; |
1450 | k = vector_unroll_factor (sc->simdlen, veclen); |
1451 | for (j = 1; j < k; j++) |
1452 | { |
1453 | vec_safe_push (v&: new_params, obj: adj); |
1454 | if (j == 1) |
1455 | { |
1456 | memset (s: &adj, c: 0, n: sizeof (adj)); |
1457 | adj.op = IPA_PARAM_OP_NEW; |
1458 | adj.user_flag = 1; |
1459 | if (sc->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK) |
1460 | adj.param_prefix_index = IPA_PARAM_PREFIX_MASK; |
1461 | else |
1462 | adj.param_prefix_index = IPA_PARAM_PREFIX_SIMD; |
1463 | adj.base_index = i; |
1464 | adj.prev_clone_index = i; |
1465 | adj.type = sc->args[i].vector_type; |
1466 | } |
1467 | } |
1468 | } |
1469 | vec_safe_push (v&: new_params, obj: adj); |
1470 | } |
1471 | ipa_param_body_adjustments *adjustments |
1472 | = new ipa_param_body_adjustments (new_params, node->decl); |
1473 | adjustments->modify_formal_parameters (); |
1474 | |
1475 | push_gimplify_context (); |
1476 | |
1477 | gimple_seq seq = simd_clone_init_simd_arrays (node, adjustments); |
1478 | |
1479 | /* Adjust all uses of vector arguments accordingly. Adjust all |
1480 | return values accordingly. */ |
1481 | tree iter = create_tmp_var (unsigned_type_node, "iter" ); |
1482 | tree iter1 = make_ssa_name (var: iter); |
1483 | tree iter2 = NULL_TREE; |
1484 | ipa_simd_modify_function_body (node, adjustments, retval_array: retval, iter: iter1); |
1485 | delete adjustments; |
1486 | |
1487 | /* Initialize the iteration variable. */ |
1488 | basic_block entry_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)); |
1489 | basic_block body_bb = split_block_after_labels (entry_bb)->dest; |
1490 | gimple_stmt_iterator gsi = gsi_after_labels (bb: entry_bb); |
1491 | /* Insert the SIMD array and iv initialization at function |
1492 | entry. */ |
1493 | gsi_insert_seq_before (&gsi, seq, GSI_NEW_STMT); |
1494 | |
1495 | pop_gimplify_context (NULL); |
1496 | |
1497 | gimple *g; |
1498 | basic_block incr_bb = NULL; |
1499 | class loop *loop = NULL; |
1500 | |
1501 | /* Create a new BB right before the original exit BB, to hold the |
1502 | iteration increment and the condition/branch. */ |
1503 | if (EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)) |
1504 | { |
1505 | basic_block orig_exit = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), 0)->src; |
1506 | incr_bb = create_empty_bb (orig_exit); |
1507 | incr_bb->count = profile_count::zero (); |
1508 | add_bb_to_loop (incr_bb, body_bb->loop_father); |
1509 | while (EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)) |
1510 | { |
1511 | edge e = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), 0); |
1512 | redirect_edge_succ (e, incr_bb); |
1513 | incr_bb->count += e->count (); |
1514 | } |
1515 | } |
1516 | else if (node->simdclone->inbranch) |
1517 | { |
1518 | incr_bb = create_empty_bb (entry_bb); |
1519 | incr_bb->count = profile_count::zero (); |
1520 | add_bb_to_loop (incr_bb, body_bb->loop_father); |
1521 | } |
1522 | |
1523 | if (incr_bb) |
1524 | { |
1525 | make_single_succ_edge (incr_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); |
1526 | gsi = gsi_last_bb (bb: incr_bb); |
1527 | iter2 = make_ssa_name (var: iter); |
1528 | g = gimple_build_assign (iter2, PLUS_EXPR, iter1, |
1529 | build_int_cst (unsigned_type_node, 1)); |
1530 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1531 | |
1532 | /* Mostly annotate the loop for the vectorizer (the rest is done |
1533 | below). */ |
1534 | loop = alloc_loop (); |
1535 | cfun->has_force_vectorize_loops = true; |
1536 | /* For now, simlen is always constant. */ |
1537 | loop->safelen = node->simdclone->simdlen.to_constant (); |
1538 | loop->force_vectorize = true; |
1539 | loop->header = body_bb; |
1540 | } |
1541 | |
1542 | /* Branch around the body if the mask applies. */ |
1543 | if (node->simdclone->inbranch) |
1544 | { |
1545 | gsi = gsi_last_bb (bb: loop->header); |
1546 | tree mask_array |
1547 | = node->simdclone->args[node->simdclone->nargs - 1].simd_array; |
1548 | tree mask; |
1549 | if (node->simdclone->mask_mode != VOIDmode) |
1550 | { |
1551 | tree shift_cnt; |
1552 | if (mask_array == NULL_TREE) |
1553 | { |
1554 | tree arg = node->simdclone->args[node->simdclone->nargs |
1555 | - 1].vector_arg; |
1556 | mask = get_or_create_ssa_default_def (cfun, arg); |
1557 | shift_cnt = iter1; |
1558 | } |
1559 | else |
1560 | { |
1561 | tree maskt = TREE_TYPE (mask_array); |
1562 | int c = tree_to_uhwi (TYPE_MAX_VALUE (TYPE_DOMAIN (maskt))); |
1563 | /* For now, c must be constant here. */ |
1564 | c = exact_div (a: node->simdclone->simdlen, b: c + 1).to_constant (); |
1565 | int s = exact_log2 (x: c); |
1566 | gcc_assert (s > 0); |
1567 | c--; |
1568 | tree idx = make_ssa_name (TREE_TYPE (iter1)); |
1569 | g = gimple_build_assign (idx, RSHIFT_EXPR, iter1, |
1570 | build_int_cst (NULL_TREE, s)); |
1571 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1572 | mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array))); |
1573 | tree aref = build4 (ARRAY_REF, |
1574 | TREE_TYPE (TREE_TYPE (mask_array)), |
1575 | mask_array, idx, NULL, NULL); |
1576 | g = gimple_build_assign (mask, aref); |
1577 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1578 | shift_cnt = make_ssa_name (TREE_TYPE (iter1)); |
1579 | g = gimple_build_assign (shift_cnt, BIT_AND_EXPR, iter1, |
1580 | build_int_cst (TREE_TYPE (iter1), c)); |
1581 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1582 | } |
1583 | tree shift_cnt_conv = shift_cnt; |
1584 | if (!useless_type_conversion_p (TREE_TYPE (mask), |
1585 | TREE_TYPE (shift_cnt))) |
1586 | { |
1587 | shift_cnt_conv = make_ssa_name (TREE_TYPE (mask)); |
1588 | g = gimple_build_assign (shift_cnt_conv, NOP_EXPR, shift_cnt); |
1589 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1590 | } |
1591 | g = gimple_build_assign (make_ssa_name (TREE_TYPE (mask)), |
1592 | RSHIFT_EXPR, mask, shift_cnt_conv); |
1593 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1594 | mask = gimple_assign_lhs (gs: g); |
1595 | g = gimple_build_assign (make_ssa_name (TREE_TYPE (mask)), |
1596 | BIT_AND_EXPR, mask, |
1597 | build_one_cst (TREE_TYPE (mask))); |
1598 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1599 | mask = gimple_assign_lhs (gs: g); |
1600 | } |
1601 | else |
1602 | { |
1603 | mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array))); |
1604 | tree aref = build4 (ARRAY_REF, |
1605 | TREE_TYPE (TREE_TYPE (mask_array)), |
1606 | mask_array, iter1, NULL, NULL); |
1607 | g = gimple_build_assign (mask, aref); |
1608 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1609 | int bitsize = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (aref))); |
1610 | if (!INTEGRAL_TYPE_P (TREE_TYPE (aref))) |
1611 | { |
1612 | aref = build1 (VIEW_CONVERT_EXPR, |
1613 | build_nonstandard_integer_type (bitsize, 0), |
1614 | mask); |
1615 | mask = make_ssa_name (TREE_TYPE (aref)); |
1616 | g = gimple_build_assign (mask, aref); |
1617 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1618 | } |
1619 | } |
1620 | |
1621 | g = gimple_build_cond (EQ_EXPR, mask, build_zero_cst (TREE_TYPE (mask)), |
1622 | NULL, NULL); |
1623 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1624 | edge e = make_edge (loop->header, incr_bb, EDGE_TRUE_VALUE); |
1625 | e->probability = profile_probability::unlikely ().guessed (); |
1626 | incr_bb->count += e->count (); |
1627 | edge fallthru = FALLTHRU_EDGE (loop->header); |
1628 | fallthru->flags = EDGE_FALSE_VALUE; |
1629 | fallthru->probability = profile_probability::likely ().guessed (); |
1630 | } |
1631 | |
1632 | basic_block latch_bb = NULL; |
1633 | basic_block new_exit_bb = NULL; |
1634 | |
1635 | /* Generate the condition. */ |
1636 | if (incr_bb) |
1637 | { |
1638 | gsi = gsi_last_bb (bb: incr_bb); |
1639 | g = gimple_build_cond (LT_EXPR, iter2, |
1640 | build_int_cst (unsigned_type_node, |
1641 | node->simdclone->simdlen), |
1642 | NULL, NULL); |
1643 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1644 | edge e = split_block (incr_bb, gsi_stmt (i: gsi)); |
1645 | latch_bb = e->dest; |
1646 | new_exit_bb = split_block_after_labels (latch_bb)->dest; |
1647 | loop->latch = latch_bb; |
1648 | |
1649 | redirect_edge_succ (FALLTHRU_EDGE (latch_bb), body_bb); |
1650 | |
1651 | edge new_e = make_edge (incr_bb, new_exit_bb, EDGE_FALSE_VALUE); |
1652 | |
1653 | /* FIXME: Do we need to distribute probabilities for the conditional? */ |
1654 | new_e->probability = profile_probability::guessed_never (); |
1655 | /* The successor of incr_bb is already pointing to latch_bb; just |
1656 | change the flags. |
1657 | make_edge (incr_bb, latch_bb, EDGE_TRUE_VALUE); */ |
1658 | FALLTHRU_EDGE (incr_bb)->flags = EDGE_TRUE_VALUE; |
1659 | } |
1660 | |
1661 | gphi *phi = create_phi_node (iter1, body_bb); |
1662 | edge = find_edge (entry_bb, body_bb); |
1663 | edge latch_edge = NULL; |
1664 | add_phi_arg (phi, build_zero_cst (unsigned_type_node), preheader_edge, |
1665 | UNKNOWN_LOCATION); |
1666 | if (incr_bb) |
1667 | { |
1668 | latch_edge = single_succ_edge (bb: latch_bb); |
1669 | add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION); |
1670 | |
1671 | /* Generate the new return. */ |
1672 | gsi = gsi_last_bb (bb: new_exit_bb); |
1673 | if (retval |
1674 | && TREE_CODE (retval) == VIEW_CONVERT_EXPR |
1675 | && TREE_CODE (TREE_OPERAND (retval, 0)) == RESULT_DECL) |
1676 | retval = TREE_OPERAND (retval, 0); |
1677 | else if (retval) |
1678 | { |
1679 | retval = build1 (VIEW_CONVERT_EXPR, |
1680 | TREE_TYPE (TREE_TYPE (node->decl)), |
1681 | retval); |
1682 | retval = force_gimple_operand_gsi (&gsi, retval, true, NULL, |
1683 | false, GSI_CONTINUE_LINKING); |
1684 | } |
1685 | g = gimple_build_return (retval); |
1686 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1687 | } |
1688 | |
1689 | /* Handle aligned clauses by replacing default defs of the aligned |
1690 | uniform args with __builtin_assume_aligned (arg_N(D), alignment) |
1691 | lhs. Handle linear by adding PHIs. */ |
1692 | for (unsigned i = 0; i < node->simdclone->nargs; i++) |
1693 | if (node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM |
1694 | && (TREE_ADDRESSABLE (node->simdclone->args[i].orig_arg) |
1695 | || !is_gimple_reg_type |
1696 | (TREE_TYPE (node->simdclone->args[i].orig_arg)))) |
1697 | { |
1698 | tree orig_arg = node->simdclone->args[i].orig_arg; |
1699 | if (is_gimple_reg_type (TREE_TYPE (orig_arg))) |
1700 | iter1 = make_ssa_name (TREE_TYPE (orig_arg)); |
1701 | else |
1702 | { |
1703 | iter1 = create_tmp_var_raw (TREE_TYPE (orig_arg)); |
1704 | gimple_add_tmp_var (iter1); |
1705 | } |
1706 | gsi = gsi_after_labels (bb: entry_bb); |
1707 | g = gimple_build_assign (iter1, orig_arg); |
1708 | gsi_insert_before (&gsi, g, GSI_NEW_STMT); |
1709 | gsi = gsi_after_labels (bb: body_bb); |
1710 | g = gimple_build_assign (orig_arg, iter1); |
1711 | gsi_insert_before (&gsi, g, GSI_NEW_STMT); |
1712 | } |
1713 | else if (node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM |
1714 | && DECL_BY_REFERENCE (node->simdclone->args[i].orig_arg) |
1715 | && TREE_CODE (TREE_TYPE (node->simdclone->args[i].orig_arg)) |
1716 | == REFERENCE_TYPE |
1717 | && TREE_ADDRESSABLE |
1718 | (TREE_TYPE (TREE_TYPE (node->simdclone->args[i].orig_arg)))) |
1719 | { |
1720 | tree orig_arg = node->simdclone->args[i].orig_arg; |
1721 | tree def = ssa_default_def (cfun, orig_arg); |
1722 | if (def && !has_zero_uses (var: def)) |
1723 | { |
1724 | iter1 = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (orig_arg))); |
1725 | gimple_add_tmp_var (iter1); |
1726 | gsi = gsi_after_labels (bb: entry_bb); |
1727 | g = gimple_build_assign (iter1, build_simple_mem_ref (def)); |
1728 | gsi_insert_before (&gsi, g, GSI_NEW_STMT); |
1729 | gsi = gsi_after_labels (bb: body_bb); |
1730 | g = gimple_build_assign (build_simple_mem_ref (def), iter1); |
1731 | gsi_insert_before (&gsi, g, GSI_NEW_STMT); |
1732 | } |
1733 | } |
1734 | else if (node->simdclone->args[i].alignment |
1735 | && node->simdclone->args[i].arg_type |
1736 | == SIMD_CLONE_ARG_TYPE_UNIFORM |
1737 | && (node->simdclone->args[i].alignment |
1738 | & (node->simdclone->args[i].alignment - 1)) == 0 |
1739 | && TREE_CODE (TREE_TYPE (node->simdclone->args[i].orig_arg)) |
1740 | == POINTER_TYPE) |
1741 | { |
1742 | unsigned int alignment = node->simdclone->args[i].alignment; |
1743 | tree orig_arg = node->simdclone->args[i].orig_arg; |
1744 | tree def = ssa_default_def (cfun, orig_arg); |
1745 | if (def && !has_zero_uses (var: def)) |
1746 | { |
1747 | tree fn = builtin_decl_explicit (fncode: BUILT_IN_ASSUME_ALIGNED); |
1748 | gimple_seq seq = NULL; |
1749 | bool need_cvt = false; |
1750 | gcall *call |
1751 | = gimple_build_call (fn, 2, def, size_int (alignment)); |
1752 | g = call; |
1753 | if (!useless_type_conversion_p (TREE_TYPE (orig_arg), |
1754 | ptr_type_node)) |
1755 | need_cvt = true; |
1756 | tree t = make_ssa_name (var: need_cvt ? ptr_type_node : orig_arg); |
1757 | gimple_call_set_lhs (gs: g, lhs: t); |
1758 | gimple_seq_add_stmt_without_update (&seq, g); |
1759 | if (need_cvt) |
1760 | { |
1761 | t = make_ssa_name (var: orig_arg); |
1762 | g = gimple_build_assign (t, NOP_EXPR, gimple_call_lhs (gs: g)); |
1763 | gimple_seq_add_stmt_without_update (&seq, g); |
1764 | } |
1765 | gsi_insert_seq_on_edge_immediate |
1766 | (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)), seq); |
1767 | |
1768 | entry_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)); |
1769 | node->create_edge (callee: cgraph_node::get_create (fn), |
1770 | call_stmt: call, count: entry_bb->count); |
1771 | |
1772 | imm_use_iterator iter; |
1773 | use_operand_p use_p; |
1774 | gimple *use_stmt; |
1775 | tree repl = gimple_get_lhs (g); |
1776 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, def) |
1777 | if (is_gimple_debug (gs: use_stmt) || use_stmt == call) |
1778 | continue; |
1779 | else |
1780 | FOR_EACH_IMM_USE_ON_STMT (use_p, iter) |
1781 | SET_USE (use_p, repl); |
1782 | } |
1783 | } |
1784 | else if ((node->simdclone->args[i].arg_type |
1785 | == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP) |
1786 | || (node->simdclone->args[i].arg_type |
1787 | == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP) |
1788 | || (node->simdclone->args[i].arg_type |
1789 | == SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP) |
1790 | || (node->simdclone->args[i].arg_type |
1791 | == SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP)) |
1792 | { |
1793 | tree orig_arg = node->simdclone->args[i].orig_arg; |
1794 | gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (orig_arg)) |
1795 | || POINTER_TYPE_P (TREE_TYPE (orig_arg))); |
1796 | tree def = NULL_TREE; |
1797 | if (TREE_ADDRESSABLE (orig_arg)) |
1798 | { |
1799 | def = make_ssa_name (TREE_TYPE (orig_arg)); |
1800 | iter1 = make_ssa_name (TREE_TYPE (orig_arg)); |
1801 | if (incr_bb) |
1802 | iter2 = make_ssa_name (TREE_TYPE (orig_arg)); |
1803 | gsi = gsi_after_labels (bb: entry_bb); |
1804 | g = gimple_build_assign (def, orig_arg); |
1805 | gsi_insert_before (&gsi, g, GSI_NEW_STMT); |
1806 | } |
1807 | else |
1808 | { |
1809 | def = ssa_default_def (cfun, orig_arg); |
1810 | if (!def || has_zero_uses (var: def)) |
1811 | def = NULL_TREE; |
1812 | else |
1813 | { |
1814 | iter1 = make_ssa_name (var: orig_arg); |
1815 | if (incr_bb) |
1816 | iter2 = make_ssa_name (var: orig_arg); |
1817 | } |
1818 | } |
1819 | if (def) |
1820 | { |
1821 | phi = create_phi_node (iter1, body_bb); |
1822 | add_phi_arg (phi, def, preheader_edge, UNKNOWN_LOCATION); |
1823 | if (incr_bb) |
1824 | { |
1825 | add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION); |
1826 | enum tree_code code = INTEGRAL_TYPE_P (TREE_TYPE (orig_arg)) |
1827 | ? PLUS_EXPR : POINTER_PLUS_EXPR; |
1828 | tree addtype = INTEGRAL_TYPE_P (TREE_TYPE (orig_arg)) |
1829 | ? TREE_TYPE (orig_arg) : sizetype; |
1830 | tree addcst = simd_clone_linear_addend (node, i, addtype, |
1831 | entry_bb); |
1832 | gsi = gsi_last_bb (bb: incr_bb); |
1833 | g = gimple_build_assign (iter2, code, iter1, addcst); |
1834 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
1835 | } |
1836 | |
1837 | imm_use_iterator iter; |
1838 | use_operand_p use_p; |
1839 | gimple *use_stmt; |
1840 | if (TREE_ADDRESSABLE (orig_arg)) |
1841 | { |
1842 | gsi = gsi_after_labels (bb: body_bb); |
1843 | g = gimple_build_assign (orig_arg, iter1); |
1844 | gsi_insert_before (&gsi, g, GSI_NEW_STMT); |
1845 | } |
1846 | else |
1847 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, def) |
1848 | if (use_stmt == phi) |
1849 | continue; |
1850 | else |
1851 | FOR_EACH_IMM_USE_ON_STMT (use_p, iter) |
1852 | SET_USE (use_p, iter1); |
1853 | } |
1854 | } |
1855 | else if (node->simdclone->args[i].arg_type |
1856 | == SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP |
1857 | || (node->simdclone->args[i].arg_type |
1858 | == SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP)) |
1859 | { |
1860 | tree orig_arg = node->simdclone->args[i].orig_arg; |
1861 | tree def = ssa_default_def (cfun, orig_arg); |
1862 | gcc_assert (!TREE_ADDRESSABLE (orig_arg) |
1863 | && TREE_CODE (TREE_TYPE (orig_arg)) == REFERENCE_TYPE); |
1864 | if (def && !has_zero_uses (var: def)) |
1865 | { |
1866 | tree rtype = TREE_TYPE (TREE_TYPE (orig_arg)); |
1867 | iter1 = make_ssa_name (var: orig_arg); |
1868 | if (incr_bb) |
1869 | iter2 = make_ssa_name (var: orig_arg); |
1870 | tree iter3 = make_ssa_name (var: rtype); |
1871 | tree iter4 = make_ssa_name (var: rtype); |
1872 | tree iter5 = incr_bb ? make_ssa_name (var: rtype) : NULL_TREE; |
1873 | gsi = gsi_after_labels (bb: entry_bb); |
1874 | gimple *load |
1875 | = gimple_build_assign (iter3, build_simple_mem_ref (def)); |
1876 | gsi_insert_before (&gsi, load, GSI_NEW_STMT); |
1877 | |
1878 | tree array = node->simdclone->args[i].simd_array; |
1879 | TREE_ADDRESSABLE (array) = 1; |
1880 | tree ptr = build_fold_addr_expr (array); |
1881 | phi = create_phi_node (iter1, body_bb); |
1882 | add_phi_arg (phi, ptr, preheader_edge, UNKNOWN_LOCATION); |
1883 | if (incr_bb) |
1884 | { |
1885 | add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION); |
1886 | g = gimple_build_assign (iter2, POINTER_PLUS_EXPR, iter1, |
1887 | TYPE_SIZE_UNIT (TREE_TYPE (iter3))); |
1888 | gsi = gsi_last_bb (bb: incr_bb); |
1889 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
1890 | } |
1891 | |
1892 | phi = create_phi_node (iter4, body_bb); |
1893 | add_phi_arg (phi, iter3, preheader_edge, UNKNOWN_LOCATION); |
1894 | if (incr_bb) |
1895 | { |
1896 | add_phi_arg (phi, iter5, latch_edge, UNKNOWN_LOCATION); |
1897 | enum tree_code code = INTEGRAL_TYPE_P (TREE_TYPE (iter3)) |
1898 | ? PLUS_EXPR : POINTER_PLUS_EXPR; |
1899 | tree addtype = INTEGRAL_TYPE_P (TREE_TYPE (iter3)) |
1900 | ? TREE_TYPE (iter3) : sizetype; |
1901 | tree addcst = simd_clone_linear_addend (node, i, addtype, |
1902 | entry_bb); |
1903 | g = gimple_build_assign (iter5, code, iter4, addcst); |
1904 | gsi = gsi_last_bb (bb: incr_bb); |
1905 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
1906 | } |
1907 | |
1908 | g = gimple_build_assign (build_simple_mem_ref (iter1), iter4); |
1909 | gsi = gsi_after_labels (bb: body_bb); |
1910 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
1911 | |
1912 | imm_use_iterator iter; |
1913 | use_operand_p use_p; |
1914 | gimple *use_stmt; |
1915 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, def) |
1916 | if (use_stmt == load) |
1917 | continue; |
1918 | else |
1919 | FOR_EACH_IMM_USE_ON_STMT (use_p, iter) |
1920 | SET_USE (use_p, iter1); |
1921 | |
1922 | if (!TYPE_READONLY (rtype) && incr_bb) |
1923 | { |
1924 | tree v = make_ssa_name (var: rtype); |
1925 | tree aref = build4 (ARRAY_REF, rtype, array, |
1926 | size_zero_node, NULL_TREE, |
1927 | NULL_TREE); |
1928 | gsi = gsi_after_labels (bb: new_exit_bb); |
1929 | g = gimple_build_assign (v, aref); |
1930 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
1931 | g = gimple_build_assign (build_simple_mem_ref (def), v); |
1932 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
1933 | } |
1934 | } |
1935 | } |
1936 | |
1937 | calculate_dominance_info (CDI_DOMINATORS); |
1938 | if (loop) |
1939 | add_loop (loop, loop->header->loop_father); |
1940 | update_ssa (TODO_update_ssa); |
1941 | |
1942 | pop_cfun (); |
1943 | } |
1944 | |
1945 | /* If the function in NODE is tagged as an elemental SIMD function, |
1946 | create the appropriate SIMD clones. */ |
1947 | |
1948 | void |
1949 | expand_simd_clones (struct cgraph_node *node) |
1950 | { |
1951 | tree attr; |
1952 | bool explicit_p = true; |
1953 | |
1954 | if (node->inlined_to |
1955 | || lookup_attribute (attr_name: "noclone" , DECL_ATTRIBUTES (node->decl))) |
1956 | return; |
1957 | |
1958 | attr = lookup_attribute (attr_name: "omp declare simd" , |
1959 | DECL_ATTRIBUTES (node->decl)); |
1960 | |
1961 | /* See if we can add an "omp declare simd" directive implicitly |
1962 | before giving up. */ |
1963 | /* FIXME: OpenACC "#pragma acc routine" translates into |
1964 | "omp declare target", but appears also to have some other effects |
1965 | that conflict with generating SIMD clones, causing ICEs. So don't |
1966 | do this if we've got OpenACC instead of OpenMP. */ |
1967 | if (attr == NULL_TREE |
1968 | #ifdef ACCEL_COMPILER |
1969 | && (flag_openmp_target_simd_clone == OMP_TARGET_SIMD_CLONE_ANY |
1970 | || flag_openmp_target_simd_clone == OMP_TARGET_SIMD_CLONE_NOHOST) |
1971 | #else |
1972 | && (flag_openmp_target_simd_clone == OMP_TARGET_SIMD_CLONE_ANY |
1973 | || flag_openmp_target_simd_clone == OMP_TARGET_SIMD_CLONE_HOST) |
1974 | #endif |
1975 | && !oacc_get_fn_attrib (fn: node->decl) |
1976 | && ok_for_auto_simd_clone (node)) |
1977 | { |
1978 | attr = tree_cons (get_identifier ("omp declare simd" ), NULL, |
1979 | DECL_ATTRIBUTES (node->decl)); |
1980 | DECL_ATTRIBUTES (node->decl) = attr; |
1981 | explicit_p = false; |
1982 | } |
1983 | |
1984 | if (attr == NULL_TREE) |
1985 | return; |
1986 | |
1987 | /* Ignore |
1988 | #pragma omp declare simd |
1989 | extern int foo (); |
1990 | in C, there we don't know the argument types at all. */ |
1991 | if (!node->definition |
1992 | && TYPE_ARG_TYPES (TREE_TYPE (node->decl)) == NULL_TREE) |
1993 | return; |
1994 | |
1995 | /* Call this before creating clone_info, as it might ggc_collect. */ |
1996 | if (node->definition && node->has_gimple_body_p ()) |
1997 | node->get_body (); |
1998 | |
1999 | do |
2000 | { |
2001 | /* Start with parsing the "omp declare simd" attribute(s). */ |
2002 | bool inbranch_clause_specified; |
2003 | struct cgraph_simd_clone *clone_info |
2004 | = simd_clone_clauses_extract (node, TREE_VALUE (attr), |
2005 | inbranch_specified: &inbranch_clause_specified); |
2006 | if (clone_info == NULL) |
2007 | continue; |
2008 | |
2009 | poly_uint64 orig_simdlen = clone_info->simdlen; |
2010 | tree base_type = simd_clone_compute_base_data_type (node, clone_info); |
2011 | |
2012 | /* The target can return 0 (no simd clones should be created), |
2013 | 1 (just one ISA of simd clones should be created) or higher |
2014 | count of ISA variants. In that case, clone_info is initialized |
2015 | for the first ISA variant. */ |
2016 | int count |
2017 | = targetm.simd_clone.compute_vecsize_and_simdlen (node, clone_info, |
2018 | base_type, 0, |
2019 | explicit_p); |
2020 | if (count == 0) |
2021 | continue; |
2022 | |
2023 | /* Loop over all COUNT ISA variants, and if !INBRANCH_CLAUSE_SPECIFIED, |
2024 | also create one inbranch and one !inbranch clone of it. */ |
2025 | for (int i = 0; i < count * 2; i++) |
2026 | { |
2027 | struct cgraph_simd_clone *clone = clone_info; |
2028 | if (inbranch_clause_specified && (i & 1) != 0) |
2029 | continue; |
2030 | |
2031 | if (i != 0) |
2032 | { |
2033 | clone = simd_clone_struct_alloc (nargs: clone_info->nargs |
2034 | + ((i & 1) != 0)); |
2035 | simd_clone_struct_copy (to: clone, from: clone_info); |
2036 | /* Undo changes targetm.simd_clone.compute_vecsize_and_simdlen |
2037 | and simd_clone_adjust_argument_types did to the first |
2038 | clone's info. */ |
2039 | clone->nargs -= clone_info->inbranch; |
2040 | clone->simdlen = orig_simdlen; |
2041 | /* And call the target hook again to get the right ISA. */ |
2042 | targetm.simd_clone.compute_vecsize_and_simdlen (node, clone, |
2043 | base_type, |
2044 | i / 2, |
2045 | explicit_p); |
2046 | if ((i & 1) != 0) |
2047 | clone->inbranch = 1; |
2048 | } |
2049 | |
2050 | /* simd_clone_mangle might fail if such a clone has been created |
2051 | already. */ |
2052 | tree id = simd_clone_mangle (node, clone_info: clone); |
2053 | if (id == NULL_TREE) |
2054 | { |
2055 | if (i == 0) |
2056 | clone->nargs += clone->inbranch; |
2057 | continue; |
2058 | } |
2059 | |
2060 | /* Only when we are sure we want to create the clone actually |
2061 | clone the function (or definitions) or create another |
2062 | extern FUNCTION_DECL (for prototypes without definitions). */ |
2063 | struct cgraph_node *n = simd_clone_create (old_node: node, force_local: !explicit_p); |
2064 | if (n == NULL) |
2065 | { |
2066 | if (i == 0) |
2067 | clone->nargs += clone->inbranch; |
2068 | continue; |
2069 | } |
2070 | |
2071 | n->simdclone = clone; |
2072 | clone->origin = node; |
2073 | clone->next_clone = NULL; |
2074 | if (node->simd_clones == NULL) |
2075 | { |
2076 | clone->prev_clone = n; |
2077 | node->simd_clones = n; |
2078 | } |
2079 | else |
2080 | { |
2081 | clone->prev_clone = node->simd_clones->simdclone->prev_clone; |
2082 | clone->prev_clone->simdclone->next_clone = n; |
2083 | node->simd_clones->simdclone->prev_clone = n; |
2084 | } |
2085 | symtab->change_decl_assembler_name (decl: n->decl, name: id); |
2086 | /* And finally adjust the return type, parameters and for |
2087 | definitions also function body. */ |
2088 | if (node->definition) |
2089 | simd_clone_adjust (node: n); |
2090 | else |
2091 | { |
2092 | TREE_TYPE (n->decl) |
2093 | = build_distinct_type_copy (TREE_TYPE (n->decl)); |
2094 | simd_clone_adjust_return_type (node: n); |
2095 | simd_clone_adjust_argument_types (node: n); |
2096 | targetm.simd_clone.adjust (n); |
2097 | } |
2098 | if (dump_file) |
2099 | fprintf (stream: dump_file, format: "\nGenerated %s clone %s\n" , |
2100 | (TREE_PUBLIC (n->decl) ? "global" : "local" ), |
2101 | IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl))); |
2102 | } |
2103 | } |
2104 | while ((attr = lookup_attribute (attr_name: "omp declare simd" , TREE_CHAIN (attr)))); |
2105 | } |
2106 | |
2107 | /* Entry point for IPA simd clone creation pass. */ |
2108 | |
2109 | static unsigned int |
2110 | ipa_omp_simd_clone (void) |
2111 | { |
2112 | struct cgraph_node *node; |
2113 | FOR_EACH_FUNCTION (node) |
2114 | expand_simd_clones (node); |
2115 | return 0; |
2116 | } |
2117 | |
2118 | namespace { |
2119 | |
2120 | const pass_data pass_data_omp_simd_clone = |
2121 | { |
2122 | .type: SIMPLE_IPA_PASS, /* type */ |
2123 | .name: "simdclone" , /* name */ |
2124 | .optinfo_flags: OPTGROUP_OMP, /* optinfo_flags */ |
2125 | .tv_id: TV_NONE, /* tv_id */ |
2126 | .properties_required: ( PROP_ssa | PROP_cfg ), /* properties_required */ |
2127 | .properties_provided: 0, /* properties_provided */ |
2128 | .properties_destroyed: 0, /* properties_destroyed */ |
2129 | .todo_flags_start: 0, /* todo_flags_start */ |
2130 | .todo_flags_finish: 0, /* todo_flags_finish */ |
2131 | }; |
2132 | |
2133 | class pass_omp_simd_clone : public simple_ipa_opt_pass |
2134 | { |
2135 | public: |
2136 | pass_omp_simd_clone(gcc::context *ctxt) |
2137 | : simple_ipa_opt_pass(pass_data_omp_simd_clone, ctxt) |
2138 | {} |
2139 | |
2140 | /* opt_pass methods: */ |
2141 | bool gate (function *) final override; |
2142 | unsigned int execute (function *) final override |
2143 | { |
2144 | return ipa_omp_simd_clone (); |
2145 | } |
2146 | }; |
2147 | |
2148 | bool |
2149 | pass_omp_simd_clone::gate (function *) |
2150 | { |
2151 | return targetm.simd_clone.compute_vecsize_and_simdlen != NULL; |
2152 | } |
2153 | |
2154 | } // anon namespace |
2155 | |
2156 | simple_ipa_opt_pass * |
2157 | make_pass_omp_simd_clone (gcc::context *ctxt) |
2158 | { |
2159 | return new pass_omp_simd_clone (ctxt); |
2160 | } |
2161 | |