1/* Implementation of commonly needed HSAIL related functions and methods.
2 Copyright (C) 2013-2017 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz> and
4 Martin Liska <mliska@suse.cz>.
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 3, or (at your option)
11any later version.
12
13GCC is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "is-a.h"
27#include "hash-set.h"
28#include "hash-map.h"
29#include "vec.h"
30#include "tree.h"
31#include "dumpfile.h"
32#include "gimple-pretty-print.h"
33#include "diagnostic-core.h"
34#include "alloc-pool.h"
35#include "cgraph.h"
36#include "print-tree.h"
37#include "stringpool.h"
38#include "symbol-summary.h"
39#include "hsa-common.h"
40#include "internal-fn.h"
41#include "ctype.h"
42#include "builtins.h"
43#include "stringpool.h"
44#include "attribs.h"
45
46/* Structure containing intermediate HSA representation of the generated
47 function. */
48class hsa_function_representation *hsa_cfun;
49
50/* Element of the mapping vector between a host decl and an HSA kernel. */
51
52struct GTY(()) hsa_decl_kernel_map_element
53{
54 /* The decl of the host function. */
55 tree decl;
56 /* Name of the HSA kernel in BRIG. */
57 char * GTY((skip)) name;
58 /* Size of OMP data, if the kernel contains a kernel dispatch. */
59 unsigned omp_data_size;
60 /* True if the function is gridified kernel. */
61 bool gridified_kernel_p;
62};
63
64/* Mapping between decls and corresponding HSA kernels in this compilation
65 unit. */
66
67static GTY (()) vec<hsa_decl_kernel_map_element, va_gc>
68 *hsa_decl_kernel_mapping;
69
70/* Mapping between decls and corresponding HSA kernels
71 called by the function. */
72hash_map <tree, vec <const char *> *> *hsa_decl_kernel_dependencies;
73
74/* Hash function to lookup a symbol for a decl. */
75hash_table <hsa_noop_symbol_hasher> *hsa_global_variable_symbols;
76
77/* HSA summaries. */
78hsa_summary_t *hsa_summaries = NULL;
79
80/* HSA number of threads. */
81hsa_symbol *hsa_num_threads = NULL;
82
83/* HSA function that cannot be expanded to HSAIL. */
84hash_set <tree> *hsa_failed_functions = NULL;
85
86/* True if compilation unit-wide data are already allocated and initialized. */
87static bool compilation_unit_data_initialized;
88
89/* Return true if FNDECL represents an HSA-callable function. */
90
91bool
92hsa_callable_function_p (tree fndecl)
93{
94 return (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (fndecl))
95 && !lookup_attribute ("oacc function", DECL_ATTRIBUTES (fndecl)));
96}
97
98/* Allocate HSA structures that are are used when dealing with different
99 functions. */
100
101void
102hsa_init_compilation_unit_data (void)
103{
104 if (compilation_unit_data_initialized)
105 return;
106
107 compilation_unit_data_initialized = true;
108
109 hsa_global_variable_symbols = new hash_table <hsa_noop_symbol_hasher> (8);
110 hsa_failed_functions = new hash_set <tree> ();
111 hsa_emitted_internal_decls = new hash_table <hsa_internal_fn_hasher> (2);
112}
113
114/* Free data structures that are used when dealing with different
115 functions. */
116
117void
118hsa_deinit_compilation_unit_data (void)
119{
120 gcc_assert (compilation_unit_data_initialized);
121
122 delete hsa_failed_functions;
123 delete hsa_emitted_internal_decls;
124
125 for (hash_table <hsa_noop_symbol_hasher>::iterator it
126 = hsa_global_variable_symbols->begin ();
127 it != hsa_global_variable_symbols->end ();
128 ++it)
129 {
130 hsa_symbol *sym = *it;
131 delete sym;
132 }
133
134 delete hsa_global_variable_symbols;
135
136 if (hsa_num_threads)
137 {
138 delete hsa_num_threads;
139 hsa_num_threads = NULL;
140 }
141
142 compilation_unit_data_initialized = false;
143}
144
145/* Return true if we are generating large HSA machine model. */
146
147bool
148hsa_machine_large_p (void)
149{
150 /* FIXME: I suppose this is technically wrong but should work for me now. */
151 return (GET_MODE_BITSIZE (Pmode) == 64);
152}
153
154/* Return the HSA profile we are using. */
155
156bool
157hsa_full_profile_p (void)
158{
159 return true;
160}
161
162/* Return true if a register in operand number OPNUM of instruction
163 is an output. False if it is an input. */
164
165bool
166hsa_insn_basic::op_output_p (unsigned opnum)
167{
168 switch (m_opcode)
169 {
170 case HSA_OPCODE_PHI:
171 case BRIG_OPCODE_CBR:
172 case BRIG_OPCODE_SBR:
173 case BRIG_OPCODE_ST:
174 case BRIG_OPCODE_SIGNALNORET:
175 case BRIG_OPCODE_DEBUGTRAP:
176 /* FIXME: There are probably missing cases here, double check. */
177 return false;
178 case BRIG_OPCODE_EXPAND:
179 /* Example: expand_v4_b32_b128 (dest0, dest1, dest2, dest3), src0. */
180 return opnum < operand_count () - 1;
181 default:
182 return opnum == 0;
183 }
184}
185
186/* Return true if OPCODE is an floating-point bit instruction opcode. */
187
188bool
189hsa_opcode_floating_bit_insn_p (BrigOpcode16_t opcode)
190{
191 switch (opcode)
192 {
193 case BRIG_OPCODE_NEG:
194 case BRIG_OPCODE_ABS:
195 case BRIG_OPCODE_CLASS:
196 case BRIG_OPCODE_COPYSIGN:
197 return true;
198 default:
199 return false;
200 }
201}
202
203/* Return the number of destination operands for this INSN. */
204
205unsigned
206hsa_insn_basic::input_count ()
207{
208 switch (m_opcode)
209 {
210 default:
211 return 1;
212
213 case BRIG_OPCODE_NOP:
214 return 0;
215
216 case BRIG_OPCODE_EXPAND:
217 return 2;
218
219 case BRIG_OPCODE_LD:
220 /* ld_v[234] not yet handled. */
221 return 1;
222
223 case BRIG_OPCODE_ST:
224 return 0;
225
226 case BRIG_OPCODE_ATOMICNORET:
227 return 0;
228
229 case BRIG_OPCODE_SIGNAL:
230 return 1;
231
232 case BRIG_OPCODE_SIGNALNORET:
233 return 0;
234
235 case BRIG_OPCODE_MEMFENCE:
236 return 0;
237
238 case BRIG_OPCODE_RDIMAGE:
239 case BRIG_OPCODE_LDIMAGE:
240 case BRIG_OPCODE_STIMAGE:
241 case BRIG_OPCODE_QUERYIMAGE:
242 case BRIG_OPCODE_QUERYSAMPLER:
243 sorry ("HSA image ops not handled");
244 return 0;
245
246 case BRIG_OPCODE_CBR:
247 case BRIG_OPCODE_BR:
248 return 0;
249
250 case BRIG_OPCODE_SBR:
251 return 0; /* ??? */
252
253 case BRIG_OPCODE_WAVEBARRIER:
254 return 0; /* ??? */
255
256 case BRIG_OPCODE_BARRIER:
257 case BRIG_OPCODE_ARRIVEFBAR:
258 case BRIG_OPCODE_INITFBAR:
259 case BRIG_OPCODE_JOINFBAR:
260 case BRIG_OPCODE_LEAVEFBAR:
261 case BRIG_OPCODE_RELEASEFBAR:
262 case BRIG_OPCODE_WAITFBAR:
263 return 0;
264
265 case BRIG_OPCODE_LDF:
266 return 1;
267
268 case BRIG_OPCODE_ACTIVELANECOUNT:
269 case BRIG_OPCODE_ACTIVELANEID:
270 case BRIG_OPCODE_ACTIVELANEMASK:
271 case BRIG_OPCODE_ACTIVELANEPERMUTE:
272 return 1; /* ??? */
273
274 case BRIG_OPCODE_CALL:
275 case BRIG_OPCODE_SCALL:
276 case BRIG_OPCODE_ICALL:
277 return 0;
278
279 case BRIG_OPCODE_RET:
280 return 0;
281
282 case BRIG_OPCODE_ALLOCA:
283 return 1;
284
285 case BRIG_OPCODE_CLEARDETECTEXCEPT:
286 return 0;
287
288 case BRIG_OPCODE_SETDETECTEXCEPT:
289 return 0;
290
291 case BRIG_OPCODE_PACKETCOMPLETIONSIG:
292 case BRIG_OPCODE_PACKETID:
293 case BRIG_OPCODE_CASQUEUEWRITEINDEX:
294 case BRIG_OPCODE_LDQUEUEREADINDEX:
295 case BRIG_OPCODE_LDQUEUEWRITEINDEX:
296 case BRIG_OPCODE_STQUEUEREADINDEX:
297 case BRIG_OPCODE_STQUEUEWRITEINDEX:
298 return 1; /* ??? */
299
300 case BRIG_OPCODE_ADDQUEUEWRITEINDEX:
301 return 1;
302
303 case BRIG_OPCODE_DEBUGTRAP:
304 return 0;
305
306 case BRIG_OPCODE_GROUPBASEPTR:
307 case BRIG_OPCODE_KERNARGBASEPTR:
308 return 1; /* ??? */
309
310 case HSA_OPCODE_ARG_BLOCK:
311 return 0;
312
313 case BRIG_KIND_DIRECTIVE_COMMENT:
314 return 0;
315 }
316}
317
318/* Return the number of source operands for this INSN. */
319
320unsigned
321hsa_insn_basic::num_used_ops ()
322{
323 gcc_checking_assert (input_count () <= operand_count ());
324
325 return operand_count () - input_count ();
326}
327
328/* Set alignment to VALUE. */
329
330void
331hsa_insn_mem::set_align (BrigAlignment8_t value)
332{
333 /* TODO: Perhaps remove this dump later on: */
334 if (dump_file && (dump_flags & TDF_DETAILS) && value < m_align)
335 {
336 fprintf (dump_file, "Decreasing alignment to %u in instruction ", value);
337 dump_hsa_insn (dump_file, this);
338 }
339 m_align = value;
340}
341
342/* Return size of HSA type T in bits. */
343
344unsigned
345hsa_type_bit_size (BrigType16_t t)
346{
347 switch (t)
348 {
349 case BRIG_TYPE_B1:
350 return 1;
351
352 case BRIG_TYPE_U8:
353 case BRIG_TYPE_S8:
354 case BRIG_TYPE_B8:
355 return 8;
356
357 case BRIG_TYPE_U16:
358 case BRIG_TYPE_S16:
359 case BRIG_TYPE_B16:
360 case BRIG_TYPE_F16:
361 return 16;
362
363 case BRIG_TYPE_U32:
364 case BRIG_TYPE_S32:
365 case BRIG_TYPE_B32:
366 case BRIG_TYPE_F32:
367 case BRIG_TYPE_U8X4:
368 case BRIG_TYPE_U16X2:
369 case BRIG_TYPE_S8X4:
370 case BRIG_TYPE_S16X2:
371 case BRIG_TYPE_F16X2:
372 return 32;
373
374 case BRIG_TYPE_U64:
375 case BRIG_TYPE_S64:
376 case BRIG_TYPE_F64:
377 case BRIG_TYPE_B64:
378 case BRIG_TYPE_U8X8:
379 case BRIG_TYPE_U16X4:
380 case BRIG_TYPE_U32X2:
381 case BRIG_TYPE_S8X8:
382 case BRIG_TYPE_S16X4:
383 case BRIG_TYPE_S32X2:
384 case BRIG_TYPE_F16X4:
385 case BRIG_TYPE_F32X2:
386
387 return 64;
388
389 case BRIG_TYPE_B128:
390 case BRIG_TYPE_U8X16:
391 case BRIG_TYPE_U16X8:
392 case BRIG_TYPE_U32X4:
393 case BRIG_TYPE_U64X2:
394 case BRIG_TYPE_S8X16:
395 case BRIG_TYPE_S16X8:
396 case BRIG_TYPE_S32X4:
397 case BRIG_TYPE_S64X2:
398 case BRIG_TYPE_F16X8:
399 case BRIG_TYPE_F32X4:
400 case BRIG_TYPE_F64X2:
401 return 128;
402
403 default:
404 gcc_assert (hsa_seen_error ());
405 return t;
406 }
407}
408
409/* Return BRIG bit-type with BITSIZE length. */
410
411BrigType16_t
412hsa_bittype_for_bitsize (unsigned bitsize)
413{
414 switch (bitsize)
415 {
416 case 1:
417 return BRIG_TYPE_B1;
418 case 8:
419 return BRIG_TYPE_B8;
420 case 16:
421 return BRIG_TYPE_B16;
422 case 32:
423 return BRIG_TYPE_B32;
424 case 64:
425 return BRIG_TYPE_B64;
426 case 128:
427 return BRIG_TYPE_B128;
428 default:
429 gcc_unreachable ();
430 }
431}
432
433/* Return BRIG unsigned int type with BITSIZE length. */
434
435BrigType16_t
436hsa_uint_for_bitsize (unsigned bitsize)
437{
438 switch (bitsize)
439 {
440 case 8:
441 return BRIG_TYPE_U8;
442 case 16:
443 return BRIG_TYPE_U16;
444 case 32:
445 return BRIG_TYPE_U32;
446 case 64:
447 return BRIG_TYPE_U64;
448 default:
449 gcc_unreachable ();
450 }
451}
452
453/* Return BRIG float type with BITSIZE length. */
454
455BrigType16_t
456hsa_float_for_bitsize (unsigned bitsize)
457{
458 switch (bitsize)
459 {
460 case 16:
461 return BRIG_TYPE_F16;
462 case 32:
463 return BRIG_TYPE_F32;
464 case 64:
465 return BRIG_TYPE_F64;
466 default:
467 gcc_unreachable ();
468 }
469}
470
471/* Return HSA bit-type with the same size as the type T. */
472
473BrigType16_t
474hsa_bittype_for_type (BrigType16_t t)
475{
476 return hsa_bittype_for_bitsize (hsa_type_bit_size (t));
477}
478
479/* Return HSA unsigned integer type with the same size as the type T. */
480
481BrigType16_t
482hsa_unsigned_type_for_type (BrigType16_t t)
483{
484 return hsa_uint_for_bitsize (hsa_type_bit_size (t));
485}
486
487/* Return true if TYPE is a packed HSA type. */
488
489bool
490hsa_type_packed_p (BrigType16_t type)
491{
492 return (type & BRIG_TYPE_PACK_MASK) != BRIG_TYPE_PACK_NONE;
493}
494
495/* Return true if and only if TYPE is a floating point number type. */
496
497bool
498hsa_type_float_p (BrigType16_t type)
499{
500 switch (type & BRIG_TYPE_BASE_MASK)
501 {
502 case BRIG_TYPE_F16:
503 case BRIG_TYPE_F32:
504 case BRIG_TYPE_F64:
505 return true;
506 default:
507 return false;
508 }
509}
510
511/* Return true if and only if TYPE is an integer number type. */
512
513bool
514hsa_type_integer_p (BrigType16_t type)
515{
516 switch (type & BRIG_TYPE_BASE_MASK)
517 {
518 case BRIG_TYPE_U8:
519 case BRIG_TYPE_U16:
520 case BRIG_TYPE_U32:
521 case BRIG_TYPE_U64:
522 case BRIG_TYPE_S8:
523 case BRIG_TYPE_S16:
524 case BRIG_TYPE_S32:
525 case BRIG_TYPE_S64:
526 return true;
527 default:
528 return false;
529 }
530}
531
532/* Return true if and only if TYPE is an bit-type. */
533
534bool
535hsa_btype_p (BrigType16_t type)
536{
537 switch (type & BRIG_TYPE_BASE_MASK)
538 {
539 case BRIG_TYPE_B8:
540 case BRIG_TYPE_B16:
541 case BRIG_TYPE_B32:
542 case BRIG_TYPE_B64:
543 case BRIG_TYPE_B128:
544 return true;
545 default:
546 return false;
547 }
548}
549
550
551/* Return HSA alignment encoding alignment to N bits. */
552
553BrigAlignment8_t
554hsa_alignment_encoding (unsigned n)
555{
556 gcc_assert (n >= 8 && !(n & (n - 1)));
557 if (n >= 256)
558 return BRIG_ALIGNMENT_32;
559
560 switch (n)
561 {
562 case 8:
563 return BRIG_ALIGNMENT_1;
564 case 16:
565 return BRIG_ALIGNMENT_2;
566 case 32:
567 return BRIG_ALIGNMENT_4;
568 case 64:
569 return BRIG_ALIGNMENT_8;
570 case 128:
571 return BRIG_ALIGNMENT_16;
572 default:
573 gcc_unreachable ();
574 }
575}
576
577/* Return HSA alignment encoding alignment of T got
578 by get_object_alignment. */
579
580BrigAlignment8_t
581hsa_object_alignment (tree t)
582{
583 return hsa_alignment_encoding (get_object_alignment (t));
584}
585
586/* Return byte alignment for given BrigAlignment8_t value. */
587
588unsigned
589hsa_byte_alignment (BrigAlignment8_t alignment)
590{
591 gcc_assert (alignment != BRIG_ALIGNMENT_NONE);
592
593 return 1 << (alignment - 1);
594}
595
596/* Return natural alignment of HSA TYPE. */
597
598BrigAlignment8_t
599hsa_natural_alignment (BrigType16_t type)
600{
601 return hsa_alignment_encoding (hsa_type_bit_size (type & ~BRIG_TYPE_ARRAY));
602}
603
604/* Call the correct destructor of a HSA instruction. */
605
606void
607hsa_destroy_insn (hsa_insn_basic *insn)
608{
609 if (hsa_insn_phi *phi = dyn_cast <hsa_insn_phi *> (insn))
610 phi->~hsa_insn_phi ();
611 else if (hsa_insn_cbr *br = dyn_cast <hsa_insn_cbr *> (insn))
612 br->~hsa_insn_cbr ();
613 else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn))
614 cmp->~hsa_insn_cmp ();
615 else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn))
616 mem->~hsa_insn_mem ();
617 else if (hsa_insn_atomic *atomic = dyn_cast <hsa_insn_atomic *> (insn))
618 atomic->~hsa_insn_atomic ();
619 else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn))
620 seg->~hsa_insn_seg ();
621 else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn))
622 call->~hsa_insn_call ();
623 else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn))
624 block->~hsa_insn_arg_block ();
625 else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn))
626 sbr->~hsa_insn_sbr ();
627 else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn))
628 br->~hsa_insn_br ();
629 else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn))
630 comment->~hsa_insn_comment ();
631 else
632 insn->~hsa_insn_basic ();
633}
634
635/* Call the correct destructor of a HSA operand. */
636
637void
638hsa_destroy_operand (hsa_op_base *op)
639{
640 if (hsa_op_code_list *list = dyn_cast <hsa_op_code_list *> (op))
641 list->~hsa_op_code_list ();
642 else if (hsa_op_operand_list *list = dyn_cast <hsa_op_operand_list *> (op))
643 list->~hsa_op_operand_list ();
644 else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op))
645 reg->~hsa_op_reg ();
646 else if (hsa_op_immed *immed = dyn_cast <hsa_op_immed *> (op))
647 immed->~hsa_op_immed ();
648 else
649 op->~hsa_op_base ();
650}
651
652/* Create a mapping between the original function DECL and kernel name NAME. */
653
654void
655hsa_add_kern_decl_mapping (tree decl, char *name, unsigned omp_data_size,
656 bool gridified_kernel_p)
657{
658 hsa_decl_kernel_map_element dkm;
659 dkm.decl = decl;
660 dkm.name = name;
661 dkm.omp_data_size = omp_data_size;
662 dkm.gridified_kernel_p = gridified_kernel_p;
663 vec_safe_push (hsa_decl_kernel_mapping, dkm);
664}
665
666/* Return the number of kernel decl name mappings. */
667
668unsigned
669hsa_get_number_decl_kernel_mappings (void)
670{
671 return vec_safe_length (hsa_decl_kernel_mapping);
672}
673
674/* Return the decl in the Ith kernel decl name mapping. */
675
676tree
677hsa_get_decl_kernel_mapping_decl (unsigned i)
678{
679 return (*hsa_decl_kernel_mapping)[i].decl;
680}
681
682/* Return the name in the Ith kernel decl name mapping. */
683
684char *
685hsa_get_decl_kernel_mapping_name (unsigned i)
686{
687 return (*hsa_decl_kernel_mapping)[i].name;
688}
689
690/* Return maximum OMP size for kernel decl name mapping. */
691
692unsigned
693hsa_get_decl_kernel_mapping_omp_size (unsigned i)
694{
695 return (*hsa_decl_kernel_mapping)[i].omp_data_size;
696}
697
698/* Return if the function is gridified kernel in decl name mapping. */
699
700bool
701hsa_get_decl_kernel_mapping_gridified (unsigned i)
702{
703 return (*hsa_decl_kernel_mapping)[i].gridified_kernel_p;
704}
705
706/* Free the mapping between original decls and kernel names. */
707
708void
709hsa_free_decl_kernel_mapping (void)
710{
711 if (hsa_decl_kernel_mapping == NULL)
712 return;
713
714 for (unsigned i = 0; i < hsa_decl_kernel_mapping->length (); ++i)
715 free ((*hsa_decl_kernel_mapping)[i].name);
716 ggc_free (hsa_decl_kernel_mapping);
717}
718
719/* Add new kernel dependency. */
720
721void
722hsa_add_kernel_dependency (tree caller, const char *called_function)
723{
724 if (hsa_decl_kernel_dependencies == NULL)
725 hsa_decl_kernel_dependencies = new hash_map<tree, vec<const char *> *> ();
726
727 vec <const char *> *s = NULL;
728 vec <const char *> **slot = hsa_decl_kernel_dependencies->get (caller);
729 if (slot == NULL)
730 {
731 s = new vec <const char *> ();
732 hsa_decl_kernel_dependencies->put (caller, s);
733 }
734 else
735 s = *slot;
736
737 s->safe_push (called_function);
738}
739
740/* Expansion to HSA needs a few gc roots to hold types, constructors etc. In
741 order to minimize the number of GTY roots, we'll root them all in the
742 following array. The individual elements should only be accessed by the
743 very simple getters (of a pointer-to-tree) below. */
744
745static GTY(()) tree hsa_tree_gt_roots[3];
746
747tree *
748hsa_get_ctor_statements (void)
749{
750 return &hsa_tree_gt_roots[0];
751}
752
753tree *
754hsa_get_dtor_statements (void)
755{
756 return &hsa_tree_gt_roots[1];
757}
758
759tree *
760hsa_get_kernel_dispatch_type (void)
761{
762 return &hsa_tree_gt_roots[2];
763}
764
765/* Modify the name P in-place so that it is a valid HSA identifier. */
766
767void
768hsa_sanitize_name (char *p)
769{
770 for (; *p; p++)
771 if (*p == '.' || *p == '-')
772 *p = '_';
773}
774
775/* Clone the name P, set trailing ampersand and sanitize the name. */
776
777char *
778hsa_brig_function_name (const char *p)
779{
780 unsigned len = strlen (p);
781 char *buf = XNEWVEC (char, len + 2);
782
783 buf[0] = '&';
784 buf[len + 1] = '\0';
785 memcpy (buf + 1, p, len);
786
787 hsa_sanitize_name (buf);
788 return buf;
789}
790
791/* Add a flatten attribute and disable vectorization for gpu implementation
792 function decl GDECL. */
793
794void hsa_summary_t::process_gpu_implementation_attributes (tree gdecl)
795{
796 DECL_ATTRIBUTES (gdecl)
797 = tree_cons (get_identifier ("flatten"), NULL_TREE,
798 DECL_ATTRIBUTES (gdecl));
799
800 tree fn_opts = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl);
801 if (fn_opts == NULL_TREE)
802 fn_opts = optimization_default_node;
803 fn_opts = copy_node (fn_opts);
804 TREE_OPTIMIZATION (fn_opts)->x_flag_tree_loop_vectorize = false;
805 TREE_OPTIMIZATION (fn_opts)->x_flag_tree_slp_vectorize = false;
806 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl) = fn_opts;
807}
808
809void
810hsa_summary_t::link_functions (cgraph_node *gpu, cgraph_node *host,
811 hsa_function_kind kind, bool gridified_kernel_p)
812{
813 hsa_function_summary *gpu_summary = get (gpu);
814 hsa_function_summary *host_summary = get (host);
815
816 gpu_summary->m_kind = kind;
817 host_summary->m_kind = kind;
818
819 gpu_summary->m_gpu_implementation_p = true;
820 host_summary->m_gpu_implementation_p = false;
821
822 gpu_summary->m_gridified_kernel_p = gridified_kernel_p;
823 host_summary->m_gridified_kernel_p = gridified_kernel_p;
824
825 gpu_summary->m_bound_function = host;
826 host_summary->m_bound_function = gpu;
827
828 process_gpu_implementation_attributes (gpu->decl);
829
830 /* Create reference between a kernel and a corresponding host implementation
831 to quarantee LTO streaming to a same LTRANS. */
832 if (kind == HSA_KERNEL)
833 gpu->create_reference (host, IPA_REF_ADDR);
834}
835
836/* Add a HOST function to HSA summaries. */
837
838void
839hsa_register_kernel (cgraph_node *host)
840{
841 if (hsa_summaries == NULL)
842 hsa_summaries = new hsa_summary_t (symtab);
843 hsa_function_summary *s = hsa_summaries->get (host);
844 s->m_kind = HSA_KERNEL;
845}
846
847/* Add a pair of functions to HSA summaries. GPU is an HSA implementation of
848 a HOST function. */
849
850void
851hsa_register_kernel (cgraph_node *gpu, cgraph_node *host)
852{
853 if (hsa_summaries == NULL)
854 hsa_summaries = new hsa_summary_t (symtab);
855 hsa_summaries->link_functions (gpu, host, HSA_KERNEL, true);
856}
857
858/* Return true if expansion of the current HSA function has already failed. */
859
860bool
861hsa_seen_error (void)
862{
863 return hsa_cfun->m_seen_error;
864}
865
866/* Mark current HSA function as failed. */
867
868void
869hsa_fail_cfun (void)
870{
871 hsa_failed_functions->add (hsa_cfun->m_decl);
872 hsa_cfun->m_seen_error = true;
873}
874
875char *
876hsa_internal_fn::name ()
877{
878 char *name = xstrdup (internal_fn_name (m_fn));
879 for (char *ptr = name; *ptr; ptr++)
880 *ptr = TOLOWER (*ptr);
881
882 const char *suffix = NULL;
883 if (m_type_bit_size == 32)
884 suffix = "f";
885
886 if (suffix)
887 {
888 char *name2 = concat (name, suffix, NULL);
889 free (name);
890 name = name2;
891 }
892
893 hsa_sanitize_name (name);
894 return name;
895}
896
897unsigned
898hsa_internal_fn::get_arity ()
899{
900 switch (m_fn)
901 {
902 case IFN_ACOS:
903 case IFN_ASIN:
904 case IFN_ATAN:
905 case IFN_COS:
906 case IFN_EXP:
907 case IFN_EXP10:
908 case IFN_EXP2:
909 case IFN_EXPM1:
910 case IFN_LOG:
911 case IFN_LOG10:
912 case IFN_LOG1P:
913 case IFN_LOG2:
914 case IFN_LOGB:
915 case IFN_SIGNIFICAND:
916 case IFN_SIN:
917 case IFN_SQRT:
918 case IFN_TAN:
919 case IFN_CEIL:
920 case IFN_FLOOR:
921 case IFN_NEARBYINT:
922 case IFN_RINT:
923 case IFN_ROUND:
924 case IFN_TRUNC:
925 return 1;
926 case IFN_ATAN2:
927 case IFN_COPYSIGN:
928 case IFN_FMOD:
929 case IFN_POW:
930 case IFN_REMAINDER:
931 case IFN_SCALB:
932 case IFN_LDEXP:
933 return 2;
934 case IFN_CLRSB:
935 case IFN_CLZ:
936 case IFN_CTZ:
937 case IFN_FFS:
938 case IFN_PARITY:
939 case IFN_POPCOUNT:
940 default:
941 /* As we produce sorry message for unknown internal functions,
942 reaching this label is definitely a bug. */
943 gcc_unreachable ();
944 }
945}
946
947BrigType16_t
948hsa_internal_fn::get_argument_type (int n)
949{
950 switch (m_fn)
951 {
952 case IFN_ACOS:
953 case IFN_ASIN:
954 case IFN_ATAN:
955 case IFN_COS:
956 case IFN_EXP:
957 case IFN_EXP10:
958 case IFN_EXP2:
959 case IFN_EXPM1:
960 case IFN_LOG:
961 case IFN_LOG10:
962 case IFN_LOG1P:
963 case IFN_LOG2:
964 case IFN_LOGB:
965 case IFN_SIGNIFICAND:
966 case IFN_SIN:
967 case IFN_SQRT:
968 case IFN_TAN:
969 case IFN_CEIL:
970 case IFN_FLOOR:
971 case IFN_NEARBYINT:
972 case IFN_RINT:
973 case IFN_ROUND:
974 case IFN_TRUNC:
975 case IFN_ATAN2:
976 case IFN_COPYSIGN:
977 case IFN_FMOD:
978 case IFN_POW:
979 case IFN_REMAINDER:
980 case IFN_SCALB:
981 return hsa_float_for_bitsize (m_type_bit_size);
982 case IFN_LDEXP:
983 {
984 if (n == -1 || n == 0)
985 return hsa_float_for_bitsize (m_type_bit_size);
986 else
987 return BRIG_TYPE_S32;
988 }
989 default:
990 /* As we produce sorry message for unknown internal functions,
991 reaching this label is definitely a bug. */
992 gcc_unreachable ();
993 }
994}
995
996#include "gt-hsa-common.h"
997