1/* Producing binary form of HSA BRIG from our internal representation.
2 Copyright (C) 2013-2017 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz> and
4 Martin Liska <mliska@suse.cz>.
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 3, or (at your option)
11any later version.
12
13GCC is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "target.h"
27#include "memmodel.h"
28#include "tm_p.h"
29#include "is-a.h"
30#include "vec.h"
31#include "hash-table.h"
32#include "hash-map.h"
33#include "tree.h"
34#include "tree-iterator.h"
35#include "stor-layout.h"
36#include "output.h"
37#include "basic-block.h"
38#include "cfg.h"
39#include "function.h"
40#include "fold-const.h"
41#include "stringpool.h"
42#include "gimple-pretty-print.h"
43#include "diagnostic-core.h"
44#include "cgraph.h"
45#include "dumpfile.h"
46#include "print-tree.h"
47#include "symbol-summary.h"
48#include "hsa-common.h"
49#include "gomp-constants.h"
50
51/* Convert VAL to little endian form, if necessary. */
52
53static uint16_t
54lendian16 (uint16_t val)
55{
56#if GCC_VERSION >= 4008
57#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
58 return val;
59#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
60 return __builtin_bswap16 (val);
61#else /* __ORDER_PDP_ENDIAN__ */
62 return val;
63#endif
64#else
65// provide a safe slower default, with shifts and masking
66#ifndef WORDS_BIGENDIAN
67 return val;
68#else
69 return (val >> 8) | (val << 8);
70#endif
71#endif
72}
73
74/* Convert VAL to little endian form, if necessary. */
75
76static uint32_t
77lendian32 (uint32_t val)
78{
79#if GCC_VERSION >= 4006
80#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
81 return val;
82#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
83 return __builtin_bswap32 (val);
84#else /* __ORDER_PDP_ENDIAN__ */
85 return (val >> 16) | (val << 16);
86#endif
87#else
88// provide a safe slower default, with shifts and masking
89#ifndef WORDS_BIGENDIAN
90 return val;
91#else
92 val = ((val & 0xff00ff00) >> 8) | ((val & 0xff00ff) << 8);
93 return (val >> 16) | (val << 16);
94#endif
95#endif
96}
97
98/* Convert VAL to little endian form, if necessary. */
99
100static uint64_t
101lendian64 (uint64_t val)
102{
103#if GCC_VERSION >= 4006
104#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
105 return val;
106#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
107 return __builtin_bswap64 (val);
108#else /* __ORDER_PDP_ENDIAN__ */
109 return (((val & 0xffffll) << 48)
110 | ((val & 0xffff0000ll) << 16)
111 | ((val & 0xffff00000000ll) >> 16)
112 | ((val & 0xffff000000000000ll) >> 48));
113#endif
114#else
115// provide a safe slower default, with shifts and masking
116#ifndef WORDS_BIGENDIAN
117 return val;
118#else
119 val = (((val & 0xff00ff00ff00ff00ll) >> 8)
120 | ((val & 0x00ff00ff00ff00ffll) << 8));
121 val = ((( val & 0xffff0000ffff0000ll) >> 16)
122 | (( val & 0x0000ffff0000ffffll) << 16));
123 return (val >> 32) | (val << 32);
124#endif
125#endif
126}
127
128#define BRIG_ELF_SECTION_NAME ".brig"
129#define BRIG_LABEL_STRING "hsa_brig"
130#define BRIG_SECTION_DATA_NAME "hsa_data"
131#define BRIG_SECTION_CODE_NAME "hsa_code"
132#define BRIG_SECTION_OPERAND_NAME "hsa_operand"
133
134#define BRIG_CHUNK_MAX_SIZE (64 * 1024)
135
136/* Required HSA section alignment. */
137
138#define HSA_SECTION_ALIGNMENT 16
139
140/* Chunks of BRIG binary data. */
141
142struct hsa_brig_data_chunk
143{
144 /* Size of the data already stored into a chunk. */
145 unsigned size;
146
147 /* Pointer to the data. */
148 char *data;
149};
150
151/* Structure representing a BRIG section, holding and writing its data. */
152
153class hsa_brig_section
154{
155public:
156 /* Section name that will be output to the BRIG. */
157 const char *section_name;
158 /* Size in bytes of all data stored in the section. */
159 unsigned total_size;
160 /* The size of the header of the section including padding. */
161 unsigned header_byte_count;
162 /* The size of the header of the section without any padding. */
163 unsigned header_byte_delta;
164
165 void init (const char *name);
166 void release ();
167 void output ();
168 unsigned add (const void *data, unsigned len, void **output = NULL);
169 void round_size_up (int factor);
170 void *get_ptr_by_offset (unsigned int offset);
171
172private:
173 void allocate_new_chunk ();
174
175 /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */
176 vec <struct hsa_brig_data_chunk> chunks;
177
178 /* More convenient access to the last chunk from the vector above. */
179 struct hsa_brig_data_chunk *cur_chunk;
180};
181
182static struct hsa_brig_section brig_data, brig_code, brig_operand;
183static uint32_t brig_insn_count;
184static bool brig_initialized = false;
185
186/* Mapping between emitted HSA functions and their offset in code segment. */
187static hash_map<tree, BrigCodeOffset32_t> *function_offsets;
188
189/* Hash map of emitted function declarations. */
190static hash_map <tree, BrigDirectiveExecutable *> *emitted_declarations;
191
192/* Hash table of emitted internal function declaration offsets. */
193hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls;
194
195/* List of sbr instructions. */
196static vec <hsa_insn_sbr *> *switch_instructions;
197
198struct function_linkage_pair
199{
200 function_linkage_pair (tree decl, unsigned int off)
201 : function_decl (decl), offset (off) {}
202
203 /* Declaration of called function. */
204 tree function_decl;
205
206 /* Offset in operand section. */
207 unsigned int offset;
208};
209
210/* Vector of function calls where we need to resolve function offsets. */
211static auto_vec <function_linkage_pair> function_call_linkage;
212
213/* Add a new chunk, allocate data for it and initialize it. */
214
215void
216hsa_brig_section::allocate_new_chunk ()
217{
218 struct hsa_brig_data_chunk new_chunk;
219
220 new_chunk.data = XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE);
221 new_chunk.size = 0;
222 cur_chunk = chunks.safe_push (new_chunk);
223}
224
225/* Initialize the brig section. */
226
227void
228hsa_brig_section::init (const char *name)
229{
230 section_name = name;
231 /* While the following computation is basically wrong, because the intent
232 certainly wasn't to have the first character of name and padding, which
233 are a part of sizeof (BrigSectionHeader), included in the first addend,
234 this is what the disassembler expects. */
235 total_size = sizeof (BrigSectionHeader) + strlen (section_name);
236 chunks.create (1);
237 allocate_new_chunk ();
238 header_byte_delta = total_size;
239 round_size_up (4);
240 header_byte_count = total_size;
241}
242
243/* Free all data in the section. */
244
245void
246hsa_brig_section::release ()
247{
248 for (unsigned i = 0; i < chunks.length (); i++)
249 free (chunks[i].data);
250 chunks.release ();
251 cur_chunk = NULL;
252}
253
254/* Write the section to the output file to a section with the name given at
255 initialization. Switches the output section and does not restore it. */
256
257void
258hsa_brig_section::output ()
259{
260 struct BrigSectionHeader section_header;
261 char padding[8];
262
263 section_header.byteCount = lendian64 (total_size);
264 section_header.headerByteCount = lendian32 (header_byte_count);
265 section_header.nameLength = lendian32 (strlen (section_name));
266 assemble_string ((const char *) &section_header, 16);
267 assemble_string (section_name, (section_header.nameLength));
268 memset (&padding, 0, sizeof (padding));
269 /* This is also a consequence of the wrong header size computation described
270 in a comment in hsa_brig_section::init. */
271 assemble_string (padding, 8);
272 for (unsigned i = 0; i < chunks.length (); i++)
273 assemble_string (chunks[i].data, chunks[i].size);
274}
275
276/* Add to the stream LEN bytes of opaque binary DATA. Return the offset at
277 which it was stored. If OUTPUT is not NULL, store into it the pointer to
278 the place where DATA was actually stored. */
279
280unsigned
281hsa_brig_section::add (const void *data, unsigned len, void **output)
282{
283 unsigned offset = total_size;
284
285 gcc_assert (len <= BRIG_CHUNK_MAX_SIZE);
286 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - len))
287 allocate_new_chunk ();
288
289 char *dst = cur_chunk->data + cur_chunk->size;
290 memcpy (dst, data, len);
291 if (output)
292 *output = dst;
293 cur_chunk->size += len;
294 total_size += len;
295
296 return offset;
297}
298
299/* Add padding to section so that its size is divisible by FACTOR. */
300
301void
302hsa_brig_section::round_size_up (int factor)
303{
304 unsigned padding, res = total_size % factor;
305
306 if (res == 0)
307 return;
308
309 padding = factor - res;
310 total_size += padding;
311 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - padding))
312 {
313 padding -= BRIG_CHUNK_MAX_SIZE - cur_chunk->size;
314 cur_chunk->size = BRIG_CHUNK_MAX_SIZE;
315 allocate_new_chunk ();
316 }
317
318 cur_chunk->size += padding;
319}
320
321/* Return pointer to data by global OFFSET in the section. */
322
323void *
324hsa_brig_section::get_ptr_by_offset (unsigned int offset)
325{
326 gcc_assert (offset < total_size);
327 offset -= header_byte_delta;
328
329 unsigned i;
330 for (i = 0; offset >= chunks[i].size; i++)
331 offset -= chunks[i].size;
332
333 return chunks[i].data + offset;
334}
335
336/* BRIG string data hashing. */
337
338struct brig_string_slot
339{
340 const char *s;
341 char prefix;
342 int len;
343 uint32_t offset;
344};
345
346/* Hash table helpers. */
347
348struct brig_string_slot_hasher : pointer_hash <brig_string_slot>
349{
350 static inline hashval_t hash (const value_type);
351 static inline bool equal (const value_type, const compare_type);
352 static inline void remove (value_type);
353};
354
355/* Returns a hash code for DS. Adapted from libiberty's htab_hash_string
356 to support strings that may not end in '\0'. */
357
358inline hashval_t
359brig_string_slot_hasher::hash (const value_type ds)
360{
361 hashval_t r = ds->len;
362 int i;
363
364 for (i = 0; i < ds->len; i++)
365 r = r * 67 + (unsigned) ds->s[i] - 113;
366 r = r * 67 + (unsigned) ds->prefix - 113;
367 return r;
368}
369
370/* Returns nonzero if DS1 and DS2 are equal. */
371
372inline bool
373brig_string_slot_hasher::equal (const value_type ds1, const compare_type ds2)
374{
375 if (ds1->len == ds2->len)
376 return ds1->prefix == ds2->prefix
377 && memcmp (ds1->s, ds2->s, ds1->len) == 0;
378
379 return 0;
380}
381
382/* Deallocate memory for DS upon its removal. */
383
384inline void
385brig_string_slot_hasher::remove (value_type ds)
386{
387 free (const_cast<char *> (ds->s));
388 free (ds);
389}
390
391/* Hash for strings we output in order not to duplicate them needlessly. */
392
393static hash_table<brig_string_slot_hasher> *brig_string_htab;
394
395/* Emit a null terminated string STR to the data section and return its
396 offset in it. If PREFIX is non-zero, output it just before STR too.
397 Sanitize the string if SANITIZE option is set to true. */
398
399static unsigned
400brig_emit_string (const char *str, char prefix = 0, bool sanitize = true)
401{
402 unsigned slen = strlen (str);
403 unsigned offset, len = slen + (prefix ? 1 : 0);
404 uint32_t hdr_len = lendian32 (len);
405 brig_string_slot s_slot;
406 brig_string_slot **slot;
407 char *str2;
408
409 str2 = xstrdup (str);
410
411 if (sanitize)
412 hsa_sanitize_name (str2);
413 s_slot.s = str2;
414 s_slot.len = slen;
415 s_slot.prefix = prefix;
416 s_slot.offset = 0;
417
418 slot = brig_string_htab->find_slot (&s_slot, INSERT);
419 if (*slot == NULL)
420 {
421 brig_string_slot *new_slot = XCNEW (brig_string_slot);
422
423 /* In theory we should fill in BrigData but that would mean copying
424 the string to a buffer for no reason, so we just emulate it. */
425 offset = brig_data.add (&hdr_len, sizeof (hdr_len));
426 if (prefix)
427 brig_data.add (&prefix, 1);
428
429 brig_data.add (str2, slen);
430 brig_data.round_size_up (4);
431
432 /* TODO: could use the string we just copied into
433 brig_string->cur_chunk */
434 new_slot->s = str2;
435 new_slot->len = slen;
436 new_slot->prefix = prefix;
437 new_slot->offset = offset;
438 *slot = new_slot;
439 }
440 else
441 {
442 offset = (*slot)->offset;
443 free (str2);
444 }
445
446 return offset;
447}
448
449/* Linked list of queued operands. */
450
451static struct operand_queue
452{
453 /* First from the chain of queued operands. */
454 hsa_op_base *first_op, *last_op;
455
456 /* The offset at which the next operand will be enqueued. */
457 unsigned projected_size;
458
459} op_queue;
460
461/* Unless already initialized, initialize infrastructure to produce BRIG. */
462
463static void
464brig_init (void)
465{
466 brig_insn_count = 0;
467
468 if (brig_initialized)
469 return;
470
471 brig_string_htab = new hash_table<brig_string_slot_hasher> (37);
472 brig_data.init (BRIG_SECTION_DATA_NAME);
473 brig_code.init (BRIG_SECTION_CODE_NAME);
474 brig_operand.init (BRIG_SECTION_OPERAND_NAME);
475 brig_initialized = true;
476
477 struct BrigDirectiveModule moddir;
478 memset (&moddir, 0, sizeof (moddir));
479 moddir.base.byteCount = lendian16 (sizeof (moddir));
480
481 char *modname;
482 if (main_input_filename && *main_input_filename != '\0')
483 {
484 const char *part = strrchr (main_input_filename, '/');
485 if (!part)
486 part = main_input_filename;
487 else
488 part++;
489 modname = concat ("&__hsa_module_", part, NULL);
490 char *extension = strchr (modname, '.');
491 if (extension)
492 *extension = '\0';
493
494 /* As in LTO mode, we have to emit a different module names. */
495 if (flag_ltrans)
496 {
497 part = strrchr (asm_file_name, '/');
498 if (!part)
499 part = asm_file_name;
500 else
501 part++;
502 char *modname2;
503 modname2 = xasprintf ("%s_%s", modname, part);
504 free (modname);
505 modname = modname2;
506 }
507
508 hsa_sanitize_name (modname);
509 moddir.name = brig_emit_string (modname);
510 free (modname);
511 }
512 else
513 moddir.name = brig_emit_string ("__hsa_module_unnamed", '&');
514 moddir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_MODULE);
515 moddir.hsailMajor = lendian32 (BRIG_VERSION_HSAIL_MAJOR);
516 moddir.hsailMinor = lendian32 (BRIG_VERSION_HSAIL_MINOR);
517 moddir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE;
518 if (hsa_machine_large_p ())
519 moddir.machineModel = BRIG_MACHINE_LARGE;
520 else
521 moddir.machineModel = BRIG_MACHINE_SMALL;
522 moddir.defaultFloatRound = BRIG_ROUND_FLOAT_DEFAULT;
523 brig_code.add (&moddir, sizeof (moddir));
524}
525
526/* Free all BRIG data. */
527
528static void
529brig_release_data (void)
530{
531 delete brig_string_htab;
532 brig_data.release ();
533 brig_code.release ();
534 brig_operand.release ();
535
536 brig_initialized = 0;
537}
538
539/* Enqueue operation OP. Return the offset at which it will be stored. */
540
541static unsigned int
542enqueue_op (hsa_op_base *op)
543{
544 unsigned ret;
545
546 if (op->m_brig_op_offset)
547 return op->m_brig_op_offset;
548
549 ret = op_queue.projected_size;
550 op->m_brig_op_offset = op_queue.projected_size;
551
552 if (!op_queue.first_op)
553 op_queue.first_op = op;
554 else
555 op_queue.last_op->m_next = op;
556 op_queue.last_op = op;
557
558 if (is_a <hsa_op_immed *> (op))
559 op_queue.projected_size += sizeof (struct BrigOperandConstantBytes);
560 else if (is_a <hsa_op_reg *> (op))
561 op_queue.projected_size += sizeof (struct BrigOperandRegister);
562 else if (is_a <hsa_op_address *> (op))
563 op_queue.projected_size += sizeof (struct BrigOperandAddress);
564 else if (is_a <hsa_op_code_ref *> (op))
565 op_queue.projected_size += sizeof (struct BrigOperandCodeRef);
566 else if (is_a <hsa_op_code_list *> (op))
567 op_queue.projected_size += sizeof (struct BrigOperandCodeList);
568 else if (is_a <hsa_op_operand_list *> (op))
569 op_queue.projected_size += sizeof (struct BrigOperandOperandList);
570 else
571 gcc_unreachable ();
572 return ret;
573}
574
575static void emit_immediate_operand (hsa_op_immed *imm);
576
577/* Emit directive describing a symbol if it has not been emitted already.
578 Return the offset of the directive. */
579
580static unsigned
581emit_directive_variable (struct hsa_symbol *symbol)
582{
583 struct BrigDirectiveVariable dirvar;
584 unsigned name_offset;
585 static unsigned res_name_offset;
586
587 if (symbol->m_directive_offset)
588 return symbol->m_directive_offset;
589
590 memset (&dirvar, 0, sizeof (dirvar));
591 dirvar.base.byteCount = lendian16 (sizeof (dirvar));
592 dirvar.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE);
593 dirvar.allocation = symbol->m_allocation;
594
595 char prefix = symbol->m_global_scope_p ? '&' : '%';
596
597 if (symbol->m_decl && TREE_CODE (symbol->m_decl) == RESULT_DECL)
598 {
599 if (res_name_offset == 0)
600 res_name_offset = brig_emit_string (symbol->m_name, '%');
601 name_offset = res_name_offset;
602 }
603 else if (symbol->m_name)
604 name_offset = brig_emit_string (symbol->m_name, prefix);
605 else
606 {
607 char buf[64];
608 snprintf (buf, 64, "__%s_%i", hsa_seg_name (symbol->m_segment),
609 symbol->m_name_number);
610 name_offset = brig_emit_string (buf, prefix);
611 }
612
613 dirvar.name = lendian32 (name_offset);
614
615 if (symbol->m_decl && TREE_CODE (symbol->m_decl) == CONST_DECL)
616 {
617 hsa_op_immed *tmp = new hsa_op_immed (DECL_INITIAL (symbol->m_decl));
618 dirvar.init = lendian32 (enqueue_op (tmp));
619 }
620 else
621 dirvar.init = 0;
622 dirvar.type = lendian16 (symbol->m_type);
623 dirvar.segment = symbol->m_segment;
624 dirvar.align = symbol->m_align;
625 dirvar.linkage = symbol->m_linkage;
626 dirvar.dim.lo = symbol->m_dim;
627 dirvar.dim.hi = symbol->m_dim >> 32;
628
629 /* Global variables are just declared and linked via HSA runtime. */
630 if (symbol->m_linkage != BRIG_ALLOCATION_PROGRAM)
631 dirvar.modifier |= BRIG_VARIABLE_DEFINITION;
632 dirvar.reserved = 0;
633
634 if (symbol->m_cst_value)
635 {
636 dirvar.modifier |= BRIG_VARIABLE_CONST;
637 dirvar.init = lendian32 (enqueue_op (symbol->m_cst_value));
638 }
639
640 symbol->m_directive_offset = brig_code.add (&dirvar, sizeof (dirvar));
641 return symbol->m_directive_offset;
642}
643
644/* Emit directives describing either a function declaration or definition F and
645 return the produced BrigDirectiveExecutable structure. The function does
646 not take into account any instructions when calculating nextModuleEntry
647 field of the produced BrigDirectiveExecutable structure so when emitting
648 actual definitions, this field needs to be updated after all of the function
649 is actually added to the code section. */
650
651static BrigDirectiveExecutable *
652emit_function_directives (hsa_function_representation *f, bool is_declaration)
653{
654 struct BrigDirectiveExecutable fndir;
655 unsigned name_offset, inarg_off, scoped_off, next_toplev_off;
656 int count = 0;
657 void *ptr_to_fndir;
658 hsa_symbol *sym;
659
660 if (!f->m_declaration_p)
661 for (int i = 0; f->m_global_symbols.iterate (i, &sym); i++)
662 {
663 gcc_assert (!sym->m_emitted_to_brig);
664 sym->m_emitted_to_brig = true;
665 emit_directive_variable (sym);
666 brig_insn_count++;
667 }
668
669 name_offset = brig_emit_string (f->m_name, '&');
670 inarg_off = brig_code.total_size + sizeof (fndir)
671 + (f->m_output_arg ? sizeof (struct BrigDirectiveVariable) : 0);
672 scoped_off = inarg_off
673 + f->m_input_args.length () * sizeof (struct BrigDirectiveVariable);
674
675 if (!f->m_declaration_p)
676 {
677 count += f->m_spill_symbols.length ();
678 count += f->m_private_variables.length ();
679 }
680
681 next_toplev_off = scoped_off + count * sizeof (struct BrigDirectiveVariable);
682
683 memset (&fndir, 0, sizeof (fndir));
684 fndir.base.byteCount = lendian16 (sizeof (fndir));
685 fndir.base.kind = lendian16 (f->m_kern_p ? BRIG_KIND_DIRECTIVE_KERNEL
686 : BRIG_KIND_DIRECTIVE_FUNCTION);
687 fndir.name = lendian32 (name_offset);
688 fndir.inArgCount = lendian16 (f->m_input_args.length ());
689 fndir.outArgCount = lendian16 (f->m_output_arg ? 1 : 0);
690 fndir.firstInArg = lendian32 (inarg_off);
691 fndir.firstCodeBlockEntry = lendian32 (scoped_off);
692 fndir.nextModuleEntry = lendian32 (next_toplev_off);
693 fndir.linkage = f->get_linkage ();
694 if (!f->m_declaration_p)
695 fndir.modifier |= BRIG_EXECUTABLE_DEFINITION;
696 memset (&fndir.reserved, 0, sizeof (fndir.reserved));
697
698 /* Once we put a definition of function_offsets, we should not overwrite
699 it with a declaration of the function. */
700 if (f->m_internal_fn == NULL)
701 {
702 if (!function_offsets->get (f->m_decl) || !is_declaration)
703 function_offsets->put (f->m_decl, brig_code.total_size);
704 }
705 else
706 {
707 /* Internal function. */
708 hsa_internal_fn **slot
709 = hsa_emitted_internal_decls->find_slot (f->m_internal_fn, INSERT);
710 hsa_internal_fn *int_fn = new hsa_internal_fn (f->m_internal_fn);
711 int_fn->m_offset = brig_code.total_size;
712 *slot = int_fn;
713 }
714
715 brig_code.add (&fndir, sizeof (fndir), &ptr_to_fndir);
716
717 if (f->m_output_arg)
718 emit_directive_variable (f->m_output_arg);
719 for (unsigned i = 0; i < f->m_input_args.length (); i++)
720 emit_directive_variable (f->m_input_args[i]);
721
722 if (!f->m_declaration_p)
723 {
724 for (int i = 0; f->m_spill_symbols.iterate (i, &sym); i++)
725 {
726 emit_directive_variable (sym);
727 brig_insn_count++;
728 }
729 for (unsigned i = 0; i < f->m_private_variables.length (); i++)
730 {
731 emit_directive_variable (f->m_private_variables[i]);
732 brig_insn_count++;
733 }
734 }
735
736 return (BrigDirectiveExecutable *) ptr_to_fndir;
737}
738
739/* Emit a label directive for the given HBB. We assume it is about to start on
740 the current offset in the code section. */
741
742static void
743emit_bb_label_directive (hsa_bb *hbb)
744{
745 struct BrigDirectiveLabel lbldir;
746
747 lbldir.base.byteCount = lendian16 (sizeof (lbldir));
748 lbldir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_LABEL);
749 char buf[32];
750 snprintf (buf, 32, "BB_%u_%i", DECL_UID (current_function_decl),
751 hbb->m_index);
752 lbldir.name = lendian32 (brig_emit_string (buf, '@'));
753
754 hbb->m_label_ref.m_directive_offset = brig_code.add (&lbldir,
755 sizeof (lbldir));
756 brig_insn_count++;
757}
758
759/* Map a normal HSAIL type to the type of the equivalent BRIG operand
760 holding such, for constants and registers. */
761
762static BrigType16_t
763regtype_for_type (BrigType16_t t)
764{
765 switch (t)
766 {
767 case BRIG_TYPE_B1:
768 return BRIG_TYPE_B1;
769
770 case BRIG_TYPE_U8:
771 case BRIG_TYPE_U16:
772 case BRIG_TYPE_U32:
773 case BRIG_TYPE_S8:
774 case BRIG_TYPE_S16:
775 case BRIG_TYPE_S32:
776 case BRIG_TYPE_B8:
777 case BRIG_TYPE_B16:
778 case BRIG_TYPE_B32:
779 case BRIG_TYPE_F16:
780 case BRIG_TYPE_F32:
781 case BRIG_TYPE_U8X4:
782 case BRIG_TYPE_U16X2:
783 case BRIG_TYPE_S8X4:
784 case BRIG_TYPE_S16X2:
785 case BRIG_TYPE_F16X2:
786 return BRIG_TYPE_B32;
787
788 case BRIG_TYPE_U64:
789 case BRIG_TYPE_S64:
790 case BRIG_TYPE_F64:
791 case BRIG_TYPE_B64:
792 case BRIG_TYPE_U8X8:
793 case BRIG_TYPE_U16X4:
794 case BRIG_TYPE_U32X2:
795 case BRIG_TYPE_S8X8:
796 case BRIG_TYPE_S16X4:
797 case BRIG_TYPE_S32X2:
798 case BRIG_TYPE_F16X4:
799 case BRIG_TYPE_F32X2:
800 return BRIG_TYPE_B64;
801
802 case BRIG_TYPE_B128:
803 case BRIG_TYPE_U8X16:
804 case BRIG_TYPE_U16X8:
805 case BRIG_TYPE_U32X4:
806 case BRIG_TYPE_U64X2:
807 case BRIG_TYPE_S8X16:
808 case BRIG_TYPE_S16X8:
809 case BRIG_TYPE_S32X4:
810 case BRIG_TYPE_S64X2:
811 case BRIG_TYPE_F16X8:
812 case BRIG_TYPE_F32X4:
813 case BRIG_TYPE_F64X2:
814 return BRIG_TYPE_B128;
815
816 default:
817 gcc_unreachable ();
818 }
819}
820
821/* Return the length of the BRIG type TYPE that is going to be streamed out as
822 an immediate constant (so it must not be B1). */
823
824unsigned
825hsa_get_imm_brig_type_len (BrigType16_t type)
826{
827 BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK;
828 BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK;
829
830 switch (pack_type)
831 {
832 case BRIG_TYPE_PACK_NONE:
833 break;
834 case BRIG_TYPE_PACK_32:
835 return 4;
836 case BRIG_TYPE_PACK_64:
837 return 8;
838 case BRIG_TYPE_PACK_128:
839 return 16;
840 default:
841 gcc_unreachable ();
842 }
843
844 switch (base_type)
845 {
846 case BRIG_TYPE_U8:
847 case BRIG_TYPE_S8:
848 case BRIG_TYPE_B8:
849 return 1;
850 case BRIG_TYPE_U16:
851 case BRIG_TYPE_S16:
852 case BRIG_TYPE_F16:
853 case BRIG_TYPE_B16:
854 return 2;
855 case BRIG_TYPE_U32:
856 case BRIG_TYPE_S32:
857 case BRIG_TYPE_F32:
858 case BRIG_TYPE_B32:
859 return 4;
860 case BRIG_TYPE_U64:
861 case BRIG_TYPE_S64:
862 case BRIG_TYPE_F64:
863 case BRIG_TYPE_B64:
864 return 8;
865 case BRIG_TYPE_B128:
866 return 16;
867 default:
868 gcc_unreachable ();
869 }
870}
871
872/* Emit one scalar VALUE to the buffer DATA intended for BRIG emission.
873 If NEED_LEN is not equal to zero, shrink or extend the value
874 to NEED_LEN bytes. Return how many bytes were written. */
875
876static int
877emit_immediate_scalar_to_buffer (tree value, char *data, unsigned need_len)
878{
879 union hsa_bytes bytes;
880
881 memset (&bytes, 0, sizeof (bytes));
882 tree type = TREE_TYPE (value);
883 gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE);
884
885 unsigned data_len = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
886 if (INTEGRAL_TYPE_P (type)
887 || (POINTER_TYPE_P (type) && TREE_CODE (value) == INTEGER_CST))
888 switch (data_len)
889 {
890 case 1:
891 bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value);
892 break;
893 case 2:
894 bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value);
895 break;
896 case 4:
897 bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value);
898 break;
899 case 8:
900 bytes.b64 = (uint64_t) TREE_INT_CST_LOW (value);
901 break;
902 default:
903 gcc_unreachable ();
904 }
905 else if (SCALAR_FLOAT_TYPE_P (type))
906 {
907 if (data_len == 2)
908 {
909 sorry ("Support for HSA does not implement immediate 16 bit FPU "
910 "operands");
911 return 2;
912 }
913 unsigned int_len = GET_MODE_SIZE (SCALAR_FLOAT_TYPE_MODE (type));
914 /* There are always 32 bits in each long, no matter the size of
915 the hosts long. */
916 long tmp[6];
917
918 real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type));
919
920 if (int_len == 4)
921 bytes.b32 = (uint32_t) tmp[0];
922 else
923 {
924 bytes.b64 = (uint64_t)(uint32_t) tmp[1];
925 bytes.b64 <<= 32;
926 bytes.b64 |= (uint32_t) tmp[0];
927 }
928 }
929 else
930 gcc_unreachable ();
931
932 int len;
933 if (need_len == 0)
934 len = data_len;
935 else
936 len = need_len;
937
938 memcpy (data, &bytes, len);
939 return len;
940}
941
942char *
943hsa_op_immed::emit_to_buffer (unsigned *brig_repr_size)
944{
945 char *brig_repr;
946 *brig_repr_size = hsa_get_imm_brig_type_len (m_type);
947
948 if (m_tree_value != NULL_TREE)
949 {
950 /* Update brig_repr_size for special tree values. */
951 if (TREE_CODE (m_tree_value) == STRING_CST)
952 *brig_repr_size = TREE_STRING_LENGTH (m_tree_value);
953 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
954 *brig_repr_size
955 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value)));
956
957 unsigned total_len = *brig_repr_size;
958
959 /* As we can have a constructor with fewer elements, fill the memory
960 with zeros. */
961 brig_repr = XCNEWVEC (char, total_len);
962 char *p = brig_repr;
963
964 if (TREE_CODE (m_tree_value) == VECTOR_CST)
965 {
966 int i, num = VECTOR_CST_NELTS (m_tree_value);
967 for (i = 0; i < num; i++)
968 {
969 tree v = VECTOR_CST_ELT (m_tree_value, i);
970 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
971 total_len -= actual;
972 p += actual;
973 }
974 /* Vectors should have the exact size. */
975 gcc_assert (total_len == 0);
976 }
977 else if (TREE_CODE (m_tree_value) == STRING_CST)
978 memcpy (brig_repr, TREE_STRING_POINTER (m_tree_value),
979 TREE_STRING_LENGTH (m_tree_value));
980 else if (TREE_CODE (m_tree_value) == COMPLEX_CST)
981 {
982 gcc_assert (total_len % 2 == 0);
983 unsigned actual;
984 actual
985 = emit_immediate_scalar_to_buffer (TREE_REALPART (m_tree_value), p,
986 total_len / 2);
987
988 gcc_assert (actual == total_len / 2);
989 p += actual;
990
991 actual
992 = emit_immediate_scalar_to_buffer (TREE_IMAGPART (m_tree_value), p,
993 total_len / 2);
994 gcc_assert (actual == total_len / 2);
995 }
996 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
997 {
998 unsigned len = CONSTRUCTOR_NELTS (m_tree_value);
999 for (unsigned i = 0; i < len; i++)
1000 {
1001 tree v = CONSTRUCTOR_ELT (m_tree_value, i)->value;
1002 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
1003 total_len -= actual;
1004 p += actual;
1005 }
1006 }
1007 else
1008 emit_immediate_scalar_to_buffer (m_tree_value, p, total_len);
1009 }
1010 else
1011 {
1012 hsa_bytes bytes;
1013
1014 switch (*brig_repr_size)
1015 {
1016 case 1:
1017 bytes.b8 = (uint8_t) m_int_value;
1018 break;
1019 case 2:
1020 bytes.b16 = (uint16_t) m_int_value;
1021 break;
1022 case 4:
1023 bytes.b32 = (uint32_t) m_int_value;
1024 break;
1025 case 8:
1026 bytes.b64 = (uint64_t) m_int_value;
1027 break;
1028 default:
1029 gcc_unreachable ();
1030 }
1031
1032 brig_repr = XNEWVEC (char, *brig_repr_size);
1033 memcpy (brig_repr, &bytes, *brig_repr_size);
1034 }
1035
1036 return brig_repr;
1037}
1038
1039/* Emit an immediate BRIG operand IMM. The BRIG type of the immediate might
1040 have been massaged to comply with various HSA/BRIG type requirements, so the
1041 only important aspect of that is the length (because HSAIL might expect
1042 smaller constants or become bit-data). The data should be represented
1043 according to what is in the tree representation. */
1044
1045static void
1046emit_immediate_operand (hsa_op_immed *imm)
1047{
1048 unsigned brig_repr_size;
1049 char *brig_repr = imm->emit_to_buffer (&brig_repr_size);
1050 struct BrigOperandConstantBytes out;
1051
1052 memset (&out, 0, sizeof (out));
1053 out.base.byteCount = lendian16 (sizeof (out));
1054 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES);
1055 uint32_t byteCount = lendian32 (brig_repr_size);
1056 out.type = lendian16 (imm->m_type);
1057 out.bytes = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1058 brig_operand.add (&out, sizeof (out));
1059 brig_data.add (brig_repr, brig_repr_size);
1060 brig_data.round_size_up (4);
1061
1062 free (brig_repr);
1063}
1064
1065/* Emit a register BRIG operand REG. */
1066
1067static void
1068emit_register_operand (hsa_op_reg *reg)
1069{
1070 struct BrigOperandRegister out;
1071
1072 out.base.byteCount = lendian16 (sizeof (out));
1073 out.base.kind = lendian16 (BRIG_KIND_OPERAND_REGISTER);
1074 out.regNum = lendian32 (reg->m_hard_num);
1075
1076 switch (regtype_for_type (reg->m_type))
1077 {
1078 case BRIG_TYPE_B32:
1079 out.regKind = BRIG_REGISTER_KIND_SINGLE;
1080 break;
1081 case BRIG_TYPE_B64:
1082 out.regKind = BRIG_REGISTER_KIND_DOUBLE;
1083 break;
1084 case BRIG_TYPE_B128:
1085 out.regKind = BRIG_REGISTER_KIND_QUAD;
1086 break;
1087 case BRIG_TYPE_B1:
1088 out.regKind = BRIG_REGISTER_KIND_CONTROL;
1089 break;
1090 default:
1091 gcc_unreachable ();
1092 }
1093
1094 brig_operand.add (&out, sizeof (out));
1095}
1096
1097/* Emit an address BRIG operand ADDR. */
1098
1099static void
1100emit_address_operand (hsa_op_address *addr)
1101{
1102 struct BrigOperandAddress out;
1103
1104 out.base.byteCount = lendian16 (sizeof (out));
1105 out.base.kind = lendian16 (BRIG_KIND_OPERAND_ADDRESS);
1106 out.symbol = addr->m_symbol
1107 ? lendian32 (emit_directive_variable (addr->m_symbol)) : 0;
1108 out.reg = addr->m_reg ? lendian32 (enqueue_op (addr->m_reg)) : 0;
1109
1110 if (sizeof (addr->m_imm_offset) == 8)
1111 {
1112 out.offset.lo = lendian32 (addr->m_imm_offset);
1113 out.offset.hi = lendian32 (addr->m_imm_offset >> 32);
1114 }
1115 else
1116 {
1117 gcc_assert (sizeof (addr->m_imm_offset) == 4);
1118 out.offset.lo = lendian32 (addr->m_imm_offset);
1119 out.offset.hi = 0;
1120 }
1121
1122 brig_operand.add (&out, sizeof (out));
1123}
1124
1125/* Emit a code reference operand REF. */
1126
1127static void
1128emit_code_ref_operand (hsa_op_code_ref *ref)
1129{
1130 struct BrigOperandCodeRef out;
1131
1132 out.base.byteCount = lendian16 (sizeof (out));
1133 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_REF);
1134 out.ref = lendian32 (ref->m_directive_offset);
1135 brig_operand.add (&out, sizeof (out));
1136}
1137
1138/* Emit a code list operand CODE_LIST. */
1139
1140static void
1141emit_code_list_operand (hsa_op_code_list *code_list)
1142{
1143 struct BrigOperandCodeList out;
1144 unsigned args = code_list->m_offsets.length ();
1145
1146 for (unsigned i = 0; i < args; i++)
1147 gcc_assert (code_list->m_offsets[i]);
1148
1149 out.base.byteCount = lendian16 (sizeof (out));
1150 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_LIST);
1151
1152 uint32_t byteCount = lendian32 (4 * args);
1153
1154 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1155 brig_data.add (code_list->m_offsets.address (), args * sizeof (uint32_t));
1156 brig_data.round_size_up (4);
1157 brig_operand.add (&out, sizeof (out));
1158}
1159
1160/* Emit an operand list operand OPERAND_LIST. */
1161
1162static void
1163emit_operand_list_operand (hsa_op_operand_list *operand_list)
1164{
1165 struct BrigOperandOperandList out;
1166 unsigned args = operand_list->m_offsets.length ();
1167
1168 for (unsigned i = 0; i < args; i++)
1169 gcc_assert (operand_list->m_offsets[i]);
1170
1171 out.base.byteCount = lendian16 (sizeof (out));
1172 out.base.kind = lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST);
1173
1174 uint32_t byteCount = lendian32 (4 * args);
1175
1176 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1177 brig_data.add (operand_list->m_offsets.address (), args * sizeof (uint32_t));
1178 brig_data.round_size_up (4);
1179 brig_operand.add (&out, sizeof (out));
1180}
1181
1182/* Emit all operands queued for writing. */
1183
1184static void
1185emit_queued_operands (void)
1186{
1187 for (hsa_op_base *op = op_queue.first_op; op; op = op->m_next)
1188 {
1189 gcc_assert (op->m_brig_op_offset == brig_operand.total_size);
1190 if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (op))
1191 emit_immediate_operand (imm);
1192 else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op))
1193 emit_register_operand (reg);
1194 else if (hsa_op_address *addr = dyn_cast <hsa_op_address *> (op))
1195 emit_address_operand (addr);
1196 else if (hsa_op_code_ref *ref = dyn_cast <hsa_op_code_ref *> (op))
1197 emit_code_ref_operand (ref);
1198 else if (hsa_op_code_list *code_list = dyn_cast <hsa_op_code_list *> (op))
1199 emit_code_list_operand (code_list);
1200 else if (hsa_op_operand_list *l = dyn_cast <hsa_op_operand_list *> (op))
1201 emit_operand_list_operand (l);
1202 else
1203 gcc_unreachable ();
1204 }
1205}
1206
1207/* Emit directives describing the function that is used for
1208 a function declaration. */
1209
1210static BrigDirectiveExecutable *
1211emit_function_declaration (tree decl)
1212{
1213 hsa_function_representation *f = hsa_generate_function_declaration (decl);
1214
1215 BrigDirectiveExecutable *e = emit_function_directives (f, true);
1216 emit_queued_operands ();
1217
1218 delete f;
1219
1220 return e;
1221}
1222
1223/* Emit directives describing the function that is used for
1224 an internal function declaration. */
1225
1226static BrigDirectiveExecutable *
1227emit_internal_fn_decl (hsa_internal_fn *fn)
1228{
1229 hsa_function_representation *f = hsa_generate_internal_fn_decl (fn);
1230
1231 BrigDirectiveExecutable *e = emit_function_directives (f, true);
1232 emit_queued_operands ();
1233
1234 delete f;
1235
1236 return e;
1237}
1238
1239/* Enqueue all operands of INSN and return offset to BRIG data section
1240 to list of operand offsets. */
1241
1242static unsigned
1243emit_insn_operands (hsa_insn_basic *insn)
1244{
1245 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1246 operand_offsets;
1247
1248 unsigned l = insn->operand_count ();
1249
1250 /* We have N operands so use 4 * N for the byte_count. */
1251 uint32_t byte_count = lendian32 (4 * l);
1252 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1253 if (l > 0)
1254 {
1255 operand_offsets.safe_grow (l);
1256 for (unsigned i = 0; i < l; i++)
1257 operand_offsets[i] = lendian32 (enqueue_op (insn->get_op (i)));
1258
1259 brig_data.add (operand_offsets.address (),
1260 l * sizeof (BrigOperandOffset32_t));
1261 }
1262 brig_data.round_size_up (4);
1263 return offset;
1264}
1265
1266/* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset
1267 to BRIG data section to list of operand offsets. */
1268
1269static unsigned
1270emit_operands (hsa_op_base *op0, hsa_op_base *op1 = NULL,
1271 hsa_op_base *op2 = NULL)
1272{
1273 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1274 operand_offsets;
1275
1276 gcc_checking_assert (op0 != NULL);
1277 operand_offsets.safe_push (enqueue_op (op0));
1278
1279 if (op1 != NULL)
1280 {
1281 operand_offsets.safe_push (enqueue_op (op1));
1282 if (op2 != NULL)
1283 operand_offsets.safe_push (enqueue_op (op2));
1284 }
1285
1286 unsigned l = operand_offsets.length ();
1287
1288 /* We have N operands so use 4 * N for the byte_count. */
1289 uint32_t byte_count = lendian32 (4 * l);
1290
1291 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1292 brig_data.add (operand_offsets.address (),
1293 l * sizeof (BrigOperandOffset32_t));
1294
1295 brig_data.round_size_up (4);
1296
1297 return offset;
1298}
1299
1300/* Emit an HSA memory instruction and all necessary directives, schedule
1301 necessary operands for writing. */
1302
1303static void
1304emit_memory_insn (hsa_insn_mem *mem)
1305{
1306 struct BrigInstMem repr;
1307 gcc_checking_assert (mem->operand_count () == 2);
1308
1309 hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1));
1310
1311 /* This is necessary because of the erroneous typedef of
1312 BrigMemoryModifier8_t which introduces padding which may then contain
1313 random stuff (which we do not want so that we can test things don't
1314 change). */
1315 memset (&repr, 0, sizeof (repr));
1316 repr.base.base.byteCount = lendian16 (sizeof (repr));
1317 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1318 repr.base.opcode = lendian16 (mem->m_opcode);
1319 repr.base.type = lendian16 (mem->m_type);
1320 repr.base.operands = lendian32 (emit_insn_operands (mem));
1321
1322 if (addr->m_symbol)
1323 repr.segment = addr->m_symbol->m_segment;
1324 else
1325 repr.segment = BRIG_SEGMENT_FLAT;
1326 repr.modifier = 0;
1327 repr.equivClass = mem->m_equiv_class;
1328 repr.align = mem->m_align;
1329 if (mem->m_opcode == BRIG_OPCODE_LD)
1330 repr.width = BRIG_WIDTH_1;
1331 else
1332 repr.width = BRIG_WIDTH_NONE;
1333 memset (&repr.reserved, 0, sizeof (repr.reserved));
1334 brig_code.add (&repr, sizeof (repr));
1335 brig_insn_count++;
1336}
1337
1338/* Emit an HSA signal memory instruction and all necessary directives, schedule
1339 necessary operands for writing. */
1340
1341static void
1342emit_signal_insn (hsa_insn_signal *mem)
1343{
1344 struct BrigInstSignal repr;
1345
1346 memset (&repr, 0, sizeof (repr));
1347 repr.base.base.byteCount = lendian16 (sizeof (repr));
1348 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SIGNAL);
1349 repr.base.opcode = lendian16 (mem->m_opcode);
1350 repr.base.type = lendian16 (mem->m_type);
1351 repr.base.operands = lendian32 (emit_insn_operands (mem));
1352
1353 repr.memoryOrder = mem->m_memory_order;
1354 repr.signalOperation = mem->m_signalop;
1355 repr.signalType = hsa_machine_large_p () ? BRIG_TYPE_SIG64 : BRIG_TYPE_SIG32;
1356
1357 brig_code.add (&repr, sizeof (repr));
1358 brig_insn_count++;
1359}
1360
1361/* Emit an HSA atomic memory instruction and all necessary directives, schedule
1362 necessary operands for writing. */
1363
1364static void
1365emit_atomic_insn (hsa_insn_atomic *mem)
1366{
1367 struct BrigInstAtomic repr;
1368
1369 /* Either operand[0] or operand[1] must be an address operand. */
1370 hsa_op_address *addr = NULL;
1371 if (is_a <hsa_op_address *> (mem->get_op (0)))
1372 addr = as_a <hsa_op_address *> (mem->get_op (0));
1373 else
1374 addr = as_a <hsa_op_address *> (mem->get_op (1));
1375
1376 memset (&repr, 0, sizeof (repr));
1377 repr.base.base.byteCount = lendian16 (sizeof (repr));
1378 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ATOMIC);
1379 repr.base.opcode = lendian16 (mem->m_opcode);
1380 repr.base.type = lendian16 (mem->m_type);
1381 repr.base.operands = lendian32 (emit_insn_operands (mem));
1382
1383 if (addr->m_symbol)
1384 repr.segment = addr->m_symbol->m_segment;
1385 else
1386 repr.segment = BRIG_SEGMENT_FLAT;
1387 repr.memoryOrder = mem->m_memoryorder;
1388 repr.memoryScope = mem->m_memoryscope;
1389 repr.atomicOperation = mem->m_atomicop;
1390
1391 brig_code.add (&repr, sizeof (repr));
1392 brig_insn_count++;
1393}
1394
1395/* Emit an HSA LDA instruction and all necessary directives, schedule
1396 necessary operands for writing. */
1397
1398static void
1399emit_addr_insn (hsa_insn_basic *insn)
1400{
1401 struct BrigInstAddr repr;
1402
1403 hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1));
1404
1405 repr.base.base.byteCount = lendian16 (sizeof (repr));
1406 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ADDR);
1407 repr.base.opcode = lendian16 (insn->m_opcode);
1408 repr.base.type = lendian16 (insn->m_type);
1409 repr.base.operands = lendian32 (emit_insn_operands (insn));
1410
1411 if (addr->m_symbol)
1412 repr.segment = addr->m_symbol->m_segment;
1413 else
1414 repr.segment = BRIG_SEGMENT_FLAT;
1415 memset (&repr.reserved, 0, sizeof (repr.reserved));
1416
1417 brig_code.add (&repr, sizeof (repr));
1418 brig_insn_count++;
1419}
1420
1421/* Emit an HSA segment conversion instruction and all necessary directives,
1422 schedule necessary operands for writing. */
1423
1424static void
1425emit_segment_insn (hsa_insn_seg *seg)
1426{
1427 struct BrigInstSegCvt repr;
1428
1429 repr.base.base.byteCount = lendian16 (sizeof (repr));
1430 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SEG_CVT);
1431 repr.base.opcode = lendian16 (seg->m_opcode);
1432 repr.base.type = lendian16 (seg->m_type);
1433 repr.base.operands = lendian32 (emit_insn_operands (seg));
1434 repr.sourceType = lendian16 (as_a <hsa_op_reg *> (seg->get_op (1))->m_type);
1435 repr.segment = seg->m_segment;
1436 repr.modifier = 0;
1437
1438 brig_code.add (&repr, sizeof (repr));
1439
1440 brig_insn_count++;
1441}
1442
1443/* Emit an HSA alloca instruction and all necessary directives,
1444 schedule necessary operands for writing. */
1445
1446static void
1447emit_alloca_insn (hsa_insn_alloca *alloca)
1448{
1449 struct BrigInstMem repr;
1450 gcc_checking_assert (alloca->operand_count () == 2);
1451
1452 memset (&repr, 0, sizeof (repr));
1453 repr.base.base.byteCount = lendian16 (sizeof (repr));
1454 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1455 repr.base.opcode = lendian16 (alloca->m_opcode);
1456 repr.base.type = lendian16 (alloca->m_type);
1457 repr.base.operands = lendian32 (emit_insn_operands (alloca));
1458 repr.segment = BRIG_SEGMENT_PRIVATE;
1459 repr.modifier = 0;
1460 repr.equivClass = 0;
1461 repr.align = alloca->m_align;
1462 repr.width = BRIG_WIDTH_NONE;
1463 memset (&repr.reserved, 0, sizeof (repr.reserved));
1464 brig_code.add (&repr, sizeof (repr));
1465 brig_insn_count++;
1466}
1467
1468/* Emit an HSA comparison instruction and all necessary directives,
1469 schedule necessary operands for writing. */
1470
1471static void
1472emit_cmp_insn (hsa_insn_cmp *cmp)
1473{
1474 struct BrigInstCmp repr;
1475
1476 memset (&repr, 0, sizeof (repr));
1477 repr.base.base.byteCount = lendian16 (sizeof (repr));
1478 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CMP);
1479 repr.base.opcode = lendian16 (cmp->m_opcode);
1480 repr.base.type = lendian16 (cmp->m_type);
1481 repr.base.operands = lendian32 (emit_insn_operands (cmp));
1482
1483 if (is_a <hsa_op_reg *> (cmp->get_op (1)))
1484 repr.sourceType
1485 = lendian16 (as_a <hsa_op_reg *> (cmp->get_op (1))->m_type);
1486 else
1487 repr.sourceType
1488 = lendian16 (as_a <hsa_op_immed *> (cmp->get_op (1))->m_type);
1489 repr.modifier = 0;
1490 repr.compare = cmp->m_compare;
1491 repr.pack = 0;
1492
1493 brig_code.add (&repr, sizeof (repr));
1494 brig_insn_count++;
1495}
1496
1497/* Emit an HSA generic branching/sycnronization instruction. */
1498
1499static void
1500emit_generic_branch_insn (hsa_insn_br *br)
1501{
1502 struct BrigInstBr repr;
1503 repr.base.base.byteCount = lendian16 (sizeof (repr));
1504 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1505 repr.base.opcode = lendian16 (br->m_opcode);
1506 repr.width = br->m_width;
1507 repr.base.type = lendian16 (br->m_type);
1508 repr.base.operands = lendian32 (emit_insn_operands (br));
1509 memset (&repr.reserved, 0, sizeof (repr.reserved));
1510
1511 brig_code.add (&repr, sizeof (repr));
1512 brig_insn_count++;
1513}
1514
1515/* Emit an HSA conditional branching instruction and all necessary directives,
1516 schedule necessary operands for writing. */
1517
1518static void
1519emit_cond_branch_insn (hsa_insn_cbr *br)
1520{
1521 struct BrigInstBr repr;
1522
1523 basic_block target = NULL;
1524 edge_iterator ei;
1525 edge e;
1526
1527 /* At the moment we only handle direct conditional jumps. */
1528 gcc_assert (br->m_opcode == BRIG_OPCODE_CBR);
1529 repr.base.base.byteCount = lendian16 (sizeof (repr));
1530 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1531 repr.base.opcode = lendian16 (br->m_opcode);
1532 repr.width = br->m_width;
1533 /* For Conditional jumps the type is always B1. */
1534 repr.base.type = lendian16 (BRIG_TYPE_B1);
1535
1536 FOR_EACH_EDGE (e, ei, br->m_bb->succs)
1537 if (e->flags & EDGE_TRUE_VALUE)
1538 {
1539 target = e->dest;
1540 break;
1541 }
1542 gcc_assert (target);
1543
1544 repr.base.operands
1545 = lendian32 (emit_operands (br->get_op (0),
1546 &hsa_bb_for_bb (target)->m_label_ref));
1547 memset (&repr.reserved, 0, sizeof (repr.reserved));
1548
1549 brig_code.add (&repr, sizeof (repr));
1550 brig_insn_count++;
1551}
1552
1553/* Emit an HSA unconditional jump branching instruction that points to
1554 a label REFERENCE. */
1555
1556static void
1557emit_unconditional_jump (hsa_op_code_ref *reference)
1558{
1559 struct BrigInstBr repr;
1560
1561 repr.base.base.byteCount = lendian16 (sizeof (repr));
1562 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1563 repr.base.opcode = lendian16 (BRIG_OPCODE_BR);
1564 repr.base.type = lendian16 (BRIG_TYPE_NONE);
1565 /* Direct branches to labels must be width(all). */
1566 repr.width = BRIG_WIDTH_ALL;
1567
1568 repr.base.operands = lendian32 (emit_operands (reference));
1569 memset (&repr.reserved, 0, sizeof (repr.reserved));
1570 brig_code.add (&repr, sizeof (repr));
1571 brig_insn_count++;
1572}
1573
1574/* Emit an HSA switch jump instruction that uses a jump table to
1575 jump to a destination label. */
1576
1577static void
1578emit_switch_insn (hsa_insn_sbr *sbr)
1579{
1580 struct BrigInstBr repr;
1581
1582 gcc_assert (sbr->m_opcode == BRIG_OPCODE_SBR);
1583 repr.base.base.byteCount = lendian16 (sizeof (repr));
1584 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1585 repr.base.opcode = lendian16 (sbr->m_opcode);
1586 repr.width = BRIG_WIDTH_1;
1587 /* For Conditional jumps the type is always B1. */
1588 hsa_op_reg *index = as_a <hsa_op_reg *> (sbr->get_op (0));
1589 repr.base.type = lendian16 (index->m_type);
1590 repr.base.operands
1591 = lendian32 (emit_operands (sbr->get_op (0), sbr->m_label_code_list));
1592 memset (&repr.reserved, 0, sizeof (repr.reserved));
1593
1594 brig_code.add (&repr, sizeof (repr));
1595 brig_insn_count++;
1596}
1597
1598/* Emit a HSA convert instruction and all necessary directives, schedule
1599 necessary operands for writing. */
1600
1601static void
1602emit_cvt_insn (hsa_insn_cvt *insn)
1603{
1604 struct BrigInstCvt repr;
1605 BrigType16_t srctype;
1606
1607 repr.base.base.byteCount = lendian16 (sizeof (repr));
1608 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CVT);
1609 repr.base.opcode = lendian16 (insn->m_opcode);
1610 repr.base.type = lendian16 (insn->m_type);
1611 repr.base.operands = lendian32 (emit_insn_operands (insn));
1612
1613 if (is_a <hsa_op_reg *> (insn->get_op (1)))
1614 srctype = as_a <hsa_op_reg *> (insn->get_op (1))->m_type;
1615 else
1616 srctype = as_a <hsa_op_immed *> (insn->get_op (1))->m_type;
1617 repr.sourceType = lendian16 (srctype);
1618 repr.modifier = 0;
1619 /* float to smaller float requires a rounding setting (we default
1620 to 'near'. */
1621 if (hsa_type_float_p (insn->m_type)
1622 && (!hsa_type_float_p (srctype)
1623 || ((insn->m_type & BRIG_TYPE_BASE_MASK)
1624 < (srctype & BRIG_TYPE_BASE_MASK))))
1625 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1626 else if (hsa_type_integer_p (insn->m_type) &&
1627 hsa_type_float_p (srctype))
1628 repr.round = BRIG_ROUND_INTEGER_ZERO;
1629 else
1630 repr.round = BRIG_ROUND_NONE;
1631 brig_code.add (&repr, sizeof (repr));
1632 brig_insn_count++;
1633}
1634
1635/* Emit call instruction INSN, where this instruction must be closed
1636 within a call block instruction. */
1637
1638static void
1639emit_call_insn (hsa_insn_call *call)
1640{
1641 struct BrigInstBr repr;
1642
1643 repr.base.base.byteCount = lendian16 (sizeof (repr));
1644 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1645 repr.base.opcode = lendian16 (BRIG_OPCODE_CALL);
1646 repr.base.type = lendian16 (BRIG_TYPE_NONE);
1647
1648 repr.base.operands
1649 = lendian32 (emit_operands (call->m_result_code_list, &call->m_func,
1650 call->m_args_code_list));
1651
1652 /* Internal functions have not set m_called_function. */
1653 if (call->m_called_function)
1654 {
1655 function_linkage_pair pair (call->m_called_function,
1656 call->m_func.m_brig_op_offset);
1657 function_call_linkage.safe_push (pair);
1658 }
1659 else
1660 {
1661 hsa_internal_fn *slot
1662 = hsa_emitted_internal_decls->find (call->m_called_internal_fn);
1663 gcc_assert (slot);
1664 gcc_assert (slot->m_offset > 0);
1665 call->m_func.m_directive_offset = slot->m_offset;
1666 }
1667
1668 repr.width = BRIG_WIDTH_ALL;
1669 memset (&repr.reserved, 0, sizeof (repr.reserved));
1670
1671 brig_code.add (&repr, sizeof (repr));
1672 brig_insn_count++;
1673}
1674
1675/* Emit argument block directive. */
1676
1677static void
1678emit_arg_block_insn (hsa_insn_arg_block *insn)
1679{
1680 switch (insn->m_kind)
1681 {
1682 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START:
1683 {
1684 struct BrigDirectiveArgBlock repr;
1685 repr.base.byteCount = lendian16 (sizeof (repr));
1686 repr.base.kind = lendian16 (insn->m_kind);
1687 brig_code.add (&repr, sizeof (repr));
1688
1689 for (unsigned i = 0; i < insn->m_call_insn->m_input_args.length (); i++)
1690 {
1691 insn->m_call_insn->m_args_code_list->m_offsets[i]
1692 = lendian32 (emit_directive_variable
1693 (insn->m_call_insn->m_input_args[i]));
1694 brig_insn_count++;
1695 }
1696
1697 if (insn->m_call_insn->m_output_arg)
1698 {
1699 insn->m_call_insn->m_result_code_list->m_offsets[0]
1700 = lendian32 (emit_directive_variable
1701 (insn->m_call_insn->m_output_arg));
1702 brig_insn_count++;
1703 }
1704
1705 break;
1706 }
1707 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END:
1708 {
1709 struct BrigDirectiveArgBlock repr;
1710 repr.base.byteCount = lendian16 (sizeof (repr));
1711 repr.base.kind = lendian16 (insn->m_kind);
1712 brig_code.add (&repr, sizeof (repr));
1713 break;
1714 }
1715 default:
1716 gcc_unreachable ();
1717 }
1718
1719 brig_insn_count++;
1720}
1721
1722/* Emit comment directive. */
1723
1724static void
1725emit_comment_insn (hsa_insn_comment *insn)
1726{
1727 struct BrigDirectiveComment repr;
1728 memset (&repr, 0, sizeof (repr));
1729
1730 repr.base.byteCount = lendian16 (sizeof (repr));
1731 repr.base.kind = lendian16 (insn->m_opcode);
1732 repr.name = brig_emit_string (insn->m_comment, '\0', false);
1733 brig_code.add (&repr, sizeof (repr));
1734}
1735
1736/* Emit queue instruction INSN. */
1737
1738static void
1739emit_queue_insn (hsa_insn_queue *insn)
1740{
1741 BrigInstQueue repr;
1742 memset (&repr, 0, sizeof (repr));
1743
1744 repr.base.base.byteCount = lendian16 (sizeof (repr));
1745 repr.base.base.kind = lendian16 (BRIG_KIND_INST_QUEUE);
1746 repr.base.opcode = lendian16 (insn->m_opcode);
1747 repr.base.type = lendian16 (insn->m_type);
1748 repr.segment = insn->m_segment;
1749 repr.memoryOrder = insn->m_memory_order;
1750 repr.base.operands = lendian32 (emit_insn_operands (insn));
1751 brig_data.round_size_up (4);
1752 brig_code.add (&repr, sizeof (repr));
1753
1754 brig_insn_count++;
1755}
1756
1757/* Emit source type instruction INSN. */
1758
1759static void
1760emit_srctype_insn (hsa_insn_srctype *insn)
1761{
1762 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1763 struct BrigInstSourceType repr;
1764 unsigned operand_count = insn->operand_count ();
1765 gcc_checking_assert (operand_count >= 2);
1766
1767 memset (&repr, 0, sizeof (repr));
1768 repr.sourceType = lendian16 (insn->m_source_type);
1769 repr.base.base.byteCount = lendian16 (sizeof (repr));
1770 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1771 repr.base.opcode = lendian16 (insn->m_opcode);
1772 repr.base.type = lendian16 (insn->m_type);
1773
1774 repr.base.operands = lendian32 (emit_insn_operands (insn));
1775 brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1776 brig_insn_count++;
1777}
1778
1779/* Emit packed instruction INSN. */
1780
1781static void
1782emit_packed_insn (hsa_insn_packed *insn)
1783{
1784 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1785 struct BrigInstSourceType repr;
1786 unsigned operand_count = insn->operand_count ();
1787 gcc_checking_assert (operand_count >= 2);
1788
1789 memset (&repr, 0, sizeof (repr));
1790 repr.sourceType = lendian16 (insn->m_source_type);
1791 repr.base.base.byteCount = lendian16 (sizeof (repr));
1792 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1793 repr.base.opcode = lendian16 (insn->m_opcode);
1794 repr.base.type = lendian16 (insn->m_type);
1795
1796 if (insn->m_opcode == BRIG_OPCODE_COMBINE)
1797 {
1798 /* Create operand list for packed type. */
1799 for (unsigned i = 1; i < operand_count; i++)
1800 {
1801 gcc_checking_assert (insn->get_op (i));
1802 insn->m_operand_list->m_offsets[i - 1]
1803 = lendian32 (enqueue_op (insn->get_op (i)));
1804 }
1805
1806 repr.base.operands = lendian32 (emit_operands (insn->get_op (0),
1807 insn->m_operand_list));
1808 }
1809 else if (insn->m_opcode == BRIG_OPCODE_EXPAND)
1810 {
1811 /* Create operand list for packed type. */
1812 for (unsigned i = 0; i < operand_count - 1; i++)
1813 {
1814 gcc_checking_assert (insn->get_op (i));
1815 insn->m_operand_list->m_offsets[i]
1816 = lendian32 (enqueue_op (insn->get_op (i)));
1817 }
1818
1819 unsigned ops = emit_operands (insn->m_operand_list,
1820 insn->get_op (insn->operand_count () - 1));
1821 repr.base.operands = lendian32 (ops);
1822 }
1823
1824
1825 brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1826 brig_insn_count++;
1827}
1828
1829/* Emit a basic HSA instruction and all necessary directives, schedule
1830 necessary operands for writing. */
1831
1832static void
1833emit_basic_insn (hsa_insn_basic *insn)
1834{
1835 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1836 struct BrigInstMod repr;
1837 BrigType16_t type;
1838
1839 memset (&repr, 0, sizeof (repr));
1840 repr.base.base.byteCount = lendian16 (sizeof (BrigInstBasic));
1841 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BASIC);
1842 repr.base.opcode = lendian16 (insn->m_opcode);
1843 switch (insn->m_opcode)
1844 {
1845 /* And the bit-logical operations need bit types and whine about
1846 arithmetic types :-/ */
1847 case BRIG_OPCODE_AND:
1848 case BRIG_OPCODE_OR:
1849 case BRIG_OPCODE_XOR:
1850 case BRIG_OPCODE_NOT:
1851 type = regtype_for_type (insn->m_type);
1852 break;
1853 default:
1854 type = insn->m_type;
1855 break;
1856 }
1857 repr.base.type = lendian16 (type);
1858 repr.base.operands = lendian32 (emit_insn_operands (insn));
1859
1860 if (hsa_type_packed_p (type))
1861 {
1862 if (hsa_type_float_p (type)
1863 && !hsa_opcode_floating_bit_insn_p (insn->m_opcode))
1864 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1865 else
1866 repr.round = 0;
1867 /* We assume that destination and sources agree in packing layout. */
1868 if (insn->num_used_ops () >= 2)
1869 repr.pack = BRIG_PACK_PP;
1870 else
1871 repr.pack = BRIG_PACK_P;
1872 repr.reserved = 0;
1873 repr.base.base.byteCount = lendian16 (sizeof (BrigInstMod));
1874 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MOD);
1875 brig_code.add (&repr, sizeof (struct BrigInstMod));
1876 }
1877 else
1878 brig_code.add (&repr, sizeof (struct BrigInstBasic));
1879 brig_insn_count++;
1880}
1881
1882/* Emit an HSA instruction and all necessary directives, schedule necessary
1883 operands for writing. */
1884
1885static void
1886emit_insn (hsa_insn_basic *insn)
1887{
1888 gcc_assert (!is_a <hsa_insn_phi *> (insn));
1889
1890 insn->m_brig_offset = brig_code.total_size;
1891
1892 if (hsa_insn_signal *signal = dyn_cast <hsa_insn_signal *> (insn))
1893 emit_signal_insn (signal);
1894 else if (hsa_insn_atomic *atom = dyn_cast <hsa_insn_atomic *> (insn))
1895 emit_atomic_insn (atom);
1896 else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn))
1897 emit_memory_insn (mem);
1898 else if (insn->m_opcode == BRIG_OPCODE_LDA)
1899 emit_addr_insn (insn);
1900 else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn))
1901 emit_segment_insn (seg);
1902 else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn))
1903 emit_cmp_insn (cmp);
1904 else if (hsa_insn_cbr *br = dyn_cast <hsa_insn_cbr *> (insn))
1905 emit_cond_branch_insn (br);
1906 else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn))
1907 {
1908 if (switch_instructions == NULL)
1909 switch_instructions = new vec <hsa_insn_sbr *> ();
1910
1911 switch_instructions->safe_push (sbr);
1912 emit_switch_insn (sbr);
1913 }
1914 else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn))
1915 emit_generic_branch_insn (br);
1916 else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn))
1917 emit_arg_block_insn (block);
1918 else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn))
1919 emit_call_insn (call);
1920 else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn))
1921 emit_comment_insn (comment);
1922 else if (hsa_insn_queue *queue = dyn_cast <hsa_insn_queue *> (insn))
1923 emit_queue_insn (queue);
1924 else if (hsa_insn_srctype *srctype = dyn_cast <hsa_insn_srctype *> (insn))
1925 emit_srctype_insn (srctype);
1926 else if (hsa_insn_packed *packed = dyn_cast <hsa_insn_packed *> (insn))
1927 emit_packed_insn (packed);
1928 else if (hsa_insn_cvt *cvt = dyn_cast <hsa_insn_cvt *> (insn))
1929 emit_cvt_insn (cvt);
1930 else if (hsa_insn_alloca *alloca = dyn_cast <hsa_insn_alloca *> (insn))
1931 emit_alloca_insn (alloca);
1932 else
1933 emit_basic_insn (insn);
1934}
1935
1936/* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL,
1937 or we are about to finish emitting code, if it is NULL. If the fall through
1938 edge from BB does not lead to NEXT_BB, emit an unconditional jump. */
1939
1940static void
1941perhaps_emit_branch (basic_block bb, basic_block next_bb)
1942{
1943 basic_block t_bb = NULL, ff = NULL;
1944
1945 edge_iterator ei;
1946 edge e;
1947
1948 /* If the last instruction of BB is a switch, ignore emission of all
1949 edges. */
1950 if (hsa_bb_for_bb (bb)->m_last_insn
1951 && is_a <hsa_insn_sbr *> (hsa_bb_for_bb (bb)->m_last_insn))
1952 return;
1953
1954 FOR_EACH_EDGE (e, ei, bb->succs)
1955 if (e->flags & EDGE_TRUE_VALUE)
1956 {
1957 gcc_assert (!t_bb);
1958 t_bb = e->dest;
1959 }
1960 else
1961 {
1962 gcc_assert (!ff);
1963 ff = e->dest;
1964 }
1965
1966 if (!ff || ff == next_bb || ff == EXIT_BLOCK_PTR_FOR_FN (cfun))
1967 return;
1968
1969 emit_unconditional_jump (&hsa_bb_for_bb (ff)->m_label_ref);
1970}
1971
1972/* Emit the a function with name NAME to the various brig sections. */
1973
1974void
1975hsa_brig_emit_function (void)
1976{
1977 basic_block bb, prev_bb;
1978 hsa_insn_basic *insn;
1979 BrigDirectiveExecutable *ptr_to_fndir;
1980
1981 brig_init ();
1982
1983 brig_insn_count = 0;
1984 memset (&op_queue, 0, sizeof (op_queue));
1985 op_queue.projected_size = brig_operand.total_size;
1986
1987 if (!function_offsets)
1988 function_offsets = new hash_map<tree, BrigCodeOffset32_t> ();
1989
1990 if (!emitted_declarations)
1991 emitted_declarations = new hash_map <tree, BrigDirectiveExecutable *> ();
1992
1993 for (unsigned i = 0; i < hsa_cfun->m_called_functions.length (); i++)
1994 {
1995 tree called = hsa_cfun->m_called_functions[i];
1996
1997 /* If the function has no definition, emit a declaration. */
1998 if (!emitted_declarations->get (called))
1999 {
2000 BrigDirectiveExecutable *e = emit_function_declaration (called);
2001 emitted_declarations->put (called, e);
2002 }
2003 }
2004
2005 for (unsigned i = 0; i < hsa_cfun->m_called_internal_fns.length (); i++)
2006 {
2007 hsa_internal_fn *called = hsa_cfun->m_called_internal_fns[i];
2008 emit_internal_fn_decl (called);
2009 }
2010
2011 ptr_to_fndir = emit_function_directives (hsa_cfun, false);
2012 for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->m_first_insn;
2013 insn;
2014 insn = insn->m_next)
2015 emit_insn (insn);
2016 prev_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
2017 FOR_EACH_BB_FN (bb, cfun)
2018 {
2019 perhaps_emit_branch (prev_bb, bb);
2020 emit_bb_label_directive (hsa_bb_for_bb (bb));
2021 for (insn = hsa_bb_for_bb (bb)->m_first_insn; insn; insn = insn->m_next)
2022 emit_insn (insn);
2023 prev_bb = bb;
2024 }
2025 perhaps_emit_branch (prev_bb, NULL);
2026 ptr_to_fndir->nextModuleEntry = lendian32 (brig_code.total_size);
2027
2028 /* Fill up label references for all sbr instructions. */
2029 if (switch_instructions)
2030 {
2031 for (unsigned i = 0; i < switch_instructions->length (); i++)
2032 {
2033 hsa_insn_sbr *sbr = (*switch_instructions)[i];
2034 for (unsigned j = 0; j < sbr->m_jump_table.length (); j++)
2035 {
2036 hsa_bb *hbb = hsa_bb_for_bb (sbr->m_jump_table[j]);
2037 sbr->m_label_code_list->m_offsets[j]
2038 = hbb->m_label_ref.m_directive_offset;
2039 }
2040 }
2041
2042 switch_instructions->release ();
2043 delete switch_instructions;
2044 switch_instructions = NULL;
2045 }
2046
2047 if (dump_file)
2048 {
2049 fprintf (dump_file, "------- After BRIG emission: -------\n");
2050 dump_hsa_cfun (dump_file);
2051 }
2052
2053 emit_queued_operands ();
2054}
2055
2056/* Emit all OMP symbols related to OMP. */
2057
2058void
2059hsa_brig_emit_omp_symbols (void)
2060{
2061 brig_init ();
2062 emit_directive_variable (hsa_num_threads);
2063}
2064
2065/* Create and return __hsa_global_variables symbol that contains
2066 all informations consumed by libgomp to link global variables
2067 with their string names used by an HSA kernel. */
2068
2069static tree
2070hsa_output_global_variables ()
2071{
2072 unsigned l = hsa_global_variable_symbols->elements ();
2073
2074 tree variable_info_type = make_node (RECORD_TYPE);
2075 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2076 get_identifier ("name"), ptr_type_node);
2077 DECL_CHAIN (id_f1) = NULL_TREE;
2078 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2079 get_identifier ("omp_data_size"),
2080 ptr_type_node);
2081 DECL_CHAIN (id_f2) = id_f1;
2082 finish_builtin_struct (variable_info_type, "__hsa_variable_info", id_f2,
2083 NULL_TREE);
2084
2085 tree int_num_of_global_vars;
2086 int_num_of_global_vars = build_int_cst (uint32_type_node, l);
2087 tree global_vars_num_index_type = build_index_type (int_num_of_global_vars);
2088 tree global_vars_array_type = build_array_type (variable_info_type,
2089 global_vars_num_index_type);
2090 TYPE_ARTIFICIAL (global_vars_array_type) = 1;
2091
2092 vec<constructor_elt, va_gc> *global_vars_vec = NULL;
2093
2094 for (hash_table <hsa_noop_symbol_hasher>::iterator it
2095 = hsa_global_variable_symbols->begin ();
2096 it != hsa_global_variable_symbols->end (); ++it)
2097 {
2098 unsigned len = strlen ((*it)->m_name);
2099 char *copy = XNEWVEC (char, len + 2);
2100 copy[0] = '&';
2101 memcpy (copy + 1, (*it)->m_name, len);
2102 copy[len + 1] = '\0';
2103 len++;
2104 hsa_sanitize_name (copy);
2105
2106 tree var_name = build_string (len, copy);
2107 TREE_TYPE (var_name)
2108 = build_array_type (char_type_node, build_index_type (size_int (len)));
2109 free (copy);
2110
2111 vec<constructor_elt, va_gc> *variable_info_vec = NULL;
2112 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2113 build1 (ADDR_EXPR,
2114 build_pointer_type (TREE_TYPE (var_name)),
2115 var_name));
2116 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2117 build_fold_addr_expr ((*it)->m_decl));
2118
2119 tree variable_info_ctor = build_constructor (variable_info_type,
2120 variable_info_vec);
2121
2122 CONSTRUCTOR_APPEND_ELT (global_vars_vec, NULL_TREE,
2123 variable_info_ctor);
2124 }
2125
2126 tree global_vars_ctor = build_constructor (global_vars_array_type,
2127 global_vars_vec);
2128
2129 char tmp_name[64];
2130 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_global_variables", 1);
2131 tree global_vars_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2132 get_identifier (tmp_name),
2133 global_vars_array_type);
2134 TREE_STATIC (global_vars_table) = 1;
2135 TREE_READONLY (global_vars_table) = 1;
2136 TREE_PUBLIC (global_vars_table) = 0;
2137 DECL_ARTIFICIAL (global_vars_table) = 1;
2138 DECL_IGNORED_P (global_vars_table) = 1;
2139 DECL_EXTERNAL (global_vars_table) = 0;
2140 TREE_CONSTANT (global_vars_table) = 1;
2141 DECL_INITIAL (global_vars_table) = global_vars_ctor;
2142 varpool_node::finalize_decl (global_vars_table);
2143
2144 return global_vars_table;
2145}
2146
2147/* Create __hsa_host_functions and __hsa_kernels that contain
2148 all informations consumed by libgomp to register all kernels
2149 in the BRIG binary. */
2150
2151static void
2152hsa_output_kernels (tree *host_func_table, tree *kernels)
2153{
2154 unsigned map_count = hsa_get_number_decl_kernel_mappings ();
2155
2156 tree int_num_of_kernels;
2157 int_num_of_kernels = build_int_cst (uint32_type_node, map_count);
2158 tree kernel_num_index_type = build_index_type (int_num_of_kernels);
2159 tree host_functions_array_type = build_array_type (ptr_type_node,
2160 kernel_num_index_type);
2161 TYPE_ARTIFICIAL (host_functions_array_type) = 1;
2162
2163 vec<constructor_elt, va_gc> *host_functions_vec = NULL;
2164 for (unsigned i = 0; i < map_count; ++i)
2165 {
2166 tree decl = hsa_get_decl_kernel_mapping_decl (i);
2167 tree host_fn = build_fold_addr_expr (hsa_get_host_function (decl));
2168 CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn);
2169 }
2170 tree host_functions_ctor = build_constructor (host_functions_array_type,
2171 host_functions_vec);
2172 char tmp_name[64];
2173 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1);
2174 tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2175 get_identifier (tmp_name),
2176 host_functions_array_type);
2177 TREE_STATIC (hsa_host_func_table) = 1;
2178 TREE_READONLY (hsa_host_func_table) = 1;
2179 TREE_PUBLIC (hsa_host_func_table) = 0;
2180 DECL_ARTIFICIAL (hsa_host_func_table) = 1;
2181 DECL_IGNORED_P (hsa_host_func_table) = 1;
2182 DECL_EXTERNAL (hsa_host_func_table) = 0;
2183 TREE_CONSTANT (hsa_host_func_table) = 1;
2184 DECL_INITIAL (hsa_host_func_table) = host_functions_ctor;
2185 varpool_node::finalize_decl (hsa_host_func_table);
2186 *host_func_table = hsa_host_func_table;
2187
2188 /* Following code emits list of kernel_info structures. */
2189
2190 tree kernel_info_type = make_node (RECORD_TYPE);
2191 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2192 get_identifier ("name"), ptr_type_node);
2193 DECL_CHAIN (id_f1) = NULL_TREE;
2194 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2195 get_identifier ("omp_data_size"),
2196 unsigned_type_node);
2197 DECL_CHAIN (id_f2) = id_f1;
2198 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2199 get_identifier ("gridified_kernel_p"),
2200 boolean_type_node);
2201 DECL_CHAIN (id_f3) = id_f2;
2202 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2203 get_identifier ("kernel_dependencies_count"),
2204 unsigned_type_node);
2205 DECL_CHAIN (id_f4) = id_f3;
2206 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2207 get_identifier ("kernel_dependencies"),
2208 build_pointer_type (build_pointer_type
2209 (char_type_node)));
2210 DECL_CHAIN (id_f5) = id_f4;
2211 finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5,
2212 NULL_TREE);
2213
2214 int_num_of_kernels = build_int_cstu (uint32_type_node, map_count);
2215 tree kernel_info_vector_type
2216 = build_array_type (kernel_info_type,
2217 build_index_type (int_num_of_kernels));
2218 TYPE_ARTIFICIAL (kernel_info_vector_type) = 1;
2219
2220 vec<constructor_elt, va_gc> *kernel_info_vector_vec = NULL;
2221 tree kernel_dependencies_vector_type = NULL;
2222
2223 for (unsigned i = 0; i < map_count; ++i)
2224 {
2225 tree kernel = hsa_get_decl_kernel_mapping_decl (i);
2226 char *name = hsa_get_decl_kernel_mapping_name (i);
2227 unsigned len = strlen (name);
2228 char *copy = XNEWVEC (char, len + 2);
2229 copy[0] = '&';
2230 memcpy (copy + 1, name, len);
2231 copy[len + 1] = '\0';
2232 len++;
2233
2234 tree kern_name = build_string (len, copy);
2235 TREE_TYPE (kern_name)
2236 = build_array_type (char_type_node, build_index_type (size_int (len)));
2237 free (copy);
2238
2239 unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i);
2240 tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size);
2241 bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i);
2242 tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node,
2243 gridified_kernel_p);
2244 unsigned count = 0;
2245 vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL;
2246 if (hsa_decl_kernel_dependencies)
2247 {
2248 vec<const char *> **slot;
2249 slot = hsa_decl_kernel_dependencies->get (kernel);
2250 if (slot)
2251 {
2252 vec <const char *> *dependencies = *slot;
2253 count = dependencies->length ();
2254
2255 kernel_dependencies_vector_type
2256 = build_array_type (build_pointer_type (char_type_node),
2257 build_index_type (size_int (count)));
2258 TYPE_ARTIFICIAL (kernel_dependencies_vector_type) = 1;
2259
2260 for (unsigned j = 0; j < count; j++)
2261 {
2262 const char *d = (*dependencies)[j];
2263 len = strlen (d);
2264 tree dependency_name = build_string (len, d);
2265 TREE_TYPE (dependency_name)
2266 = build_array_type (char_type_node,
2267 build_index_type (size_int (len)));
2268
2269 CONSTRUCTOR_APPEND_ELT
2270 (kernel_dependencies_vec, NULL_TREE,
2271 build1 (ADDR_EXPR,
2272 build_pointer_type (TREE_TYPE (dependency_name)),
2273 dependency_name));
2274 }
2275 }
2276 }
2277
2278 tree dependencies_count = build_int_cstu (unsigned_type_node, count);
2279
2280 vec<constructor_elt, va_gc> *kernel_info_vec = NULL;
2281 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2282 build1 (ADDR_EXPR,
2283 build_pointer_type (TREE_TYPE
2284 (kern_name)),
2285 kern_name));
2286 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size);
2287 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2288 gridified_kernel_p_tree);
2289 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count);
2290
2291 if (count > 0)
2292 {
2293 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i);
2294 gcc_checking_assert (kernel_dependencies_vector_type);
2295 tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2296 get_identifier (tmp_name),
2297 kernel_dependencies_vector_type);
2298
2299 TREE_STATIC (dependencies_list) = 1;
2300 TREE_READONLY (dependencies_list) = 1;
2301 TREE_PUBLIC (dependencies_list) = 0;
2302 DECL_ARTIFICIAL (dependencies_list) = 1;
2303 DECL_IGNORED_P (dependencies_list) = 1;
2304 DECL_EXTERNAL (dependencies_list) = 0;
2305 TREE_CONSTANT (dependencies_list) = 1;
2306 DECL_INITIAL (dependencies_list)
2307 = build_constructor (kernel_dependencies_vector_type,
2308 kernel_dependencies_vec);
2309 varpool_node::finalize_decl (dependencies_list);
2310
2311 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2312 build1 (ADDR_EXPR,
2313 build_pointer_type
2314 (TREE_TYPE (dependencies_list)),
2315 dependencies_list));
2316 }
2317 else
2318 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node);
2319
2320 tree kernel_info_ctor = build_constructor (kernel_info_type,
2321 kernel_info_vec);
2322
2323 CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE,
2324 kernel_info_ctor);
2325 }
2326
2327 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kernels", 1);
2328 tree hsa_kernels = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2329 get_identifier (tmp_name),
2330 kernel_info_vector_type);
2331
2332 TREE_STATIC (hsa_kernels) = 1;
2333 TREE_READONLY (hsa_kernels) = 1;
2334 TREE_PUBLIC (hsa_kernels) = 0;
2335 DECL_ARTIFICIAL (hsa_kernels) = 1;
2336 DECL_IGNORED_P (hsa_kernels) = 1;
2337 DECL_EXTERNAL (hsa_kernels) = 0;
2338 TREE_CONSTANT (hsa_kernels) = 1;
2339 DECL_INITIAL (hsa_kernels) = build_constructor (kernel_info_vector_type,
2340 kernel_info_vector_vec);
2341 varpool_node::finalize_decl (hsa_kernels);
2342 *kernels = hsa_kernels;
2343}
2344
2345/* Create a static constructor that will register out brig stuff with
2346 libgomp. */
2347
2348static void
2349hsa_output_libgomp_mapping (tree brig_decl)
2350{
2351 unsigned kernel_count = hsa_get_number_decl_kernel_mappings ();
2352 unsigned global_variable_count = hsa_global_variable_symbols->elements ();
2353
2354 tree kernels;
2355 tree host_func_table;
2356
2357 hsa_output_kernels (&host_func_table, &kernels);
2358 tree global_vars = hsa_output_global_variables ();
2359
2360 tree hsa_image_desc_type = make_node (RECORD_TYPE);
2361 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2362 get_identifier ("brig_module"), ptr_type_node);
2363 DECL_CHAIN (id_f1) = NULL_TREE;
2364 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2365 get_identifier ("kernel_count"),
2366 unsigned_type_node);
2367
2368 DECL_CHAIN (id_f2) = id_f1;
2369 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2370 get_identifier ("hsa_kernel_infos"),
2371 ptr_type_node);
2372 DECL_CHAIN (id_f3) = id_f2;
2373 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2374 get_identifier ("global_variable_count"),
2375 unsigned_type_node);
2376 DECL_CHAIN (id_f4) = id_f3;
2377 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2378 get_identifier ("hsa_global_variable_infos"),
2379 ptr_type_node);
2380 DECL_CHAIN (id_f5) = id_f4;
2381 finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f5,
2382 NULL_TREE);
2383 TYPE_ARTIFICIAL (hsa_image_desc_type) = 1;
2384
2385 vec<constructor_elt, va_gc> *img_desc_vec = NULL;
2386 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2387 build_fold_addr_expr (brig_decl));
2388 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2389 build_int_cstu (unsigned_type_node, kernel_count));
2390 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2391 build1 (ADDR_EXPR,
2392 build_pointer_type (TREE_TYPE (kernels)),
2393 kernels));
2394 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2395 build_int_cstu (unsigned_type_node,
2396 global_variable_count));
2397 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2398 build1 (ADDR_EXPR,
2399 build_pointer_type (TREE_TYPE (global_vars)),
2400 global_vars));
2401
2402 tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec);
2403
2404 char tmp_name[64];
2405 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_img_descriptor", 1);
2406 tree hsa_img_descriptor = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2407 get_identifier (tmp_name),
2408 hsa_image_desc_type);
2409 TREE_STATIC (hsa_img_descriptor) = 1;
2410 TREE_READONLY (hsa_img_descriptor) = 1;
2411 TREE_PUBLIC (hsa_img_descriptor) = 0;
2412 DECL_ARTIFICIAL (hsa_img_descriptor) = 1;
2413 DECL_IGNORED_P (hsa_img_descriptor) = 1;
2414 DECL_EXTERNAL (hsa_img_descriptor) = 0;
2415 TREE_CONSTANT (hsa_img_descriptor) = 1;
2416 DECL_INITIAL (hsa_img_descriptor) = img_desc_ctor;
2417 varpool_node::finalize_decl (hsa_img_descriptor);
2418
2419 /* Construct the "host_table" libgomp expects. */
2420 tree index_type = build_index_type (build_int_cst (integer_type_node, 4));
2421 tree libgomp_host_table_type = build_array_type (ptr_type_node, index_type);
2422 TYPE_ARTIFICIAL (libgomp_host_table_type) = 1;
2423 vec<constructor_elt, va_gc> *libgomp_host_table_vec = NULL;
2424 tree host_func_table_addr = build_fold_addr_expr (host_func_table);
2425 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2426 host_func_table_addr);
2427 offset_int func_table_size
2428 = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * kernel_count;
2429 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2430 fold_build2 (POINTER_PLUS_EXPR,
2431 TREE_TYPE (host_func_table_addr),
2432 host_func_table_addr,
2433 build_int_cst (size_type_node,
2434 func_table_size.to_uhwi
2435 ())));
2436 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2437 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2438 tree libgomp_host_table_ctor = build_constructor (libgomp_host_table_type,
2439 libgomp_host_table_vec);
2440 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_libgomp_host_table", 1);
2441 tree hsa_libgomp_host_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2442 get_identifier (tmp_name),
2443 libgomp_host_table_type);
2444
2445 TREE_STATIC (hsa_libgomp_host_table) = 1;
2446 TREE_READONLY (hsa_libgomp_host_table) = 1;
2447 TREE_PUBLIC (hsa_libgomp_host_table) = 0;
2448 DECL_ARTIFICIAL (hsa_libgomp_host_table) = 1;
2449 DECL_IGNORED_P (hsa_libgomp_host_table) = 1;
2450 DECL_EXTERNAL (hsa_libgomp_host_table) = 0;
2451 TREE_CONSTANT (hsa_libgomp_host_table) = 1;
2452 DECL_INITIAL (hsa_libgomp_host_table) = libgomp_host_table_ctor;
2453 varpool_node::finalize_decl (hsa_libgomp_host_table);
2454
2455 /* Generate an initializer with a call to the registration routine. */
2456
2457 tree offload_register
2458 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER);
2459 gcc_checking_assert (offload_register);
2460
2461 tree *hsa_ctor_stmts = hsa_get_ctor_statements ();
2462 append_to_statement_list
2463 (build_call_expr (offload_register, 4,
2464 build_int_cstu (unsigned_type_node,
2465 GOMP_VERSION_PACK (GOMP_VERSION,
2466 GOMP_VERSION_HSA)),
2467 build_fold_addr_expr (hsa_libgomp_host_table),
2468 build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2469 build_fold_addr_expr (hsa_img_descriptor)),
2470 hsa_ctor_stmts);
2471
2472 cgraph_build_static_cdtor ('I', *hsa_ctor_stmts, DEFAULT_INIT_PRIORITY);
2473
2474 tree offload_unregister
2475 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER);
2476 gcc_checking_assert (offload_unregister);
2477
2478 tree *hsa_dtor_stmts = hsa_get_dtor_statements ();
2479 append_to_statement_list
2480 (build_call_expr (offload_unregister, 4,
2481 build_int_cstu (unsigned_type_node,
2482 GOMP_VERSION_PACK (GOMP_VERSION,
2483 GOMP_VERSION_HSA)),
2484 build_fold_addr_expr (hsa_libgomp_host_table),
2485 build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2486 build_fold_addr_expr (hsa_img_descriptor)),
2487 hsa_dtor_stmts);
2488 cgraph_build_static_cdtor ('D', *hsa_dtor_stmts, DEFAULT_INIT_PRIORITY);
2489}
2490
2491/* Emit the brig module we have compiled to a section in the final assembly and
2492 also create a compile unit static constructor that will register the brig
2493 module with libgomp. */
2494
2495void
2496hsa_output_brig (void)
2497{
2498 section *saved_section;
2499
2500 if (!brig_initialized)
2501 return;
2502
2503 for (unsigned i = 0; i < function_call_linkage.length (); i++)
2504 {
2505 function_linkage_pair p = function_call_linkage[i];
2506
2507 BrigCodeOffset32_t *func_offset = function_offsets->get (p.function_decl);
2508 gcc_assert (*func_offset);
2509 BrigOperandCodeRef *code_ref
2510 = (BrigOperandCodeRef *) (brig_operand.get_ptr_by_offset (p.offset));
2511 gcc_assert (code_ref->base.kind == BRIG_KIND_OPERAND_CODE_REF);
2512 code_ref->ref = lendian32 (*func_offset);
2513 }
2514
2515 /* Iterate all function declarations and if we meet a function that should
2516 have module linkage and we are unable to emit HSAIL for the function,
2517 then change the linkage to program linkage. Doing so, we will emit
2518 a valid BRIG image. */
2519 if (hsa_failed_functions != NULL && emitted_declarations != NULL)
2520 for (hash_map <tree, BrigDirectiveExecutable *>::iterator it
2521 = emitted_declarations->begin ();
2522 it != emitted_declarations->end ();
2523 ++it)
2524 {
2525 if (hsa_failed_functions->contains ((*it).first))
2526 (*it).second->linkage = BRIG_LINKAGE_PROGRAM;
2527 }
2528
2529 saved_section = in_section;
2530
2531 switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL));
2532 char tmp_name[64];
2533 ASM_GENERATE_INTERNAL_LABEL (tmp_name, BRIG_LABEL_STRING, 1);
2534 ASM_OUTPUT_LABEL (asm_out_file, tmp_name);
2535 tree brig_id = get_identifier (tmp_name);
2536 tree brig_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, brig_id,
2537 char_type_node);
2538 SET_DECL_ASSEMBLER_NAME (brig_decl, brig_id);
2539 TREE_ADDRESSABLE (brig_decl) = 1;
2540 TREE_READONLY (brig_decl) = 1;
2541 DECL_ARTIFICIAL (brig_decl) = 1;
2542 DECL_IGNORED_P (brig_decl) = 1;
2543 TREE_STATIC (brig_decl) = 1;
2544 TREE_PUBLIC (brig_decl) = 0;
2545 TREE_USED (brig_decl) = 1;
2546 DECL_INITIAL (brig_decl) = brig_decl;
2547 TREE_ASM_WRITTEN (brig_decl) = 1;
2548
2549 BrigModuleHeader module_header;
2550 memcpy (&module_header.identification, "HSA BRIG",
2551 sizeof (module_header.identification));
2552 module_header.brigMajor = lendian32 (BRIG_VERSION_BRIG_MAJOR);
2553 module_header.brigMinor = lendian32 (BRIG_VERSION_BRIG_MINOR);
2554 uint64_t section_index[3];
2555
2556 int data_padding, code_padding, operand_padding;
2557 data_padding = HSA_SECTION_ALIGNMENT
2558 - brig_data.total_size % HSA_SECTION_ALIGNMENT;
2559 code_padding = HSA_SECTION_ALIGNMENT
2560 - brig_code.total_size % HSA_SECTION_ALIGNMENT;
2561 operand_padding = HSA_SECTION_ALIGNMENT
2562 - brig_operand.total_size % HSA_SECTION_ALIGNMENT;
2563
2564 uint64_t module_size = sizeof (module_header)
2565 + sizeof (section_index)
2566 + brig_data.total_size
2567 + data_padding
2568 + brig_code.total_size
2569 + code_padding
2570 + brig_operand.total_size
2571 + operand_padding;
2572 gcc_assert ((module_size % 16) == 0);
2573 module_header.byteCount = lendian64 (module_size);
2574 memset (&module_header.hash, 0, sizeof (module_header.hash));
2575 module_header.reserved = 0;
2576 module_header.sectionCount = lendian32 (3);
2577 module_header.sectionIndex = lendian64 (sizeof (module_header));
2578 assemble_string ((const char *) &module_header, sizeof (module_header));
2579 uint64_t off = sizeof (module_header) + sizeof (section_index);
2580 section_index[0] = lendian64 (off);
2581 off += brig_data.total_size + data_padding;
2582 section_index[1] = lendian64 (off);
2583 off += brig_code.total_size + code_padding;
2584 section_index[2] = lendian64 (off);
2585 assemble_string ((const char *) &section_index, sizeof (section_index));
2586
2587 char padding[HSA_SECTION_ALIGNMENT];
2588 memset (padding, 0, sizeof (padding));
2589
2590 brig_data.output ();
2591 assemble_string (padding, data_padding);
2592 brig_code.output ();
2593 assemble_string (padding, code_padding);
2594 brig_operand.output ();
2595 assemble_string (padding, operand_padding);
2596
2597 if (saved_section)
2598 switch_to_section (saved_section);
2599
2600 hsa_output_libgomp_mapping (brig_decl);
2601
2602 hsa_free_decl_kernel_mapping ();
2603 brig_release_data ();
2604 hsa_deinit_compilation_unit_data ();
2605
2606 delete emitted_declarations;
2607 emitted_declarations = NULL;
2608 delete function_offsets;
2609 function_offsets = NULL;
2610}
2611