1/* Top-level LTO routines.
2 Copyright (C) 2009-2024 Free Software Foundation, Inc.
3 Contributed by CodeSourcery, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21#define INCLUDE_STRING
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "function.h"
27#include "bitmap.h"
28#include "basic-block.h"
29#include "tree.h"
30#include "gimple.h"
31#include "cfghooks.h"
32#include "alloc-pool.h"
33#include "tree-pass.h"
34#include "tree-streamer.h"
35#include "cgraph.h"
36#include "opts.h"
37#include "toplev.h"
38#include "stor-layout.h"
39#include "symbol-summary.h"
40#include "tree-vrp.h"
41#include "sreal.h"
42#include "ipa-cp.h"
43#include "ipa-prop.h"
44#include "debug.h"
45#include "lto.h"
46#include "lto-section-names.h"
47#include "splay-tree.h"
48#include "lto-partition.h"
49#include "context.h"
50#include "pass_manager.h"
51#include "ipa-fnsummary.h"
52#include "ipa-utils.h"
53#include "gomp-constants.h"
54#include "lto-symtab.h"
55#include "stringpool.h"
56#include "fold-const.h"
57#include "attribs.h"
58#include "builtins.h"
59#include "lto-common.h"
60#include "opts-jobserver.h"
61
62/* Number of parallel tasks to run. */
63static int lto_parallelism;
64
65/* Number of active WPA streaming processes. */
66static int nruns = 0;
67
68/* GNU make's jobserver info. */
69static jobserver_info *jinfo = NULL;
70
71/* Return true when NODE has a clone that is analyzed (i.e. we need
72 to load its body even if the node itself is not needed). */
73
74static bool
75has_analyzed_clone_p (struct cgraph_node *node)
76{
77 struct cgraph_node *orig = node;
78 node = node->clones;
79 if (node)
80 while (node != orig)
81 {
82 if (node->analyzed)
83 return true;
84 if (node->clones)
85 node = node->clones;
86 else if (node->next_sibling_clone)
87 node = node->next_sibling_clone;
88 else
89 {
90 while (node != orig && !node->next_sibling_clone)
91 node = node->clone_of;
92 if (node != orig)
93 node = node->next_sibling_clone;
94 }
95 }
96 return false;
97}
98
99/* Read the function body for the function associated with NODE. */
100
101static void
102lto_materialize_function (struct cgraph_node *node)
103{
104 tree decl;
105
106 decl = node->decl;
107 /* Read in functions with body (analyzed nodes)
108 and also functions that are needed to produce virtual clones. */
109 if ((node->has_gimple_body_p () && node->analyzed)
110 || node->used_as_abstract_origin
111 || has_analyzed_clone_p (node))
112 {
113 /* Clones don't need to be read. */
114 if (node->clone_of)
115 return;
116 if (DECL_FUNCTION_PERSONALITY (decl) && !first_personality_decl)
117 first_personality_decl = DECL_FUNCTION_PERSONALITY (decl);
118 /* If the file contains a function with a language specific EH
119 personality set or with EH enabled initialize the backend EH
120 machinery. */
121 if (DECL_FUNCTION_PERSONALITY (decl)
122 || opt_for_fn (decl, flag_exceptions))
123 lto_init_eh ();
124 }
125
126 /* Let the middle end know about the function. */
127 rest_of_decl_compilation (decl, 1, 0);
128}
129
130/* Materialize all the bodies for all the nodes in the callgraph. */
131
132static void
133materialize_cgraph (void)
134{
135 struct cgraph_node *node;
136 timevar_id_t lto_timer;
137
138 if (!quiet_flag)
139 fprintf (stderr,
140 flag_wpa ? "Materializing decls:" : "Reading function bodies:");
141
142 /* Start the appropriate timer depending on the mode that we are
143 operating in. */
144 lto_timer = (flag_wpa) ? TV_WHOPR_WPA
145 : (flag_ltrans) ? TV_WHOPR_LTRANS
146 : TV_LTO;
147 timevar_push (tv: lto_timer);
148
149 FOR_EACH_FUNCTION (node)
150 {
151 if (node->lto_file_data)
152 {
153 lto_materialize_function (node);
154 lto_stats.num_input_cgraph_nodes++;
155 }
156 }
157
158 current_function_decl = NULL;
159 set_cfun (NULL);
160
161 if (!quiet_flag)
162 fprintf (stderr, format: "\n");
163
164 timevar_pop (tv: lto_timer);
165}
166
167/* Actually stream out ENCODER into TEMP_FILENAME. */
168
169static void
170stream_out (char *temp_filename, lto_symtab_encoder_t encoder, int part)
171{
172 lto_file *file = lto_obj_file_open (filename: temp_filename, writable: true);
173 if (!file)
174 fatal_error (input_location, "%<lto_obj_file_open()%> failed");
175 lto_set_current_out_file (file);
176
177 gcc_assert (!dump_file);
178 streamer_dump_file = dump_begin (TDI_lto_stream_out, NULL, part);
179 ipa_write_optimization_summaries (encoder);
180
181 free (CONST_CAST (char *, file->filename));
182
183 lto_set_current_out_file (NULL);
184 lto_obj_file_close (file);
185 free (ptr: file);
186 if (streamer_dump_file)
187 {
188 dump_end (TDI_lto_stream_out, streamer_dump_file);
189 streamer_dump_file = NULL;
190 }
191}
192
193/* Wait for forked process and signal errors. */
194#ifdef HAVE_WORKING_FORK
195static void
196wait_for_child ()
197{
198 int status;
199 do
200 {
201#ifndef WCONTINUED
202#define WCONTINUED 0
203#endif
204 int w = waitpid (pid: 0, stat_loc: &status, WUNTRACED | WCONTINUED);
205 if (w == -1)
206 fatal_error (input_location, "waitpid failed");
207
208 if (WIFEXITED (status) && WEXITSTATUS (status))
209 fatal_error (input_location, "streaming subprocess failed");
210 else if (WIFSIGNALED (status))
211 fatal_error (input_location,
212 "streaming subprocess was killed by signal");
213 }
214 while (!WIFEXITED (status) && !WIFSIGNALED (status));
215
216 --nruns;
217
218 /* Return token to the jobserver if active. */
219 if (jinfo != NULL && jinfo->is_connected)
220 jinfo->return_token ();
221}
222#endif
223
224static void
225stream_out_partitions_1 (char *temp_filename, int blen, int min, int max)
226{
227 /* Write all the nodes in SET. */
228 for (int p = min; p < max; p ++)
229 {
230 sprintf (s: temp_filename + blen, format: "%u.o", p);
231 stream_out (temp_filename, encoder: ltrans_partitions[p]->encoder, part: p);
232 ltrans_partitions[p]->encoder = NULL;
233 }
234}
235
236/* Stream out ENCODER into TEMP_FILENAME
237 Fork if that seems to help. */
238
239static void
240stream_out_partitions (char *temp_filename, int blen, int min, int max,
241 bool ARG_UNUSED (last))
242{
243#ifdef HAVE_WORKING_FORK
244 if (lto_parallelism <= 1)
245 {
246 stream_out_partitions_1 (temp_filename, blen, min, max);
247 return;
248 }
249
250 if (lto_parallelism > 0 && nruns >= lto_parallelism)
251 wait_for_child ();
252
253 /* If this is not the last parallel partition, execute new
254 streaming process. */
255 if (!last)
256 {
257 if (jinfo != NULL && jinfo->is_connected)
258 while (true)
259 {
260 if (jinfo->get_token ())
261 break;
262 if (nruns > 0)
263 wait_for_child ();
264 else
265 {
266 /* There are no free tokens, lets do the job outselves. */
267 stream_out_partitions_1 (temp_filename, blen, min, max);
268 asm_nodes_output = true;
269 return;
270 }
271 }
272
273 pid_t cpid = fork ();
274
275 if (!cpid)
276 {
277 setproctitle ("lto1-wpa-streaming");
278 stream_out_partitions_1 (temp_filename, blen, min, max);
279 exit (status: 0);
280 }
281 /* Fork failed; lets do the job ourseleves. */
282 else if (cpid == -1)
283 stream_out_partitions_1 (temp_filename, blen, min, max);
284 else
285 nruns++;
286 }
287 /* Last partition; stream it and wait for all children to die. */
288 else
289 {
290 stream_out_partitions_1 (temp_filename, blen, min, max);
291 while (nruns > 0)
292 wait_for_child ();
293
294 if (jinfo != NULL && jinfo->is_connected)
295 jinfo->disconnect ();
296 }
297 asm_nodes_output = true;
298#else
299 stream_out_partitions_1 (temp_filename, blen, min, max);
300#endif
301}
302
303/* Write all output files in WPA mode and the file with the list of
304 LTRANS units. */
305
306static void
307lto_wpa_write_files (void)
308{
309 unsigned i, n_sets;
310 ltrans_partition part;
311 FILE *ltrans_output_list_stream;
312 char *temp_filename;
313 auto_vec <char *>temp_filenames;
314 auto_vec <int>temp_priority;
315 size_t blen;
316
317 /* Open the LTRANS output list. */
318 if (!ltrans_output_list)
319 fatal_error (input_location, "no LTRANS output list filename provided");
320
321 timevar_push (tv: TV_WHOPR_WPA);
322
323 FOR_EACH_VEC_ELT (ltrans_partitions, i, part)
324 lto_stats.num_output_symtab_nodes
325 += lto_symtab_encoder_size (encoder: part->encoder);
326
327 timevar_pop (tv: TV_WHOPR_WPA);
328
329 timevar_push (tv: TV_WHOPR_WPA_IO);
330
331 cgraph_node *node;
332 /* Do body modifications needed for streaming before we fork out
333 worker processes. */
334 FOR_EACH_FUNCTION (node)
335 if (!node->clone_of && gimple_has_body_p (node->decl))
336 lto_prepare_function_for_streaming (node);
337
338 ggc_trim ();
339 report_heap_memory_use ();
340
341 /* Generate a prefix for the LTRANS unit files. */
342 blen = strlen (ltrans_output_list);
343 temp_filename = (char *) xmalloc (blen + sizeof ("2147483648.o"));
344 strcpy (dest: temp_filename, ltrans_output_list);
345 if (blen > sizeof (".out")
346 && strcmp (s1: temp_filename + blen - sizeof (".out") + 1,
347 s2: ".out") == 0)
348 temp_filename[blen - sizeof (".out") + 1] = '\0';
349 blen = strlen (s: temp_filename);
350
351 n_sets = ltrans_partitions.length ();
352 unsigned sets_per_worker = n_sets;
353 if (lto_parallelism > 1)
354 {
355 if (lto_parallelism > (int)n_sets)
356 lto_parallelism = n_sets;
357 sets_per_worker = (n_sets + lto_parallelism - 1) / lto_parallelism;
358 }
359
360 for (i = 0; i < n_sets; i++)
361 {
362 ltrans_partition part = ltrans_partitions[i];
363
364 /* Write all the nodes in SET. */
365 sprintf (s: temp_filename + blen, format: "%u.o", i);
366
367 if (!quiet_flag)
368 fprintf (stderr, format: " %s (%s %i insns)", temp_filename, part->name,
369 part->insns);
370 if (symtab->dump_file)
371 {
372 lto_symtab_encoder_iterator lsei;
373
374 fprintf (stream: symtab->dump_file,
375 format: "Writing partition %s to file %s, %i insns\n",
376 part->name, temp_filename, part->insns);
377 fprintf (stream: symtab->dump_file, format: " Symbols in partition: ");
378 for (lsei = lsei_start_in_partition (encoder: part->encoder);
379 !lsei_end_p (lsei);
380 lsei_next_in_partition (lsei: &lsei))
381 {
382 symtab_node *node = lsei_node (lsei);
383 fprintf (stream: symtab->dump_file, format: "%s ", node->dump_asm_name ());
384 }
385 fprintf (stream: symtab->dump_file, format: "\n Symbols in boundary: ");
386 for (lsei = lsei_start (encoder: part->encoder); !lsei_end_p (lsei);
387 lsei_next (lsei: &lsei))
388 {
389 symtab_node *node = lsei_node (lsei);
390 if (!lto_symtab_encoder_in_partition_p (part->encoder, node))
391 {
392 fprintf (stream: symtab->dump_file, format: "%s ", node->dump_asm_name ());
393 cgraph_node *cnode = dyn_cast <cgraph_node *> (p: node);
394 if (cnode
395 && lto_symtab_encoder_encode_body_p (part->encoder,
396 cnode))
397 fprintf (stream: symtab->dump_file, format: "(body included)");
398 else
399 {
400 varpool_node *vnode = dyn_cast <varpool_node *> (p: node);
401 if (vnode
402 && lto_symtab_encoder_encode_initializer_p (part->encoder,
403 vnode))
404 fprintf (stream: symtab->dump_file, format: "(initializer included)");
405 }
406 }
407 }
408 fprintf (stream: symtab->dump_file, format: "\n");
409 }
410 gcc_checking_assert (lto_symtab_encoder_size (part->encoder) || !i);
411
412 temp_priority.safe_push (obj: part->insns);
413 temp_filenames.safe_push (obj: xstrdup (temp_filename));
414 }
415 memory_block_pool::trim (nblocks: 0);
416
417 for (int set = 0; set < MAX (lto_parallelism, 1); set++)
418 {
419 stream_out_partitions (temp_filename, blen, min: set * sets_per_worker,
420 MIN ((set + 1) * sets_per_worker, n_sets),
421 last: set == MAX (lto_parallelism, 1) - 1);
422 }
423
424 ltrans_output_list_stream = fopen (ltrans_output_list, modes: "w");
425 if (ltrans_output_list_stream == NULL)
426 fatal_error (input_location,
427 "opening LTRANS output list %s: %m", ltrans_output_list);
428 for (i = 0; i < n_sets; i++)
429 {
430 unsigned int len = strlen (s: temp_filenames[i]);
431 if (fprintf (stream: ltrans_output_list_stream, format: "%i\n", temp_priority[i]) < 0
432 || fwrite (ptr: temp_filenames[i], size: 1, n: len, s: ltrans_output_list_stream) < len
433 || fwrite (ptr: "\n", size: 1, n: 1, s: ltrans_output_list_stream) < 1)
434 fatal_error (input_location, "writing to LTRANS output list %s: %m",
435 ltrans_output_list);
436 free (ptr: temp_filenames[i]);
437 }
438
439 lto_stats.num_output_files += n_sets;
440
441 /* Close the LTRANS output list. */
442 if (fclose (stream: ltrans_output_list_stream))
443 fatal_error (input_location,
444 "closing LTRANS output list %s: %m", ltrans_output_list);
445
446 free_ltrans_partitions ();
447 free (ptr: temp_filename);
448
449 timevar_pop (tv: TV_WHOPR_WPA_IO);
450}
451
452/* Create artificial pointers for "omp declare target link" vars. */
453
454static void
455offload_handle_link_vars (void)
456{
457#ifdef ACCEL_COMPILER
458 varpool_node *var;
459 FOR_EACH_VARIABLE (var)
460 if (lookup_attribute ("omp declare target link",
461 DECL_ATTRIBUTES (var->decl)))
462 {
463 tree type = build_pointer_type (TREE_TYPE (var->decl));
464 tree link_ptr_var = build_decl (UNKNOWN_LOCATION, VAR_DECL,
465 clone_function_name (var->decl,
466 "linkptr"), type);
467 TREE_USED (link_ptr_var) = 1;
468 TREE_STATIC (link_ptr_var) = 1;
469 TREE_PUBLIC (link_ptr_var) = TREE_PUBLIC (var->decl);
470 DECL_ARTIFICIAL (link_ptr_var) = 1;
471 SET_DECL_ASSEMBLER_NAME (link_ptr_var, DECL_NAME (link_ptr_var));
472 SET_DECL_VALUE_EXPR (var->decl, build_simple_mem_ref (link_ptr_var));
473 DECL_HAS_VALUE_EXPR_P (var->decl) = 1;
474 }
475#endif
476}
477
478/* Perform whole program analysis (WPA) on the callgraph and write out the
479 optimization plan. */
480
481static void
482do_whole_program_analysis (void)
483{
484 symtab_node *node;
485
486 lto_parallelism = 1;
487
488 if (!strcmp (flag_wpa, s2: "jobserver"))
489 {
490 jinfo = new jobserver_info ();
491 if (jinfo->is_active)
492 jinfo->connect ();
493
494 lto_parallelism = param_max_lto_streaming_parallelism;
495 }
496 else
497 {
498 lto_parallelism = atoi (flag_wpa);
499 if (lto_parallelism <= 0)
500 lto_parallelism = 0;
501 if (lto_parallelism >= param_max_lto_streaming_parallelism)
502 lto_parallelism = param_max_lto_streaming_parallelism;
503 }
504
505 timevar_start (TV_PHASE_OPT_GEN);
506
507 /* Note that since we are in WPA mode, materialize_cgraph will not
508 actually read in all the function bodies. It only materializes
509 the decls and cgraph nodes so that analysis can be performed. */
510 materialize_cgraph ();
511
512 /* Reading in the cgraph uses different timers, start timing WPA now. */
513 timevar_push (tv: TV_WHOPR_WPA);
514
515 if (pre_ipa_mem_report)
516 dump_memory_report ("Memory consumption before IPA");
517
518 symtab->function_flags_ready = true;
519
520 if (symtab->dump_file)
521 symtab->dump (f: symtab->dump_file);
522 bitmap_obstack_initialize (NULL);
523 symtab->state = IPA_SSA;
524
525 execute_ipa_pass_list (g->get_passes ()->all_regular_ipa_passes);
526
527 /* When WPA analysis raises errors, do not bother to output anything. */
528 if (seen_error ())
529 return;
530
531 /* We are about to launch the final LTRANS phase, stop the WPA timer. */
532 timevar_pop (tv: TV_WHOPR_WPA);
533
534 /* We are no longer going to stream in anything. Free some memory. */
535 lto_free_file_name_hash ();
536
537
538 timevar_push (tv: TV_WHOPR_PARTITIONING);
539
540 gcc_assert (!dump_file);
541 dump_file = dump_begin (partition_dump_id, NULL);
542
543 if (dump_file)
544 symtab->dump (f: dump_file);
545
546 symtab_node::checking_verify_symtab_nodes ();
547 bitmap_obstack_release (NULL);
548 if (flag_lto_partition == LTO_PARTITION_1TO1)
549 lto_1_to_1_map ();
550 else if (flag_lto_partition == LTO_PARTITION_MAX)
551 lto_max_map ();
552 else if (flag_lto_partition == LTO_PARTITION_ONE)
553 lto_balanced_map (1, INT_MAX);
554 else if (flag_lto_partition == LTO_PARTITION_BALANCED)
555 lto_balanced_map (param_lto_partitions,
556 param_max_partition_size);
557 else
558 gcc_unreachable ();
559
560 /* Size summaries are needed for balanced partitioning. Free them now so
561 the memory can be used for streamer caches. */
562 ipa_free_size_summary ();
563
564 /* AUX pointers are used by partitioning code to bookkeep number of
565 partitions symbol is in. This is no longer needed. */
566 FOR_EACH_SYMBOL (node)
567 node->aux = NULL;
568
569 lto_stats.num_cgraph_partitions += ltrans_partitions.length ();
570
571 /* Find out statics that need to be promoted
572 to globals with hidden visibility because they are accessed from multiple
573 partitions. */
574 lto_promote_cross_file_statics ();
575 offload_handle_link_vars ();
576 if (dump_file)
577 dump_end (partition_dump_id, dump_file);
578 dump_file = NULL;
579 timevar_pop (tv: TV_WHOPR_PARTITIONING);
580
581 timevar_stop (TV_PHASE_OPT_GEN);
582
583 /* Collect a last time - in lto_wpa_write_files we may end up forking
584 with the idea that this doesn't increase memory usage. So we
585 absoultely do not want to collect after that. */
586 ggc_collect ();
587
588 timevar_start (TV_PHASE_STREAM_OUT);
589 if (!quiet_flag)
590 {
591 fprintf (stderr, format: "\nStreaming out");
592 fflush (stderr);
593 }
594 lto_wpa_write_files ();
595 if (!quiet_flag)
596 fprintf (stderr, format: "\n");
597 timevar_stop (TV_PHASE_STREAM_OUT);
598
599 if (post_ipa_mem_report)
600 dump_memory_report ("Memory consumption after IPA");
601
602 /* Show the LTO report before launching LTRANS. */
603 if (flag_lto_report || (flag_wpa && flag_lto_report_wpa))
604 print_lto_report_1 ();
605 if (mem_report_wpa)
606 dump_memory_report ("Final");
607}
608
609unsigned int
610lto_option_lang_mask (void)
611{
612 return CL_LTO;
613}
614
615/* Main entry point for the GIMPLE front end. This front end has
616 three main personalities:
617
618 - LTO (-flto). All the object files on the command line are
619 loaded in memory and processed as a single translation unit.
620 This is the traditional link-time optimization behavior.
621
622 - WPA (-fwpa). Only the callgraph and summary information for
623 files in the command file are loaded. A single callgraph
624 (without function bodies) is instantiated for the whole set of
625 files. IPA passes are only allowed to analyze the call graph
626 and make transformation decisions. The callgraph is
627 partitioned, each partition is written to a new object file
628 together with the transformation decisions.
629
630 - LTRANS (-fltrans). Similar to -flto but it prevents the IPA
631 summary files from running again. Since WPA computed summary
632 information and decided what transformations to apply, LTRANS
633 simply applies them. */
634
635void
636lto_main (void)
637{
638 /* LTO is called as a front end, even though it is not a front end.
639 Because it is called as a front end, TV_PHASE_PARSING and
640 TV_PARSE_GLOBAL are active, and we need to turn them off while
641 doing LTO. Later we turn them back on so they are active up in
642 toplev.cc. */
643 timevar_pop (tv: TV_PARSE_GLOBAL);
644 timevar_stop (TV_PHASE_PARSING);
645
646 timevar_start (TV_PHASE_SETUP);
647
648 /* Initialize the LTO front end. */
649 lto_fe_init ();
650
651 timevar_stop (TV_PHASE_SETUP);
652 timevar_start (TV_PHASE_STREAM_IN);
653
654 /* Read all the symbols and call graph from all the files in the
655 command line. */
656 read_cgraph_and_symbols (num_in_fnames, in_fnames);
657
658 timevar_stop (TV_PHASE_STREAM_IN);
659
660 if (!seen_error ())
661 {
662 offload_handle_link_vars ();
663
664 /* If WPA is enabled analyze the whole call graph and create an
665 optimization plan. Otherwise, read in all the function
666 bodies and continue with optimization. */
667 if (flag_wpa)
668 do_whole_program_analysis ();
669 else
670 {
671 timevar_start (TV_PHASE_OPT_GEN);
672
673 materialize_cgraph ();
674 if (!flag_ltrans)
675 {
676 lto_promote_statics_nonwpa ();
677 offload_handle_link_vars ();
678 }
679
680 /* Annotate the CU DIE and mark the early debug phase as finished. */
681 debuginfo_early_start ();
682 debug_hooks->early_finish ("<artificial>");
683 debuginfo_early_stop ();
684
685 /* Let the middle end know that we have read and merged all of
686 the input files. */
687 symtab->compile ();
688
689 timevar_stop (TV_PHASE_OPT_GEN);
690
691 /* FIXME lto, if the processes spawned by WPA fail, we miss
692 the chance to print WPA's report, so WPA will call
693 print_lto_report before launching LTRANS. If LTRANS was
694 launched directly by the driver we would not need to do
695 this. */
696 if (flag_lto_report || (flag_wpa && flag_lto_report_wpa))
697 print_lto_report_1 ();
698 }
699 }
700
701 /* Here we make LTO pretend to be a parser. */
702 timevar_start (TV_PHASE_PARSING);
703 timevar_push (tv: TV_PARSE_GLOBAL);
704}
705

source code of gcc/lto/lto.cc