1/* LTO partitioning logic routines.
2 Copyright (C) 2009-2024 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "target.h"
24#include "function.h"
25#include "basic-block.h"
26#include "tree.h"
27#include "gimple.h"
28#include "alloc-pool.h"
29#include "stringpool.h"
30#include "cgraph.h"
31#include "lto-streamer.h"
32#include "symbol-summary.h"
33#include "tree-vrp.h"
34#include "sreal.h"
35#include "ipa-cp.h"
36#include "ipa-prop.h"
37#include "ipa-fnsummary.h"
38#include "lto-partition.h"
39
40vec<ltrans_partition> ltrans_partitions;
41
42static void add_symbol_to_partition (ltrans_partition part, symtab_node *node);
43
44
45/* Helper for qsort; compare partitions and return one with smaller order. */
46
47static int
48cmp_partitions_order (const void *a, const void *b)
49{
50 const struct ltrans_partition_def *pa
51 = *(struct ltrans_partition_def *const *)a;
52 const struct ltrans_partition_def *pb
53 = *(struct ltrans_partition_def *const *)b;
54 int ordera = -1, orderb = -1;
55
56 if (lto_symtab_encoder_size (encoder: pa->encoder))
57 ordera = lto_symtab_encoder_deref (encoder: pa->encoder, ref: 0)->order;
58 if (lto_symtab_encoder_size (encoder: pb->encoder))
59 orderb = lto_symtab_encoder_deref (encoder: pb->encoder, ref: 0)->order;
60 return orderb - ordera;
61}
62
63/* Create new partition with name NAME. */
64
65static ltrans_partition
66new_partition (const char *name)
67{
68 ltrans_partition part = XCNEW (struct ltrans_partition_def);
69 part->encoder = lto_symtab_encoder_new (false);
70 part->name = name;
71 part->insns = 0;
72 part->symbols = 0;
73 ltrans_partitions.safe_push (obj: part);
74 return part;
75}
76
77/* Free memory used by ltrans datastructures. */
78
79void
80free_ltrans_partitions (void)
81{
82 unsigned int idx;
83 ltrans_partition part;
84 for (idx = 0; ltrans_partitions.iterate (ix: idx, ptr: &part); idx++)
85 {
86 if (part->initializers_visited)
87 delete part->initializers_visited;
88 /* Symtab encoder is freed after streaming. */
89 free (ptr: part);
90 }
91 ltrans_partitions.release ();
92}
93
94/* Return true if symbol is already in some partition. */
95
96static inline bool
97symbol_partitioned_p (symtab_node *node)
98{
99 return node->aux;
100}
101
102/* Add references into the partition. */
103static void
104add_references_to_partition (ltrans_partition part, symtab_node *node)
105{
106 int i;
107 struct ipa_ref *ref = NULL;
108
109 /* Add all duplicated references to the partition. */
110 for (i = 0; node->iterate_reference (i, ref); i++)
111 if (ref->referred->get_partitioning_class () == SYMBOL_DUPLICATE)
112 add_symbol_to_partition (part, node: ref->referred);
113 /* References to a readonly variable may be constant foled into its value.
114 Recursively look into the initializers of the constant variable and add
115 references, too. */
116 else if (is_a <varpool_node *> (p: ref->referred)
117 && (dyn_cast <varpool_node *> (p: ref->referred)
118 ->ctor_useable_for_folding_p ())
119 && !lto_symtab_encoder_in_partition_p (part->encoder, ref->referred))
120 {
121 if (!part->initializers_visited)
122 part->initializers_visited = new hash_set<symtab_node *>;
123 if (!part->initializers_visited->add (k: ref->referred))
124 add_references_to_partition (part, node: ref->referred);
125 }
126}
127
128/* Helper function for add_symbol_to_partition doing the actual dirty work
129 of adding NODE to PART. */
130
131static bool
132add_symbol_to_partition_1 (ltrans_partition part, symtab_node *node)
133{
134 enum symbol_partitioning_class c = node->get_partitioning_class ();
135 struct ipa_ref *ref;
136 symtab_node *node1;
137
138 /* If NODE is already there, we have nothing to do. */
139 if (lto_symtab_encoder_in_partition_p (part->encoder, node))
140 return true;
141
142 /* non-duplicated aliases or tunks of a duplicated symbol needs to be output
143 just once.
144
145 Be lax about comdats; they may or may not be duplicated and we may
146 end up in need to duplicate keyed comdat because it has unkeyed alias. */
147 if (c == SYMBOL_PARTITION && !DECL_COMDAT (node->decl)
148 && symbol_partitioned_p (node))
149 return false;
150
151 /* Be sure that we never try to duplicate partitioned symbol
152 or add external symbol. */
153 gcc_assert (c != SYMBOL_EXTERNAL
154 && (c == SYMBOL_DUPLICATE || !symbol_partitioned_p (node)));
155
156 part->symbols++;
157
158 lto_set_symtab_encoder_in_partition (part->encoder, node);
159
160 if (symbol_partitioned_p (node))
161 {
162 node->in_other_partition = 1;
163 if (dump_file)
164 fprintf (stream: dump_file,
165 format: "Symbol node %s now used in multiple partitions\n",
166 node->dump_name ());
167 }
168 node->aux = (void *)((size_t)node->aux + 1);
169
170 if (cgraph_node *cnode = dyn_cast <cgraph_node *> (p: node))
171 {
172 struct cgraph_edge *e;
173 if (!node->alias && c == SYMBOL_PARTITION)
174 part->insns += ipa_size_summaries->get (node: cnode)->size;
175
176 /* Add all inline clones and callees that are duplicated. */
177 for (e = cnode->callees; e; e = e->next_callee)
178 if (!e->inline_failed)
179 add_symbol_to_partition_1 (part, node: e->callee);
180 else if (e->callee->get_partitioning_class () == SYMBOL_DUPLICATE)
181 add_symbol_to_partition (part, node: e->callee);
182
183 /* Add all thunks associated with the function. */
184 for (e = cnode->callers; e; e = e->next_caller)
185 if (e->caller->thunk && !e->caller->inlined_to)
186 add_symbol_to_partition_1 (part, node: e->caller);
187 }
188
189 add_references_to_partition (part, node);
190
191 /* Add all aliases associated with the symbol. */
192
193 FOR_EACH_ALIAS (node, ref)
194 if (!ref->referring->transparent_alias)
195 add_symbol_to_partition_1 (part, node: ref->referring);
196 else
197 {
198 struct ipa_ref *ref2;
199 /* We do not need to add transparent aliases if they are not used.
200 However we must add aliases of transparent aliases if they exist. */
201 FOR_EACH_ALIAS (ref->referring, ref2)
202 {
203 /* Nested transparent aliases are not permitted. */
204 gcc_checking_assert (!ref2->referring->transparent_alias);
205 add_symbol_to_partition_1 (part, node: ref2->referring);
206 }
207 }
208
209 /* Ensure that SAME_COMDAT_GROUP lists all allways added in a group. */
210 if (node->same_comdat_group)
211 for (node1 = node->same_comdat_group;
212 node1 != node; node1 = node1->same_comdat_group)
213 if (!node->alias)
214 {
215 bool added = add_symbol_to_partition_1 (part, node: node1);
216 gcc_assert (added);
217 }
218 return true;
219}
220
221/* If symbol NODE is really part of other symbol's definition (i.e. it is
222 internal label, thunk, alias or so), return the outer symbol.
223 When add_symbol_to_partition_1 is called on the outer symbol it must
224 eventually add NODE, too. */
225static symtab_node *
226contained_in_symbol (symtab_node *node)
227{
228 /* There is no need to consider transparent aliases to be part of the
229 definition: they are only useful insite the partition they are output
230 and thus we will always see an explicit reference to it. */
231 if (node->transparent_alias)
232 return node;
233 if (cgraph_node *cnode = dyn_cast <cgraph_node *> (p: node))
234 {
235 cnode = cnode->function_symbol ();
236 if (cnode->inlined_to)
237 cnode = cnode->inlined_to;
238 return cnode;
239 }
240 else if (varpool_node *vnode = dyn_cast <varpool_node *> (p: node))
241 return vnode->ultimate_alias_target ();
242 return node;
243}
244
245/* Add symbol NODE to partition. When definition of NODE is part
246 of other symbol definition, add the other symbol, too. */
247
248static void
249add_symbol_to_partition (ltrans_partition part, symtab_node *node)
250{
251 symtab_node *node1;
252
253 /* Verify that we do not try to duplicate something that cannot be. */
254 gcc_checking_assert (node->get_partitioning_class () == SYMBOL_DUPLICATE
255 || !symbol_partitioned_p (node));
256
257 while ((node1 = contained_in_symbol (node)) != node)
258 node = node1;
259
260 /* If we have duplicated symbol contained in something we cannot duplicate,
261 we are very badly screwed. The other way is possible, so we do not
262 assert this in add_symbol_to_partition_1.
263
264 Be lax about comdats; they may or may not be duplicated and we may
265 end up in need to duplicate keyed comdat because it has unkeyed alias. */
266
267 gcc_assert (node->get_partitioning_class () == SYMBOL_DUPLICATE
268 || DECL_COMDAT (node->decl)
269 || !symbol_partitioned_p (node));
270
271 add_symbol_to_partition_1 (part, node);
272}
273
274/* Undo all additions until number of cgraph nodes in PARITION is N_CGRAPH_NODES
275 and number of varpool nodes is N_VARPOOL_NODES. */
276
277static void
278undo_partition (ltrans_partition partition, unsigned int n_nodes)
279{
280 while (lto_symtab_encoder_size (encoder: partition->encoder) > (int)n_nodes)
281 {
282 symtab_node *node = lto_symtab_encoder_deref (encoder: partition->encoder,
283 ref: n_nodes);
284 partition->symbols--;
285 cgraph_node *cnode;
286
287 /* After UNDO we no longer know what was visited. */
288 if (partition->initializers_visited)
289 delete partition->initializers_visited;
290 partition->initializers_visited = NULL;
291
292 if (!node->alias && (cnode = dyn_cast <cgraph_node *> (p: node))
293 && node->get_partitioning_class () == SYMBOL_PARTITION)
294 partition->insns -= ipa_size_summaries->get (node: cnode)->size;
295 lto_symtab_encoder_delete_node (partition->encoder, node);
296 node->aux = (void *)((size_t)node->aux - 1);
297 }
298}
299
300/* Group cgrah nodes by input files. This is used mainly for testing
301 right now. */
302
303void
304lto_1_to_1_map (void)
305{
306 symtab_node *node;
307 struct lto_file_decl_data *file_data;
308 hash_map<lto_file_decl_data *, ltrans_partition> pmap;
309 ltrans_partition partition;
310 int npartitions = 0;
311
312 FOR_EACH_SYMBOL (node)
313 {
314 if (node->get_partitioning_class () != SYMBOL_PARTITION
315 || symbol_partitioned_p (node))
316 continue;
317
318 file_data = node->lto_file_data;
319
320 if (file_data)
321 {
322 ltrans_partition *slot = &pmap.get_or_insert (k: file_data);
323 if (*slot)
324 partition = *slot;
325 else
326 {
327 partition = new_partition (name: file_data->file_name);
328 *slot = partition;
329 npartitions++;
330 }
331 }
332 else if (!file_data && ltrans_partitions.length ())
333 partition = ltrans_partitions[0];
334 else
335 {
336 partition = new_partition (name: "");
337 npartitions++;
338 }
339
340 add_symbol_to_partition (part: partition, node);
341 }
342
343 /* If the cgraph is empty, create one cgraph node set so that there is still
344 an output file for any variables that need to be exported in a DSO. */
345 if (!npartitions)
346 new_partition (name: "empty");
347
348 /* Order partitions by order of symbols because they are linked into binary
349 that way. */
350 ltrans_partitions.qsort (cmp_partitions_order);
351}
352
353/* Maximal partitioning. Put every new symbol into new partition if possible. */
354
355void
356lto_max_map (void)
357{
358 symtab_node *node;
359 ltrans_partition partition;
360 int npartitions = 0;
361
362 FOR_EACH_SYMBOL (node)
363 {
364 if (node->get_partitioning_class () != SYMBOL_PARTITION
365 || symbol_partitioned_p (node))
366 continue;
367 partition = new_partition (name: node->asm_name ());
368 add_symbol_to_partition (part: partition, node);
369 npartitions++;
370 }
371 if (!npartitions)
372 new_partition (name: "empty");
373}
374
375/* Helper function for qsort; sort nodes by order. */
376static int
377node_cmp (const void *pa, const void *pb)
378{
379 const symtab_node *a = *static_cast<const symtab_node * const *> (pa);
380 const symtab_node *b = *static_cast<const symtab_node * const *> (pb);
381 return b->order - a->order;
382}
383
384/* Add all symtab nodes from NEXT_NODE to PARTITION in order. */
385
386static void
387add_sorted_nodes (vec<symtab_node *> &next_nodes, ltrans_partition partition)
388{
389 unsigned i;
390 symtab_node *node;
391
392 next_nodes.qsort (node_cmp);
393 FOR_EACH_VEC_ELT (next_nodes, i, node)
394 if (!symbol_partitioned_p (node))
395 add_symbol_to_partition (part: partition, node);
396}
397
398/* Return true if we should account reference from N1 to N2 in cost
399 of partition boundary. */
400
401bool
402account_reference_p (symtab_node *n1, symtab_node *n2)
403{
404 if (cgraph_node *cnode = dyn_cast <cgraph_node *> (p: n1))
405 n1 = cnode;
406 /* Do not account references from aliases - they are never split across
407 partitions. */
408 if (n1->alias)
409 return false;
410 /* Do not account recursion - the code below will handle it incorrectly
411 otherwise. Do not account references to external symbols: they will
412 never become local. Finally do not account references to duplicated
413 symbols: they will be always local. */
414 if (n1 == n2
415 || !n2->definition
416 || n2->get_partitioning_class () != SYMBOL_PARTITION)
417 return false;
418 /* If referring node is external symbol do not account it to boundary
419 cost. Those are added into units only to enable possible constant
420 folding and devirtulization.
421
422 Here we do not know if it will ever be added to some partition
423 (this is decided by compute_ltrans_boundary) and second it is not
424 that likely that constant folding will actually use the reference. */
425 if (contained_in_symbol (node: n1)
426 ->get_partitioning_class () == SYMBOL_EXTERNAL)
427 return false;
428 return true;
429}
430
431
432/* Group cgraph nodes into equally-sized partitions.
433
434 The partitioning algorithm is simple: nodes are taken in predefined order.
435 The order corresponds to the order we want functions to have in the final
436 output. In the future this will be given by function reordering pass, but
437 at the moment we use the topological order, which is a good approximation.
438
439 The goal is to partition this linear order into intervals (partitions) so
440 that all the partitions have approximately the same size and the number of
441 callgraph or IPA reference edges crossing boundaries is minimal.
442
443 This is a lot faster (O(n) in size of callgraph) than algorithms doing
444 priority-based graph clustering that are generally O(n^2) and, since
445 WHOPR is designed to make things go well across partitions, it leads
446 to good results.
447
448 We compute the expected size of a partition as:
449
450 max (total_size / lto_partitions, min_partition_size)
451
452 We use dynamic expected size of partition so small programs are partitioned
453 into enough partitions to allow use of multiple CPUs, while large programs
454 are not partitioned too much. Creating too many partitions significantly
455 increases the streaming overhead.
456
457 In the future, we would like to bound the maximal size of partitions so as
458 to prevent the LTRANS stage from consuming too much memory. At the moment,
459 however, the WPA stage is the most memory intensive for large benchmarks,
460 since too many types and declarations are read into memory.
461
462 The function implements a simple greedy algorithm. Nodes are being added
463 to the current partition until after 3/4 of the expected partition size is
464 reached. Past this threshold, we keep track of boundary size (number of
465 edges going to other partitions) and continue adding functions until after
466 the current partition has grown to twice the expected partition size. Then
467 the process is undone to the point where the minimal ratio of boundary size
468 and in-partition calls was reached. */
469
470void
471lto_balanced_map (int n_lto_partitions, int max_partition_size)
472{
473 int n_varpool_nodes = 0, varpool_pos = 0, best_varpool_pos = 0;
474 int best_noreorder_pos = 0;
475 auto_vec <cgraph_node *> order (symtab->cgraph_count);
476 auto_vec<cgraph_node *> noreorder;
477 auto_vec<varpool_node *> varpool_order;
478 struct cgraph_node *node;
479 int64_t original_total_size, total_size = 0;
480 int64_t partition_size;
481 ltrans_partition partition;
482 int last_visited_node = 0;
483 varpool_node *vnode;
484 int64_t cost = 0, internal = 0;
485 unsigned int best_n_nodes = 0, best_i = 0;
486 int64_t best_cost = -1, best_internal = 0, best_size = 0;
487 int npartitions;
488 int current_order = -1;
489 int noreorder_pos = 0;
490
491 FOR_EACH_VARIABLE (vnode)
492 gcc_assert (!vnode->aux);
493
494 FOR_EACH_DEFINED_FUNCTION (node)
495 if (node->get_partitioning_class () == SYMBOL_PARTITION)
496 {
497 if (node->no_reorder)
498 noreorder.safe_push (obj: node);
499 else
500 order.safe_push (obj: node);
501 if (!node->alias)
502 total_size += ipa_size_summaries->get (node)->size;
503 }
504
505 original_total_size = total_size;
506
507 /* Streaming works best when the source units do not cross partition
508 boundaries much. This is because importing function from a source
509 unit tends to import a lot of global trees defined there. We should
510 get better about minimizing the function bounday, but until that
511 things works smoother if we order in source order. */
512 order.qsort (tp_first_run_node_cmp);
513 noreorder.qsort (node_cmp);
514
515 if (dump_file)
516 {
517 for (unsigned i = 0; i < order.length (); i++)
518 fprintf (stream: dump_file, format: "Balanced map symbol order:%s:%u\n",
519 order[i]->dump_name (), order[i]->tp_first_run);
520 for (unsigned i = 0; i < noreorder.length (); i++)
521 fprintf (stream: dump_file, format: "Balanced map symbol no_reorder:%s:%u\n",
522 noreorder[i]->dump_name (), noreorder[i]->tp_first_run);
523 }
524
525 /* Collect all variables that should not be reordered. */
526 FOR_EACH_VARIABLE (vnode)
527 if (vnode->get_partitioning_class () == SYMBOL_PARTITION
528 && vnode->no_reorder)
529 varpool_order.safe_push (obj: vnode);
530 n_varpool_nodes = varpool_order.length ();
531 varpool_order.qsort (node_cmp);
532
533 /* Compute partition size and create the first partition. */
534 if (param_min_partition_size > max_partition_size)
535 fatal_error (input_location, "min partition size cannot be greater "
536 "than max partition size");
537
538 partition_size = total_size / n_lto_partitions;
539 if (partition_size < param_min_partition_size)
540 partition_size = param_min_partition_size;
541 npartitions = 1;
542 partition = new_partition (name: "");
543 if (dump_file)
544 fprintf (stream: dump_file, format: "Total unit size: %" PRId64 ", partition size: %" PRId64 "\n",
545 total_size, partition_size);
546
547 auto_vec<symtab_node *> next_nodes;
548
549 for (unsigned i = 0; i < order.length (); i++)
550 {
551 if (symbol_partitioned_p (node: order[i]))
552 continue;
553
554 current_order = order[i]->order;
555
556 /* Output noreorder and varpool in program order first. */
557 next_nodes.truncate (size: 0);
558 while (varpool_pos < n_varpool_nodes
559 && varpool_order[varpool_pos]->order < current_order)
560 next_nodes.safe_push (obj: varpool_order[varpool_pos++]);
561 while (noreorder_pos < (int)noreorder.length ()
562 && noreorder[noreorder_pos]->order < current_order)
563 next_nodes.safe_push (obj: noreorder[noreorder_pos++]);
564 add_sorted_nodes (next_nodes, partition);
565
566 if (!symbol_partitioned_p (node: order[i]))
567 add_symbol_to_partition (part: partition, node: order[i]);
568
569
570 /* Once we added a new node to the partition, we also want to add
571 all referenced variables unless they was already added into some
572 earlier partition.
573 add_symbol_to_partition adds possibly multiple nodes and
574 variables that are needed to satisfy needs of ORDER[i].
575 We remember last visited cgraph and varpool node from last iteration
576 of outer loop that allows us to process every new addition.
577
578 At the same time we compute size of the boundary into COST. Every
579 callgraph or IPA reference edge leaving the partition contributes into
580 COST. Every edge inside partition was earlier computed as one leaving
581 it and thus we need to subtract it from COST. */
582 while (last_visited_node < lto_symtab_encoder_size (encoder: partition->encoder))
583 {
584 int j;
585 struct ipa_ref *ref = NULL;
586 symtab_node *snode = lto_symtab_encoder_deref (encoder: partition->encoder,
587 ref: last_visited_node);
588
589 if (cgraph_node *node = dyn_cast <cgraph_node *> (p: snode))
590 {
591 struct cgraph_edge *edge;
592
593
594 last_visited_node++;
595
596 gcc_assert (node->definition || node->weakref
597 || node->declare_variant_alt);
598
599 /* Compute boundary cost of callgraph edges. */
600 for (edge = node->callees; edge; edge = edge->next_callee)
601 /* Inline edges will always end up local. */
602 if (edge->inline_failed
603 && account_reference_p (n1: node, n2: edge->callee))
604 {
605 int edge_cost = edge->frequency ();
606 int index;
607
608 if (!edge_cost)
609 edge_cost = 1;
610 gcc_assert (edge_cost > 0);
611 index = lto_symtab_encoder_lookup (encoder: partition->encoder,
612 node: edge->callee);
613 if (index != LCC_NOT_FOUND
614 && index < last_visited_node - 1)
615 cost -= edge_cost, internal += edge_cost;
616 else
617 cost += edge_cost;
618 }
619 for (edge = node->callers; edge; edge = edge->next_caller)
620 if (edge->inline_failed
621 && account_reference_p (n1: edge->caller, n2: node))
622 {
623 int edge_cost = edge->frequency ();
624 int index;
625
626 gcc_assert (edge->caller->definition);
627 if (!edge_cost)
628 edge_cost = 1;
629 gcc_assert (edge_cost > 0);
630 index = lto_symtab_encoder_lookup (encoder: partition->encoder,
631 node: edge->caller);
632 if (index != LCC_NOT_FOUND
633 && index < last_visited_node - 1)
634 cost -= edge_cost, internal += edge_cost;
635 else
636 cost += edge_cost;
637 }
638 }
639 else
640 last_visited_node++;
641
642 /* Compute boundary cost of IPA REF edges and at the same time look into
643 variables referenced from current partition and try to add them. */
644 for (j = 0; snode->iterate_reference (i: j, ref); j++)
645 if (!account_reference_p (n1: snode, n2: ref->referred))
646 ;
647 else if (is_a <varpool_node *> (p: ref->referred))
648 {
649 int index;
650
651 vnode = dyn_cast <varpool_node *> (p: ref->referred);
652 if (!symbol_partitioned_p (node: vnode)
653 && !vnode->no_reorder
654 && vnode->get_partitioning_class () == SYMBOL_PARTITION)
655 add_symbol_to_partition (part: partition, node: vnode);
656 index = lto_symtab_encoder_lookup (encoder: partition->encoder,
657 node: vnode);
658 if (index != LCC_NOT_FOUND
659 && index < last_visited_node - 1)
660 cost--, internal++;
661 else
662 cost++;
663 }
664 else
665 {
666 int index;
667
668 node = dyn_cast <cgraph_node *> (p: ref->referred);
669 index = lto_symtab_encoder_lookup (encoder: partition->encoder,
670 node);
671 if (index != LCC_NOT_FOUND
672 && index < last_visited_node - 1)
673 cost--, internal++;
674 else
675 cost++;
676 }
677 for (j = 0; snode->iterate_referring (i: j, ref); j++)
678 if (!account_reference_p (n1: ref->referring, n2: snode))
679 ;
680 else if (is_a <varpool_node *> (p: ref->referring))
681 {
682 int index;
683
684 vnode = dyn_cast <varpool_node *> (p: ref->referring);
685 gcc_assert (vnode->definition);
686 /* It is better to couple variables with their users,
687 because it allows them to be removed. Coupling
688 with objects they refer to only helps to reduce
689 number of symbols promoted to hidden. */
690 if (!symbol_partitioned_p (node: vnode)
691 && !vnode->no_reorder
692 && !vnode->can_remove_if_no_refs_p ()
693 && vnode->get_partitioning_class () == SYMBOL_PARTITION)
694 add_symbol_to_partition (part: partition, node: vnode);
695 index = lto_symtab_encoder_lookup (encoder: partition->encoder,
696 node: vnode);
697 if (index != LCC_NOT_FOUND
698 && index < last_visited_node - 1)
699 cost--, internal++;
700 else
701 cost++;
702 }
703 else
704 {
705 int index;
706
707 node = dyn_cast <cgraph_node *> (p: ref->referring);
708 gcc_assert (node->definition || node->declare_variant_alt);
709 index = lto_symtab_encoder_lookup (encoder: partition->encoder,
710 node);
711 if (index != LCC_NOT_FOUND
712 && index < last_visited_node - 1)
713 cost--, internal++;
714 else
715 cost++;
716 }
717 }
718
719 gcc_assert (cost >= 0 && internal >= 0);
720
721 /* If the partition is large enough, start looking for smallest boundary cost.
722 If partition still seems too small (less than 7/8 of target weight) accept
723 any cost. If partition has right size, optimize for highest internal/cost.
724 Later we stop building partition if its size is 9/8 of the target wight. */
725 if (partition->insns < partition_size * 7 / 8
726 || best_cost == -1
727 || (!cost
728 || ((sreal)best_internal * (sreal) cost
729 < ((sreal) internal * (sreal)best_cost))))
730 {
731 best_cost = cost;
732 best_internal = internal;
733 best_size = partition->insns;
734 best_i = i;
735 best_n_nodes = lto_symtab_encoder_size (encoder: partition->encoder);
736 best_varpool_pos = varpool_pos;
737 best_noreorder_pos = noreorder_pos;
738 }
739 if (dump_file)
740 fprintf (stream: dump_file, format: "Step %i: added %s, size %i, "
741 "cost %" PRId64 "/%" PRId64 " "
742 "best %" PRId64 "/%" PRId64", step %i\n", i,
743 order[i]->dump_name (),
744 partition->insns, cost, internal,
745 best_cost, best_internal, best_i);
746 /* Partition is too large, unwind into step when best cost was reached and
747 start new partition. */
748 if (partition->insns > 9 * partition_size / 8
749 || partition->insns > max_partition_size)
750 {
751 if (best_i != i)
752 {
753 if (dump_file)
754 fprintf (stream: dump_file, format: "Unwinding %i insertions to step %i\n",
755 i - best_i, best_i);
756 undo_partition (partition, n_nodes: best_n_nodes);
757 varpool_pos = best_varpool_pos;
758 noreorder_pos = best_noreorder_pos;
759 }
760 gcc_assert (best_size == partition->insns);
761 i = best_i;
762 if (dump_file)
763 fprintf (stream: dump_file,
764 format: "Partition insns: %i (want %" PRId64 ")\n",
765 partition->insns, partition_size);
766 /* When we are finished, avoid creating empty partition. */
767 while (i < order.length () - 1 && symbol_partitioned_p (node: order[i + 1]))
768 i++;
769 if (i == order.length () - 1)
770 break;
771 total_size -= partition->insns;
772 partition = new_partition (name: "");
773 last_visited_node = 0;
774 cost = 0;
775
776 if (dump_file)
777 fprintf (stream: dump_file, format: "New partition\n");
778 best_n_nodes = 0;
779 best_cost = -1;
780
781 /* Since the size of partitions is just approximate, update the size after
782 we finished current one. */
783 if (npartitions < n_lto_partitions)
784 partition_size = total_size / (n_lto_partitions - npartitions);
785 else
786 /* Watch for overflow. */
787 partition_size = INT_MAX / 16;
788
789 if (dump_file)
790 fprintf (stream: dump_file,
791 format: "Total size: %" PRId64 " partition_size: %" PRId64 "\n",
792 total_size, partition_size);
793 if (partition_size < param_min_partition_size)
794 partition_size = param_min_partition_size;
795 npartitions ++;
796 }
797 }
798
799 next_nodes.truncate (size: 0);
800
801 /* Varables that are not reachable from the code go into last partition. */
802 FOR_EACH_VARIABLE (vnode)
803 if (vnode->get_partitioning_class () == SYMBOL_PARTITION
804 && !symbol_partitioned_p (node: vnode))
805 next_nodes.safe_push (obj: vnode);
806
807 /* Output remaining ordered symbols. */
808 while (varpool_pos < n_varpool_nodes)
809 next_nodes.safe_push (obj: varpool_order[varpool_pos++]);
810 while (noreorder_pos < (int)noreorder.length ())
811 next_nodes.safe_push (obj: noreorder[noreorder_pos++]);
812 /* For one partition the cost of boundary should be 0 unless we added final
813 symbols here (these are not accounted) or we have accounting bug. */
814 gcc_assert (next_nodes.length () || npartitions != 1 || !best_cost || best_cost == -1);
815 add_sorted_nodes (next_nodes, partition);
816
817 if (dump_file)
818 {
819 fprintf (stream: dump_file, format: "\nPartition sizes:\n");
820 unsigned partitions = ltrans_partitions.length ();
821
822 for (unsigned i = 0; i < partitions ; i++)
823 {
824 ltrans_partition p = ltrans_partitions[i];
825 fprintf (stream: dump_file, format: "partition %d contains %d (%2.2f%%)"
826 " symbols and %d (%2.2f%%) insns\n", i, p->symbols,
827 100.0 * p->symbols / order.length (), p->insns,
828 100.0 * p->insns / original_total_size);
829 }
830
831 fprintf (stream: dump_file, format: "\n");
832 }
833}
834
835/* Return true if we must not change the name of the NODE. The name as
836 extracted from the corresponding decl should be passed in NAME. */
837
838static bool
839must_not_rename (symtab_node *node, const char *name)
840{
841 /* Our renaming machinery do not handle more than one change of assembler name.
842 We should not need more than one anyway. */
843 if (node->lto_file_data
844 && lto_get_decl_name_mapping (node->lto_file_data, name) != name)
845 {
846 if (dump_file)
847 fprintf (stream: dump_file,
848 format: "Not privatizing symbol name: %s. It privatized already.\n",
849 name);
850 return true;
851 }
852 /* Avoid mangling of already mangled clones.
853 ??? should have a flag whether a symbol has a 'private' name already,
854 since we produce some symbols like that i.e. for global constructors
855 that are not really clones.
856 ??? it is what unique_name means. We only need to set it when doing
857 private symbols. */
858 if (node->unique_name)
859 {
860 if (dump_file)
861 fprintf (stream: dump_file,
862 format: "Not privatizing symbol name: %s. Has unique name.\n",
863 name);
864 return true;
865 }
866 return false;
867}
868
869/* If we are an offload compiler, we may have to rewrite symbols to be
870 valid on this target. Return either PTR or a modified version of it. */
871
872static const char *
873maybe_rewrite_identifier (const char *ptr)
874{
875#if defined ACCEL_COMPILER && (defined NO_DOT_IN_LABEL || defined NO_DOLLAR_IN_LABEL)
876#ifndef NO_DOT_IN_LABEL
877 char valid = '.';
878 const char reject[] = "$";
879#elif !defined NO_DOLLAR_IN_LABEL
880 char valid = '$';
881 const char reject[] = ".";
882#else
883 char valid = '_';
884 const char reject[] = ".$";
885#endif
886
887 char *copy = NULL;
888 const char *match = ptr;
889 for (;;)
890 {
891 size_t off = strcspn (match, reject);
892 if (match[off] == '\0')
893 break;
894 if (copy == NULL)
895 {
896 copy = xstrdup (ptr);
897 match = copy;
898 }
899 copy[off] = valid;
900 }
901 if (copy)
902 {
903 match = IDENTIFIER_POINTER (get_identifier (copy));
904 free (copy);
905 }
906 return match;
907#else
908 return ptr;
909#endif
910}
911
912/* Ensure that the symbol in NODE is valid for the target, and if not,
913 rewrite it. */
914
915static void
916validize_symbol_for_target (symtab_node *node)
917{
918 tree decl = node->decl;
919 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
920
921 if (must_not_rename (node, name))
922 return;
923
924 const char *name2 = maybe_rewrite_identifier (ptr: name);
925 if (name2 != name)
926 {
927 symtab->change_decl_assembler_name (decl, get_identifier (name2));
928 if (node->lto_file_data)
929 lto_record_renamed_decl (node->lto_file_data, name, name2);
930 }
931}
932
933/* Maps symbol names to unique lto clone counters. */
934static hash_map<const char *, unsigned> *lto_clone_numbers;
935
936/* Helper for privatize_symbol_name. Mangle NODE symbol name
937 represented by DECL. */
938
939static bool
940privatize_symbol_name_1 (symtab_node *node, tree decl)
941{
942 const char *name0 = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
943
944 if (must_not_rename (node, name: name0))
945 return false;
946
947 const char *name = maybe_rewrite_identifier (ptr: name0);
948 unsigned &clone_number = lto_clone_numbers->get_or_insert (k: name);
949 symtab->change_decl_assembler_name (decl,
950 name: clone_function_name (
951 name, suffix: "lto_priv", number: clone_number));
952 clone_number++;
953
954 if (node->lto_file_data)
955 lto_record_renamed_decl (node->lto_file_data, name0,
956 IDENTIFIER_POINTER
957 (DECL_ASSEMBLER_NAME (decl)));
958
959 if (dump_file)
960 fprintf (stream: dump_file,
961 format: "Privatizing symbol name: %s -> %s\n",
962 name, IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
963
964 return true;
965}
966
967/* Mangle NODE symbol name into a local name.
968 This is necessary to do
969 1) if two or more static vars of same assembler name
970 are merged into single ltrans unit.
971 2) if previously static var was promoted hidden to avoid possible conflict
972 with symbols defined out of the LTO world. */
973
974static bool
975privatize_symbol_name (symtab_node *node)
976{
977 if (!privatize_symbol_name_1 (node, decl: node->decl))
978 return false;
979
980 return true;
981}
982
983/* Promote variable VNODE to be static. */
984
985static void
986promote_symbol (symtab_node *node)
987{
988 /* We already promoted ... */
989 if (DECL_VISIBILITY (node->decl) == VISIBILITY_HIDDEN
990 && DECL_VISIBILITY_SPECIFIED (node->decl)
991 && TREE_PUBLIC (node->decl))
992 {
993 validize_symbol_for_target (node);
994 return;
995 }
996
997 gcc_checking_assert (!TREE_PUBLIC (node->decl)
998 && !DECL_EXTERNAL (node->decl));
999 /* Be sure that newly public symbol does not conflict with anything already
1000 defined by the non-LTO part. */
1001 privatize_symbol_name (node);
1002 TREE_PUBLIC (node->decl) = 1;
1003 /* After privatization the node should not conflict with any other symbol,
1004 so it is prevailing. This is important to keep binds_to_current_def_p
1005 to work across partitions. */
1006 node->resolution = LDPR_PREVAILING_DEF_IRONLY;
1007 node->semantic_interposition = false;
1008 DECL_VISIBILITY (node->decl) = VISIBILITY_HIDDEN;
1009 DECL_VISIBILITY_SPECIFIED (node->decl) = true;
1010 if (dump_file)
1011 fprintf (stream: dump_file,
1012 format: "Promoting as hidden: %s (%s)\n", node->dump_name (),
1013 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl)));
1014
1015 /* Promoting a symbol also promotes all transparent aliases with exception
1016 of weakref where the visibility flags are always wrong and set to
1017 !PUBLIC. */
1018 ipa_ref *ref;
1019 for (unsigned i = 0; node->iterate_direct_aliases (i, ref); i++)
1020 {
1021 struct symtab_node *alias = ref->referring;
1022 if (alias->transparent_alias && !alias->weakref)
1023 {
1024 TREE_PUBLIC (alias->decl) = 1;
1025 DECL_VISIBILITY (alias->decl) = VISIBILITY_HIDDEN;
1026 DECL_VISIBILITY_SPECIFIED (alias->decl) = true;
1027 if (dump_file)
1028 fprintf (stream: dump_file,
1029 format: "Promoting alias as hidden: %s\n",
1030 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl)));
1031 }
1032 gcc_assert (!alias->weakref || TREE_PUBLIC (alias->decl));
1033 }
1034}
1035
1036/* Return true if NODE needs named section even if it won't land in
1037 the partition symbol table.
1038
1039 FIXME: we should really not use named sections for master clones. */
1040
1041static bool
1042may_need_named_section_p (lto_symtab_encoder_t encoder, symtab_node *node)
1043{
1044 struct cgraph_node *cnode = dyn_cast <cgraph_node *> (p: node);
1045 /* We do not need to handle variables since we never clone them. */
1046 if (!cnode)
1047 return false;
1048 /* Only master clones will have bodies streamed. */
1049 if (cnode->clone_of)
1050 return false;
1051 if (node->real_symbol_p ())
1052 return false;
1053 return (!encoder
1054 || (lto_symtab_encoder_lookup (encoder, node) != LCC_NOT_FOUND
1055 && lto_symtab_encoder_encode_body_p (encoder,
1056 cnode)));
1057}
1058
1059/* If NODE represents a static variable. See if there are other variables
1060 of the same name in partition ENCODER (or in whole compilation unit if
1061 ENCODER is NULL) and if so, mangle the statics. Always mangle all
1062 conflicting statics, so we reduce changes of silently miscompiling
1063 asm statements referring to them by symbol name. */
1064
1065static void
1066rename_statics (lto_symtab_encoder_t encoder, symtab_node *node)
1067{
1068 tree decl = node->decl;
1069 symtab_node *s;
1070 tree name = DECL_ASSEMBLER_NAME (decl);
1071
1072 /* See if this is static symbol. */
1073 if (((node->externally_visible && !node->weakref)
1074 /* FIXME: externally_visible is somewhat illogically not set for
1075 external symbols (i.e. those not defined). Remove this test
1076 once this is fixed. */
1077 || DECL_EXTERNAL (node->decl)
1078 || !node->real_symbol_p ())
1079 && !may_need_named_section_p (encoder, node))
1080 return;
1081
1082 /* Now walk symbols sharing the same name and see if there are any conflicts.
1083 (all types of symbols counts here, since we cannot have static of the
1084 same name as external or public symbol.) */
1085 for (s = symtab_node::get_for_asmname (asmname: name);
1086 s; s = s->next_sharing_asm_name)
1087 if ((s->real_symbol_p () || may_need_named_section_p (encoder, node: s))
1088 && s->decl != node->decl
1089 && (!encoder
1090 || lto_symtab_encoder_lookup (encoder, node: s) != LCC_NOT_FOUND))
1091 break;
1092
1093 /* OK, no confict, so we have nothing to do. */
1094 if (!s)
1095 return;
1096
1097 if (dump_file)
1098 fprintf (stream: dump_file,
1099 format: "Renaming statics with asm name: %s\n", node->dump_name ());
1100
1101 /* Assign every symbol in the set that shares the same ASM name an unique
1102 mangled name. */
1103 for (s = symtab_node::get_for_asmname (asmname: name); s;)
1104 if ((!s->externally_visible || s->weakref)
1105 /* Transparent aliases having same name as target are renamed at a
1106 time their target gets new name. Transparent aliases that use
1107 separate assembler name require the name to be unique. */
1108 && (!s->transparent_alias || !s->definition || s->weakref
1109 || !symbol_table::assembler_names_equal_p
1110 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (s->decl)),
1111 IDENTIFIER_POINTER
1112 (DECL_ASSEMBLER_NAME (s->get_alias_target()->decl))))
1113 && ((s->real_symbol_p ()
1114 && !DECL_EXTERNAL (s->decl)
1115 && !TREE_PUBLIC (s->decl))
1116 || may_need_named_section_p (encoder, node: s))
1117 && (!encoder
1118 || lto_symtab_encoder_lookup (encoder, node: s) != LCC_NOT_FOUND))
1119 {
1120 if (privatize_symbol_name (node: s))
1121 /* Re-start from beginning since we do not know how many
1122 symbols changed a name. */
1123 s = symtab_node::get_for_asmname (asmname: name);
1124 else s = s->next_sharing_asm_name;
1125 }
1126 else s = s->next_sharing_asm_name;
1127}
1128
1129/* Find out all static decls that need to be promoted to global because
1130 of cross file sharing. This function must be run in the WPA mode after
1131 all inlinees are added. */
1132
1133void
1134lto_promote_cross_file_statics (void)
1135{
1136 unsigned i, n_sets;
1137
1138 gcc_assert (flag_wpa);
1139
1140 lto_stream_offload_p = false;
1141 select_what_to_stream ();
1142
1143 /* First compute boundaries. */
1144 n_sets = ltrans_partitions.length ();
1145 for (i = 0; i < n_sets; i++)
1146 {
1147 ltrans_partition part
1148 = ltrans_partitions[i];
1149 part->encoder = compute_ltrans_boundary (encoder: part->encoder);
1150 }
1151
1152 lto_clone_numbers = new hash_map<const char *, unsigned>;
1153
1154 /* Look at boundaries and promote symbols as needed. */
1155 for (i = 0; i < n_sets; i++)
1156 {
1157 lto_symtab_encoder_iterator lsei;
1158 lto_symtab_encoder_t encoder = ltrans_partitions[i]->encoder;
1159
1160 for (lsei = lsei_start (encoder); !lsei_end_p (lsei);
1161 lsei_next (lsei: &lsei))
1162 {
1163 symtab_node *node = lsei_node (lsei);
1164
1165 /* If symbol is static, rename it if its assembler name
1166 clashes with anything else in this unit. */
1167 rename_statics (encoder, node);
1168
1169 /* No need to promote if symbol already is externally visible ... */
1170 if (node->externally_visible
1171 /* ... or if it is part of current partition ... */
1172 || lto_symtab_encoder_in_partition_p (encoder, node)
1173 /* ... or if we do not partition it. This mean that it will
1174 appear in every partition referencing it. */
1175 || node->get_partitioning_class () != SYMBOL_PARTITION)
1176 {
1177 validize_symbol_for_target (node);
1178 continue;
1179 }
1180
1181 promote_symbol (node);
1182 }
1183 }
1184 delete lto_clone_numbers;
1185}
1186
1187/* Rename statics in the whole unit in the case that
1188 we do -flto-partition=none. */
1189
1190void
1191lto_promote_statics_nonwpa (void)
1192{
1193 symtab_node *node;
1194
1195 lto_clone_numbers = new hash_map<const char *, unsigned>;
1196 FOR_EACH_SYMBOL (node)
1197 {
1198 rename_statics (NULL, node);
1199 validize_symbol_for_target (node);
1200 }
1201 delete lto_clone_numbers;
1202}
1203

source code of gcc/lto/lto-partition.cc