1 | /* Routines to implement minimum-cost maximal flow algorithm used to smooth |
2 | basic block and edge frequency counts. |
3 | Copyright (C) 2008-2024 Free Software Foundation, Inc. |
4 | Contributed by Paul Yuan (yingbo.com@gmail.com) and |
5 | Vinodha Ramasamy (vinodha@google.com). |
6 | |
7 | This file is part of GCC. |
8 | GCC is free software; you can redistribute it and/or modify it under |
9 | the terms of the GNU General Public License as published by the Free |
10 | Software Foundation; either version 3, or (at your option) any later |
11 | version. |
12 | |
13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
16 | for more details. |
17 | |
18 | You should have received a copy of the GNU General Public License |
19 | along with GCC; see the file COPYING3. If not see |
20 | <http://www.gnu.org/licenses/>. */ |
21 | |
22 | /* References: |
23 | [1] "Feedback-directed Optimizations in GCC with Estimated Edge Profiles |
24 | from Hardware Event Sampling", Vinodha Ramasamy, Paul Yuan, Dehao Chen, |
25 | and Robert Hundt; GCC Summit 2008. |
26 | [2] "Complementing Missing and Inaccurate Profiling Using a Minimum Cost |
27 | Circulation Algorithm", Roy Levin, Ilan Newman and Gadi Haber; |
28 | HiPEAC '08. |
29 | |
30 | Algorithm to smooth basic block and edge counts: |
31 | 1. create_fixup_graph: Create fixup graph by translating function CFG into |
32 | a graph that satisfies MCF algorithm requirements. |
33 | 2. find_max_flow: Find maximal flow. |
34 | 3. compute_residual_flow: Form residual network. |
35 | 4. Repeat: |
36 | cancel_negative_cycle: While G contains a negative cost cycle C, reverse |
37 | the flow on the found cycle by the minimum residual capacity in that |
38 | cycle. |
39 | 5. Form the minimal cost flow |
40 | f(u,v) = rf(v, u). |
41 | 6. adjust_cfg_counts: Update initial edge weights with corrected weights. |
42 | delta(u.v) = f(u,v) -f(v,u). |
43 | w*(u,v) = w(u,v) + delta(u,v). */ |
44 | |
45 | #include "config.h" |
46 | #include "system.h" |
47 | #include "coretypes.h" |
48 | #include "backend.h" |
49 | #include "profile.h" |
50 | #include "dumpfile.h" |
51 | |
52 | /* CAP_INFINITY: Constant to represent infinite capacity. */ |
53 | #define CAP_INFINITY INTTYPE_MAXIMUM (int64_t) |
54 | |
55 | /* COST FUNCTION. */ |
56 | #define K_POS(b) ((b)) |
57 | #define K_NEG(b) (50 * (b)) |
58 | #define COST(k, w) ((k) / mcf_ln ((w) + 2)) |
59 | /* Limit the number of iterations for cancel_negative_cycles() to ensure |
60 | reasonable compile time. */ |
61 | #define MAX_ITER(n, e) 10 + (1000000 / ((n) * (e))) |
62 | enum edge_type |
63 | { |
64 | INVALID_EDGE, |
65 | VERTEX_SPLIT_EDGE, /* Edge to represent vertex with w(e) = w(v). */ |
66 | REDIRECT_EDGE, /* Edge after vertex transformation. */ |
67 | REVERSE_EDGE, |
68 | SOURCE_CONNECT_EDGE, /* Single edge connecting to single source. */ |
69 | SINK_CONNECT_EDGE, /* Single edge connecting to single sink. */ |
70 | BALANCE_EDGE, /* Edge connecting with source/sink: cp(e) = 0. */ |
71 | REDIRECT_NORMALIZED_EDGE, /* Normalized edge for a redirect edge. */ |
72 | REVERSE_NORMALIZED_EDGE /* Normalized edge for a reverse edge. */ |
73 | }; |
74 | |
75 | /* Structure to represent an edge in the fixup graph. */ |
76 | struct fixup_edge_type |
77 | { |
78 | int src; |
79 | int dest; |
80 | /* Flag denoting type of edge and attributes for the flow field. */ |
81 | edge_type type; |
82 | bool is_rflow_valid; |
83 | /* Index to the normalization vertex added for this edge. */ |
84 | int norm_vertex_index; |
85 | /* Flow for this edge. */ |
86 | gcov_type flow; |
87 | /* Residual flow for this edge - used during negative cycle canceling. */ |
88 | gcov_type rflow; |
89 | gcov_type weight; |
90 | gcov_type cost; |
91 | gcov_type max_capacity; |
92 | }; |
93 | |
94 | typedef fixup_edge_type *fixup_edge_p; |
95 | |
96 | |
97 | /* Structure to represent a vertex in the fixup graph. */ |
98 | struct fixup_vertex_type |
99 | { |
100 | vec<fixup_edge_p> succ_edges; |
101 | }; |
102 | |
103 | typedef fixup_vertex_type *fixup_vertex_p; |
104 | |
105 | /* Fixup graph used in the MCF algorithm. */ |
106 | struct fixup_graph_type |
107 | { |
108 | /* Current number of vertices for the graph. */ |
109 | int num_vertices; |
110 | /* Current number of edges for the graph. */ |
111 | int num_edges; |
112 | /* Index of new entry vertex. */ |
113 | int new_entry_index; |
114 | /* Index of new exit vertex. */ |
115 | int new_exit_index; |
116 | /* Fixup vertex list. Adjacency list for fixup graph. */ |
117 | fixup_vertex_p vertex_list; |
118 | /* Fixup edge list. */ |
119 | fixup_edge_p edge_list; |
120 | }; |
121 | |
122 | struct queue_type |
123 | { |
124 | int *queue; |
125 | int head; |
126 | int tail; |
127 | int size; |
128 | }; |
129 | |
130 | /* Structure used in the maximal flow routines to find augmenting path. */ |
131 | struct augmenting_path_type |
132 | { |
133 | /* Queue used to hold vertex indices. */ |
134 | queue_type queue_list; |
135 | /* Vector to hold chain of pred vertex indices in augmenting path. */ |
136 | int *bb_pred; |
137 | /* Vector that indicates if basic block i has been visited. */ |
138 | int *is_visited; |
139 | }; |
140 | |
141 | |
142 | /* Function definitions. */ |
143 | |
144 | /* Dump routines to aid debugging. */ |
145 | |
146 | /* Print basic block with index N for FIXUP_GRAPH in n' and n'' format. */ |
147 | |
148 | static void |
149 | print_basic_block (FILE *file, fixup_graph_type *fixup_graph, int n) |
150 | { |
151 | if (n == ENTRY_BLOCK) |
152 | fputs (s: "ENTRY" , stream: file); |
153 | else if (n == ENTRY_BLOCK + 1) |
154 | fputs (s: "ENTRY''" , stream: file); |
155 | else if (n == 2 * EXIT_BLOCK) |
156 | fputs (s: "EXIT" , stream: file); |
157 | else if (n == 2 * EXIT_BLOCK + 1) |
158 | fputs (s: "EXIT''" , stream: file); |
159 | else if (n == fixup_graph->new_exit_index) |
160 | fputs (s: "NEW_EXIT" , stream: file); |
161 | else if (n == fixup_graph->new_entry_index) |
162 | fputs (s: "NEW_ENTRY" , stream: file); |
163 | else |
164 | { |
165 | fprintf (stream: file, format: "%d" , n / 2); |
166 | if (n % 2) |
167 | fputs (s: "''" , stream: file); |
168 | else |
169 | fputs (s: "'" , stream: file); |
170 | } |
171 | } |
172 | |
173 | |
174 | /* Print edge S->D for given fixup_graph with n' and n'' format. |
175 | PARAMETERS: |
176 | S is the index of the source vertex of the edge (input) and |
177 | D is the index of the destination vertex of the edge (input) for the given |
178 | fixup_graph (input). */ |
179 | |
180 | static void |
181 | print_edge (FILE *file, fixup_graph_type *fixup_graph, int s, int d) |
182 | { |
183 | print_basic_block (file, fixup_graph, n: s); |
184 | fputs (s: "->" , stream: file); |
185 | print_basic_block (file, fixup_graph, n: d); |
186 | } |
187 | |
188 | |
189 | /* Dump out the attributes of a given edge FEDGE in the fixup_graph to a |
190 | file. */ |
191 | static void |
192 | dump_fixup_edge (FILE *file, fixup_graph_type *fixup_graph, fixup_edge_p fedge) |
193 | { |
194 | if (!fedge) |
195 | { |
196 | fputs (s: "NULL fixup graph edge.\n" , stream: file); |
197 | return; |
198 | } |
199 | |
200 | print_edge (file, fixup_graph, s: fedge->src, d: fedge->dest); |
201 | fputs (s: ": " , stream: file); |
202 | |
203 | if (fedge->type) |
204 | { |
205 | fprintf (stream: file, format: "flow/capacity=%" PRId64 "/" , |
206 | fedge->flow); |
207 | if (fedge->max_capacity == CAP_INFINITY) |
208 | fputs (s: "+oo," , stream: file); |
209 | else |
210 | fprintf (stream: file, format: "%" PRId64 "," , fedge->max_capacity); |
211 | } |
212 | |
213 | if (fedge->is_rflow_valid) |
214 | { |
215 | if (fedge->rflow == CAP_INFINITY) |
216 | fputs (s: " rflow=+oo." , stream: file); |
217 | else |
218 | fprintf (stream: file, format: " rflow=%" PRId64 "," , fedge->rflow); |
219 | } |
220 | |
221 | fprintf (stream: file, format: " cost=%" PRId64 "." , fedge->cost); |
222 | |
223 | fprintf (stream: file, format: "\t(%d->%d)" , fedge->src, fedge->dest); |
224 | |
225 | if (fedge->type) |
226 | { |
227 | switch (fedge->type) |
228 | { |
229 | case VERTEX_SPLIT_EDGE: |
230 | fputs (s: " @VERTEX_SPLIT_EDGE" , stream: file); |
231 | break; |
232 | |
233 | case REDIRECT_EDGE: |
234 | fputs (s: " @REDIRECT_EDGE" , stream: file); |
235 | break; |
236 | |
237 | case SOURCE_CONNECT_EDGE: |
238 | fputs (s: " @SOURCE_CONNECT_EDGE" , stream: file); |
239 | break; |
240 | |
241 | case SINK_CONNECT_EDGE: |
242 | fputs (s: " @SINK_CONNECT_EDGE" , stream: file); |
243 | break; |
244 | |
245 | case REVERSE_EDGE: |
246 | fputs (s: " @REVERSE_EDGE" , stream: file); |
247 | break; |
248 | |
249 | case BALANCE_EDGE: |
250 | fputs (s: " @BALANCE_EDGE" , stream: file); |
251 | break; |
252 | |
253 | case REDIRECT_NORMALIZED_EDGE: |
254 | case REVERSE_NORMALIZED_EDGE: |
255 | fputs (s: " @NORMALIZED_EDGE" , stream: file); |
256 | break; |
257 | |
258 | default: |
259 | fputs (s: " @INVALID_EDGE" , stream: file); |
260 | break; |
261 | } |
262 | } |
263 | fputs (s: "\n" , stream: file); |
264 | } |
265 | |
266 | |
267 | /* Print out the edges and vertices of the given FIXUP_GRAPH, into the dump |
268 | file. The input string MSG is printed out as a heading. */ |
269 | |
270 | static void |
271 | dump_fixup_graph (FILE *file, fixup_graph_type *fixup_graph, const char *msg) |
272 | { |
273 | int i, j; |
274 | int fnum_vertices, fnum_edges; |
275 | |
276 | fixup_vertex_p fvertex_list, pfvertex; |
277 | fixup_edge_p pfedge; |
278 | |
279 | gcc_assert (fixup_graph); |
280 | fvertex_list = fixup_graph->vertex_list; |
281 | fnum_vertices = fixup_graph->num_vertices; |
282 | fnum_edges = fixup_graph->num_edges; |
283 | |
284 | fprintf (stream: file, format: "\nDump fixup graph for %s(): %s.\n" , |
285 | current_function_name (), msg); |
286 | fprintf (stream: file, |
287 | format: "There are %d vertices and %d edges. new_exit_index is %d.\n\n" , |
288 | fnum_vertices, fnum_edges, fixup_graph->new_exit_index); |
289 | |
290 | for (i = 0; i < fnum_vertices; i++) |
291 | { |
292 | pfvertex = fvertex_list + i; |
293 | fprintf (stream: file, format: "vertex_list[%d]: %d succ fixup edges.\n" , |
294 | i, pfvertex->succ_edges.length ()); |
295 | |
296 | for (j = 0; pfvertex->succ_edges.iterate (ix: j, ptr: &pfedge); |
297 | j++) |
298 | { |
299 | /* Distinguish forward edges and backward edges in the residual flow |
300 | network. */ |
301 | if (pfedge->type) |
302 | fputs (s: "(f) " , stream: file); |
303 | else if (pfedge->is_rflow_valid) |
304 | fputs (s: "(b) " , stream: file); |
305 | dump_fixup_edge (file, fixup_graph, fedge: pfedge); |
306 | } |
307 | } |
308 | |
309 | fputs (s: "\n" , stream: file); |
310 | } |
311 | |
312 | |
313 | /* Utility routines. */ |
314 | /* ln() implementation: approximate calculation. Returns ln of X. */ |
315 | |
316 | static double |
317 | mcf_ln (double x) |
318 | { |
319 | #define E 2.71828 |
320 | int l = 1; |
321 | double m = E; |
322 | |
323 | gcc_assert (x >= 0); |
324 | |
325 | while (m < x) |
326 | { |
327 | m *= E; |
328 | l++; |
329 | } |
330 | |
331 | return l; |
332 | } |
333 | |
334 | |
335 | /* sqrt() implementation: based on open source QUAKE3 code (magic sqrt |
336 | implementation) by John Carmack. Returns sqrt of X. */ |
337 | |
338 | static double |
339 | mcf_sqrt (double x) |
340 | { |
341 | #define MAGIC_CONST1 0x1fbcf800 |
342 | #define MAGIC_CONST2 0x5f3759df |
343 | union { |
344 | int intPart; |
345 | float floatPart; |
346 | } convertor, convertor2; |
347 | |
348 | gcc_assert (x >= 0); |
349 | |
350 | convertor.floatPart = x; |
351 | convertor2.floatPart = x; |
352 | convertor.intPart = MAGIC_CONST1 + (convertor.intPart >> 1); |
353 | convertor2.intPart = MAGIC_CONST2 - (convertor2.intPart >> 1); |
354 | |
355 | return 0.5f * (convertor.floatPart + (x * convertor2.floatPart)); |
356 | } |
357 | |
358 | |
359 | /* Common code shared between add_fixup_edge and add_rfixup_edge. Adds an edge |
360 | (SRC->DEST) to the edge_list maintained in FIXUP_GRAPH with cost of the edge |
361 | added set to COST. */ |
362 | |
363 | static fixup_edge_p |
364 | add_edge (fixup_graph_type *fixup_graph, int src, int dest, gcov_type cost) |
365 | { |
366 | fixup_vertex_p curr_vertex = fixup_graph->vertex_list + src; |
367 | fixup_edge_p curr_edge = fixup_graph->edge_list + fixup_graph->num_edges; |
368 | curr_edge->src = src; |
369 | curr_edge->dest = dest; |
370 | curr_edge->cost = cost; |
371 | fixup_graph->num_edges++; |
372 | if (dump_file) |
373 | dump_fixup_edge (file: dump_file, fixup_graph, fedge: curr_edge); |
374 | curr_vertex->succ_edges.safe_push (obj: curr_edge); |
375 | return curr_edge; |
376 | } |
377 | |
378 | |
379 | /* Add a fixup edge (src->dest) with attributes TYPE, WEIGHT, COST and |
380 | MAX_CAPACITY to the edge_list in the fixup graph. */ |
381 | |
382 | static void |
383 | add_fixup_edge (fixup_graph_type *fixup_graph, int src, int dest, |
384 | edge_type type, gcov_type weight, gcov_type cost, |
385 | gcov_type max_capacity) |
386 | { |
387 | fixup_edge_p curr_edge = add_edge (fixup_graph, src, dest, cost); |
388 | curr_edge->type = type; |
389 | curr_edge->weight = weight; |
390 | curr_edge->max_capacity = max_capacity; |
391 | } |
392 | |
393 | |
394 | /* Add a residual edge (SRC->DEST) with attributes RFLOW and COST |
395 | to the fixup graph. */ |
396 | |
397 | static void |
398 | add_rfixup_edge (fixup_graph_type *fixup_graph, int src, int dest, |
399 | gcov_type rflow, gcov_type cost) |
400 | { |
401 | fixup_edge_p curr_edge = add_edge (fixup_graph, src, dest, cost); |
402 | curr_edge->rflow = rflow; |
403 | curr_edge->is_rflow_valid = true; |
404 | /* This edge is not a valid edge - merely used to hold residual flow. */ |
405 | curr_edge->type = INVALID_EDGE; |
406 | } |
407 | |
408 | |
409 | /* Return the pointer to fixup edge SRC->DEST or NULL if edge does not |
410 | exist in the FIXUP_GRAPH. */ |
411 | |
412 | static fixup_edge_p |
413 | find_fixup_edge (fixup_graph_type *fixup_graph, int src, int dest) |
414 | { |
415 | int j; |
416 | fixup_edge_p pfedge; |
417 | fixup_vertex_p pfvertex; |
418 | |
419 | gcc_assert (src < fixup_graph->num_vertices); |
420 | |
421 | pfvertex = fixup_graph->vertex_list + src; |
422 | |
423 | for (j = 0; pfvertex->succ_edges.iterate (ix: j, ptr: &pfedge); |
424 | j++) |
425 | if (pfedge->dest == dest) |
426 | return pfedge; |
427 | |
428 | return NULL; |
429 | } |
430 | |
431 | |
432 | /* Cleanup routine to free structures in FIXUP_GRAPH. */ |
433 | |
434 | static void |
435 | delete_fixup_graph (fixup_graph_type *fixup_graph) |
436 | { |
437 | int i; |
438 | int fnum_vertices = fixup_graph->num_vertices; |
439 | fixup_vertex_p pfvertex = fixup_graph->vertex_list; |
440 | |
441 | for (i = 0; i < fnum_vertices; i++, pfvertex++) |
442 | pfvertex->succ_edges.release (); |
443 | |
444 | free (ptr: fixup_graph->vertex_list); |
445 | free (ptr: fixup_graph->edge_list); |
446 | } |
447 | |
448 | |
449 | /* Creates a fixup graph FIXUP_GRAPH from the function CFG. */ |
450 | |
451 | static void |
452 | create_fixup_graph (fixup_graph_type *fixup_graph) |
453 | { |
454 | double sqrt_avg_vertex_weight = 0; |
455 | double total_vertex_weight = 0; |
456 | double k_pos = 0; |
457 | double k_neg = 0; |
458 | /* Vector to hold D(v) = sum_out_edges(v) - sum_in_edges(v). */ |
459 | gcov_type *diff_out_in = NULL; |
460 | gcov_type supply_value = 1, demand_value = 0; |
461 | gcov_type fcost = 0; |
462 | int new_entry_index = 0, new_exit_index = 0; |
463 | int i = 0, j = 0; |
464 | int new_index = 0; |
465 | basic_block bb; |
466 | edge e; |
467 | edge_iterator ei; |
468 | fixup_edge_p pfedge, r_pfedge; |
469 | fixup_edge_p fedge_list; |
470 | int fnum_edges; |
471 | |
472 | /* Each basic_block will be split into 2 during vertex transformation. */ |
473 | int fnum_vertices_after_transform = 2 * n_basic_blocks_for_fn (cfun); |
474 | int fnum_edges_after_transform = |
475 | n_edges_for_fn (cfun) + n_basic_blocks_for_fn (cfun); |
476 | |
477 | /* Count the new SOURCE and EXIT vertices to be added. */ |
478 | int fmax_num_vertices = |
479 | (fnum_vertices_after_transform + n_edges_for_fn (cfun) |
480 | + n_basic_blocks_for_fn (cfun) + 2); |
481 | |
482 | /* In create_fixup_graph: Each basic block and edge can be split into 3 |
483 | edges. Number of balance edges = n_basic_blocks. So after |
484 | create_fixup_graph: |
485 | max_edges = 4 * n_basic_blocks + 3 * n_edges |
486 | Accounting for residual flow edges |
487 | max_edges = 2 * (4 * n_basic_blocks + 3 * n_edges) |
488 | = 8 * n_basic_blocks + 6 * n_edges |
489 | < 8 * n_basic_blocks + 8 * n_edges. */ |
490 | int fmax_num_edges = 8 * (n_basic_blocks_for_fn (cfun) + |
491 | n_edges_for_fn (cfun)); |
492 | |
493 | /* Initial num of vertices in the fixup graph. */ |
494 | fixup_graph->num_vertices = n_basic_blocks_for_fn (cfun); |
495 | |
496 | /* Fixup graph vertex list. */ |
497 | fixup_graph->vertex_list = |
498 | (fixup_vertex_p) xcalloc (fmax_num_vertices, sizeof (fixup_vertex_type)); |
499 | |
500 | /* Fixup graph edge list. */ |
501 | fixup_graph->edge_list = |
502 | (fixup_edge_p) xcalloc (fmax_num_edges, sizeof (fixup_edge_type)); |
503 | |
504 | diff_out_in = |
505 | (gcov_type *) xcalloc (1 + fnum_vertices_after_transform, |
506 | sizeof (gcov_type)); |
507 | |
508 | /* Compute constants b, k_pos, k_neg used in the cost function calculation. |
509 | b = sqrt(avg_vertex_weight(cfg)); k_pos = b; k_neg = 50b. */ |
510 | FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) |
511 | total_vertex_weight += bb_gcov_count (bb); |
512 | |
513 | sqrt_avg_vertex_weight = mcf_sqrt (x: total_vertex_weight / |
514 | n_basic_blocks_for_fn (cfun)); |
515 | |
516 | k_pos = K_POS (sqrt_avg_vertex_weight); |
517 | k_neg = K_NEG (sqrt_avg_vertex_weight); |
518 | |
519 | /* 1. Vertex Transformation: Split each vertex v into two vertices v' and v'', |
520 | connected by an edge e from v' to v''. w(e) = w(v). */ |
521 | |
522 | if (dump_file) |
523 | fprintf (stream: dump_file, format: "\nVertex transformation:\n" ); |
524 | |
525 | FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) |
526 | { |
527 | /* v'->v'': index1->(index1+1). */ |
528 | i = 2 * bb->index; |
529 | fcost = (gcov_type) COST (k_pos, bb_gcov_count (bb)); |
530 | add_fixup_edge (fixup_graph, src: i, dest: i + 1, type: VERTEX_SPLIT_EDGE, weight: bb_gcov_count (bb), |
531 | cost: fcost, CAP_INFINITY); |
532 | fixup_graph->num_vertices++; |
533 | |
534 | FOR_EACH_EDGE (e, ei, bb->succs) |
535 | { |
536 | /* Edges with ignore attribute set should be treated like they don't |
537 | exist. */ |
538 | if (EDGE_INFO (e) && EDGE_INFO (e)->ignore) |
539 | continue; |
540 | j = 2 * e->dest->index; |
541 | fcost = (gcov_type) COST (k_pos, edge_gcov_count (e)); |
542 | add_fixup_edge (fixup_graph, src: i + 1, dest: j, type: REDIRECT_EDGE, weight: edge_gcov_count (e), |
543 | cost: fcost, CAP_INFINITY); |
544 | } |
545 | } |
546 | |
547 | /* After vertex transformation. */ |
548 | gcc_assert (fixup_graph->num_vertices == fnum_vertices_after_transform); |
549 | /* Redirect edges are not added for edges with ignore attribute. */ |
550 | gcc_assert (fixup_graph->num_edges <= fnum_edges_after_transform); |
551 | |
552 | fnum_edges_after_transform = fixup_graph->num_edges; |
553 | |
554 | /* 2. Initialize D(v). */ |
555 | for (i = 0; i < fnum_edges_after_transform; i++) |
556 | { |
557 | pfedge = fixup_graph->edge_list + i; |
558 | diff_out_in[pfedge->src] += pfedge->weight; |
559 | diff_out_in[pfedge->dest] -= pfedge->weight; |
560 | } |
561 | |
562 | /* Entry block - vertex indices 0, 1; EXIT block - vertex indices 2, 3. */ |
563 | for (i = 0; i <= 3; i++) |
564 | diff_out_in[i] = 0; |
565 | |
566 | /* 3. Add reverse edges: needed to decrease counts during smoothing. */ |
567 | if (dump_file) |
568 | fprintf (stream: dump_file, format: "\nReverse edges:\n" ); |
569 | for (i = 0; i < fnum_edges_after_transform; i++) |
570 | { |
571 | pfedge = fixup_graph->edge_list + i; |
572 | if ((pfedge->src == 0) || (pfedge->src == 2)) |
573 | continue; |
574 | r_pfedge = find_fixup_edge (fixup_graph, src: pfedge->dest, dest: pfedge->src); |
575 | if (!r_pfedge && pfedge->weight) |
576 | { |
577 | /* Skip adding reverse edges for edges with w(e) = 0, as its maximum |
578 | capacity is 0. */ |
579 | fcost = (gcov_type) COST (k_neg, pfedge->weight); |
580 | add_fixup_edge (fixup_graph, src: pfedge->dest, dest: pfedge->src, |
581 | type: REVERSE_EDGE, weight: 0, cost: fcost, max_capacity: pfedge->weight); |
582 | } |
583 | } |
584 | |
585 | /* 4. Create single source and sink. Connect new source vertex s' to function |
586 | entry block. Connect sink vertex t' to function exit. */ |
587 | if (dump_file) |
588 | fprintf (stream: dump_file, format: "\ns'->S, T->t':\n" ); |
589 | |
590 | new_entry_index = fixup_graph->new_entry_index = fixup_graph->num_vertices; |
591 | fixup_graph->num_vertices++; |
592 | /* Set supply_value to 1 to avoid zero count function ENTRY. */ |
593 | add_fixup_edge (fixup_graph, src: new_entry_index, ENTRY_BLOCK, type: SOURCE_CONNECT_EDGE, |
594 | weight: 1 /* supply_value */, cost: 0, max_capacity: 1 /* supply_value */); |
595 | |
596 | /* Create new exit with EXIT_BLOCK as single pred. */ |
597 | new_exit_index = fixup_graph->new_exit_index = fixup_graph->num_vertices; |
598 | fixup_graph->num_vertices++; |
599 | add_fixup_edge (fixup_graph, src: 2 * EXIT_BLOCK + 1, dest: new_exit_index, |
600 | type: SINK_CONNECT_EDGE, |
601 | weight: 0 /* demand_value */, cost: 0, max_capacity: 0 /* demand_value */); |
602 | |
603 | /* Connect vertices with unbalanced D(v) to source/sink. */ |
604 | if (dump_file) |
605 | fprintf (stream: dump_file, format: "\nD(v) balance:\n" ); |
606 | /* Skip vertices for ENTRY (0, 1) and EXIT (2,3) blocks, so start with i = 4. |
607 | diff_out_in[v''] will be 0, so skip v'' vertices, hence i += 2. */ |
608 | for (i = 4; i < new_entry_index; i += 2) |
609 | { |
610 | if (diff_out_in[i] > 0) |
611 | { |
612 | add_fixup_edge (fixup_graph, src: i, dest: new_exit_index, type: BALANCE_EDGE, weight: 0, cost: 0, |
613 | max_capacity: diff_out_in[i]); |
614 | demand_value += diff_out_in[i]; |
615 | } |
616 | else if (diff_out_in[i] < 0) |
617 | { |
618 | add_fixup_edge (fixup_graph, src: new_entry_index, dest: i, type: BALANCE_EDGE, weight: 0, cost: 0, |
619 | max_capacity: -diff_out_in[i]); |
620 | supply_value -= diff_out_in[i]; |
621 | } |
622 | } |
623 | |
624 | /* Set supply = demand. */ |
625 | if (dump_file) |
626 | { |
627 | fprintf (stream: dump_file, format: "\nAdjust supply and demand:\n" ); |
628 | fprintf (stream: dump_file, format: "supply_value=%" PRId64 "\n" , |
629 | supply_value); |
630 | fprintf (stream: dump_file, format: "demand_value=%" PRId64 "\n" , |
631 | demand_value); |
632 | } |
633 | |
634 | if (demand_value > supply_value) |
635 | { |
636 | pfedge = find_fixup_edge (fixup_graph, src: new_entry_index, ENTRY_BLOCK); |
637 | pfedge->max_capacity += (demand_value - supply_value); |
638 | } |
639 | else |
640 | { |
641 | pfedge = find_fixup_edge (fixup_graph, src: 2 * EXIT_BLOCK + 1, dest: new_exit_index); |
642 | pfedge->max_capacity += (supply_value - demand_value); |
643 | } |
644 | |
645 | /* 6. Normalize edges: remove anti-parallel edges. Anti-parallel edges are |
646 | created by the vertex transformation step from self-edges in the original |
647 | CFG and by the reverse edges added earlier. */ |
648 | if (dump_file) |
649 | fprintf (stream: dump_file, format: "\nNormalize edges:\n" ); |
650 | |
651 | fnum_edges = fixup_graph->num_edges; |
652 | fedge_list = fixup_graph->edge_list; |
653 | |
654 | for (i = 0; i < fnum_edges; i++) |
655 | { |
656 | pfedge = fedge_list + i; |
657 | r_pfedge = find_fixup_edge (fixup_graph, src: pfedge->dest, dest: pfedge->src); |
658 | if (((pfedge->type == VERTEX_SPLIT_EDGE) |
659 | || (pfedge->type == REDIRECT_EDGE)) && r_pfedge) |
660 | { |
661 | new_index = fixup_graph->num_vertices; |
662 | fixup_graph->num_vertices++; |
663 | |
664 | if (dump_file) |
665 | { |
666 | fprintf (stream: dump_file, format: "\nAnti-parallel edge:\n" ); |
667 | dump_fixup_edge (file: dump_file, fixup_graph, fedge: pfedge); |
668 | dump_fixup_edge (file: dump_file, fixup_graph, fedge: r_pfedge); |
669 | fprintf (stream: dump_file, format: "New vertex is %d.\n" , new_index); |
670 | fprintf (stream: dump_file, format: "------------------\n" ); |
671 | } |
672 | |
673 | pfedge->cost /= 2; |
674 | pfedge->norm_vertex_index = new_index; |
675 | if (dump_file) |
676 | { |
677 | fprintf (stream: dump_file, format: "After normalization:\n" ); |
678 | dump_fixup_edge (file: dump_file, fixup_graph, fedge: pfedge); |
679 | } |
680 | |
681 | /* Add a new fixup edge: new_index->src. */ |
682 | add_fixup_edge (fixup_graph, src: new_index, dest: pfedge->src, |
683 | type: REVERSE_NORMALIZED_EDGE, weight: 0, cost: r_pfedge->cost, |
684 | max_capacity: r_pfedge->max_capacity); |
685 | gcc_assert (fixup_graph->num_vertices <= fmax_num_vertices); |
686 | |
687 | /* Edge: r_pfedge->src -> r_pfedge->dest |
688 | ==> r_pfedge->src -> new_index. */ |
689 | r_pfedge->dest = new_index; |
690 | r_pfedge->type = REVERSE_NORMALIZED_EDGE; |
691 | r_pfedge->cost = pfedge->cost; |
692 | r_pfedge->max_capacity = pfedge->max_capacity; |
693 | if (dump_file) |
694 | dump_fixup_edge (file: dump_file, fixup_graph, fedge: r_pfedge); |
695 | } |
696 | } |
697 | |
698 | if (dump_file) |
699 | dump_fixup_graph (file: dump_file, fixup_graph, msg: "After create_fixup_graph()" ); |
700 | |
701 | /* Cleanup. */ |
702 | free (ptr: diff_out_in); |
703 | } |
704 | |
705 | |
706 | /* Allocates space for the structures in AUGMENTING_PATH. The space needed is |
707 | proportional to the number of nodes in the graph, which is given by |
708 | GRAPH_SIZE. */ |
709 | |
710 | static void |
711 | init_augmenting_path (augmenting_path_type *augmenting_path, int graph_size) |
712 | { |
713 | augmenting_path->queue_list.queue = (int *) |
714 | xcalloc (graph_size + 2, sizeof (int)); |
715 | augmenting_path->queue_list.size = graph_size + 2; |
716 | augmenting_path->bb_pred = (int *) xcalloc (graph_size, sizeof (int)); |
717 | augmenting_path->is_visited = (int *) xcalloc (graph_size, sizeof (int)); |
718 | } |
719 | |
720 | /* Free the structures in AUGMENTING_PATH. */ |
721 | static void |
722 | free_augmenting_path (augmenting_path_type *augmenting_path) |
723 | { |
724 | free (ptr: augmenting_path->queue_list.queue); |
725 | free (ptr: augmenting_path->bb_pred); |
726 | free (ptr: augmenting_path->is_visited); |
727 | } |
728 | |
729 | |
730 | /* Queue routines. Assumes queue will never overflow. */ |
731 | |
732 | static void |
733 | init_queue (queue_type *queue_list) |
734 | { |
735 | gcc_assert (queue_list); |
736 | queue_list->head = 0; |
737 | queue_list->tail = 0; |
738 | } |
739 | |
740 | /* Return true if QUEUE_LIST is empty. */ |
741 | static bool |
742 | is_empty (queue_type *queue_list) |
743 | { |
744 | return (queue_list->head == queue_list->tail); |
745 | } |
746 | |
747 | /* Insert element X into QUEUE_LIST. */ |
748 | static void |
749 | enqueue (queue_type *queue_list, int x) |
750 | { |
751 | gcc_assert (queue_list->tail < queue_list->size); |
752 | queue_list->queue[queue_list->tail] = x; |
753 | (queue_list->tail)++; |
754 | } |
755 | |
756 | /* Return the first element in QUEUE_LIST. */ |
757 | static int |
758 | dequeue (queue_type *queue_list) |
759 | { |
760 | int x; |
761 | gcc_assert (queue_list->head >= 0); |
762 | x = queue_list->queue[queue_list->head]; |
763 | (queue_list->head)++; |
764 | return x; |
765 | } |
766 | |
767 | |
768 | /* Finds a negative cycle in the residual network using |
769 | the Bellman-Ford algorithm. The flow on the found cycle is reversed by the |
770 | minimum residual capacity of that cycle. ENTRY and EXIT vertices are not |
771 | considered. |
772 | |
773 | Parameters: |
774 | FIXUP_GRAPH - Residual graph (input/output) |
775 | The following are allocated/freed by the caller: |
776 | PI - Vector to hold predecessors in path (pi = pred index) |
777 | D - D[I] holds minimum cost of path from i to sink |
778 | CYCLE - Vector to hold the minimum cost cycle |
779 | |
780 | Return: |
781 | true if a negative cycle was found, false otherwise. */ |
782 | |
783 | static bool |
784 | cancel_negative_cycle (fixup_graph_type *fixup_graph, |
785 | int *pi, gcov_type *d, int *cycle) |
786 | { |
787 | int i, j, k; |
788 | int fnum_vertices, fnum_edges; |
789 | fixup_edge_p fedge_list, pfedge, r_pfedge; |
790 | bool found_cycle = false; |
791 | int cycle_start = 0, cycle_end = 0; |
792 | gcov_type sum_cost = 0, cycle_flow = 0; |
793 | int new_entry_index; |
794 | bool propagated = false; |
795 | |
796 | gcc_assert (fixup_graph); |
797 | fnum_vertices = fixup_graph->num_vertices; |
798 | fnum_edges = fixup_graph->num_edges; |
799 | fedge_list = fixup_graph->edge_list; |
800 | new_entry_index = fixup_graph->new_entry_index; |
801 | |
802 | /* Initialize. */ |
803 | /* Skip ENTRY. */ |
804 | for (i = 1; i < fnum_vertices; i++) |
805 | { |
806 | d[i] = CAP_INFINITY; |
807 | pi[i] = -1; |
808 | cycle[i] = -1; |
809 | } |
810 | d[ENTRY_BLOCK] = 0; |
811 | |
812 | /* Relax. */ |
813 | for (k = 1; k < fnum_vertices; k++) |
814 | { |
815 | propagated = false; |
816 | for (i = 0; i < fnum_edges; i++) |
817 | { |
818 | pfedge = fedge_list + i; |
819 | if (pfedge->src == new_entry_index) |
820 | continue; |
821 | if (pfedge->is_rflow_valid && pfedge->rflow |
822 | && d[pfedge->src] != CAP_INFINITY |
823 | && (d[pfedge->dest] > d[pfedge->src] + pfedge->cost)) |
824 | { |
825 | d[pfedge->dest] = d[pfedge->src] + pfedge->cost; |
826 | pi[pfedge->dest] = pfedge->src; |
827 | propagated = true; |
828 | } |
829 | } |
830 | if (!propagated) |
831 | break; |
832 | } |
833 | |
834 | if (!propagated) |
835 | /* No negative cycles exist. */ |
836 | return 0; |
837 | |
838 | /* Detect. */ |
839 | for (i = 0; i < fnum_edges; i++) |
840 | { |
841 | pfedge = fedge_list + i; |
842 | if (pfedge->src == new_entry_index) |
843 | continue; |
844 | if (pfedge->is_rflow_valid && pfedge->rflow |
845 | && d[pfedge->src] != CAP_INFINITY |
846 | && (d[pfedge->dest] > d[pfedge->src] + pfedge->cost)) |
847 | { |
848 | found_cycle = true; |
849 | break; |
850 | } |
851 | } |
852 | |
853 | if (!found_cycle) |
854 | return 0; |
855 | |
856 | /* Augment the cycle with the cycle's minimum residual capacity. */ |
857 | found_cycle = false; |
858 | cycle[0] = pfedge->dest; |
859 | j = pfedge->dest; |
860 | |
861 | for (i = 1; i < fnum_vertices; i++) |
862 | { |
863 | j = pi[j]; |
864 | cycle[i] = j; |
865 | for (k = 0; k < i; k++) |
866 | { |
867 | if (cycle[k] == j) |
868 | { |
869 | /* cycle[k] -> ... -> cycle[i]. */ |
870 | cycle_start = k; |
871 | cycle_end = i; |
872 | found_cycle = true; |
873 | break; |
874 | } |
875 | } |
876 | if (found_cycle) |
877 | break; |
878 | } |
879 | |
880 | gcc_assert (cycle[cycle_start] == cycle[cycle_end]); |
881 | if (dump_file) |
882 | fprintf (stream: dump_file, format: "\nNegative cycle length is %d:\n" , |
883 | cycle_end - cycle_start); |
884 | |
885 | sum_cost = 0; |
886 | cycle_flow = CAP_INFINITY; |
887 | for (k = cycle_start; k < cycle_end; k++) |
888 | { |
889 | pfedge = find_fixup_edge (fixup_graph, src: cycle[k + 1], dest: cycle[k]); |
890 | cycle_flow = MIN (cycle_flow, pfedge->rflow); |
891 | sum_cost += pfedge->cost; |
892 | if (dump_file) |
893 | fprintf (stream: dump_file, format: "%d " , cycle[k]); |
894 | } |
895 | |
896 | if (dump_file) |
897 | { |
898 | fprintf (stream: dump_file, format: "%d" , cycle[k]); |
899 | fprintf (stream: dump_file, |
900 | format: ": (%" PRId64 ", %" PRId64 |
901 | ")\n" , sum_cost, cycle_flow); |
902 | fprintf (stream: dump_file, |
903 | format: "Augment cycle with %" PRId64 "\n" , |
904 | cycle_flow); |
905 | } |
906 | |
907 | for (k = cycle_start; k < cycle_end; k++) |
908 | { |
909 | pfedge = find_fixup_edge (fixup_graph, src: cycle[k + 1], dest: cycle[k]); |
910 | r_pfedge = find_fixup_edge (fixup_graph, src: cycle[k], dest: cycle[k + 1]); |
911 | pfedge->rflow -= cycle_flow; |
912 | if (pfedge->type) |
913 | pfedge->flow += cycle_flow; |
914 | r_pfedge->rflow += cycle_flow; |
915 | if (r_pfedge->type) |
916 | r_pfedge->flow -= cycle_flow; |
917 | } |
918 | |
919 | return true; |
920 | } |
921 | |
922 | |
923 | /* Computes the residual flow for FIXUP_GRAPH by setting the rflow field of |
924 | the edges. ENTRY and EXIT vertices should not be considered. */ |
925 | |
926 | static void |
927 | compute_residual_flow (fixup_graph_type *fixup_graph) |
928 | { |
929 | int i; |
930 | int fnum_edges; |
931 | fixup_edge_p fedge_list, pfedge; |
932 | |
933 | gcc_assert (fixup_graph); |
934 | |
935 | if (dump_file) |
936 | fputs (s: "\ncompute_residual_flow():\n" , stream: dump_file); |
937 | |
938 | fnum_edges = fixup_graph->num_edges; |
939 | fedge_list = fixup_graph->edge_list; |
940 | |
941 | for (i = 0; i < fnum_edges; i++) |
942 | { |
943 | pfedge = fedge_list + i; |
944 | pfedge->rflow = pfedge->max_capacity - pfedge->flow; |
945 | pfedge->is_rflow_valid = true; |
946 | add_rfixup_edge (fixup_graph, src: pfedge->dest, dest: pfedge->src, rflow: pfedge->flow, |
947 | cost: -pfedge->cost); |
948 | } |
949 | } |
950 | |
951 | |
952 | /* Uses Edmonds-Karp algorithm - BFS to find augmenting path from SOURCE to |
953 | SINK. The fields in the edge vector in the FIXUP_GRAPH are not modified by |
954 | this routine. The vector bb_pred in the AUGMENTING_PATH structure is updated |
955 | to reflect the path found. |
956 | Returns: 0 if no augmenting path is found, 1 otherwise. */ |
957 | |
958 | static int |
959 | find_augmenting_path (fixup_graph_type *fixup_graph, |
960 | augmenting_path_type *augmenting_path, int source, |
961 | int sink) |
962 | { |
963 | int u = 0; |
964 | int i; |
965 | fixup_vertex_p fvertex_list, pfvertex; |
966 | fixup_edge_p pfedge; |
967 | int *bb_pred, *is_visited; |
968 | queue_type *queue_list; |
969 | |
970 | gcc_assert (augmenting_path); |
971 | bb_pred = augmenting_path->bb_pred; |
972 | gcc_assert (bb_pred); |
973 | is_visited = augmenting_path->is_visited; |
974 | gcc_assert (is_visited); |
975 | queue_list = &(augmenting_path->queue_list); |
976 | |
977 | gcc_assert (fixup_graph); |
978 | |
979 | fvertex_list = fixup_graph->vertex_list; |
980 | |
981 | for (u = 0; u < fixup_graph->num_vertices; u++) |
982 | is_visited[u] = 0; |
983 | |
984 | init_queue (queue_list); |
985 | enqueue (queue_list, x: source); |
986 | bb_pred[source] = -1; |
987 | |
988 | while (!is_empty (queue_list)) |
989 | { |
990 | u = dequeue (queue_list); |
991 | is_visited[u] = 1; |
992 | pfvertex = fvertex_list + u; |
993 | for (i = 0; pfvertex->succ_edges.iterate (ix: i, ptr: &pfedge); |
994 | i++) |
995 | { |
996 | int dest = pfedge->dest; |
997 | if ((pfedge->rflow > 0) && (is_visited[dest] == 0)) |
998 | { |
999 | enqueue (queue_list, x: dest); |
1000 | bb_pred[dest] = u; |
1001 | is_visited[dest] = 1; |
1002 | if (dest == sink) |
1003 | return 1; |
1004 | } |
1005 | } |
1006 | } |
1007 | |
1008 | return 0; |
1009 | } |
1010 | |
1011 | |
1012 | /* Routine to find the maximal flow: |
1013 | Algorithm: |
1014 | 1. Initialize flow to 0 |
1015 | 2. Find an augmenting path form source to sink. |
1016 | 3. Send flow equal to the path's residual capacity along the edges of this path. |
1017 | 4. Repeat steps 2 and 3 until no new augmenting path is found. |
1018 | |
1019 | Parameters: |
1020 | SOURCE: index of source vertex (input) |
1021 | SINK: index of sink vertex (input) |
1022 | FIXUP_GRAPH: adjacency matrix representing the graph. The flow of the edges will be |
1023 | set to have a valid maximal flow by this routine. (input) |
1024 | Return: Maximum flow possible. */ |
1025 | |
1026 | static gcov_type |
1027 | find_max_flow (fixup_graph_type *fixup_graph, int source, int sink) |
1028 | { |
1029 | int fnum_edges; |
1030 | augmenting_path_type augmenting_path; |
1031 | int *bb_pred; |
1032 | gcov_type max_flow = 0; |
1033 | int i, u; |
1034 | fixup_edge_p fedge_list, pfedge, r_pfedge; |
1035 | |
1036 | gcc_assert (fixup_graph); |
1037 | |
1038 | fnum_edges = fixup_graph->num_edges; |
1039 | fedge_list = fixup_graph->edge_list; |
1040 | |
1041 | /* Initialize flow to 0. */ |
1042 | for (i = 0; i < fnum_edges; i++) |
1043 | { |
1044 | pfedge = fedge_list + i; |
1045 | pfedge->flow = 0; |
1046 | } |
1047 | |
1048 | compute_residual_flow (fixup_graph); |
1049 | |
1050 | init_augmenting_path (augmenting_path: &augmenting_path, graph_size: fixup_graph->num_vertices); |
1051 | |
1052 | bb_pred = augmenting_path.bb_pred; |
1053 | while (find_augmenting_path (fixup_graph, augmenting_path: &augmenting_path, source, sink)) |
1054 | { |
1055 | /* Determine the amount by which we can increment the flow. */ |
1056 | gcov_type increment = CAP_INFINITY; |
1057 | for (u = sink; u != source; u = bb_pred[u]) |
1058 | { |
1059 | pfedge = find_fixup_edge (fixup_graph, src: bb_pred[u], dest: u); |
1060 | increment = MIN (increment, pfedge->rflow); |
1061 | } |
1062 | max_flow += increment; |
1063 | |
1064 | /* Now increment the flow. EXIT vertex index is 1. */ |
1065 | for (u = sink; u != source; u = bb_pred[u]) |
1066 | { |
1067 | pfedge = find_fixup_edge (fixup_graph, src: bb_pred[u], dest: u); |
1068 | r_pfedge = find_fixup_edge (fixup_graph, src: u, dest: bb_pred[u]); |
1069 | if (pfedge->type) |
1070 | { |
1071 | /* forward edge. */ |
1072 | pfedge->flow += increment; |
1073 | pfedge->rflow -= increment; |
1074 | r_pfedge->rflow += increment; |
1075 | } |
1076 | else |
1077 | { |
1078 | /* backward edge. */ |
1079 | gcc_assert (r_pfedge->type); |
1080 | r_pfedge->rflow += increment; |
1081 | r_pfedge->flow -= increment; |
1082 | pfedge->rflow -= increment; |
1083 | } |
1084 | } |
1085 | |
1086 | if (dump_file) |
1087 | { |
1088 | fprintf (stream: dump_file, format: "\nDump augmenting path:\n" ); |
1089 | for (u = sink; u != source; u = bb_pred[u]) |
1090 | { |
1091 | print_basic_block (file: dump_file, fixup_graph, n: u); |
1092 | fprintf (stream: dump_file, format: "<-" ); |
1093 | } |
1094 | fprintf (stream: dump_file, |
1095 | format: "ENTRY (path_capacity=%" PRId64 ")\n" , |
1096 | increment); |
1097 | fprintf (stream: dump_file, |
1098 | format: "Network flow is %" PRId64 ".\n" , |
1099 | max_flow); |
1100 | } |
1101 | } |
1102 | |
1103 | free_augmenting_path (augmenting_path: &augmenting_path); |
1104 | if (dump_file) |
1105 | dump_fixup_graph (file: dump_file, fixup_graph, msg: "After find_max_flow()" ); |
1106 | return max_flow; |
1107 | } |
1108 | |
1109 | |
1110 | /* Computes the corrected edge and basic block weights using FIXUP_GRAPH |
1111 | after applying the find_minimum_cost_flow() routine. */ |
1112 | |
1113 | static void |
1114 | adjust_cfg_counts (fixup_graph_type *fixup_graph) |
1115 | { |
1116 | basic_block bb; |
1117 | edge e; |
1118 | edge_iterator ei; |
1119 | int i, j; |
1120 | fixup_edge_p pfedge, pfedge_n; |
1121 | |
1122 | gcc_assert (fixup_graph); |
1123 | |
1124 | if (dump_file) |
1125 | fprintf (stream: dump_file, format: "\nadjust_cfg_counts():\n" ); |
1126 | |
1127 | FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), |
1128 | EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb) |
1129 | { |
1130 | i = 2 * bb->index; |
1131 | |
1132 | /* Fixup BB. */ |
1133 | if (dump_file) |
1134 | fprintf (stream: dump_file, |
1135 | format: "BB%d: %" PRId64 "" , bb->index, bb_gcov_count (bb)); |
1136 | |
1137 | pfedge = find_fixup_edge (fixup_graph, src: i, dest: i + 1); |
1138 | if (pfedge->flow) |
1139 | { |
1140 | bb_gcov_count (bb) += pfedge->flow; |
1141 | if (dump_file) |
1142 | { |
1143 | fprintf (stream: dump_file, format: " + %" PRId64 "(" , |
1144 | pfedge->flow); |
1145 | print_edge (file: dump_file, fixup_graph, s: i, d: i + 1); |
1146 | fprintf (stream: dump_file, format: ")" ); |
1147 | } |
1148 | } |
1149 | |
1150 | pfedge_n = |
1151 | find_fixup_edge (fixup_graph, src: i + 1, dest: pfedge->norm_vertex_index); |
1152 | /* Deduct flow from normalized reverse edge. */ |
1153 | if (pfedge->norm_vertex_index && pfedge_n->flow) |
1154 | { |
1155 | bb_gcov_count (bb) -= pfedge_n->flow; |
1156 | if (dump_file) |
1157 | { |
1158 | fprintf (stream: dump_file, format: " - %" PRId64 "(" , |
1159 | pfedge_n->flow); |
1160 | print_edge (file: dump_file, fixup_graph, s: i + 1, |
1161 | d: pfedge->norm_vertex_index); |
1162 | fprintf (stream: dump_file, format: ")" ); |
1163 | } |
1164 | } |
1165 | if (dump_file) |
1166 | fprintf (stream: dump_file, format: " = %" PRId64 "\n" , bb_gcov_count (bb)); |
1167 | |
1168 | /* Fixup edge. */ |
1169 | FOR_EACH_EDGE (e, ei, bb->succs) |
1170 | { |
1171 | /* Treat edges with ignore attribute set as if they don't exist. */ |
1172 | if (EDGE_INFO (e) && EDGE_INFO (e)->ignore) |
1173 | continue; |
1174 | |
1175 | j = 2 * e->dest->index; |
1176 | if (dump_file) |
1177 | fprintf (stream: dump_file, format: "%d->%d: %" PRId64 "" , |
1178 | bb->index, e->dest->index, edge_gcov_count (e)); |
1179 | |
1180 | pfedge = find_fixup_edge (fixup_graph, src: i + 1, dest: j); |
1181 | |
1182 | if (bb->index != e->dest->index) |
1183 | { |
1184 | /* Non-self edge. */ |
1185 | if (pfedge->flow) |
1186 | { |
1187 | edge_gcov_count (e) += pfedge->flow; |
1188 | if (dump_file) |
1189 | { |
1190 | fprintf (stream: dump_file, format: " + %" PRId64 "(" , |
1191 | pfedge->flow); |
1192 | print_edge (file: dump_file, fixup_graph, s: i + 1, d: j); |
1193 | fprintf (stream: dump_file, format: ")" ); |
1194 | } |
1195 | } |
1196 | |
1197 | pfedge_n = |
1198 | find_fixup_edge (fixup_graph, src: j, dest: pfedge->norm_vertex_index); |
1199 | /* Deduct flow from normalized reverse edge. */ |
1200 | if (pfedge->norm_vertex_index && pfedge_n->flow) |
1201 | { |
1202 | edge_gcov_count (e) -= pfedge_n->flow; |
1203 | if (dump_file) |
1204 | { |
1205 | fprintf (stream: dump_file, format: " - %" PRId64 "(" , |
1206 | pfedge_n->flow); |
1207 | print_edge (file: dump_file, fixup_graph, s: j, |
1208 | d: pfedge->norm_vertex_index); |
1209 | fprintf (stream: dump_file, format: ")" ); |
1210 | } |
1211 | } |
1212 | } |
1213 | else |
1214 | { |
1215 | /* Handle self edges. Self edge is split with a normalization |
1216 | vertex. Here i=j. */ |
1217 | pfedge = find_fixup_edge (fixup_graph, src: j, dest: i + 1); |
1218 | pfedge_n = |
1219 | find_fixup_edge (fixup_graph, src: i + 1, dest: pfedge->norm_vertex_index); |
1220 | edge_gcov_count (e) += pfedge_n->flow; |
1221 | bb_gcov_count (bb) += pfedge_n->flow; |
1222 | if (dump_file) |
1223 | { |
1224 | fprintf (stream: dump_file, format: "(self edge)" ); |
1225 | fprintf (stream: dump_file, format: " + %" PRId64 "(" , |
1226 | pfedge_n->flow); |
1227 | print_edge (file: dump_file, fixup_graph, s: i + 1, |
1228 | d: pfedge->norm_vertex_index); |
1229 | fprintf (stream: dump_file, format: ")" ); |
1230 | } |
1231 | } |
1232 | |
1233 | if (bb_gcov_count (bb)) |
1234 | e->probability = profile_probability::probability_in_gcov_type |
1235 | (val1: edge_gcov_count (e), val2: bb_gcov_count (bb)); |
1236 | if (dump_file) |
1237 | { |
1238 | fprintf (stream: dump_file, format: " = %" PRId64 "\t" , |
1239 | edge_gcov_count (e)); |
1240 | e->probability.dump (f: dump_file); |
1241 | fprintf (stream: dump_file, format: "\n" ); |
1242 | } |
1243 | } |
1244 | } |
1245 | |
1246 | bb_gcov_count (ENTRY_BLOCK_PTR_FOR_FN (cfun)) = |
1247 | sum_edge_counts (ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs); |
1248 | bb_gcov_count (EXIT_BLOCK_PTR_FOR_FN (cfun)) = |
1249 | sum_edge_counts (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds); |
1250 | |
1251 | /* Compute edge probabilities. */ |
1252 | FOR_ALL_BB_FN (bb, cfun) |
1253 | { |
1254 | if (bb_gcov_count (bb)) |
1255 | { |
1256 | FOR_EACH_EDGE (e, ei, bb->succs) |
1257 | e->probability = profile_probability::probability_in_gcov_type |
1258 | (val1: edge_gcov_count (e), val2: bb_gcov_count (bb)); |
1259 | } |
1260 | } |
1261 | |
1262 | if (dump_file) |
1263 | { |
1264 | fprintf (stream: dump_file, format: "\nCheck %s() CFG flow conservation:\n" , |
1265 | current_function_name ()); |
1266 | FOR_EACH_BB_FN (bb, cfun) |
1267 | { |
1268 | if ((bb_gcov_count (bb) != sum_edge_counts (edges: bb->preds)) |
1269 | || (bb_gcov_count (bb) != sum_edge_counts (edges: bb->succs))) |
1270 | { |
1271 | fprintf (stream: dump_file, |
1272 | format: "BB%d(%" PRId64 ") **INVALID**: " , |
1273 | bb->index, bb_gcov_count (bb)); |
1274 | fprintf (stderr, |
1275 | format: "******** BB%d(%" PRId64 |
1276 | ") **INVALID**: \n" , bb->index, bb_gcov_count (bb)); |
1277 | fprintf (stream: dump_file, format: "in_edges=%" PRId64 " " , |
1278 | sum_edge_counts (edges: bb->preds)); |
1279 | fprintf (stream: dump_file, format: "out_edges=%" PRId64 "\n" , |
1280 | sum_edge_counts (edges: bb->succs)); |
1281 | } |
1282 | } |
1283 | } |
1284 | } |
1285 | |
1286 | |
1287 | /* Implements the negative cycle canceling algorithm to compute a minimum cost |
1288 | flow. |
1289 | Algorithm: |
1290 | 1. Find maximal flow. |
1291 | 2. Form residual network |
1292 | 3. Repeat: |
1293 | While G contains a negative cost cycle C, reverse the flow on the found cycle |
1294 | by the minimum residual capacity in that cycle. |
1295 | 4. Form the minimal cost flow |
1296 | f(u,v) = rf(v, u) |
1297 | Input: |
1298 | FIXUP_GRAPH - Initial fixup graph. |
1299 | The flow field is modified to represent the minimum cost flow. */ |
1300 | |
1301 | static void |
1302 | find_minimum_cost_flow (fixup_graph_type *fixup_graph) |
1303 | { |
1304 | /* Holds the index of predecessor in path. */ |
1305 | int *pred; |
1306 | /* Used to hold the minimum cost cycle. */ |
1307 | int *cycle; |
1308 | /* Used to record the number of iterations of cancel_negative_cycle. */ |
1309 | int iteration; |
1310 | /* Vector d[i] holds the minimum cost of path from i to sink. */ |
1311 | gcov_type *d; |
1312 | int fnum_vertices; |
1313 | int new_exit_index; |
1314 | int new_entry_index; |
1315 | |
1316 | gcc_assert (fixup_graph); |
1317 | fnum_vertices = fixup_graph->num_vertices; |
1318 | new_exit_index = fixup_graph->new_exit_index; |
1319 | new_entry_index = fixup_graph->new_entry_index; |
1320 | |
1321 | find_max_flow (fixup_graph, source: new_entry_index, sink: new_exit_index); |
1322 | |
1323 | /* Initialize the structures for find_negative_cycle(). */ |
1324 | pred = (int *) xcalloc (fnum_vertices, sizeof (int)); |
1325 | d = (gcov_type *) xcalloc (fnum_vertices, sizeof (gcov_type)); |
1326 | cycle = (int *) xcalloc (fnum_vertices, sizeof (int)); |
1327 | |
1328 | /* Repeatedly find and cancel negative cost cycles, until |
1329 | no more negative cycles exist. This also updates the flow field |
1330 | to represent the minimum cost flow so far. */ |
1331 | iteration = 0; |
1332 | while (cancel_negative_cycle (fixup_graph, pi: pred, d, cycle)) |
1333 | { |
1334 | iteration++; |
1335 | if (iteration > MAX_ITER (fixup_graph->num_vertices, |
1336 | fixup_graph->num_edges)) |
1337 | break; |
1338 | } |
1339 | |
1340 | if (dump_file) |
1341 | dump_fixup_graph (file: dump_file, fixup_graph, |
1342 | msg: "After find_minimum_cost_flow()" ); |
1343 | |
1344 | /* Cleanup structures. */ |
1345 | free (ptr: pred); |
1346 | free (ptr: d); |
1347 | free (ptr: cycle); |
1348 | } |
1349 | |
1350 | |
1351 | /* Compute the sum of the edge counts in TO_EDGES. */ |
1352 | |
1353 | gcov_type |
1354 | sum_edge_counts (vec<edge, va_gc> *to_edges) |
1355 | { |
1356 | gcov_type sum = 0; |
1357 | edge e; |
1358 | edge_iterator ei; |
1359 | |
1360 | FOR_EACH_EDGE (e, ei, to_edges) |
1361 | { |
1362 | if (EDGE_INFO (e) && EDGE_INFO (e)->ignore) |
1363 | continue; |
1364 | sum += edge_gcov_count (e); |
1365 | } |
1366 | return sum; |
1367 | } |
1368 | |
1369 | |
1370 | /* Main routine. Smoothes the initial assigned basic block and edge counts using |
1371 | a minimum cost flow algorithm, to ensure that the flow consistency rule is |
1372 | obeyed: sum of outgoing edges = sum of incoming edges for each basic |
1373 | block. */ |
1374 | |
1375 | void |
1376 | mcf_smooth_cfg (void) |
1377 | { |
1378 | fixup_graph_type fixup_graph; |
1379 | memset (s: &fixup_graph, c: 0, n: sizeof (fixup_graph)); |
1380 | create_fixup_graph (fixup_graph: &fixup_graph); |
1381 | find_minimum_cost_flow (fixup_graph: &fixup_graph); |
1382 | adjust_cfg_counts (fixup_graph: &fixup_graph); |
1383 | delete_fixup_graph (fixup_graph: &fixup_graph); |
1384 | } |
1385 | |