1 | /* Reassociation for trees. |
2 | Copyright (C) 2005-2024 Free Software Foundation, Inc. |
3 | Contributed by Daniel Berlin <dan@dberlin.org> |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify |
8 | it under the terms of the GNU General Public License as published by |
9 | the Free Software Foundation; either version 3, or (at your option) |
10 | any later version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | GNU General Public License for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | #include "config.h" |
22 | #include "system.h" |
23 | #include "coretypes.h" |
24 | #include "backend.h" |
25 | #include "target.h" |
26 | #include "rtl.h" |
27 | #include "tree.h" |
28 | #include "gimple.h" |
29 | #include "cfghooks.h" |
30 | #include "alloc-pool.h" |
31 | #include "tree-pass.h" |
32 | #include "memmodel.h" |
33 | #include "tm_p.h" |
34 | #include "ssa.h" |
35 | #include "optabs-tree.h" |
36 | #include "gimple-pretty-print.h" |
37 | #include "diagnostic-core.h" |
38 | #include "fold-const.h" |
39 | #include "stor-layout.h" |
40 | #include "cfganal.h" |
41 | #include "gimple-iterator.h" |
42 | #include "gimple-fold.h" |
43 | #include "tree-eh.h" |
44 | #include "gimplify-me.h" |
45 | #include "tree-cfg.h" |
46 | #include "tree-ssa-loop.h" |
47 | #include "flags.h" |
48 | #include "tree-ssa.h" |
49 | #include "langhooks.h" |
50 | #include "cfgloop.h" |
51 | #include "builtins.h" |
52 | #include "gimplify.h" |
53 | #include "case-cfn-macros.h" |
54 | #include "tree-ssa-reassoc.h" |
55 | #include "tree-ssa-math-opts.h" |
56 | #include "gimple-range.h" |
57 | #include "internal-fn.h" |
58 | |
59 | /* This is a simple global reassociation pass. It is, in part, based |
60 | on the LLVM pass of the same name (They do some things more/less |
61 | than we do, in different orders, etc). |
62 | |
63 | It consists of five steps: |
64 | |
65 | 1. Breaking up subtract operations into addition + negate, where |
66 | it would promote the reassociation of adds. |
67 | |
68 | 2. Left linearization of the expression trees, so that (A+B)+(C+D) |
69 | becomes (((A+B)+C)+D), which is easier for us to rewrite later. |
70 | During linearization, we place the operands of the binary |
71 | expressions into a vector of operand_entry_* |
72 | |
73 | 3. Optimization of the operand lists, eliminating things like a + |
74 | -a, a & a, etc. |
75 | |
76 | 3a. Combine repeated factors with the same occurrence counts |
77 | into a __builtin_powi call that will later be optimized into |
78 | an optimal number of multiplies. |
79 | |
80 | 4. Rewrite the expression trees we linearized and optimized so |
81 | they are in proper rank order. |
82 | |
83 | 5. Repropagate negates, as nothing else will clean it up ATM. |
84 | |
85 | A bit of theory on #4, since nobody seems to write anything down |
86 | about why it makes sense to do it the way they do it: |
87 | |
88 | We could do this much nicer theoretically, but don't (for reasons |
89 | explained after how to do it theoretically nice :P). |
90 | |
91 | In order to promote the most redundancy elimination, you want |
92 | binary expressions whose operands are the same rank (or |
93 | preferably, the same value) exposed to the redundancy eliminator, |
94 | for possible elimination. |
95 | |
96 | So the way to do this if we really cared, is to build the new op |
97 | tree from the leaves to the roots, merging as you go, and putting the |
98 | new op on the end of the worklist, until you are left with one |
99 | thing on the worklist. |
100 | |
101 | IE if you have to rewrite the following set of operands (listed with |
102 | rank in parentheses), with opcode PLUS_EXPR: |
103 | |
104 | a (1), b (1), c (1), d (2), e (2) |
105 | |
106 | |
107 | We start with our merge worklist empty, and the ops list with all of |
108 | those on it. |
109 | |
110 | You want to first merge all leaves of the same rank, as much as |
111 | possible. |
112 | |
113 | So first build a binary op of |
114 | |
115 | mergetmp = a + b, and put "mergetmp" on the merge worklist. |
116 | |
117 | Because there is no three operand form of PLUS_EXPR, c is not going to |
118 | be exposed to redundancy elimination as a rank 1 operand. |
119 | |
120 | So you might as well throw it on the merge worklist (you could also |
121 | consider it to now be a rank two operand, and merge it with d and e, |
122 | but in this case, you then have evicted e from a binary op. So at |
123 | least in this situation, you can't win.) |
124 | |
125 | Then build a binary op of d + e |
126 | mergetmp2 = d + e |
127 | |
128 | and put mergetmp2 on the merge worklist. |
129 | |
130 | so merge worklist = {mergetmp, c, mergetmp2} |
131 | |
132 | Continue building binary ops of these operations until you have only |
133 | one operation left on the worklist. |
134 | |
135 | So we have |
136 | |
137 | build binary op |
138 | mergetmp3 = mergetmp + c |
139 | |
140 | worklist = {mergetmp2, mergetmp3} |
141 | |
142 | mergetmp4 = mergetmp2 + mergetmp3 |
143 | |
144 | worklist = {mergetmp4} |
145 | |
146 | because we have one operation left, we can now just set the original |
147 | statement equal to the result of that operation. |
148 | |
149 | This will at least expose a + b and d + e to redundancy elimination |
150 | as binary operations. |
151 | |
152 | For extra points, you can reuse the old statements to build the |
153 | mergetmps, since you shouldn't run out. |
154 | |
155 | So why don't we do this? |
156 | |
157 | Because it's expensive, and rarely will help. Most trees we are |
158 | reassociating have 3 or less ops. If they have 2 ops, they already |
159 | will be written into a nice single binary op. If you have 3 ops, a |
160 | single simple check suffices to tell you whether the first two are of the |
161 | same rank. If so, you know to order it |
162 | |
163 | mergetmp = op1 + op2 |
164 | newstmt = mergetmp + op3 |
165 | |
166 | instead of |
167 | mergetmp = op2 + op3 |
168 | newstmt = mergetmp + op1 |
169 | |
170 | If all three are of the same rank, you can't expose them all in a |
171 | single binary operator anyway, so the above is *still* the best you |
172 | can do. |
173 | |
174 | Thus, this is what we do. When we have three ops left, we check to see |
175 | what order to put them in, and call it a day. As a nod to vector sum |
176 | reduction, we check if any of the ops are really a phi node that is a |
177 | destructive update for the associating op, and keep the destructive |
178 | update together for vector sum reduction recognition. */ |
179 | |
180 | /* Enable insertion of __builtin_powi calls during execute_reassoc. See |
181 | point 3a in the pass header comment. */ |
182 | static bool reassoc_insert_powi_p; |
183 | |
184 | /* Enable biasing ranks of loop accumulators. We don't want this before |
185 | vectorization, since it interferes with reduction chains. */ |
186 | static bool reassoc_bias_loop_carried_phi_ranks_p; |
187 | |
188 | /* Statistics */ |
189 | static struct |
190 | { |
191 | int linearized; |
192 | int constants_eliminated; |
193 | int ops_eliminated; |
194 | int rewritten; |
195 | int pows_encountered; |
196 | int pows_created; |
197 | } reassociate_stats; |
198 | |
199 | |
200 | static object_allocator<operand_entry> operand_entry_pool |
201 | ("operand entry pool" ); |
202 | |
203 | /* This is used to assign a unique ID to each struct operand_entry |
204 | so that qsort results are identical on different hosts. */ |
205 | static unsigned int next_operand_entry_id; |
206 | |
207 | /* Starting rank number for a given basic block, so that we can rank |
208 | operations using unmovable instructions in that BB based on the bb |
209 | depth. */ |
210 | static int64_t *bb_rank; |
211 | |
212 | /* Operand->rank hashtable. */ |
213 | static hash_map<tree, int64_t> *operand_rank; |
214 | |
215 | /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be |
216 | biased. */ |
217 | static auto_bitmap biased_names; |
218 | |
219 | /* Vector of SSA_NAMEs on which after reassociate_bb is done with |
220 | all basic blocks the CFG should be adjusted - basic blocks |
221 | split right after that SSA_NAME's definition statement and before |
222 | the only use, which must be a bit ior. */ |
223 | static vec<tree> reassoc_branch_fixups; |
224 | |
225 | /* Forward decls. */ |
226 | static int64_t get_rank (tree); |
227 | static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *); |
228 | |
229 | /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts |
230 | possibly added by gsi_remove. */ |
231 | |
232 | static bool |
233 | reassoc_remove_stmt (gimple_stmt_iterator *gsi) |
234 | { |
235 | gimple *stmt = gsi_stmt (i: *gsi); |
236 | |
237 | if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (g: stmt) == GIMPLE_PHI) |
238 | return gsi_remove (gsi, true); |
239 | |
240 | gimple_stmt_iterator prev = *gsi; |
241 | gsi_prev (i: &prev); |
242 | unsigned uid = gimple_uid (g: stmt); |
243 | basic_block bb = gimple_bb (g: stmt); |
244 | bool ret = gsi_remove (gsi, true); |
245 | if (!gsi_end_p (i: prev)) |
246 | gsi_next (i: &prev); |
247 | else |
248 | prev = gsi_start_bb (bb); |
249 | gimple *end_stmt = gsi_stmt (i: *gsi); |
250 | while ((stmt = gsi_stmt (i: prev)) != end_stmt) |
251 | { |
252 | gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0); |
253 | gimple_set_uid (g: stmt, uid); |
254 | gsi_next (i: &prev); |
255 | } |
256 | return ret; |
257 | } |
258 | |
259 | /* Bias amount for loop-carried phis. We want this to be larger than |
260 | the depth of any reassociation tree we can see, but not larger than |
261 | the rank difference between two blocks. */ |
262 | #define PHI_LOOP_BIAS (1 << 15) |
263 | |
264 | /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's |
265 | operands to the STMT's left-hand side. The goal is to preserve bias in code |
266 | like this: |
267 | |
268 | x_1 = phi(x_0, x_2) |
269 | a = x_1 | 1 |
270 | b = a ^ 2 |
271 | .MEM = b |
272 | c = b + d |
273 | x_2 = c + e |
274 | |
275 | That is, we need to preserve bias along single-use chains originating from |
276 | loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be |
277 | uses, because only they participate in rank propagation. */ |
278 | static bool |
279 | propagate_bias_p (gimple *stmt) |
280 | { |
281 | use_operand_p use; |
282 | imm_use_iterator use_iter; |
283 | gimple *single_use_stmt = NULL; |
284 | |
285 | if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_reference) |
286 | return false; |
287 | |
288 | FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt)) |
289 | { |
290 | gimple *current_use_stmt = USE_STMT (use); |
291 | |
292 | if (is_gimple_assign (gs: current_use_stmt) |
293 | && TREE_CODE (gimple_assign_lhs (current_use_stmt)) == SSA_NAME) |
294 | { |
295 | if (single_use_stmt != NULL && single_use_stmt != current_use_stmt) |
296 | return false; |
297 | single_use_stmt = current_use_stmt; |
298 | } |
299 | } |
300 | |
301 | if (single_use_stmt == NULL) |
302 | return false; |
303 | |
304 | if (gimple_bb (g: stmt)->loop_father |
305 | != gimple_bb (g: single_use_stmt)->loop_father) |
306 | return false; |
307 | |
308 | return true; |
309 | } |
310 | |
311 | /* Rank assigned to a phi statement. If STMT is a loop-carried phi of |
312 | an innermost loop, and the phi has only a single use which is inside |
313 | the loop, then the rank is the block rank of the loop latch plus an |
314 | extra bias for the loop-carried dependence. This causes expressions |
315 | calculated into an accumulator variable to be independent for each |
316 | iteration of the loop. If STMT is some other phi, the rank is the |
317 | block rank of its containing block. */ |
318 | static int64_t |
319 | phi_rank (gimple *stmt) |
320 | { |
321 | basic_block bb = gimple_bb (g: stmt); |
322 | class loop *father = bb->loop_father; |
323 | tree res; |
324 | unsigned i; |
325 | use_operand_p use; |
326 | gimple *use_stmt; |
327 | |
328 | if (!reassoc_bias_loop_carried_phi_ranks_p) |
329 | return bb_rank[bb->index]; |
330 | |
331 | /* We only care about real loops (those with a latch). */ |
332 | if (!father->latch) |
333 | return bb_rank[bb->index]; |
334 | |
335 | /* Interesting phis must be in headers of innermost loops. */ |
336 | if (bb != father->header |
337 | || father->inner) |
338 | return bb_rank[bb->index]; |
339 | |
340 | /* Ignore virtual SSA_NAMEs. */ |
341 | res = gimple_phi_result (gs: stmt); |
342 | if (virtual_operand_p (op: res)) |
343 | return bb_rank[bb->index]; |
344 | |
345 | /* The phi definition must have a single use, and that use must be |
346 | within the loop. Otherwise this isn't an accumulator pattern. */ |
347 | if (!single_imm_use (var: res, use_p: &use, stmt: &use_stmt) |
348 | || gimple_bb (g: use_stmt)->loop_father != father) |
349 | return bb_rank[bb->index]; |
350 | |
351 | /* Look for phi arguments from within the loop. If found, bias this phi. */ |
352 | for (i = 0; i < gimple_phi_num_args (gs: stmt); i++) |
353 | { |
354 | tree arg = gimple_phi_arg_def (gs: stmt, index: i); |
355 | if (TREE_CODE (arg) == SSA_NAME |
356 | && !SSA_NAME_IS_DEFAULT_DEF (arg)) |
357 | { |
358 | gimple *def_stmt = SSA_NAME_DEF_STMT (arg); |
359 | if (gimple_bb (g: def_stmt)->loop_father == father) |
360 | return bb_rank[father->latch->index] + PHI_LOOP_BIAS; |
361 | } |
362 | } |
363 | |
364 | /* Must be an uninteresting phi. */ |
365 | return bb_rank[bb->index]; |
366 | } |
367 | |
368 | /* Return the maximum of RANK and the rank that should be propagated |
369 | from expression OP. For most operands, this is just the rank of OP. |
370 | For loop-carried phis, the value is zero to avoid undoing the bias |
371 | in favor of the phi. */ |
372 | static int64_t |
373 | propagate_rank (int64_t rank, tree op, bool *maybe_biased_p) |
374 | { |
375 | int64_t op_rank; |
376 | |
377 | op_rank = get_rank (op); |
378 | |
379 | /* Check whether op is biased after the get_rank () call, since it might have |
380 | updated biased_names. */ |
381 | if (TREE_CODE (op) == SSA_NAME |
382 | && bitmap_bit_p (biased_names, SSA_NAME_VERSION (op))) |
383 | { |
384 | if (maybe_biased_p == NULL) |
385 | return rank; |
386 | *maybe_biased_p = true; |
387 | } |
388 | |
389 | return MAX (rank, op_rank); |
390 | } |
391 | |
392 | /* Look up the operand rank structure for expression E. */ |
393 | |
394 | static inline int64_t |
395 | find_operand_rank (tree e) |
396 | { |
397 | int64_t *slot = operand_rank->get (k: e); |
398 | return slot ? *slot : -1; |
399 | } |
400 | |
401 | /* Insert {E,RANK} into the operand rank hashtable. */ |
402 | |
403 | static inline void |
404 | insert_operand_rank (tree e, int64_t rank) |
405 | { |
406 | gcc_assert (rank > 0); |
407 | gcc_assert (!operand_rank->put (e, rank)); |
408 | } |
409 | |
410 | /* Given an expression E, return the rank of the expression. */ |
411 | |
412 | static int64_t |
413 | get_rank (tree e) |
414 | { |
415 | /* SSA_NAME's have the rank of the expression they are the result |
416 | of. |
417 | For globals and uninitialized values, the rank is 0. |
418 | For function arguments, use the pre-setup rank. |
419 | For PHI nodes, stores, asm statements, etc, we use the rank of |
420 | the BB. |
421 | For simple operations, the rank is the maximum rank of any of |
422 | its operands, or the bb_rank, whichever is less. |
423 | I make no claims that this is optimal, however, it gives good |
424 | results. */ |
425 | |
426 | /* We make an exception to the normal ranking system to break |
427 | dependences of accumulator variables in loops. Suppose we |
428 | have a simple one-block loop containing: |
429 | |
430 | x_1 = phi(x_0, x_2) |
431 | b = a + x_1 |
432 | c = b + d |
433 | x_2 = c + e |
434 | |
435 | As shown, each iteration of the calculation into x is fully |
436 | dependent upon the iteration before it. We would prefer to |
437 | see this in the form: |
438 | |
439 | x_1 = phi(x_0, x_2) |
440 | b = a + d |
441 | c = b + e |
442 | x_2 = c + x_1 |
443 | |
444 | If the loop is unrolled, the calculations of b and c from |
445 | different iterations can be interleaved. |
446 | |
447 | To obtain this result during reassociation, we bias the rank |
448 | of the phi definition x_1 upward, when it is recognized as an |
449 | accumulator pattern. The artificial rank causes it to be |
450 | added last, providing the desired independence. */ |
451 | |
452 | if (TREE_CODE (e) == SSA_NAME) |
453 | { |
454 | ssa_op_iter iter; |
455 | gimple *stmt; |
456 | int64_t rank; |
457 | tree op; |
458 | |
459 | /* If we already have a rank for this expression, use that. */ |
460 | rank = find_operand_rank (e); |
461 | if (rank != -1) |
462 | return rank; |
463 | |
464 | stmt = SSA_NAME_DEF_STMT (e); |
465 | if (gimple_code (g: stmt) == GIMPLE_PHI) |
466 | { |
467 | rank = phi_rank (stmt); |
468 | if (rank != bb_rank[gimple_bb (g: stmt)->index]) |
469 | bitmap_set_bit (biased_names, SSA_NAME_VERSION (e)); |
470 | } |
471 | |
472 | else if (!is_gimple_assign (gs: stmt)) |
473 | rank = bb_rank[gimple_bb (g: stmt)->index]; |
474 | |
475 | else |
476 | { |
477 | bool biased_p = false; |
478 | bool *maybe_biased_p = propagate_bias_p (stmt) ? &biased_p : NULL; |
479 | |
480 | /* Otherwise, find the maximum rank for the operands. As an |
481 | exception, remove the bias from loop-carried phis when propagating |
482 | the rank so that dependent operations are not also biased. */ |
483 | /* Simply walk over all SSA uses - this takes advatage of the |
484 | fact that non-SSA operands are is_gimple_min_invariant and |
485 | thus have rank 0. */ |
486 | rank = 0; |
487 | FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) |
488 | rank = propagate_rank (rank, op, maybe_biased_p); |
489 | |
490 | rank += 1; |
491 | if (biased_p) |
492 | bitmap_set_bit (biased_names, SSA_NAME_VERSION (e)); |
493 | } |
494 | |
495 | if (dump_file && (dump_flags & TDF_DETAILS)) |
496 | { |
497 | fprintf (stream: dump_file, format: "Rank for " ); |
498 | print_generic_expr (dump_file, e); |
499 | fprintf (stream: dump_file, format: " is %" PRId64 "\n" , rank); |
500 | } |
501 | |
502 | /* Note the rank in the hashtable so we don't recompute it. */ |
503 | insert_operand_rank (e, rank); |
504 | return rank; |
505 | } |
506 | |
507 | /* Constants, globals, etc., are rank 0 */ |
508 | return 0; |
509 | } |
510 | |
511 | |
512 | /* We want integer ones to end up last no matter what, since they are |
513 | the ones we can do the most with. */ |
514 | #define INTEGER_CONST_TYPE 1 << 4 |
515 | #define FLOAT_ONE_CONST_TYPE 1 << 3 |
516 | #define FLOAT_CONST_TYPE 1 << 2 |
517 | #define OTHER_CONST_TYPE 1 << 1 |
518 | |
519 | /* Classify an invariant tree into integer, float, or other, so that |
520 | we can sort them to be near other constants of the same type. */ |
521 | static inline int |
522 | constant_type (tree t) |
523 | { |
524 | if (INTEGRAL_TYPE_P (TREE_TYPE (t))) |
525 | return INTEGER_CONST_TYPE; |
526 | else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))) |
527 | { |
528 | /* Sort -1.0 and 1.0 constants last, while in some cases |
529 | const_binop can't optimize some inexact operations, multiplication |
530 | by -1.0 or 1.0 can be always merged with others. */ |
531 | if (real_onep (t) || real_minus_onep (t)) |
532 | return FLOAT_ONE_CONST_TYPE; |
533 | return FLOAT_CONST_TYPE; |
534 | } |
535 | else |
536 | return OTHER_CONST_TYPE; |
537 | } |
538 | |
539 | /* qsort comparison function to sort operand entries PA and PB by rank |
540 | so that the sorted array is ordered by rank in decreasing order. */ |
541 | static int |
542 | sort_by_operand_rank (const void *pa, const void *pb) |
543 | { |
544 | const operand_entry *oea = *(const operand_entry *const *)pa; |
545 | const operand_entry *oeb = *(const operand_entry *const *)pb; |
546 | |
547 | if (oeb->rank != oea->rank) |
548 | return oeb->rank > oea->rank ? 1 : -1; |
549 | |
550 | /* It's nicer for optimize_expression if constants that are likely |
551 | to fold when added/multiplied/whatever are put next to each |
552 | other. Since all constants have rank 0, order them by type. */ |
553 | if (oea->rank == 0) |
554 | { |
555 | if (constant_type (t: oeb->op) != constant_type (t: oea->op)) |
556 | return constant_type (t: oea->op) - constant_type (t: oeb->op); |
557 | else |
558 | /* To make sorting result stable, we use unique IDs to determine |
559 | order. */ |
560 | return oeb->id > oea->id ? 1 : -1; |
561 | } |
562 | |
563 | if (TREE_CODE (oea->op) != SSA_NAME) |
564 | { |
565 | if (TREE_CODE (oeb->op) != SSA_NAME) |
566 | return oeb->id > oea->id ? 1 : -1; |
567 | else |
568 | return 1; |
569 | } |
570 | else if (TREE_CODE (oeb->op) != SSA_NAME) |
571 | return -1; |
572 | |
573 | /* Lastly, make sure the versions that are the same go next to each |
574 | other. */ |
575 | if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) |
576 | { |
577 | /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse |
578 | versions of removed SSA_NAMEs, so if possible, prefer to sort |
579 | based on basic block and gimple_uid of the SSA_NAME_DEF_STMT. |
580 | See PR60418. */ |
581 | gimple *stmta = SSA_NAME_DEF_STMT (oea->op); |
582 | gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op); |
583 | basic_block bba = gimple_bb (g: stmta); |
584 | basic_block bbb = gimple_bb (g: stmtb); |
585 | if (bbb != bba) |
586 | { |
587 | /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert |
588 | but the other might not. */ |
589 | if (!bba) |
590 | return 1; |
591 | if (!bbb) |
592 | return -1; |
593 | /* If neither is, compare bb_rank. */ |
594 | if (bb_rank[bbb->index] != bb_rank[bba->index]) |
595 | return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16); |
596 | } |
597 | |
598 | bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb); |
599 | bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta); |
600 | if (da != db) |
601 | return da ? 1 : -1; |
602 | |
603 | return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1; |
604 | } |
605 | |
606 | return oeb->id > oea->id ? 1 : -1; |
607 | } |
608 | |
609 | /* Add an operand entry to *OPS for the tree operand OP. */ |
610 | |
611 | static void |
612 | add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL) |
613 | { |
614 | operand_entry *oe = operand_entry_pool.allocate (); |
615 | |
616 | oe->op = op; |
617 | oe->rank = get_rank (e: op); |
618 | oe->id = next_operand_entry_id++; |
619 | oe->count = 1; |
620 | oe->stmt_to_insert = stmt_to_insert; |
621 | ops->safe_push (obj: oe); |
622 | } |
623 | |
624 | /* Add an operand entry to *OPS for the tree operand OP with repeat |
625 | count REPEAT. */ |
626 | |
627 | static void |
628 | add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op, |
629 | HOST_WIDE_INT repeat) |
630 | { |
631 | operand_entry *oe = operand_entry_pool.allocate (); |
632 | |
633 | oe->op = op; |
634 | oe->rank = get_rank (e: op); |
635 | oe->id = next_operand_entry_id++; |
636 | oe->count = repeat; |
637 | oe->stmt_to_insert = NULL; |
638 | ops->safe_push (obj: oe); |
639 | |
640 | reassociate_stats.pows_encountered++; |
641 | } |
642 | |
643 | /* Returns true if we can associate the SSA def OP. */ |
644 | |
645 | static bool |
646 | can_reassociate_op_p (tree op) |
647 | { |
648 | if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op)) |
649 | return false; |
650 | /* Uninitialized variables can't participate in reassociation. */ |
651 | if (TREE_CODE (op) == SSA_NAME && ssa_name_maybe_undef_p (var: op)) |
652 | return false; |
653 | /* Make sure asm goto outputs do not participate in reassociation since |
654 | we have no way to find an insertion place after asm goto. */ |
655 | if (TREE_CODE (op) == SSA_NAME |
656 | && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM |
657 | && gimple_asm_nlabels (asm_stmt: as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0) |
658 | return false; |
659 | return true; |
660 | } |
661 | |
662 | /* Returns true if we can reassociate operations of TYPE. |
663 | That is for integral or non-saturating fixed-point types, and for |
664 | floating point type when associative-math is enabled. */ |
665 | |
666 | static bool |
667 | can_reassociate_type_p (tree type) |
668 | { |
669 | if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)) |
670 | || NON_SAT_FIXED_POINT_TYPE_P (type) |
671 | || (flag_associative_math && FLOAT_TYPE_P (type))) |
672 | return true; |
673 | return false; |
674 | } |
675 | |
676 | /* Return true if STMT is reassociable operation containing a binary |
677 | operation with tree code CODE, and is inside LOOP. */ |
678 | |
679 | static bool |
680 | is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop) |
681 | { |
682 | basic_block bb = gimple_bb (g: stmt); |
683 | |
684 | if (gimple_bb (g: stmt) == NULL) |
685 | return false; |
686 | |
687 | if (!flow_bb_inside_loop_p (loop, bb)) |
688 | return false; |
689 | |
690 | if (is_gimple_assign (gs: stmt) |
691 | && gimple_assign_rhs_code (gs: stmt) == code |
692 | && has_single_use (var: gimple_assign_lhs (gs: stmt))) |
693 | { |
694 | tree rhs1 = gimple_assign_rhs1 (gs: stmt); |
695 | tree rhs2 = gimple_assign_rhs2 (gs: stmt); |
696 | if (!can_reassociate_op_p (op: rhs1) |
697 | || (rhs2 && !can_reassociate_op_p (op: rhs2))) |
698 | return false; |
699 | return true; |
700 | } |
701 | |
702 | return false; |
703 | } |
704 | |
705 | |
706 | /* Return true if STMT is a nop-conversion. */ |
707 | |
708 | static bool |
709 | gimple_nop_conversion_p (gimple *stmt) |
710 | { |
711 | if (gassign *ass = dyn_cast <gassign *> (p: stmt)) |
712 | { |
713 | if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass)) |
714 | && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)), |
715 | TREE_TYPE (gimple_assign_rhs1 (ass)))) |
716 | return true; |
717 | } |
718 | return false; |
719 | } |
720 | |
721 | /* Given NAME, if NAME is defined by a unary operation OPCODE, return the |
722 | operand of the negate operation. Otherwise, return NULL. */ |
723 | |
724 | static tree |
725 | get_unary_op (tree name, enum tree_code opcode) |
726 | { |
727 | gimple *stmt = SSA_NAME_DEF_STMT (name); |
728 | |
729 | /* Look through nop conversions (sign changes). */ |
730 | if (gimple_nop_conversion_p (stmt) |
731 | && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME) |
732 | stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); |
733 | |
734 | if (!is_gimple_assign (gs: stmt)) |
735 | return NULL_TREE; |
736 | |
737 | if (gimple_assign_rhs_code (gs: stmt) == opcode) |
738 | return gimple_assign_rhs1 (gs: stmt); |
739 | return NULL_TREE; |
740 | } |
741 | |
742 | /* Return true if OP1 and OP2 have the same value if casted to either type. */ |
743 | |
744 | static bool |
745 | ops_equal_values_p (tree op1, tree op2) |
746 | { |
747 | if (op1 == op2) |
748 | return true; |
749 | |
750 | tree orig_op1 = op1; |
751 | if (TREE_CODE (op1) == SSA_NAME) |
752 | { |
753 | gimple *stmt = SSA_NAME_DEF_STMT (op1); |
754 | if (gimple_nop_conversion_p (stmt)) |
755 | { |
756 | op1 = gimple_assign_rhs1 (gs: stmt); |
757 | if (op1 == op2) |
758 | return true; |
759 | } |
760 | } |
761 | |
762 | if (TREE_CODE (op2) == SSA_NAME) |
763 | { |
764 | gimple *stmt = SSA_NAME_DEF_STMT (op2); |
765 | if (gimple_nop_conversion_p (stmt)) |
766 | { |
767 | op2 = gimple_assign_rhs1 (gs: stmt); |
768 | if (op1 == op2 |
769 | || orig_op1 == op2) |
770 | return true; |
771 | } |
772 | } |
773 | |
774 | return false; |
775 | } |
776 | |
777 | |
778 | /* If CURR and LAST are a pair of ops that OPCODE allows us to |
779 | eliminate through equivalences, do so, remove them from OPS, and |
780 | return true. Otherwise, return false. */ |
781 | |
782 | static bool |
783 | eliminate_duplicate_pair (enum tree_code opcode, |
784 | vec<operand_entry *> *ops, |
785 | bool *all_done, |
786 | unsigned int i, |
787 | operand_entry *curr, |
788 | operand_entry *last) |
789 | { |
790 | |
791 | /* If we have two of the same op, and the opcode is & |, min, or max, |
792 | we can eliminate one of them. |
793 | If we have two of the same op, and the opcode is ^, we can |
794 | eliminate both of them. */ |
795 | |
796 | if (last && last->op == curr->op) |
797 | { |
798 | switch (opcode) |
799 | { |
800 | case MAX_EXPR: |
801 | case MIN_EXPR: |
802 | case BIT_IOR_EXPR: |
803 | case BIT_AND_EXPR: |
804 | if (dump_file && (dump_flags & TDF_DETAILS)) |
805 | { |
806 | fprintf (stream: dump_file, format: "Equivalence: " ); |
807 | print_generic_expr (dump_file, curr->op); |
808 | fprintf (stream: dump_file, format: " [&|minmax] " ); |
809 | print_generic_expr (dump_file, last->op); |
810 | fprintf (stream: dump_file, format: " -> " ); |
811 | print_generic_stmt (dump_file, last->op); |
812 | } |
813 | |
814 | ops->ordered_remove (ix: i); |
815 | reassociate_stats.ops_eliminated ++; |
816 | |
817 | return true; |
818 | |
819 | case BIT_XOR_EXPR: |
820 | if (dump_file && (dump_flags & TDF_DETAILS)) |
821 | { |
822 | fprintf (stream: dump_file, format: "Equivalence: " ); |
823 | print_generic_expr (dump_file, curr->op); |
824 | fprintf (stream: dump_file, format: " ^ " ); |
825 | print_generic_expr (dump_file, last->op); |
826 | fprintf (stream: dump_file, format: " -> nothing\n" ); |
827 | } |
828 | |
829 | reassociate_stats.ops_eliminated += 2; |
830 | |
831 | if (ops->length () == 2) |
832 | { |
833 | ops->truncate (size: 0); |
834 | add_to_ops_vec (ops, op: build_zero_cst (TREE_TYPE (last->op))); |
835 | *all_done = true; |
836 | } |
837 | else |
838 | { |
839 | ops->ordered_remove (ix: i-1); |
840 | ops->ordered_remove (ix: i-1); |
841 | } |
842 | |
843 | return true; |
844 | |
845 | default: |
846 | break; |
847 | } |
848 | } |
849 | return false; |
850 | } |
851 | |
852 | static vec<tree> plus_negates; |
853 | |
854 | /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not |
855 | expression, look in OPS for a corresponding positive operation to cancel |
856 | it out. If we find one, remove the other from OPS, replace |
857 | OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise, |
858 | return false. */ |
859 | |
860 | static bool |
861 | eliminate_plus_minus_pair (enum tree_code opcode, |
862 | vec<operand_entry *> *ops, |
863 | unsigned int currindex, |
864 | operand_entry *curr) |
865 | { |
866 | tree negateop; |
867 | tree notop; |
868 | unsigned int i; |
869 | operand_entry *oe; |
870 | |
871 | if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME) |
872 | return false; |
873 | |
874 | negateop = get_unary_op (name: curr->op, opcode: NEGATE_EXPR); |
875 | notop = get_unary_op (name: curr->op, opcode: BIT_NOT_EXPR); |
876 | if (negateop == NULL_TREE && notop == NULL_TREE) |
877 | return false; |
878 | |
879 | /* Any non-negated version will have a rank that is one less than |
880 | the current rank. So once we hit those ranks, if we don't find |
881 | one, we can stop. */ |
882 | |
883 | for (i = currindex + 1; |
884 | ops->iterate (ix: i, ptr: &oe) |
885 | && oe->rank >= curr->rank - 1 ; |
886 | i++) |
887 | { |
888 | if (negateop |
889 | && ops_equal_values_p (op1: oe->op, op2: negateop)) |
890 | { |
891 | if (dump_file && (dump_flags & TDF_DETAILS)) |
892 | { |
893 | fprintf (stream: dump_file, format: "Equivalence: " ); |
894 | print_generic_expr (dump_file, negateop); |
895 | fprintf (stream: dump_file, format: " + -" ); |
896 | print_generic_expr (dump_file, oe->op); |
897 | fprintf (stream: dump_file, format: " -> 0\n" ); |
898 | } |
899 | |
900 | ops->ordered_remove (ix: i); |
901 | add_to_ops_vec (ops, op: build_zero_cst (TREE_TYPE (oe->op))); |
902 | ops->ordered_remove (ix: currindex); |
903 | reassociate_stats.ops_eliminated ++; |
904 | |
905 | return true; |
906 | } |
907 | else if (notop |
908 | && ops_equal_values_p (op1: oe->op, op2: notop)) |
909 | { |
910 | tree op_type = TREE_TYPE (oe->op); |
911 | |
912 | if (dump_file && (dump_flags & TDF_DETAILS)) |
913 | { |
914 | fprintf (stream: dump_file, format: "Equivalence: " ); |
915 | print_generic_expr (dump_file, notop); |
916 | fprintf (stream: dump_file, format: " + ~" ); |
917 | print_generic_expr (dump_file, oe->op); |
918 | fprintf (stream: dump_file, format: " -> -1\n" ); |
919 | } |
920 | |
921 | ops->ordered_remove (ix: i); |
922 | add_to_ops_vec (ops, op: build_all_ones_cst (op_type)); |
923 | ops->ordered_remove (ix: currindex); |
924 | reassociate_stats.ops_eliminated ++; |
925 | |
926 | return true; |
927 | } |
928 | } |
929 | |
930 | /* If CURR->OP is a negate expr without nop conversion in a plus expr: |
931 | save it for later inspection in repropagate_negates(). */ |
932 | if (negateop != NULL_TREE |
933 | && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR) |
934 | plus_negates.safe_push (obj: curr->op); |
935 | |
936 | return false; |
937 | } |
938 | |
939 | /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a |
940 | bitwise not expression, look in OPS for a corresponding operand to |
941 | cancel it out. If we find one, remove the other from OPS, replace |
942 | OPS[CURRINDEX] with 0, and return true. Otherwise, return |
943 | false. */ |
944 | |
945 | static bool |
946 | eliminate_not_pairs (enum tree_code opcode, |
947 | vec<operand_entry *> *ops, |
948 | unsigned int currindex, |
949 | operand_entry *curr) |
950 | { |
951 | tree notop; |
952 | unsigned int i; |
953 | operand_entry *oe; |
954 | |
955 | if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) |
956 | || TREE_CODE (curr->op) != SSA_NAME) |
957 | return false; |
958 | |
959 | notop = get_unary_op (name: curr->op, opcode: BIT_NOT_EXPR); |
960 | if (notop == NULL_TREE) |
961 | return false; |
962 | |
963 | /* Any non-not version will have a rank that is one less than |
964 | the current rank. So once we hit those ranks, if we don't find |
965 | one, we can stop. */ |
966 | |
967 | for (i = currindex + 1; |
968 | ops->iterate (ix: i, ptr: &oe) |
969 | && oe->rank >= curr->rank - 1; |
970 | i++) |
971 | { |
972 | if (oe->op == notop) |
973 | { |
974 | if (dump_file && (dump_flags & TDF_DETAILS)) |
975 | { |
976 | fprintf (stream: dump_file, format: "Equivalence: " ); |
977 | print_generic_expr (dump_file, notop); |
978 | if (opcode == BIT_AND_EXPR) |
979 | fprintf (stream: dump_file, format: " & ~" ); |
980 | else if (opcode == BIT_IOR_EXPR) |
981 | fprintf (stream: dump_file, format: " | ~" ); |
982 | print_generic_expr (dump_file, oe->op); |
983 | if (opcode == BIT_AND_EXPR) |
984 | fprintf (stream: dump_file, format: " -> 0\n" ); |
985 | else if (opcode == BIT_IOR_EXPR) |
986 | fprintf (stream: dump_file, format: " -> -1\n" ); |
987 | } |
988 | |
989 | if (opcode == BIT_AND_EXPR) |
990 | oe->op = build_zero_cst (TREE_TYPE (oe->op)); |
991 | else if (opcode == BIT_IOR_EXPR) |
992 | oe->op = build_all_ones_cst (TREE_TYPE (oe->op)); |
993 | |
994 | reassociate_stats.ops_eliminated += ops->length () - 1; |
995 | ops->truncate (size: 0); |
996 | ops->quick_push (obj: oe); |
997 | return true; |
998 | } |
999 | } |
1000 | |
1001 | return false; |
1002 | } |
1003 | |
1004 | /* Use constant value that may be present in OPS to try to eliminate |
1005 | operands. Note that this function is only really used when we've |
1006 | eliminated ops for other reasons, or merged constants. Across |
1007 | single statements, fold already does all of this, plus more. There |
1008 | is little point in duplicating logic, so I've only included the |
1009 | identities that I could ever construct testcases to trigger. */ |
1010 | |
1011 | static void |
1012 | eliminate_using_constants (enum tree_code opcode, |
1013 | vec<operand_entry *> *ops) |
1014 | { |
1015 | operand_entry *oelast = ops->last (); |
1016 | tree type = TREE_TYPE (oelast->op); |
1017 | |
1018 | if (oelast->rank == 0 |
1019 | && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type))) |
1020 | { |
1021 | switch (opcode) |
1022 | { |
1023 | case BIT_AND_EXPR: |
1024 | if (integer_zerop (oelast->op)) |
1025 | { |
1026 | if (ops->length () != 1) |
1027 | { |
1028 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1029 | fprintf (stream: dump_file, format: "Found & 0, removing all other ops\n" ); |
1030 | |
1031 | reassociate_stats.ops_eliminated += ops->length () - 1; |
1032 | |
1033 | ops->truncate (size: 0); |
1034 | ops->quick_push (obj: oelast); |
1035 | return; |
1036 | } |
1037 | } |
1038 | else if (integer_all_onesp (oelast->op)) |
1039 | { |
1040 | if (ops->length () != 1) |
1041 | { |
1042 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1043 | fprintf (stream: dump_file, format: "Found & -1, removing\n" ); |
1044 | ops->pop (); |
1045 | reassociate_stats.ops_eliminated++; |
1046 | } |
1047 | } |
1048 | break; |
1049 | case BIT_IOR_EXPR: |
1050 | if (integer_all_onesp (oelast->op)) |
1051 | { |
1052 | if (ops->length () != 1) |
1053 | { |
1054 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1055 | fprintf (stream: dump_file, format: "Found | -1, removing all other ops\n" ); |
1056 | |
1057 | reassociate_stats.ops_eliminated += ops->length () - 1; |
1058 | |
1059 | ops->truncate (size: 0); |
1060 | ops->quick_push (obj: oelast); |
1061 | return; |
1062 | } |
1063 | } |
1064 | else if (integer_zerop (oelast->op)) |
1065 | { |
1066 | if (ops->length () != 1) |
1067 | { |
1068 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1069 | fprintf (stream: dump_file, format: "Found | 0, removing\n" ); |
1070 | ops->pop (); |
1071 | reassociate_stats.ops_eliminated++; |
1072 | } |
1073 | } |
1074 | break; |
1075 | case MULT_EXPR: |
1076 | if (integer_zerop (oelast->op) |
1077 | || (FLOAT_TYPE_P (type) |
1078 | && !HONOR_NANS (type) |
1079 | && !HONOR_SIGNED_ZEROS (type) |
1080 | && real_zerop (oelast->op))) |
1081 | { |
1082 | if (ops->length () != 1) |
1083 | { |
1084 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1085 | fprintf (stream: dump_file, format: "Found * 0, removing all other ops\n" ); |
1086 | |
1087 | reassociate_stats.ops_eliminated += ops->length () - 1; |
1088 | ops->truncate (size: 0); |
1089 | ops->quick_push (obj: oelast); |
1090 | return; |
1091 | } |
1092 | } |
1093 | else if (integer_onep (oelast->op) |
1094 | || (FLOAT_TYPE_P (type) |
1095 | && !HONOR_SNANS (type) |
1096 | && real_onep (oelast->op))) |
1097 | { |
1098 | if (ops->length () != 1) |
1099 | { |
1100 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1101 | fprintf (stream: dump_file, format: "Found * 1, removing\n" ); |
1102 | ops->pop (); |
1103 | reassociate_stats.ops_eliminated++; |
1104 | return; |
1105 | } |
1106 | } |
1107 | break; |
1108 | case BIT_XOR_EXPR: |
1109 | case PLUS_EXPR: |
1110 | case MINUS_EXPR: |
1111 | if (integer_zerop (oelast->op) |
1112 | || (FLOAT_TYPE_P (type) |
1113 | && (opcode == PLUS_EXPR || opcode == MINUS_EXPR) |
1114 | && fold_real_zero_addition_p (type, 0, oelast->op, |
1115 | opcode == MINUS_EXPR))) |
1116 | { |
1117 | if (ops->length () != 1) |
1118 | { |
1119 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1120 | fprintf (stream: dump_file, format: "Found [|^+] 0, removing\n" ); |
1121 | ops->pop (); |
1122 | reassociate_stats.ops_eliminated++; |
1123 | return; |
1124 | } |
1125 | } |
1126 | break; |
1127 | default: |
1128 | break; |
1129 | } |
1130 | } |
1131 | } |
1132 | |
1133 | |
1134 | static void linearize_expr_tree (vec<operand_entry *> *, gimple *, |
1135 | bool, bool); |
1136 | |
1137 | /* Structure for tracking and counting operands. */ |
1138 | struct oecount { |
1139 | unsigned int cnt; |
1140 | unsigned int id; |
1141 | enum tree_code oecode; |
1142 | tree op; |
1143 | }; |
1144 | |
1145 | |
1146 | /* The heap for the oecount hashtable and the sorted list of operands. */ |
1147 | static vec<oecount> cvec; |
1148 | |
1149 | |
1150 | /* Oecount hashtable helpers. */ |
1151 | |
1152 | struct oecount_hasher : int_hash <int, 0, 1> |
1153 | { |
1154 | static inline hashval_t hash (int); |
1155 | static inline bool equal (int, int); |
1156 | }; |
1157 | |
1158 | /* Hash function for oecount. */ |
1159 | |
1160 | inline hashval_t |
1161 | oecount_hasher::hash (int p) |
1162 | { |
1163 | const oecount *c = &cvec[p - 42]; |
1164 | return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode; |
1165 | } |
1166 | |
1167 | /* Comparison function for oecount. */ |
1168 | |
1169 | inline bool |
1170 | oecount_hasher::equal (int p1, int p2) |
1171 | { |
1172 | const oecount *c1 = &cvec[p1 - 42]; |
1173 | const oecount *c2 = &cvec[p2 - 42]; |
1174 | return c1->oecode == c2->oecode && c1->op == c2->op; |
1175 | } |
1176 | |
1177 | /* Comparison function for qsort sorting oecount elements by count. */ |
1178 | |
1179 | static int |
1180 | oecount_cmp (const void *p1, const void *p2) |
1181 | { |
1182 | const oecount *c1 = (const oecount *)p1; |
1183 | const oecount *c2 = (const oecount *)p2; |
1184 | if (c1->cnt != c2->cnt) |
1185 | return c1->cnt > c2->cnt ? 1 : -1; |
1186 | else |
1187 | /* If counts are identical, use unique IDs to stabilize qsort. */ |
1188 | return c1->id > c2->id ? 1 : -1; |
1189 | } |
1190 | |
1191 | /* Return TRUE iff STMT represents a builtin call that raises OP |
1192 | to some exponent. */ |
1193 | |
1194 | static bool |
1195 | stmt_is_power_of_op (gimple *stmt, tree op) |
1196 | { |
1197 | if (!is_gimple_call (gs: stmt)) |
1198 | return false; |
1199 | |
1200 | switch (gimple_call_combined_fn (stmt)) |
1201 | { |
1202 | CASE_CFN_POW: |
1203 | CASE_CFN_POWI: |
1204 | return (operand_equal_p (gimple_call_arg (gs: stmt, index: 0), op, flags: 0)); |
1205 | |
1206 | default: |
1207 | return false; |
1208 | } |
1209 | } |
1210 | |
1211 | /* Given STMT which is a __builtin_pow* call, decrement its exponent |
1212 | in place and return the result. Assumes that stmt_is_power_of_op |
1213 | was previously called for STMT and returned TRUE. */ |
1214 | |
1215 | static HOST_WIDE_INT |
1216 | decrement_power (gimple *stmt) |
1217 | { |
1218 | REAL_VALUE_TYPE c, cint; |
1219 | HOST_WIDE_INT power; |
1220 | tree arg1; |
1221 | |
1222 | switch (gimple_call_combined_fn (stmt)) |
1223 | { |
1224 | CASE_CFN_POW: |
1225 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
1226 | c = TREE_REAL_CST (arg1); |
1227 | power = real_to_integer (&c) - 1; |
1228 | real_from_integer (&cint, VOIDmode, power, SIGNED); |
1229 | gimple_call_set_arg (gs: stmt, index: 1, arg: build_real (TREE_TYPE (arg1), cint)); |
1230 | return power; |
1231 | |
1232 | CASE_CFN_POWI: |
1233 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
1234 | power = TREE_INT_CST_LOW (arg1) - 1; |
1235 | gimple_call_set_arg (gs: stmt, index: 1, arg: build_int_cst (TREE_TYPE (arg1), power)); |
1236 | return power; |
1237 | |
1238 | default: |
1239 | gcc_unreachable (); |
1240 | } |
1241 | } |
1242 | |
1243 | /* Replace SSA defined by STMT and replace all its uses with new |
1244 | SSA. Also return the new SSA. */ |
1245 | |
1246 | static tree |
1247 | make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op) |
1248 | { |
1249 | gimple *use_stmt; |
1250 | use_operand_p use; |
1251 | imm_use_iterator iter; |
1252 | tree new_lhs, new_debug_lhs = NULL_TREE; |
1253 | tree lhs = gimple_get_lhs (stmt); |
1254 | |
1255 | new_lhs = make_ssa_name (TREE_TYPE (lhs)); |
1256 | gimple_set_lhs (stmt, new_lhs); |
1257 | |
1258 | /* Also need to update GIMPLE_DEBUGs. */ |
1259 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) |
1260 | { |
1261 | tree repl = new_lhs; |
1262 | if (is_gimple_debug (gs: use_stmt)) |
1263 | { |
1264 | if (new_debug_lhs == NULL_TREE) |
1265 | { |
1266 | new_debug_lhs = build_debug_expr_decl (TREE_TYPE (lhs)); |
1267 | gdebug *def_temp |
1268 | = gimple_build_debug_bind (new_debug_lhs, |
1269 | build2 (opcode, TREE_TYPE (lhs), |
1270 | new_lhs, op), |
1271 | stmt); |
1272 | gimple_set_uid (g: def_temp, uid: gimple_uid (g: stmt)); |
1273 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
1274 | gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT); |
1275 | } |
1276 | repl = new_debug_lhs; |
1277 | } |
1278 | FOR_EACH_IMM_USE_ON_STMT (use, iter) |
1279 | SET_USE (use, repl); |
1280 | update_stmt (s: use_stmt); |
1281 | } |
1282 | return new_lhs; |
1283 | } |
1284 | |
1285 | /* Replace all SSAs defined in STMTS_TO_FIX and replace its |
1286 | uses with new SSAs. Also do this for the stmt that defines DEF |
1287 | if *DEF is not OP. */ |
1288 | |
1289 | static void |
1290 | make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op, |
1291 | vec<gimple *> &stmts_to_fix) |
1292 | { |
1293 | unsigned i; |
1294 | gimple *stmt; |
1295 | |
1296 | if (*def != op |
1297 | && TREE_CODE (*def) == SSA_NAME |
1298 | && (stmt = SSA_NAME_DEF_STMT (*def)) |
1299 | && gimple_code (g: stmt) != GIMPLE_NOP) |
1300 | *def = make_new_ssa_for_def (stmt, opcode, op); |
1301 | |
1302 | FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt) |
1303 | make_new_ssa_for_def (stmt, opcode, op); |
1304 | } |
1305 | |
1306 | /* Find the single immediate use of STMT's LHS, and replace it |
1307 | with OP. Remove STMT. If STMT's LHS is the same as *DEF, |
1308 | replace *DEF with OP as well. */ |
1309 | |
1310 | static void |
1311 | propagate_op_to_single_use (tree op, gimple *stmt, tree *def) |
1312 | { |
1313 | tree lhs; |
1314 | gimple *use_stmt; |
1315 | use_operand_p use; |
1316 | gimple_stmt_iterator gsi; |
1317 | |
1318 | if (is_gimple_call (gs: stmt)) |
1319 | lhs = gimple_call_lhs (gs: stmt); |
1320 | else |
1321 | lhs = gimple_assign_lhs (gs: stmt); |
1322 | |
1323 | gcc_assert (has_single_use (lhs)); |
1324 | single_imm_use (var: lhs, use_p: &use, stmt: &use_stmt); |
1325 | if (lhs == *def) |
1326 | *def = op; |
1327 | SET_USE (use, op); |
1328 | if (TREE_CODE (op) != SSA_NAME) |
1329 | update_stmt (s: use_stmt); |
1330 | gsi = gsi_for_stmt (stmt); |
1331 | unlink_stmt_vdef (stmt); |
1332 | reassoc_remove_stmt (gsi: &gsi); |
1333 | release_defs (stmt); |
1334 | } |
1335 | |
1336 | /* Walks the linear chain with result *DEF searching for an operation |
1337 | with operand OP and code OPCODE removing that from the chain. *DEF |
1338 | is updated if there is only one operand but no operation left. */ |
1339 | |
1340 | static void |
1341 | zero_one_operation (tree *def, enum tree_code opcode, tree op) |
1342 | { |
1343 | tree orig_def = *def; |
1344 | gimple *stmt = SSA_NAME_DEF_STMT (*def); |
1345 | /* PR72835 - Record the stmt chain that has to be updated such that |
1346 | we dont use the same LHS when the values computed are different. */ |
1347 | auto_vec<gimple *, 64> stmts_to_fix; |
1348 | |
1349 | do |
1350 | { |
1351 | tree name; |
1352 | |
1353 | if (opcode == MULT_EXPR) |
1354 | { |
1355 | if (stmt_is_power_of_op (stmt, op)) |
1356 | { |
1357 | if (decrement_power (stmt) == 1) |
1358 | { |
1359 | if (stmts_to_fix.length () > 0) |
1360 | stmts_to_fix.pop (); |
1361 | propagate_op_to_single_use (op, stmt, def); |
1362 | } |
1363 | break; |
1364 | } |
1365 | else if (gimple_assign_rhs_code (gs: stmt) == NEGATE_EXPR) |
1366 | { |
1367 | if (gimple_assign_rhs1 (gs: stmt) == op) |
1368 | { |
1369 | tree cst = build_minus_one_cst (TREE_TYPE (op)); |
1370 | if (stmts_to_fix.length () > 0) |
1371 | stmts_to_fix.pop (); |
1372 | propagate_op_to_single_use (op: cst, stmt, def); |
1373 | break; |
1374 | } |
1375 | else if (integer_minus_onep (op) |
1376 | || real_minus_onep (op)) |
1377 | { |
1378 | gimple_assign_set_rhs_code |
1379 | (s: stmt, TREE_CODE (gimple_assign_rhs1 (stmt))); |
1380 | break; |
1381 | } |
1382 | } |
1383 | } |
1384 | |
1385 | name = gimple_assign_rhs1 (gs: stmt); |
1386 | |
1387 | /* If this is the operation we look for and one of the operands |
1388 | is ours simply propagate the other operand into the stmts |
1389 | single use. */ |
1390 | if (gimple_assign_rhs_code (gs: stmt) == opcode |
1391 | && (name == op |
1392 | || gimple_assign_rhs2 (gs: stmt) == op)) |
1393 | { |
1394 | if (name == op) |
1395 | name = gimple_assign_rhs2 (gs: stmt); |
1396 | if (stmts_to_fix.length () > 0) |
1397 | stmts_to_fix.pop (); |
1398 | propagate_op_to_single_use (op: name, stmt, def); |
1399 | break; |
1400 | } |
1401 | |
1402 | /* We might have a multiply of two __builtin_pow* calls, and |
1403 | the operand might be hiding in the rightmost one. Likewise |
1404 | this can happen for a negate. */ |
1405 | if (opcode == MULT_EXPR |
1406 | && gimple_assign_rhs_code (gs: stmt) == opcode |
1407 | && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME |
1408 | && has_single_use (var: gimple_assign_rhs2 (gs: stmt))) |
1409 | { |
1410 | gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); |
1411 | if (stmt_is_power_of_op (stmt: stmt2, op)) |
1412 | { |
1413 | if (decrement_power (stmt: stmt2) == 1) |
1414 | propagate_op_to_single_use (op, stmt: stmt2, def); |
1415 | else |
1416 | stmts_to_fix.safe_push (obj: stmt2); |
1417 | break; |
1418 | } |
1419 | else if (is_gimple_assign (gs: stmt2) |
1420 | && gimple_assign_rhs_code (gs: stmt2) == NEGATE_EXPR) |
1421 | { |
1422 | if (gimple_assign_rhs1 (gs: stmt2) == op) |
1423 | { |
1424 | tree cst = build_minus_one_cst (TREE_TYPE (op)); |
1425 | propagate_op_to_single_use (op: cst, stmt: stmt2, def); |
1426 | break; |
1427 | } |
1428 | else if (integer_minus_onep (op) |
1429 | || real_minus_onep (op)) |
1430 | { |
1431 | stmts_to_fix.safe_push (obj: stmt2); |
1432 | gimple_assign_set_rhs_code |
1433 | (s: stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2))); |
1434 | break; |
1435 | } |
1436 | } |
1437 | } |
1438 | |
1439 | /* Continue walking the chain. */ |
1440 | gcc_assert (name != op |
1441 | && TREE_CODE (name) == SSA_NAME); |
1442 | stmt = SSA_NAME_DEF_STMT (name); |
1443 | stmts_to_fix.safe_push (obj: stmt); |
1444 | } |
1445 | while (1); |
1446 | |
1447 | if (stmts_to_fix.length () > 0 || *def == orig_def) |
1448 | make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix); |
1449 | } |
1450 | |
1451 | /* Returns true if statement S1 dominates statement S2. Like |
1452 | stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */ |
1453 | |
1454 | static bool |
1455 | reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2) |
1456 | { |
1457 | basic_block bb1 = gimple_bb (g: s1), bb2 = gimple_bb (g: s2); |
1458 | |
1459 | /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D) |
1460 | SSA_NAME. Assume it lives at the beginning of function and |
1461 | thus dominates everything. */ |
1462 | if (!bb1 || s1 == s2) |
1463 | return true; |
1464 | |
1465 | /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */ |
1466 | if (!bb2) |
1467 | return false; |
1468 | |
1469 | if (bb1 == bb2) |
1470 | { |
1471 | /* PHIs in the same basic block are assumed to be |
1472 | executed all in parallel, if only one stmt is a PHI, |
1473 | it dominates the other stmt in the same basic block. */ |
1474 | if (gimple_code (g: s1) == GIMPLE_PHI) |
1475 | return true; |
1476 | |
1477 | if (gimple_code (g: s2) == GIMPLE_PHI) |
1478 | return false; |
1479 | |
1480 | gcc_assert (gimple_uid (s1) && gimple_uid (s2)); |
1481 | |
1482 | if (gimple_uid (g: s1) < gimple_uid (g: s2)) |
1483 | return true; |
1484 | |
1485 | if (gimple_uid (g: s1) > gimple_uid (g: s2)) |
1486 | return false; |
1487 | |
1488 | gimple_stmt_iterator gsi = gsi_for_stmt (s1); |
1489 | unsigned int uid = gimple_uid (g: s1); |
1490 | for (gsi_next (i: &gsi); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
1491 | { |
1492 | gimple *s = gsi_stmt (i: gsi); |
1493 | if (gimple_uid (g: s) != uid) |
1494 | break; |
1495 | if (s == s2) |
1496 | return true; |
1497 | } |
1498 | |
1499 | return false; |
1500 | } |
1501 | |
1502 | return dominated_by_p (CDI_DOMINATORS, bb2, bb1); |
1503 | } |
1504 | |
1505 | /* Insert STMT after INSERT_POINT. */ |
1506 | |
1507 | static void |
1508 | insert_stmt_after (gimple *stmt, gimple *insert_point) |
1509 | { |
1510 | gimple_stmt_iterator gsi; |
1511 | basic_block bb; |
1512 | |
1513 | if (gimple_code (g: insert_point) == GIMPLE_PHI) |
1514 | bb = gimple_bb (g: insert_point); |
1515 | else if (!stmt_ends_bb_p (insert_point)) |
1516 | { |
1517 | gsi = gsi_for_stmt (insert_point); |
1518 | gimple_set_uid (g: stmt, uid: gimple_uid (g: insert_point)); |
1519 | gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); |
1520 | return; |
1521 | } |
1522 | else if (gimple_code (g: insert_point) == GIMPLE_ASM |
1523 | && gimple_asm_nlabels (asm_stmt: as_a <gasm *> (p: insert_point)) != 0) |
1524 | /* We have no idea where to insert - it depends on where the |
1525 | uses will be placed. */ |
1526 | gcc_unreachable (); |
1527 | else |
1528 | /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME, |
1529 | thus if it must end a basic block, it should be a call that can |
1530 | throw, or some assignment that can throw. If it throws, the LHS |
1531 | of it will not be initialized though, so only valid places using |
1532 | the SSA_NAME should be dominated by the fallthru edge. */ |
1533 | bb = find_fallthru_edge (edges: gimple_bb (g: insert_point)->succs)->dest; |
1534 | gsi = gsi_after_labels (bb); |
1535 | if (gsi_end_p (i: gsi)) |
1536 | { |
1537 | gimple_stmt_iterator gsi2 = gsi_last_bb (bb); |
1538 | gimple_set_uid (g: stmt, |
1539 | uid: gsi_end_p (i: gsi2) ? 1 : gimple_uid (g: gsi_stmt (i: gsi2))); |
1540 | } |
1541 | else |
1542 | gimple_set_uid (g: stmt, uid: gimple_uid (g: gsi_stmt (i: gsi))); |
1543 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); |
1544 | } |
1545 | |
1546 | /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for |
1547 | the result. Places the statement after the definition of either |
1548 | OP1 or OP2. Returns the new statement. */ |
1549 | |
1550 | static gimple * |
1551 | build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode) |
1552 | { |
1553 | gimple *op1def = NULL, *op2def = NULL; |
1554 | gimple_stmt_iterator gsi; |
1555 | tree op; |
1556 | gassign *sum; |
1557 | |
1558 | /* Create the addition statement. */ |
1559 | op = make_ssa_name (var: type); |
1560 | sum = gimple_build_assign (op, opcode, op1, op2); |
1561 | |
1562 | /* Find an insertion place and insert. */ |
1563 | if (TREE_CODE (op1) == SSA_NAME) |
1564 | op1def = SSA_NAME_DEF_STMT (op1); |
1565 | if (TREE_CODE (op2) == SSA_NAME) |
1566 | op2def = SSA_NAME_DEF_STMT (op2); |
1567 | if ((!op1def || gimple_nop_p (g: op1def)) |
1568 | && (!op2def || gimple_nop_p (g: op2def))) |
1569 | { |
1570 | gsi = gsi_after_labels (bb: single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); |
1571 | if (!gsi_end_p (i: gsi) |
1572 | && is_gimple_call (gs: gsi_stmt (i: gsi)) |
1573 | && (gimple_call_flags (gsi_stmt (i: gsi)) & ECF_RETURNS_TWICE)) |
1574 | { |
1575 | /* Don't add statements before a returns_twice call at the start |
1576 | of a function. */ |
1577 | split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))); |
1578 | gsi = gsi_after_labels (bb: single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); |
1579 | } |
1580 | if (gsi_end_p (i: gsi)) |
1581 | { |
1582 | gimple_stmt_iterator gsi2 |
1583 | = gsi_last_bb (bb: single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); |
1584 | gimple_set_uid (g: sum, |
1585 | uid: gsi_end_p (i: gsi2) ? 1 : gimple_uid (g: gsi_stmt (i: gsi2))); |
1586 | } |
1587 | else |
1588 | gimple_set_uid (g: sum, uid: gimple_uid (g: gsi_stmt (i: gsi))); |
1589 | gsi_insert_before (&gsi, sum, GSI_NEW_STMT); |
1590 | } |
1591 | else |
1592 | { |
1593 | gimple *insert_point; |
1594 | if ((!op1def || gimple_nop_p (g: op1def)) |
1595 | || (op2def && !gimple_nop_p (g: op2def) |
1596 | && reassoc_stmt_dominates_stmt_p (s1: op1def, s2: op2def))) |
1597 | insert_point = op2def; |
1598 | else |
1599 | insert_point = op1def; |
1600 | insert_stmt_after (stmt: sum, insert_point); |
1601 | } |
1602 | update_stmt (s: sum); |
1603 | |
1604 | return sum; |
1605 | } |
1606 | |
1607 | /* Perform un-distribution of divisions and multiplications. |
1608 | A * X + B * X is transformed into (A + B) * X and A / X + B / X |
1609 | to (A + B) / X for real X. |
1610 | |
1611 | The algorithm is organized as follows. |
1612 | |
1613 | - First we walk the addition chain *OPS looking for summands that |
1614 | are defined by a multiplication or a real division. This results |
1615 | in the candidates bitmap with relevant indices into *OPS. |
1616 | |
1617 | - Second we build the chains of multiplications or divisions for |
1618 | these candidates, counting the number of occurrences of (operand, code) |
1619 | pairs in all of the candidates chains. |
1620 | |
1621 | - Third we sort the (operand, code) pairs by number of occurrence and |
1622 | process them starting with the pair with the most uses. |
1623 | |
1624 | * For each such pair we walk the candidates again to build a |
1625 | second candidate bitmap noting all multiplication/division chains |
1626 | that have at least one occurrence of (operand, code). |
1627 | |
1628 | * We build an alternate addition chain only covering these |
1629 | candidates with one (operand, code) operation removed from their |
1630 | multiplication/division chain. |
1631 | |
1632 | * The first candidate gets replaced by the alternate addition chain |
1633 | multiplied/divided by the operand. |
1634 | |
1635 | * All candidate chains get disabled for further processing and |
1636 | processing of (operand, code) pairs continues. |
1637 | |
1638 | The alternate addition chains built are re-processed by the main |
1639 | reassociation algorithm which allows optimizing a * x * y + b * y * x |
1640 | to (a + b ) * x * y in one invocation of the reassociation pass. */ |
1641 | |
1642 | static bool |
1643 | undistribute_ops_list (enum tree_code opcode, |
1644 | vec<operand_entry *> *ops, class loop *loop) |
1645 | { |
1646 | unsigned int length = ops->length (); |
1647 | operand_entry *oe1; |
1648 | unsigned i, j; |
1649 | unsigned nr_candidates, nr_candidates2; |
1650 | sbitmap_iterator sbi0; |
1651 | vec<operand_entry *> *subops; |
1652 | bool changed = false; |
1653 | unsigned int next_oecount_id = 0; |
1654 | |
1655 | if (length <= 1 |
1656 | || opcode != PLUS_EXPR) |
1657 | return false; |
1658 | |
1659 | /* Build a list of candidates to process. */ |
1660 | auto_sbitmap candidates (length); |
1661 | bitmap_clear (candidates); |
1662 | nr_candidates = 0; |
1663 | FOR_EACH_VEC_ELT (*ops, i, oe1) |
1664 | { |
1665 | enum tree_code dcode; |
1666 | gimple *oe1def; |
1667 | |
1668 | if (TREE_CODE (oe1->op) != SSA_NAME) |
1669 | continue; |
1670 | oe1def = SSA_NAME_DEF_STMT (oe1->op); |
1671 | if (!is_gimple_assign (gs: oe1def)) |
1672 | continue; |
1673 | dcode = gimple_assign_rhs_code (gs: oe1def); |
1674 | if ((dcode != MULT_EXPR |
1675 | && dcode != RDIV_EXPR) |
1676 | || !is_reassociable_op (stmt: oe1def, code: dcode, loop)) |
1677 | continue; |
1678 | |
1679 | bitmap_set_bit (map: candidates, bitno: i); |
1680 | nr_candidates++; |
1681 | } |
1682 | |
1683 | if (nr_candidates < 2) |
1684 | return false; |
1685 | |
1686 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1687 | { |
1688 | fprintf (stream: dump_file, format: "searching for un-distribute opportunities " ); |
1689 | print_generic_expr (dump_file, |
1690 | (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE); |
1691 | fprintf (stream: dump_file, format: " %d\n" , nr_candidates); |
1692 | } |
1693 | |
1694 | /* Build linearized sub-operand lists and the counting table. */ |
1695 | cvec.create (nelems: 0); |
1696 | |
1697 | hash_table<oecount_hasher> ctable (15); |
1698 | |
1699 | /* ??? Macro arguments cannot have multi-argument template types in |
1700 | them. This typedef is needed to workaround that limitation. */ |
1701 | typedef vec<operand_entry *> vec_operand_entry_t_heap; |
1702 | subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ()); |
1703 | EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) |
1704 | { |
1705 | gimple *oedef; |
1706 | enum tree_code oecode; |
1707 | unsigned j; |
1708 | |
1709 | oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op); |
1710 | oecode = gimple_assign_rhs_code (gs: oedef); |
1711 | linearize_expr_tree (&subops[i], oedef, |
1712 | associative_tree_code (oecode), false); |
1713 | |
1714 | FOR_EACH_VEC_ELT (subops[i], j, oe1) |
1715 | { |
1716 | oecount c; |
1717 | int *slot; |
1718 | int idx; |
1719 | c.oecode = oecode; |
1720 | c.cnt = 1; |
1721 | c.id = next_oecount_id++; |
1722 | c.op = oe1->op; |
1723 | cvec.safe_push (obj: c); |
1724 | idx = cvec.length () + 41; |
1725 | slot = ctable.find_slot (value: idx, insert: INSERT); |
1726 | if (!*slot) |
1727 | { |
1728 | *slot = idx; |
1729 | } |
1730 | else |
1731 | { |
1732 | cvec.pop (); |
1733 | cvec[*slot - 42].cnt++; |
1734 | } |
1735 | } |
1736 | } |
1737 | |
1738 | /* Sort the counting table. */ |
1739 | cvec.qsort (oecount_cmp); |
1740 | |
1741 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1742 | { |
1743 | oecount *c; |
1744 | fprintf (stream: dump_file, format: "Candidates:\n" ); |
1745 | FOR_EACH_VEC_ELT (cvec, j, c) |
1746 | { |
1747 | fprintf (stream: dump_file, format: " %u %s: " , c->cnt, |
1748 | c->oecode == MULT_EXPR |
1749 | ? "*" : c->oecode == RDIV_EXPR ? "/" : "?" ); |
1750 | print_generic_expr (dump_file, c->op); |
1751 | fprintf (stream: dump_file, format: "\n" ); |
1752 | } |
1753 | } |
1754 | |
1755 | /* Process the (operand, code) pairs in order of most occurrence. */ |
1756 | auto_sbitmap candidates2 (length); |
1757 | while (!cvec.is_empty ()) |
1758 | { |
1759 | oecount *c = &cvec.last (); |
1760 | if (c->cnt < 2) |
1761 | break; |
1762 | |
1763 | /* Now collect the operands in the outer chain that contain |
1764 | the common operand in their inner chain. */ |
1765 | bitmap_clear (candidates2); |
1766 | nr_candidates2 = 0; |
1767 | EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) |
1768 | { |
1769 | gimple *oedef; |
1770 | enum tree_code oecode; |
1771 | unsigned j; |
1772 | tree op = (*ops)[i]->op; |
1773 | |
1774 | /* If we undistributed in this chain already this may be |
1775 | a constant. */ |
1776 | if (TREE_CODE (op) != SSA_NAME) |
1777 | continue; |
1778 | |
1779 | oedef = SSA_NAME_DEF_STMT (op); |
1780 | oecode = gimple_assign_rhs_code (gs: oedef); |
1781 | if (oecode != c->oecode) |
1782 | continue; |
1783 | |
1784 | FOR_EACH_VEC_ELT (subops[i], j, oe1) |
1785 | { |
1786 | if (oe1->op == c->op) |
1787 | { |
1788 | bitmap_set_bit (map: candidates2, bitno: i); |
1789 | ++nr_candidates2; |
1790 | break; |
1791 | } |
1792 | } |
1793 | } |
1794 | |
1795 | if (nr_candidates2 >= 2) |
1796 | { |
1797 | operand_entry *oe1, *oe2; |
1798 | gimple *prod; |
1799 | int first = bitmap_first_set_bit (candidates2); |
1800 | |
1801 | /* Build the new addition chain. */ |
1802 | oe1 = (*ops)[first]; |
1803 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1804 | { |
1805 | fprintf (stream: dump_file, format: "Building (" ); |
1806 | print_generic_expr (dump_file, oe1->op); |
1807 | } |
1808 | zero_one_operation (def: &oe1->op, opcode: c->oecode, op: c->op); |
1809 | EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0) |
1810 | { |
1811 | gimple *sum; |
1812 | oe2 = (*ops)[i]; |
1813 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1814 | { |
1815 | fprintf (stream: dump_file, format: " + " ); |
1816 | print_generic_expr (dump_file, oe2->op); |
1817 | } |
1818 | zero_one_operation (def: &oe2->op, opcode: c->oecode, op: c->op); |
1819 | sum = build_and_add_sum (TREE_TYPE (oe1->op), |
1820 | op1: oe1->op, op2: oe2->op, opcode); |
1821 | oe2->op = build_zero_cst (TREE_TYPE (oe2->op)); |
1822 | oe2->rank = 0; |
1823 | oe1->op = gimple_get_lhs (sum); |
1824 | } |
1825 | |
1826 | /* Apply the multiplication/division. */ |
1827 | prod = build_and_add_sum (TREE_TYPE (oe1->op), |
1828 | op1: oe1->op, op2: c->op, opcode: c->oecode); |
1829 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1830 | { |
1831 | fprintf (stream: dump_file, format: ") %s " , c->oecode == MULT_EXPR ? "*" : "/" ); |
1832 | print_generic_expr (dump_file, c->op); |
1833 | fprintf (stream: dump_file, format: "\n" ); |
1834 | } |
1835 | |
1836 | /* Record it in the addition chain and disable further |
1837 | undistribution with this op. */ |
1838 | oe1->op = gimple_assign_lhs (gs: prod); |
1839 | oe1->rank = get_rank (e: oe1->op); |
1840 | subops[first].release (); |
1841 | |
1842 | changed = true; |
1843 | } |
1844 | |
1845 | cvec.pop (); |
1846 | } |
1847 | |
1848 | for (i = 0; i < ops->length (); ++i) |
1849 | subops[i].release (); |
1850 | free (ptr: subops); |
1851 | cvec.release (); |
1852 | |
1853 | return changed; |
1854 | } |
1855 | |
1856 | /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME: |
1857 | first: element index for each relevant BIT_FIELD_REF. |
1858 | second: the index of vec ops* for each relevant BIT_FIELD_REF. */ |
1859 | typedef std::pair<unsigned, unsigned> v_info_elem; |
1860 | struct v_info { |
1861 | tree vec_type; |
1862 | auto_vec<v_info_elem, 32> vec; |
1863 | }; |
1864 | typedef v_info *v_info_ptr; |
1865 | |
1866 | /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */ |
1867 | static int |
1868 | sort_by_mach_mode (const void *p_i, const void *p_j) |
1869 | { |
1870 | const tree tr1 = *((const tree *) p_i); |
1871 | const tree tr2 = *((const tree *) p_j); |
1872 | unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1)); |
1873 | unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2)); |
1874 | if (mode1 > mode2) |
1875 | return 1; |
1876 | else if (mode1 < mode2) |
1877 | return -1; |
1878 | if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2)) |
1879 | return -1; |
1880 | else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2)) |
1881 | return 1; |
1882 | return 0; |
1883 | } |
1884 | |
1885 | /* Cleanup hash map for VECTOR information. */ |
1886 | static void |
1887 | cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map) |
1888 | { |
1889 | for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin (); |
1890 | it != info_map.end (); ++it) |
1891 | { |
1892 | v_info_ptr info = (*it).second; |
1893 | delete info; |
1894 | (*it).second = NULL; |
1895 | } |
1896 | } |
1897 | |
1898 | /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE. |
1899 | V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k] |
1900 | is transformed to |
1901 | Vs = (V1 + V2 + ... + Vn) |
1902 | Vs[0] + Vs[1] + ... + Vs[k] |
1903 | |
1904 | The basic steps are listed below: |
1905 | |
1906 | 1) Check the addition chain *OPS by looking those summands coming from |
1907 | VECTOR bit_field_ref on VECTOR type. Put the information into |
1908 | v_info_map for each satisfied summand, using VECTOR SSA_NAME as key. |
1909 | |
1910 | 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are |
1911 | continuous, they can cover the whole VECTOR perfectly without any holes. |
1912 | Obtain one VECTOR list which contain candidates to be transformed. |
1913 | |
1914 | 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of |
1915 | candidates with same mode, build the addition statements for them and |
1916 | generate BIT_FIELD_REFs accordingly. |
1917 | |
1918 | TODO: |
1919 | The current implementation requires the whole VECTORs should be fully |
1920 | covered, but it can be extended to support partial, checking adjacent |
1921 | but not fill the whole, it may need some cost model to define the |
1922 | boundary to do or not. |
1923 | */ |
1924 | static bool |
1925 | undistribute_bitref_for_vector (enum tree_code opcode, |
1926 | vec<operand_entry *> *ops, struct loop *loop) |
1927 | { |
1928 | if (ops->length () <= 1) |
1929 | return false; |
1930 | |
1931 | if (opcode != PLUS_EXPR |
1932 | && opcode != MULT_EXPR |
1933 | && opcode != BIT_XOR_EXPR |
1934 | && opcode != BIT_IOR_EXPR |
1935 | && opcode != BIT_AND_EXPR) |
1936 | return false; |
1937 | |
1938 | hash_map<tree, v_info_ptr> v_info_map; |
1939 | operand_entry *oe1; |
1940 | unsigned i; |
1941 | |
1942 | /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the |
1943 | information into map. */ |
1944 | FOR_EACH_VEC_ELT (*ops, i, oe1) |
1945 | { |
1946 | enum tree_code dcode; |
1947 | gimple *oe1def; |
1948 | |
1949 | if (TREE_CODE (oe1->op) != SSA_NAME) |
1950 | continue; |
1951 | oe1def = SSA_NAME_DEF_STMT (oe1->op); |
1952 | if (!is_gimple_assign (gs: oe1def)) |
1953 | continue; |
1954 | dcode = gimple_assign_rhs_code (gs: oe1def); |
1955 | if (dcode != BIT_FIELD_REF || !is_reassociable_op (stmt: oe1def, code: dcode, loop)) |
1956 | continue; |
1957 | |
1958 | tree rhs = gimple_assign_rhs1 (gs: oe1def); |
1959 | tree vec = TREE_OPERAND (rhs, 0); |
1960 | tree vec_type = TREE_TYPE (vec); |
1961 | |
1962 | if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type)) |
1963 | continue; |
1964 | |
1965 | /* Ignore it if target machine can't support this VECTOR type. */ |
1966 | if (!VECTOR_MODE_P (TYPE_MODE (vec_type))) |
1967 | continue; |
1968 | |
1969 | /* Check const vector type, constrain BIT_FIELD_REF offset and size. */ |
1970 | if (!TYPE_VECTOR_SUBPARTS (node: vec_type).is_constant ()) |
1971 | continue; |
1972 | |
1973 | if (VECTOR_TYPE_P (TREE_TYPE (rhs)) |
1974 | || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs)))) |
1975 | continue; |
1976 | |
1977 | /* The type of BIT_FIELD_REF might not be equal to the element type of |
1978 | the vector. We want to use a vector type with element type the |
1979 | same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */ |
1980 | if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type))) |
1981 | { |
1982 | machine_mode simd_mode; |
1983 | unsigned HOST_WIDE_INT size, nunits; |
1984 | unsigned HOST_WIDE_INT elem_size |
1985 | = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs))); |
1986 | if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (const_value: &size)) |
1987 | continue; |
1988 | if (size <= elem_size || (size % elem_size) != 0) |
1989 | continue; |
1990 | nunits = size / elem_size; |
1991 | if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)), |
1992 | nunits).exists (mode: &simd_mode)) |
1993 | continue; |
1994 | vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode); |
1995 | |
1996 | /* Ignore it if target machine can't support this VECTOR type. */ |
1997 | if (!VECTOR_MODE_P (TYPE_MODE (vec_type))) |
1998 | continue; |
1999 | |
2000 | /* Check const vector type, constrain BIT_FIELD_REF offset and |
2001 | size. */ |
2002 | if (!TYPE_VECTOR_SUBPARTS (node: vec_type).is_constant ()) |
2003 | continue; |
2004 | |
2005 | if (maybe_ne (a: GET_MODE_SIZE (TYPE_MODE (vec_type)), |
2006 | b: GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec))))) |
2007 | continue; |
2008 | } |
2009 | |
2010 | tree elem_type = TREE_TYPE (vec_type); |
2011 | unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type)); |
2012 | if (maybe_ne (a: bit_field_size (t: rhs), b: elem_size)) |
2013 | continue; |
2014 | |
2015 | unsigned idx; |
2016 | if (!constant_multiple_p (a: bit_field_offset (t: rhs), b: elem_size, multiple: &idx)) |
2017 | continue; |
2018 | |
2019 | /* Ignore it if target machine can't support this type of VECTOR |
2020 | operation. */ |
2021 | optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector); |
2022 | if (optab_handler (op: op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing) |
2023 | continue; |
2024 | |
2025 | bool existed; |
2026 | v_info_ptr &info = v_info_map.get_or_insert (k: vec, existed: &existed); |
2027 | if (!existed) |
2028 | { |
2029 | info = new v_info; |
2030 | info->vec_type = vec_type; |
2031 | } |
2032 | else if (!types_compatible_p (type1: vec_type, type2: info->vec_type)) |
2033 | continue; |
2034 | info->vec.safe_push (obj: std::make_pair (x&: idx, y&: i)); |
2035 | } |
2036 | |
2037 | /* At least two VECTOR to combine. */ |
2038 | if (v_info_map.elements () <= 1) |
2039 | { |
2040 | cleanup_vinfo_map (info_map&: v_info_map); |
2041 | return false; |
2042 | } |
2043 | |
2044 | /* Verify all VECTOR candidates by checking two conditions: |
2045 | 1) sorted offsets are adjacent, no holes. |
2046 | 2) can fill the whole VECTOR perfectly. |
2047 | And add the valid candidates to a vector for further handling. */ |
2048 | auto_vec<tree> valid_vecs (v_info_map.elements ()); |
2049 | for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin (); |
2050 | it != v_info_map.end (); ++it) |
2051 | { |
2052 | tree cand_vec = (*it).first; |
2053 | v_info_ptr cand_info = (*it).second; |
2054 | unsigned int num_elems |
2055 | = TYPE_VECTOR_SUBPARTS (node: cand_info->vec_type).to_constant (); |
2056 | if (cand_info->vec.length () != num_elems) |
2057 | continue; |
2058 | sbitmap holes = sbitmap_alloc (num_elems); |
2059 | bitmap_ones (holes); |
2060 | bool valid = true; |
2061 | v_info_elem *curr; |
2062 | FOR_EACH_VEC_ELT (cand_info->vec, i, curr) |
2063 | { |
2064 | if (!bitmap_bit_p (map: holes, bitno: curr->first)) |
2065 | { |
2066 | valid = false; |
2067 | break; |
2068 | } |
2069 | else |
2070 | bitmap_clear_bit (map: holes, bitno: curr->first); |
2071 | } |
2072 | if (valid && bitmap_empty_p (holes)) |
2073 | valid_vecs.quick_push (obj: cand_vec); |
2074 | sbitmap_free (map: holes); |
2075 | } |
2076 | |
2077 | /* At least two VECTOR to combine. */ |
2078 | if (valid_vecs.length () <= 1) |
2079 | { |
2080 | cleanup_vinfo_map (info_map&: v_info_map); |
2081 | return false; |
2082 | } |
2083 | |
2084 | valid_vecs.qsort (sort_by_mach_mode); |
2085 | /* Go through all candidates by machine mode order, query the mode_to_total |
2086 | to get the total number for each mode and skip the single one. */ |
2087 | for (unsigned i = 0; i < valid_vecs.length () - 1; ++i) |
2088 | { |
2089 | tree tvec = valid_vecs[i]; |
2090 | enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec)); |
2091 | |
2092 | /* Skip modes with only a single candidate. */ |
2093 | if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode) |
2094 | continue; |
2095 | |
2096 | unsigned int idx, j; |
2097 | gimple *sum = NULL; |
2098 | tree sum_vec = tvec; |
2099 | v_info_ptr info_ptr = *(v_info_map.get (k: tvec)); |
2100 | v_info_elem *elem; |
2101 | tree vec_type = info_ptr->vec_type; |
2102 | |
2103 | /* Build the sum for all candidates with same mode. */ |
2104 | do |
2105 | { |
2106 | sum = build_and_add_sum (type: vec_type, op1: sum_vec, |
2107 | op2: valid_vecs[i + 1], opcode); |
2108 | /* Update the operands only after build_and_add_sum, |
2109 | so that we don't have to repeat the placement algorithm |
2110 | of build_and_add_sum. */ |
2111 | if (sum_vec == tvec |
2112 | && !useless_type_conversion_p (vec_type, TREE_TYPE (sum_vec))) |
2113 | { |
2114 | gimple_stmt_iterator gsi = gsi_for_stmt (sum); |
2115 | tree vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec); |
2116 | tree lhs = make_ssa_name (var: vec_type); |
2117 | gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce); |
2118 | gimple_set_uid (g, uid: gimple_uid (g: sum)); |
2119 | gsi_insert_before (&gsi, g, GSI_NEW_STMT); |
2120 | gimple_assign_set_rhs1 (gs: sum, rhs: lhs); |
2121 | update_stmt (s: sum); |
2122 | } |
2123 | if (!useless_type_conversion_p (vec_type, |
2124 | TREE_TYPE (valid_vecs[i + 1]))) |
2125 | { |
2126 | gimple_stmt_iterator gsi = gsi_for_stmt (sum); |
2127 | tree vce = build1 (VIEW_CONVERT_EXPR, vec_type, |
2128 | valid_vecs[i + 1]); |
2129 | tree lhs = make_ssa_name (var: vec_type); |
2130 | gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce); |
2131 | gimple_set_uid (g, uid: gimple_uid (g: sum)); |
2132 | gsi_insert_before (&gsi, g, GSI_NEW_STMT); |
2133 | gimple_assign_set_rhs2 (gs: sum, rhs: lhs); |
2134 | update_stmt (s: sum); |
2135 | } |
2136 | sum_vec = gimple_get_lhs (sum); |
2137 | info_ptr = *(v_info_map.get (k: valid_vecs[i + 1])); |
2138 | gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type)); |
2139 | /* Update those related ops of current candidate VECTOR. */ |
2140 | FOR_EACH_VEC_ELT (info_ptr->vec, j, elem) |
2141 | { |
2142 | idx = elem->second; |
2143 | gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op); |
2144 | /* Set this then op definition will get DCEd later. */ |
2145 | gimple_set_visited (stmt: def, visited_p: true); |
2146 | if (opcode == PLUS_EXPR |
2147 | || opcode == BIT_XOR_EXPR |
2148 | || opcode == BIT_IOR_EXPR) |
2149 | (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op)); |
2150 | else if (opcode == MULT_EXPR) |
2151 | (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op)); |
2152 | else |
2153 | { |
2154 | gcc_assert (opcode == BIT_AND_EXPR); |
2155 | (*ops)[idx]->op |
2156 | = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op)); |
2157 | } |
2158 | (*ops)[idx]->rank = 0; |
2159 | } |
2160 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2161 | { |
2162 | fprintf (stream: dump_file, format: "Generating addition -> " ); |
2163 | print_gimple_stmt (dump_file, sum, 0); |
2164 | } |
2165 | i++; |
2166 | } |
2167 | while ((i < valid_vecs.length () - 1) |
2168 | && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode); |
2169 | |
2170 | /* Referring to first valid VECTOR with this mode, generate the |
2171 | BIT_FIELD_REF statements accordingly. */ |
2172 | info_ptr = *(v_info_map.get (k: tvec)); |
2173 | gcc_assert (sum); |
2174 | tree elem_type = TREE_TYPE (vec_type); |
2175 | FOR_EACH_VEC_ELT (info_ptr->vec, j, elem) |
2176 | { |
2177 | idx = elem->second; |
2178 | tree dst = make_ssa_name (var: elem_type); |
2179 | tree pos = bitsize_int (elem->first |
2180 | * tree_to_uhwi (TYPE_SIZE (elem_type))); |
2181 | tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec, |
2182 | TYPE_SIZE (elem_type), pos); |
2183 | gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr); |
2184 | insert_stmt_after (stmt: gs, insert_point: sum); |
2185 | gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op); |
2186 | /* Set this then op definition will get DCEd later. */ |
2187 | gimple_set_visited (stmt: def, visited_p: true); |
2188 | (*ops)[idx]->op = gimple_assign_lhs (gs); |
2189 | (*ops)[idx]->rank = get_rank (e: (*ops)[idx]->op); |
2190 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2191 | { |
2192 | fprintf (stream: dump_file, format: "Generating bit_field_ref -> " ); |
2193 | print_gimple_stmt (dump_file, gs, 0); |
2194 | } |
2195 | } |
2196 | } |
2197 | |
2198 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2199 | fprintf (stream: dump_file, format: "undistributiong bit_field_ref for vector done.\n" ); |
2200 | |
2201 | cleanup_vinfo_map (info_map&: v_info_map); |
2202 | |
2203 | return true; |
2204 | } |
2205 | |
2206 | /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison |
2207 | expression, examine the other OPS to see if any of them are comparisons |
2208 | of the same values, which we may be able to combine or eliminate. |
2209 | For example, we can rewrite (a < b) | (a == b) as (a <= b). */ |
2210 | |
2211 | static bool |
2212 | eliminate_redundant_comparison (enum tree_code opcode, |
2213 | vec<operand_entry *> *ops, |
2214 | unsigned int currindex, |
2215 | operand_entry *curr) |
2216 | { |
2217 | tree op1, op2; |
2218 | enum tree_code lcode, rcode; |
2219 | gimple *def1, *def2; |
2220 | int i; |
2221 | operand_entry *oe; |
2222 | |
2223 | if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) |
2224 | return false; |
2225 | |
2226 | /* Check that CURR is a comparison. */ |
2227 | if (TREE_CODE (curr->op) != SSA_NAME) |
2228 | return false; |
2229 | def1 = SSA_NAME_DEF_STMT (curr->op); |
2230 | if (!is_gimple_assign (gs: def1)) |
2231 | return false; |
2232 | lcode = gimple_assign_rhs_code (gs: def1); |
2233 | if (TREE_CODE_CLASS (lcode) != tcc_comparison) |
2234 | return false; |
2235 | op1 = gimple_assign_rhs1 (gs: def1); |
2236 | op2 = gimple_assign_rhs2 (gs: def1); |
2237 | |
2238 | /* Now look for a similar comparison in the remaining OPS. */ |
2239 | for (i = currindex + 1; ops->iterate (ix: i, ptr: &oe); i++) |
2240 | { |
2241 | tree t; |
2242 | |
2243 | if (TREE_CODE (oe->op) != SSA_NAME) |
2244 | continue; |
2245 | def2 = SSA_NAME_DEF_STMT (oe->op); |
2246 | if (!is_gimple_assign (gs: def2)) |
2247 | continue; |
2248 | rcode = gimple_assign_rhs_code (gs: def2); |
2249 | if (TREE_CODE_CLASS (rcode) != tcc_comparison) |
2250 | continue; |
2251 | |
2252 | /* If we got here, we have a match. See if we can combine the |
2253 | two comparisons. */ |
2254 | tree type = TREE_TYPE (gimple_assign_lhs (def1)); |
2255 | if (opcode == BIT_IOR_EXPR) |
2256 | t = maybe_fold_or_comparisons (type, |
2257 | lcode, op1, op2, |
2258 | rcode, gimple_assign_rhs1 (gs: def2), |
2259 | gimple_assign_rhs2 (gs: def2)); |
2260 | else |
2261 | t = maybe_fold_and_comparisons (type, |
2262 | lcode, op1, op2, |
2263 | rcode, gimple_assign_rhs1 (gs: def2), |
2264 | gimple_assign_rhs2 (gs: def2)); |
2265 | if (!t) |
2266 | continue; |
2267 | |
2268 | /* maybe_fold_and_comparisons and maybe_fold_or_comparisons |
2269 | always give us a boolean_type_node value back. If the original |
2270 | BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type, |
2271 | we need to convert. */ |
2272 | if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t))) |
2273 | { |
2274 | if (!fold_convertible_p (TREE_TYPE (curr->op), t)) |
2275 | continue; |
2276 | t = fold_convert (TREE_TYPE (curr->op), t); |
2277 | } |
2278 | |
2279 | if (TREE_CODE (t) != INTEGER_CST |
2280 | && !operand_equal_p (t, curr->op, flags: 0)) |
2281 | { |
2282 | enum tree_code subcode; |
2283 | tree newop1, newop2; |
2284 | if (!COMPARISON_CLASS_P (t)) |
2285 | continue; |
2286 | extract_ops_from_tree (expr: t, code: &subcode, op0: &newop1, op1: &newop2); |
2287 | STRIP_USELESS_TYPE_CONVERSION (newop1); |
2288 | STRIP_USELESS_TYPE_CONVERSION (newop2); |
2289 | if (!is_gimple_val (newop1) || !is_gimple_val (newop2)) |
2290 | continue; |
2291 | if (lcode == TREE_CODE (t) |
2292 | && operand_equal_p (op1, newop1, flags: 0) |
2293 | && operand_equal_p (op2, newop2, flags: 0)) |
2294 | t = curr->op; |
2295 | else if ((TREE_CODE (newop1) == SSA_NAME |
2296 | && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop1)) |
2297 | || (TREE_CODE (newop2) == SSA_NAME |
2298 | && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop2))) |
2299 | continue; |
2300 | } |
2301 | |
2302 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2303 | { |
2304 | fprintf (stream: dump_file, format: "Equivalence: " ); |
2305 | print_generic_expr (dump_file, curr->op); |
2306 | fprintf (stream: dump_file, format: " %s " , op_symbol_code (opcode)); |
2307 | print_generic_expr (dump_file, oe->op); |
2308 | fprintf (stream: dump_file, format: " -> " ); |
2309 | print_generic_expr (dump_file, t); |
2310 | fprintf (stream: dump_file, format: "\n" ); |
2311 | } |
2312 | |
2313 | /* Now we can delete oe, as it has been subsumed by the new combined |
2314 | expression t. */ |
2315 | ops->ordered_remove (ix: i); |
2316 | reassociate_stats.ops_eliminated ++; |
2317 | |
2318 | /* If t is the same as curr->op, we're done. Otherwise we must |
2319 | replace curr->op with t. Special case is if we got a constant |
2320 | back, in which case we add it to the end instead of in place of |
2321 | the current entry. */ |
2322 | if (TREE_CODE (t) == INTEGER_CST) |
2323 | { |
2324 | ops->ordered_remove (ix: currindex); |
2325 | add_to_ops_vec (ops, op: t); |
2326 | } |
2327 | else if (!operand_equal_p (t, curr->op, flags: 0)) |
2328 | { |
2329 | gimple *sum; |
2330 | enum tree_code subcode; |
2331 | tree newop1; |
2332 | tree newop2; |
2333 | gcc_assert (COMPARISON_CLASS_P (t)); |
2334 | extract_ops_from_tree (expr: t, code: &subcode, op0: &newop1, op1: &newop2); |
2335 | STRIP_USELESS_TYPE_CONVERSION (newop1); |
2336 | STRIP_USELESS_TYPE_CONVERSION (newop2); |
2337 | gcc_checking_assert (is_gimple_val (newop1) |
2338 | && is_gimple_val (newop2)); |
2339 | sum = build_and_add_sum (TREE_TYPE (t), op1: newop1, op2: newop2, opcode: subcode); |
2340 | curr->op = gimple_get_lhs (sum); |
2341 | } |
2342 | return true; |
2343 | } |
2344 | |
2345 | return false; |
2346 | } |
2347 | |
2348 | |
2349 | /* Transform repeated addition of same values into multiply with |
2350 | constant. */ |
2351 | static bool |
2352 | transform_add_to_multiply (vec<operand_entry *> *ops) |
2353 | { |
2354 | operand_entry *oe; |
2355 | tree op = NULL_TREE; |
2356 | int j; |
2357 | int i, start = -1, end = 0, count = 0; |
2358 | auto_vec<std::pair <int, int> > indxs; |
2359 | bool changed = false; |
2360 | |
2361 | if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op)) |
2362 | && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op)) |
2363 | || !flag_unsafe_math_optimizations)) |
2364 | return false; |
2365 | |
2366 | /* Look for repeated operands. */ |
2367 | FOR_EACH_VEC_ELT (*ops, i, oe) |
2368 | { |
2369 | if (start == -1) |
2370 | { |
2371 | count = 1; |
2372 | op = oe->op; |
2373 | start = i; |
2374 | } |
2375 | else if (operand_equal_p (oe->op, op, flags: 0)) |
2376 | { |
2377 | count++; |
2378 | end = i; |
2379 | } |
2380 | else |
2381 | { |
2382 | if (count > 1) |
2383 | indxs.safe_push (obj: std::make_pair (x&: start, y&: end)); |
2384 | count = 1; |
2385 | op = oe->op; |
2386 | start = i; |
2387 | } |
2388 | } |
2389 | |
2390 | if (count > 1) |
2391 | indxs.safe_push (obj: std::make_pair (x&: start, y&: end)); |
2392 | |
2393 | for (j = indxs.length () - 1; j >= 0; --j) |
2394 | { |
2395 | /* Convert repeated operand addition to multiplication. */ |
2396 | start = indxs[j].first; |
2397 | end = indxs[j].second; |
2398 | op = (*ops)[start]->op; |
2399 | count = end - start + 1; |
2400 | for (i = end; i >= start; --i) |
2401 | ops->unordered_remove (ix: i); |
2402 | tree tmp = make_ssa_name (TREE_TYPE (op)); |
2403 | tree cst = build_int_cst (integer_type_node, count); |
2404 | gassign *mul_stmt |
2405 | = gimple_build_assign (tmp, MULT_EXPR, |
2406 | op, fold_convert (TREE_TYPE (op), cst)); |
2407 | gimple_set_visited (stmt: mul_stmt, visited_p: true); |
2408 | add_to_ops_vec (ops, op: tmp, stmt_to_insert: mul_stmt); |
2409 | changed = true; |
2410 | } |
2411 | |
2412 | return changed; |
2413 | } |
2414 | |
2415 | |
2416 | /* Perform various identities and other optimizations on the list of |
2417 | operand entries, stored in OPS. The tree code for the binary |
2418 | operation between all the operands is OPCODE. */ |
2419 | |
2420 | static void |
2421 | optimize_ops_list (enum tree_code opcode, |
2422 | vec<operand_entry *> *ops) |
2423 | { |
2424 | unsigned int length = ops->length (); |
2425 | unsigned int i; |
2426 | operand_entry *oe; |
2427 | operand_entry *oelast = NULL; |
2428 | bool iterate = false; |
2429 | |
2430 | if (length == 1) |
2431 | return; |
2432 | |
2433 | oelast = ops->last (); |
2434 | |
2435 | /* If the last two are constants, pop the constants off, merge them |
2436 | and try the next two. */ |
2437 | if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op)) |
2438 | { |
2439 | operand_entry *oelm1 = (*ops)[length - 2]; |
2440 | |
2441 | if (oelm1->rank == 0 |
2442 | && is_gimple_min_invariant (oelm1->op) |
2443 | && useless_type_conversion_p (TREE_TYPE (oelm1->op), |
2444 | TREE_TYPE (oelast->op))) |
2445 | { |
2446 | tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op), |
2447 | oelm1->op, oelast->op); |
2448 | |
2449 | if (folded && is_gimple_min_invariant (folded)) |
2450 | { |
2451 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2452 | fprintf (stream: dump_file, format: "Merging constants\n" ); |
2453 | |
2454 | ops->pop (); |
2455 | ops->pop (); |
2456 | |
2457 | add_to_ops_vec (ops, op: folded); |
2458 | reassociate_stats.constants_eliminated++; |
2459 | |
2460 | optimize_ops_list (opcode, ops); |
2461 | return; |
2462 | } |
2463 | } |
2464 | } |
2465 | |
2466 | eliminate_using_constants (opcode, ops); |
2467 | oelast = NULL; |
2468 | |
2469 | for (i = 0; ops->iterate (ix: i, ptr: &oe);) |
2470 | { |
2471 | bool done = false; |
2472 | |
2473 | if (eliminate_not_pairs (opcode, ops, currindex: i, curr: oe)) |
2474 | return; |
2475 | if (eliminate_duplicate_pair (opcode, ops, all_done: &done, i, curr: oe, last: oelast) |
2476 | || (!done && eliminate_plus_minus_pair (opcode, ops, currindex: i, curr: oe)) |
2477 | || (!done && eliminate_redundant_comparison (opcode, ops, currindex: i, curr: oe))) |
2478 | { |
2479 | if (done) |
2480 | return; |
2481 | iterate = true; |
2482 | oelast = NULL; |
2483 | continue; |
2484 | } |
2485 | oelast = oe; |
2486 | i++; |
2487 | } |
2488 | |
2489 | if (iterate) |
2490 | optimize_ops_list (opcode, ops); |
2491 | } |
2492 | |
2493 | /* The following functions are subroutines to optimize_range_tests and allow |
2494 | it to try to change a logical combination of comparisons into a range |
2495 | test. |
2496 | |
2497 | For example, both |
2498 | X == 2 || X == 5 || X == 3 || X == 4 |
2499 | and |
2500 | X >= 2 && X <= 5 |
2501 | are converted to |
2502 | (unsigned) (X - 2) <= 3 |
2503 | |
2504 | For more information see comments above fold_test_range in fold-const.cc, |
2505 | this implementation is for GIMPLE. */ |
2506 | |
2507 | |
2508 | |
2509 | /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */ |
2510 | |
2511 | void |
2512 | dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp) |
2513 | { |
2514 | if (!skip_exp) |
2515 | print_generic_expr (file, r->exp); |
2516 | fprintf (stream: file, format: " %c[" , r->in_p ? '+' : '-'); |
2517 | print_generic_expr (file, r->low); |
2518 | fputs (s: ", " , stream: file); |
2519 | print_generic_expr (file, r->high); |
2520 | fputc (c: ']', stream: file); |
2521 | } |
2522 | |
2523 | /* Dump the range entry R to STDERR. */ |
2524 | |
2525 | DEBUG_FUNCTION void |
2526 | debug_range_entry (struct range_entry *r) |
2527 | { |
2528 | dump_range_entry (stderr, r, skip_exp: false); |
2529 | fputc (c: '\n', stderr); |
2530 | } |
2531 | |
2532 | /* This is similar to make_range in fold-const.cc, but on top of |
2533 | GIMPLE instead of trees. If EXP is non-NULL, it should be |
2534 | an SSA_NAME and STMT argument is ignored, otherwise STMT |
2535 | argument should be a GIMPLE_COND. */ |
2536 | |
2537 | void |
2538 | init_range_entry (struct range_entry *r, tree exp, gimple *stmt) |
2539 | { |
2540 | int in_p; |
2541 | tree low, high; |
2542 | bool is_bool, strict_overflow_p; |
2543 | |
2544 | r->exp = NULL_TREE; |
2545 | r->in_p = false; |
2546 | r->strict_overflow_p = false; |
2547 | r->low = NULL_TREE; |
2548 | r->high = NULL_TREE; |
2549 | if (exp != NULL_TREE |
2550 | && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))) |
2551 | return; |
2552 | |
2553 | /* Start with simply saying "EXP != 0" and then look at the code of EXP |
2554 | and see if we can refine the range. Some of the cases below may not |
2555 | happen, but it doesn't seem worth worrying about this. We "continue" |
2556 | the outer loop when we've changed something; otherwise we "break" |
2557 | the switch, which will "break" the while. */ |
2558 | low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node; |
2559 | high = low; |
2560 | in_p = 0; |
2561 | strict_overflow_p = false; |
2562 | is_bool = false; |
2563 | if (exp == NULL_TREE) |
2564 | is_bool = true; |
2565 | else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1) |
2566 | { |
2567 | if (TYPE_UNSIGNED (TREE_TYPE (exp))) |
2568 | is_bool = true; |
2569 | else |
2570 | return; |
2571 | } |
2572 | else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) |
2573 | is_bool = true; |
2574 | |
2575 | while (1) |
2576 | { |
2577 | enum tree_code code; |
2578 | tree arg0, arg1, exp_type; |
2579 | tree nexp; |
2580 | location_t loc; |
2581 | |
2582 | if (exp != NULL_TREE) |
2583 | { |
2584 | if (TREE_CODE (exp) != SSA_NAME |
2585 | || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp)) |
2586 | break; |
2587 | |
2588 | stmt = SSA_NAME_DEF_STMT (exp); |
2589 | if (!is_gimple_assign (gs: stmt)) |
2590 | break; |
2591 | |
2592 | code = gimple_assign_rhs_code (gs: stmt); |
2593 | arg0 = gimple_assign_rhs1 (gs: stmt); |
2594 | arg1 = gimple_assign_rhs2 (gs: stmt); |
2595 | exp_type = TREE_TYPE (exp); |
2596 | } |
2597 | else |
2598 | { |
2599 | code = gimple_cond_code (gs: stmt); |
2600 | arg0 = gimple_cond_lhs (gs: stmt); |
2601 | arg1 = gimple_cond_rhs (gs: stmt); |
2602 | exp_type = boolean_type_node; |
2603 | } |
2604 | |
2605 | if (TREE_CODE (arg0) != SSA_NAME |
2606 | || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0) |
2607 | || ssa_name_maybe_undef_p (var: arg0)) |
2608 | break; |
2609 | loc = gimple_location (g: stmt); |
2610 | switch (code) |
2611 | { |
2612 | case BIT_NOT_EXPR: |
2613 | if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE |
2614 | /* Ensure the range is either +[-,0], +[0,0], |
2615 | -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or |
2616 | -[1,1]. If it is e.g. +[-,-] or -[-,-] |
2617 | or similar expression of unconditional true or |
2618 | false, it should not be negated. */ |
2619 | && ((high && integer_zerop (high)) |
2620 | || (low && integer_onep (low)))) |
2621 | { |
2622 | in_p = !in_p; |
2623 | exp = arg0; |
2624 | continue; |
2625 | } |
2626 | break; |
2627 | case SSA_NAME: |
2628 | exp = arg0; |
2629 | continue; |
2630 | CASE_CONVERT: |
2631 | if (is_bool) |
2632 | { |
2633 | if ((TYPE_PRECISION (exp_type) == 1 |
2634 | || TREE_CODE (exp_type) == BOOLEAN_TYPE) |
2635 | && TYPE_PRECISION (TREE_TYPE (arg0)) > 1) |
2636 | return; |
2637 | } |
2638 | else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1) |
2639 | { |
2640 | if (TYPE_UNSIGNED (TREE_TYPE (arg0))) |
2641 | is_bool = true; |
2642 | else |
2643 | return; |
2644 | } |
2645 | else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE) |
2646 | is_bool = true; |
2647 | goto do_default; |
2648 | case EQ_EXPR: |
2649 | case NE_EXPR: |
2650 | case LT_EXPR: |
2651 | case LE_EXPR: |
2652 | case GE_EXPR: |
2653 | case GT_EXPR: |
2654 | is_bool = true; |
2655 | /* FALLTHRU */ |
2656 | default: |
2657 | if (!is_bool) |
2658 | return; |
2659 | do_default: |
2660 | nexp = make_range_step (loc, code, arg0, arg1, exp_type, |
2661 | &low, &high, &in_p, |
2662 | &strict_overflow_p); |
2663 | if (nexp != NULL_TREE) |
2664 | { |
2665 | exp = nexp; |
2666 | gcc_assert (TREE_CODE (exp) == SSA_NAME); |
2667 | continue; |
2668 | } |
2669 | break; |
2670 | } |
2671 | break; |
2672 | } |
2673 | if (is_bool) |
2674 | { |
2675 | r->exp = exp; |
2676 | r->in_p = in_p; |
2677 | r->low = low; |
2678 | r->high = high; |
2679 | r->strict_overflow_p = strict_overflow_p; |
2680 | } |
2681 | } |
2682 | |
2683 | /* Comparison function for qsort. Sort entries |
2684 | without SSA_NAME exp first, then with SSA_NAMEs sorted |
2685 | by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs |
2686 | by increasing ->low and if ->low is the same, by increasing |
2687 | ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE |
2688 | maximum. */ |
2689 | |
2690 | static int |
2691 | range_entry_cmp (const void *a, const void *b) |
2692 | { |
2693 | const struct range_entry *p = (const struct range_entry *) a; |
2694 | const struct range_entry *q = (const struct range_entry *) b; |
2695 | |
2696 | if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME) |
2697 | { |
2698 | if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) |
2699 | { |
2700 | /* Group range_entries for the same SSA_NAME together. */ |
2701 | if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp)) |
2702 | return -1; |
2703 | else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp)) |
2704 | return 1; |
2705 | /* If ->low is different, NULL low goes first, then by |
2706 | ascending low. */ |
2707 | if (p->low != NULL_TREE) |
2708 | { |
2709 | if (q->low != NULL_TREE) |
2710 | { |
2711 | tree tem = fold_binary (LT_EXPR, boolean_type_node, |
2712 | p->low, q->low); |
2713 | if (tem && integer_onep (tem)) |
2714 | return -1; |
2715 | tem = fold_binary (GT_EXPR, boolean_type_node, |
2716 | p->low, q->low); |
2717 | if (tem && integer_onep (tem)) |
2718 | return 1; |
2719 | } |
2720 | else |
2721 | return 1; |
2722 | } |
2723 | else if (q->low != NULL_TREE) |
2724 | return -1; |
2725 | /* If ->high is different, NULL high goes last, before that by |
2726 | ascending high. */ |
2727 | if (p->high != NULL_TREE) |
2728 | { |
2729 | if (q->high != NULL_TREE) |
2730 | { |
2731 | tree tem = fold_binary (LT_EXPR, boolean_type_node, |
2732 | p->high, q->high); |
2733 | if (tem && integer_onep (tem)) |
2734 | return -1; |
2735 | tem = fold_binary (GT_EXPR, boolean_type_node, |
2736 | p->high, q->high); |
2737 | if (tem && integer_onep (tem)) |
2738 | return 1; |
2739 | } |
2740 | else |
2741 | return -1; |
2742 | } |
2743 | else if (q->high != NULL_TREE) |
2744 | return 1; |
2745 | /* If both ranges are the same, sort below by ascending idx. */ |
2746 | } |
2747 | else |
2748 | return 1; |
2749 | } |
2750 | else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) |
2751 | return -1; |
2752 | |
2753 | if (p->idx < q->idx) |
2754 | return -1; |
2755 | else |
2756 | { |
2757 | gcc_checking_assert (p->idx > q->idx); |
2758 | return 1; |
2759 | } |
2760 | } |
2761 | |
2762 | /* Helper function for update_range_test. Force EXPR into an SSA_NAME, |
2763 | insert needed statements BEFORE or after GSI. */ |
2764 | |
2765 | static tree |
2766 | force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before) |
2767 | { |
2768 | enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING; |
2769 | tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m); |
2770 | if (TREE_CODE (ret) != SSA_NAME) |
2771 | { |
2772 | gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret); |
2773 | if (before) |
2774 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
2775 | else |
2776 | gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING); |
2777 | ret = gimple_assign_lhs (gs: g); |
2778 | } |
2779 | return ret; |
2780 | } |
2781 | |
2782 | /* Helper routine of optimize_range_test. |
2783 | [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for |
2784 | RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges, |
2785 | OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE |
2786 | is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to |
2787 | an array of COUNT pointers to other ranges. Return |
2788 | true if the range merge has been successful. |
2789 | If OPCODE is ERROR_MARK, this is called from within |
2790 | maybe_optimize_range_tests and is performing inter-bb range optimization. |
2791 | In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in |
2792 | oe->rank. */ |
2793 | |
2794 | static bool |
2795 | update_range_test (struct range_entry *range, struct range_entry *otherrange, |
2796 | struct range_entry **otherrangep, |
2797 | unsigned int count, enum tree_code opcode, |
2798 | vec<operand_entry *> *ops, tree exp, gimple_seq seq, |
2799 | bool in_p, tree low, tree high, bool strict_overflow_p) |
2800 | { |
2801 | unsigned int idx = range->idx; |
2802 | struct range_entry *swap_with = NULL; |
2803 | basic_block rewrite_bb_first = NULL, rewrite_bb_last = NULL; |
2804 | if (opcode == ERROR_MARK) |
2805 | { |
2806 | /* For inter-bb range test optimization, pick from the range tests |
2807 | the one which is tested in the earliest condition (one dominating |
2808 | the others), because otherwise there could be some UB (e.g. signed |
2809 | overflow) in following bbs that we'd expose which wasn't there in |
2810 | the original program. See PR104196. */ |
2811 | basic_block orig_range_bb = BASIC_BLOCK_FOR_FN (cfun, (*ops)[idx]->id); |
2812 | basic_block range_bb = orig_range_bb; |
2813 | for (unsigned int i = 0; i < count; i++) |
2814 | { |
2815 | struct range_entry *this_range; |
2816 | if (otherrange) |
2817 | this_range = otherrange + i; |
2818 | else |
2819 | this_range = otherrangep[i]; |
2820 | operand_entry *oe = (*ops)[this_range->idx]; |
2821 | basic_block this_bb = BASIC_BLOCK_FOR_FN (cfun, oe->id); |
2822 | if (range_bb != this_bb |
2823 | && dominated_by_p (CDI_DOMINATORS, range_bb, this_bb)) |
2824 | { |
2825 | swap_with = this_range; |
2826 | range_bb = this_bb; |
2827 | idx = this_range->idx; |
2828 | } |
2829 | } |
2830 | /* If seq is non-NULL, it can contain statements that use SSA_NAMEs |
2831 | only defined in later blocks. In this case we can't move the |
2832 | merged comparison earlier, so instead check if there are any stmts |
2833 | that might trigger signed integer overflow in between and rewrite |
2834 | them. But only after we check if the optimization is possible. */ |
2835 | if (seq && swap_with) |
2836 | { |
2837 | rewrite_bb_first = range_bb; |
2838 | rewrite_bb_last = orig_range_bb; |
2839 | idx = range->idx; |
2840 | swap_with = NULL; |
2841 | } |
2842 | } |
2843 | operand_entry *oe = (*ops)[idx]; |
2844 | tree op = oe->op; |
2845 | gimple *stmt = op ? SSA_NAME_DEF_STMT (op) |
2846 | : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); |
2847 | location_t loc = gimple_location (g: stmt); |
2848 | tree optype = op ? TREE_TYPE (op) : boolean_type_node; |
2849 | tree tem = build_range_check (loc, optype, unshare_expr (exp), |
2850 | in_p, low, high); |
2851 | enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; |
2852 | gimple_stmt_iterator gsi; |
2853 | unsigned int i, uid; |
2854 | |
2855 | if (tem == NULL_TREE) |
2856 | return false; |
2857 | |
2858 | /* If op is default def SSA_NAME, there is no place to insert the |
2859 | new comparison. Give up, unless we can use OP itself as the |
2860 | range test. */ |
2861 | if (op && SSA_NAME_IS_DEFAULT_DEF (op)) |
2862 | { |
2863 | if (op == range->exp |
2864 | && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype)) |
2865 | || TREE_CODE (optype) == BOOLEAN_TYPE) |
2866 | && (op == tem |
2867 | || (TREE_CODE (tem) == EQ_EXPR |
2868 | && TREE_OPERAND (tem, 0) == op |
2869 | && integer_onep (TREE_OPERAND (tem, 1)))) |
2870 | && opcode != BIT_IOR_EXPR |
2871 | && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR)) |
2872 | { |
2873 | stmt = NULL; |
2874 | tem = op; |
2875 | } |
2876 | else |
2877 | return false; |
2878 | } |
2879 | |
2880 | if (swap_with) |
2881 | std::swap (a&: range->idx, b&: swap_with->idx); |
2882 | |
2883 | if (strict_overflow_p && issue_strict_overflow_warning (wc)) |
2884 | warning_at (loc, OPT_Wstrict_overflow, |
2885 | "assuming signed overflow does not occur " |
2886 | "when simplifying range test" ); |
2887 | |
2888 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2889 | { |
2890 | struct range_entry *r; |
2891 | fprintf (stream: dump_file, format: "Optimizing range tests " ); |
2892 | dump_range_entry (file: dump_file, r: range, skip_exp: false); |
2893 | for (i = 0; i < count; i++) |
2894 | { |
2895 | if (otherrange) |
2896 | r = otherrange + i; |
2897 | else |
2898 | r = otherrangep[i]; |
2899 | if (r->exp |
2900 | && r->exp != range->exp |
2901 | && TREE_CODE (r->exp) == SSA_NAME) |
2902 | { |
2903 | fprintf (stream: dump_file, format: " and " ); |
2904 | dump_range_entry (file: dump_file, r, skip_exp: false); |
2905 | } |
2906 | else |
2907 | { |
2908 | fprintf (stream: dump_file, format: " and" ); |
2909 | dump_range_entry (file: dump_file, r, skip_exp: true); |
2910 | } |
2911 | } |
2912 | fprintf (stream: dump_file, format: "\n into " ); |
2913 | print_generic_expr (dump_file, tem); |
2914 | fprintf (stream: dump_file, format: "\n" ); |
2915 | } |
2916 | |
2917 | /* In inter-bb range optimization mode, if we have a seq, we can't |
2918 | move the merged comparison to the earliest bb from the comparisons |
2919 | being replaced, so instead rewrite stmts that could trigger signed |
2920 | integer overflow. */ |
2921 | for (basic_block bb = rewrite_bb_last; |
2922 | bb != rewrite_bb_first; bb = single_pred (bb)) |
2923 | for (gimple_stmt_iterator gsi = gsi_start_bb (bb); |
2924 | !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
2925 | { |
2926 | gimple *stmt = gsi_stmt (i: gsi); |
2927 | if (is_gimple_assign (gs: stmt)) |
2928 | if (tree lhs = gimple_assign_lhs (gs: stmt)) |
2929 | if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs)) |
2930 | || POINTER_TYPE_P (TREE_TYPE (lhs))) |
2931 | && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))) |
2932 | { |
2933 | enum tree_code code = gimple_assign_rhs_code (gs: stmt); |
2934 | if (arith_code_with_undefined_signed_overflow (code)) |
2935 | { |
2936 | gimple_stmt_iterator gsip = gsi; |
2937 | gimple_stmt_iterator gsin = gsi; |
2938 | gsi_prev (i: &gsip); |
2939 | gsi_next (i: &gsin); |
2940 | rewrite_to_defined_overflow (&gsi); |
2941 | unsigned uid = gimple_uid (g: stmt); |
2942 | if (gsi_end_p (i: gsip)) |
2943 | gsip = gsi_after_labels (bb); |
2944 | else |
2945 | gsi_next (i: &gsip); |
2946 | for (; gsi_stmt (i: gsip) != gsi_stmt (i: gsin); |
2947 | gsi_next (i: &gsip)) |
2948 | gimple_set_uid (g: gsi_stmt (i: gsip), uid); |
2949 | } |
2950 | } |
2951 | } |
2952 | |
2953 | if (opcode == BIT_IOR_EXPR |
2954 | || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) |
2955 | tem = invert_truthvalue_loc (loc, tem); |
2956 | |
2957 | tem = fold_convert_loc (loc, optype, tem); |
2958 | if (stmt) |
2959 | { |
2960 | gsi = gsi_for_stmt (stmt); |
2961 | uid = gimple_uid (g: stmt); |
2962 | } |
2963 | else |
2964 | { |
2965 | gsi = gsi_none (); |
2966 | uid = 0; |
2967 | } |
2968 | if (stmt == NULL) |
2969 | gcc_checking_assert (tem == op); |
2970 | /* In rare cases range->exp can be equal to lhs of stmt. |
2971 | In that case we have to insert after the stmt rather then before |
2972 | it. If stmt is a PHI, insert it at the start of the basic block. */ |
2973 | else if (op != range->exp) |
2974 | { |
2975 | gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); |
2976 | tem = force_into_ssa_name (gsi: &gsi, expr: tem, before: true); |
2977 | gsi_prev (i: &gsi); |
2978 | } |
2979 | else if (gimple_code (g: stmt) != GIMPLE_PHI) |
2980 | { |
2981 | gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); |
2982 | tem = force_into_ssa_name (gsi: &gsi, expr: tem, before: false); |
2983 | } |
2984 | else |
2985 | { |
2986 | gsi = gsi_after_labels (bb: gimple_bb (g: stmt)); |
2987 | if (!gsi_end_p (i: gsi)) |
2988 | uid = gimple_uid (g: gsi_stmt (i: gsi)); |
2989 | else |
2990 | { |
2991 | gsi = gsi_start_bb (bb: gimple_bb (g: stmt)); |
2992 | uid = 1; |
2993 | while (!gsi_end_p (i: gsi)) |
2994 | { |
2995 | uid = gimple_uid (g: gsi_stmt (i: gsi)); |
2996 | gsi_next (i: &gsi); |
2997 | } |
2998 | } |
2999 | gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); |
3000 | tem = force_into_ssa_name (gsi: &gsi, expr: tem, before: true); |
3001 | if (gsi_end_p (i: gsi)) |
3002 | gsi = gsi_last_bb (bb: gimple_bb (g: stmt)); |
3003 | else |
3004 | gsi_prev (i: &gsi); |
3005 | } |
3006 | for (; !gsi_end_p (i: gsi); gsi_prev (i: &gsi)) |
3007 | if (gimple_uid (g: gsi_stmt (i: gsi))) |
3008 | break; |
3009 | else |
3010 | gimple_set_uid (g: gsi_stmt (i: gsi), uid); |
3011 | |
3012 | oe->op = tem; |
3013 | range->exp = exp; |
3014 | range->low = low; |
3015 | range->high = high; |
3016 | range->in_p = in_p; |
3017 | range->strict_overflow_p = false; |
3018 | |
3019 | for (i = 0; i < count; i++) |
3020 | { |
3021 | if (otherrange) |
3022 | range = otherrange + i; |
3023 | else |
3024 | range = otherrangep[i]; |
3025 | oe = (*ops)[range->idx]; |
3026 | /* Now change all the other range test immediate uses, so that |
3027 | those tests will be optimized away. */ |
3028 | if (opcode == ERROR_MARK) |
3029 | { |
3030 | if (oe->op) |
3031 | oe->op = build_int_cst (TREE_TYPE (oe->op), |
3032 | oe->rank == BIT_IOR_EXPR ? 0 : 1); |
3033 | else |
3034 | oe->op = (oe->rank == BIT_IOR_EXPR |
3035 | ? boolean_false_node : boolean_true_node); |
3036 | } |
3037 | else |
3038 | oe->op = error_mark_node; |
3039 | range->exp = NULL_TREE; |
3040 | range->low = NULL_TREE; |
3041 | range->high = NULL_TREE; |
3042 | } |
3043 | return true; |
3044 | } |
3045 | |
3046 | /* Optimize X == CST1 || X == CST2 |
3047 | if popcount (CST1 ^ CST2) == 1 into |
3048 | (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)). |
3049 | Similarly for ranges. E.g. |
3050 | X != 2 && X != 3 && X != 10 && X != 11 |
3051 | will be transformed by the previous optimization into |
3052 | !((X - 2U) <= 1U || (X - 10U) <= 1U) |
3053 | and this loop can transform that into |
3054 | !(((X & ~8) - 2U) <= 1U). */ |
3055 | |
3056 | static bool |
3057 | optimize_range_tests_xor (enum tree_code opcode, tree type, |
3058 | tree lowi, tree lowj, tree highi, tree highj, |
3059 | vec<operand_entry *> *ops, |
3060 | struct range_entry *rangei, |
3061 | struct range_entry *rangej) |
3062 | { |
3063 | tree lowxor, highxor, tem, exp; |
3064 | /* Check lowi ^ lowj == highi ^ highj and |
3065 | popcount (lowi ^ lowj) == 1. */ |
3066 | lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj); |
3067 | if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST) |
3068 | return false; |
3069 | if (!integer_pow2p (lowxor)) |
3070 | return false; |
3071 | highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj); |
3072 | if (!tree_int_cst_equal (lowxor, highxor)) |
3073 | return false; |
3074 | |
3075 | exp = rangei->exp; |
3076 | scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type)); |
3077 | int prec = GET_MODE_PRECISION (mode); |
3078 | if (TYPE_PRECISION (type) < prec |
3079 | || (wi::to_wide (TYPE_MIN_VALUE (type)) |
3080 | != wi::min_value (prec, TYPE_SIGN (type))) |
3081 | || (wi::to_wide (TYPE_MAX_VALUE (type)) |
3082 | != wi::max_value (prec, TYPE_SIGN (type)))) |
3083 | { |
3084 | type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type)); |
3085 | exp = fold_convert (type, exp); |
3086 | lowxor = fold_convert (type, lowxor); |
3087 | lowi = fold_convert (type, lowi); |
3088 | highi = fold_convert (type, highi); |
3089 | } |
3090 | tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); |
3091 | exp = fold_build2 (BIT_AND_EXPR, type, exp, tem); |
3092 | lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); |
3093 | highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); |
3094 | if (update_range_test (range: rangei, otherrange: rangej, NULL, count: 1, opcode, ops, exp, |
3095 | NULL, in_p: rangei->in_p, low: lowj, high: highj, |
3096 | strict_overflow_p: rangei->strict_overflow_p |
3097 | || rangej->strict_overflow_p)) |
3098 | return true; |
3099 | return false; |
3100 | } |
3101 | |
3102 | /* Optimize X == CST1 || X == CST2 |
3103 | if popcount (CST2 - CST1) == 1 into |
3104 | ((X - CST1) & ~(CST2 - CST1)) == 0. |
3105 | Similarly for ranges. E.g. |
3106 | X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46 |
3107 | || X == 75 || X == 45 |
3108 | will be transformed by the previous optimization into |
3109 | (X - 43U) <= 3U || (X - 75U) <= 3U |
3110 | and this loop can transform that into |
3111 | ((X - 43U) & ~(75U - 43U)) <= 3U. */ |
3112 | static bool |
3113 | optimize_range_tests_diff (enum tree_code opcode, tree type, |
3114 | tree lowi, tree lowj, tree highi, tree highj, |
3115 | vec<operand_entry *> *ops, |
3116 | struct range_entry *rangei, |
3117 | struct range_entry *rangej) |
3118 | { |
3119 | tree tem1, tem2, mask; |
3120 | /* Check highi - lowi == highj - lowj. */ |
3121 | tem1 = fold_binary (MINUS_EXPR, type, highi, lowi); |
3122 | if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) |
3123 | return false; |
3124 | tem2 = fold_binary (MINUS_EXPR, type, highj, lowj); |
3125 | if (!tree_int_cst_equal (tem1, tem2)) |
3126 | return false; |
3127 | /* Check popcount (lowj - lowi) == 1. */ |
3128 | tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi); |
3129 | if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) |
3130 | return false; |
3131 | if (!integer_pow2p (tem1)) |
3132 | return false; |
3133 | |
3134 | scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type)); |
3135 | int prec = GET_MODE_PRECISION (mode); |
3136 | if (TYPE_PRECISION (type) < prec |
3137 | || (wi::to_wide (TYPE_MIN_VALUE (type)) |
3138 | != wi::min_value (prec, TYPE_SIGN (type))) |
3139 | || (wi::to_wide (TYPE_MAX_VALUE (type)) |
3140 | != wi::max_value (prec, TYPE_SIGN (type)))) |
3141 | type = build_nonstandard_integer_type (prec, 1); |
3142 | else |
3143 | type = unsigned_type_for (type); |
3144 | tem1 = fold_convert (type, tem1); |
3145 | tem2 = fold_convert (type, tem2); |
3146 | lowi = fold_convert (type, lowi); |
3147 | mask = fold_build1 (BIT_NOT_EXPR, type, tem1); |
3148 | tem1 = fold_build2 (MINUS_EXPR, type, |
3149 | fold_convert (type, rangei->exp), lowi); |
3150 | tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask); |
3151 | lowj = build_int_cst (type, 0); |
3152 | if (update_range_test (range: rangei, otherrange: rangej, NULL, count: 1, opcode, ops, exp: tem1, |
3153 | NULL, in_p: rangei->in_p, low: lowj, high: tem2, |
3154 | strict_overflow_p: rangei->strict_overflow_p |
3155 | || rangej->strict_overflow_p)) |
3156 | return true; |
3157 | return false; |
3158 | } |
3159 | |
3160 | /* It does some common checks for function optimize_range_tests_xor and |
3161 | optimize_range_tests_diff. |
3162 | If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor. |
3163 | Else it calls optimize_range_tests_diff. */ |
3164 | |
3165 | static bool |
3166 | optimize_range_tests_1 (enum tree_code opcode, int first, int length, |
3167 | bool optimize_xor, vec<operand_entry *> *ops, |
3168 | struct range_entry *ranges) |
3169 | { |
3170 | int i, j; |
3171 | bool any_changes = false; |
3172 | for (i = first; i < length; i++) |
3173 | { |
3174 | tree lowi, highi, lowj, highj, type, tem; |
3175 | |
3176 | if (ranges[i].exp == NULL_TREE || ranges[i].in_p) |
3177 | continue; |
3178 | type = TREE_TYPE (ranges[i].exp); |
3179 | if (!INTEGRAL_TYPE_P (type)) |
3180 | continue; |
3181 | lowi = ranges[i].low; |
3182 | if (lowi == NULL_TREE) |
3183 | lowi = TYPE_MIN_VALUE (type); |
3184 | highi = ranges[i].high; |
3185 | if (highi == NULL_TREE) |
3186 | continue; |
3187 | for (j = i + 1; j < length && j < i + 64; j++) |
3188 | { |
3189 | bool changes; |
3190 | if (ranges[i].exp != ranges[j].exp || ranges[j].in_p) |
3191 | continue; |
3192 | lowj = ranges[j].low; |
3193 | if (lowj == NULL_TREE) |
3194 | continue; |
3195 | highj = ranges[j].high; |
3196 | if (highj == NULL_TREE) |
3197 | highj = TYPE_MAX_VALUE (type); |
3198 | /* Check lowj > highi. */ |
3199 | tem = fold_binary (GT_EXPR, boolean_type_node, |
3200 | lowj, highi); |
3201 | if (tem == NULL_TREE || !integer_onep (tem)) |
3202 | continue; |
3203 | if (optimize_xor) |
3204 | changes = optimize_range_tests_xor (opcode, type, lowi, lowj, |
3205 | highi, highj, ops, |
3206 | rangei: ranges + i, rangej: ranges + j); |
3207 | else |
3208 | changes = optimize_range_tests_diff (opcode, type, lowi, lowj, |
3209 | highi, highj, ops, |
3210 | rangei: ranges + i, rangej: ranges + j); |
3211 | if (changes) |
3212 | { |
3213 | any_changes = true; |
3214 | break; |
3215 | } |
3216 | } |
3217 | } |
3218 | return any_changes; |
3219 | } |
3220 | |
3221 | /* Helper function of optimize_range_tests_to_bit_test. Handle a single |
3222 | range, EXP, LOW, HIGH, compute bit mask of bits to test and return |
3223 | EXP on success, NULL otherwise. */ |
3224 | |
3225 | static tree |
3226 | (tree exp, int prec, tree totallow, tree low, tree high, |
3227 | wide_int *mask, tree *totallowp) |
3228 | { |
3229 | tree tem = int_const_binop (MINUS_EXPR, high, low); |
3230 | if (tem == NULL_TREE |
3231 | || TREE_CODE (tem) != INTEGER_CST |
3232 | || TREE_OVERFLOW (tem) |
3233 | || tree_int_cst_sgn (tem) == -1 |
3234 | || compare_tree_int (tem, prec) != -1) |
3235 | return NULL_TREE; |
3236 | |
3237 | unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1; |
3238 | *mask = wi::shifted_mask (start: 0, width: max, negate_p: false, precision: prec); |
3239 | if (TREE_CODE (exp) == BIT_AND_EXPR |
3240 | && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) |
3241 | { |
3242 | widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1)); |
3243 | msk = wi::zext (x: ~msk, TYPE_PRECISION (TREE_TYPE (exp))); |
3244 | if (wi::popcount (msk) == 1 |
3245 | && wi::ltu_p (x: msk, y: prec - max)) |
3246 | { |
3247 | *mask |= wi::shifted_mask (start: msk.to_uhwi (), width: max, negate_p: false, precision: prec); |
3248 | max += msk.to_uhwi (); |
3249 | exp = TREE_OPERAND (exp, 0); |
3250 | if (integer_zerop (low) |
3251 | && TREE_CODE (exp) == PLUS_EXPR |
3252 | && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) |
3253 | { |
3254 | tree ret = TREE_OPERAND (exp, 0); |
3255 | STRIP_NOPS (ret); |
3256 | widest_int bias |
3257 | = wi::neg (x: wi::sext (x: wi::to_widest (TREE_OPERAND (exp, 1)), |
3258 | TYPE_PRECISION (TREE_TYPE (low)))); |
3259 | tree tbias = wide_int_to_tree (TREE_TYPE (ret), cst: bias); |
3260 | if (totallowp) |
3261 | { |
3262 | *totallowp = tbias; |
3263 | return ret; |
3264 | } |
3265 | else if (!tree_int_cst_lt (t1: totallow, t2: tbias)) |
3266 | return NULL_TREE; |
3267 | bias = wi::to_widest (t: tbias); |
3268 | bias -= wi::to_widest (t: totallow); |
3269 | if (bias >= 0 && bias < prec - max) |
3270 | { |
3271 | *mask = wi::lshift (x: *mask, y: bias); |
3272 | return ret; |
3273 | } |
3274 | } |
3275 | } |
3276 | } |
3277 | if (totallowp) |
3278 | return exp; |
3279 | if (!tree_int_cst_lt (t1: totallow, t2: low)) |
3280 | return exp; |
3281 | tem = int_const_binop (MINUS_EXPR, low, totallow); |
3282 | if (tem == NULL_TREE |
3283 | || TREE_CODE (tem) != INTEGER_CST |
3284 | || TREE_OVERFLOW (tem) |
3285 | || compare_tree_int (tem, prec - max) == 1) |
3286 | return NULL_TREE; |
3287 | |
3288 | *mask = wi::lshift (x: *mask, y: wi::to_widest (t: tem)); |
3289 | return exp; |
3290 | } |
3291 | |
3292 | /* Attempt to optimize small range tests using bit test. |
3293 | E.g. |
3294 | X != 43 && X != 76 && X != 44 && X != 78 && X != 49 |
3295 | && X != 77 && X != 46 && X != 75 && X != 45 && X != 82 |
3296 | has been by earlier optimizations optimized into: |
3297 | ((X - 43U) & ~32U) > 3U && X != 49 && X != 82 |
3298 | As all the 43 through 82 range is less than 64 numbers, |
3299 | for 64-bit word targets optimize that into: |
3300 | (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */ |
3301 | |
3302 | static bool |
3303 | optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length, |
3304 | vec<operand_entry *> *ops, |
3305 | struct range_entry *ranges) |
3306 | { |
3307 | int i, j; |
3308 | bool any_changes = false; |
3309 | int prec = GET_MODE_BITSIZE (mode: word_mode); |
3310 | auto_vec<struct range_entry *, 64> candidates; |
3311 | |
3312 | for (i = first; i < length - 1; i++) |
3313 | { |
3314 | tree lowi, highi, lowj, highj, type; |
3315 | |
3316 | if (ranges[i].exp == NULL_TREE || ranges[i].in_p) |
3317 | continue; |
3318 | type = TREE_TYPE (ranges[i].exp); |
3319 | if (!INTEGRAL_TYPE_P (type)) |
3320 | continue; |
3321 | lowi = ranges[i].low; |
3322 | if (lowi == NULL_TREE) |
3323 | lowi = TYPE_MIN_VALUE (type); |
3324 | highi = ranges[i].high; |
3325 | if (highi == NULL_TREE) |
3326 | continue; |
3327 | wide_int mask; |
3328 | tree exp = extract_bit_test_mask (exp: ranges[i].exp, prec, totallow: lowi, low: lowi, |
3329 | high: highi, mask: &mask, totallowp: &lowi); |
3330 | if (exp == NULL_TREE) |
3331 | continue; |
3332 | bool strict_overflow_p = ranges[i].strict_overflow_p; |
3333 | candidates.truncate (size: 0); |
3334 | int end = MIN (i + 64, length); |
3335 | for (j = i + 1; j < end; j++) |
3336 | { |
3337 | tree exp2; |
3338 | if (ranges[j].exp == NULL_TREE || ranges[j].in_p) |
3339 | continue; |
3340 | if (ranges[j].exp == exp) |
3341 | ; |
3342 | else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR) |
3343 | { |
3344 | exp2 = TREE_OPERAND (ranges[j].exp, 0); |
3345 | if (exp2 == exp) |
3346 | ; |
3347 | else if (TREE_CODE (exp2) == PLUS_EXPR) |
3348 | { |
3349 | exp2 = TREE_OPERAND (exp2, 0); |
3350 | STRIP_NOPS (exp2); |
3351 | if (exp2 != exp) |
3352 | continue; |
3353 | } |
3354 | else |
3355 | continue; |
3356 | } |
3357 | else |
3358 | continue; |
3359 | lowj = ranges[j].low; |
3360 | if (lowj == NULL_TREE) |
3361 | continue; |
3362 | highj = ranges[j].high; |
3363 | if (highj == NULL_TREE) |
3364 | highj = TYPE_MAX_VALUE (type); |
3365 | wide_int mask2; |
3366 | exp2 = extract_bit_test_mask (exp: ranges[j].exp, prec, totallow: lowi, low: lowj, |
3367 | high: highj, mask: &mask2, NULL); |
3368 | if (exp2 != exp) |
3369 | continue; |
3370 | mask |= mask2; |
3371 | strict_overflow_p |= ranges[j].strict_overflow_p; |
3372 | candidates.safe_push (obj: &ranges[j]); |
3373 | } |
3374 | |
3375 | /* If every possible relative value of the expression is a valid shift |
3376 | amount, then we can merge the entry test in the bit test. In this |
3377 | case, if we would need otherwise 2 or more comparisons, then use |
3378 | the bit test; in the other cases, the threshold is 3 comparisons. */ |
3379 | bool entry_test_needed; |
3380 | value_range r; |
3381 | if (TREE_CODE (exp) == SSA_NAME |
3382 | && get_range_query (cfun)->range_of_expr (r, expr: exp) |
3383 | && !r.undefined_p () |
3384 | && !r.varying_p () |
3385 | && wi::leu_p (x: r.upper_bound () - r.lower_bound (), y: prec - 1)) |
3386 | { |
3387 | wide_int min = r.lower_bound (); |
3388 | wide_int ilowi = wi::to_wide (t: lowi); |
3389 | if (wi::lt_p (x: min, y: ilowi, TYPE_SIGN (TREE_TYPE (lowi)))) |
3390 | { |
3391 | lowi = wide_int_to_tree (TREE_TYPE (lowi), cst: min); |
3392 | mask = wi::lshift (x: mask, y: ilowi - min); |
3393 | } |
3394 | else if (wi::gt_p (x: min, y: ilowi, TYPE_SIGN (TREE_TYPE (lowi)))) |
3395 | { |
3396 | lowi = wide_int_to_tree (TREE_TYPE (lowi), cst: min); |
3397 | mask = wi::lrshift (x: mask, y: min - ilowi); |
3398 | } |
3399 | entry_test_needed = false; |
3400 | } |
3401 | else |
3402 | entry_test_needed = true; |
3403 | if (candidates.length () >= (entry_test_needed ? 2 : 1)) |
3404 | { |
3405 | tree high = wide_int_to_tree (TREE_TYPE (lowi), |
3406 | cst: wi::to_widest (t: lowi) |
3407 | + prec - 1 - wi::clz (mask)); |
3408 | operand_entry *oe = (*ops)[ranges[i].idx]; |
3409 | tree op = oe->op; |
3410 | gimple *stmt = op ? SSA_NAME_DEF_STMT (op) |
3411 | : last_nondebug_stmt (BASIC_BLOCK_FOR_FN |
3412 | (cfun, oe->id)); |
3413 | location_t loc = gimple_location (g: stmt); |
3414 | tree optype = op ? TREE_TYPE (op) : boolean_type_node; |
3415 | |
3416 | /* See if it isn't cheaper to pretend the minimum value of the |
3417 | range is 0, if maximum value is small enough. |
3418 | We can avoid then subtraction of the minimum value, but the |
3419 | mask constant could be perhaps more expensive. */ |
3420 | if (compare_tree_int (lowi, 0) > 0 |
3421 | && compare_tree_int (high, prec) < 0) |
3422 | { |
3423 | int cost_diff; |
3424 | HOST_WIDE_INT m = tree_to_uhwi (lowi); |
3425 | rtx reg = gen_raw_REG (word_mode, 10000); |
3426 | bool speed_p = optimize_bb_for_speed_p (gimple_bb (g: stmt)); |
3427 | cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg, |
3428 | GEN_INT (-m)), |
3429 | mode: word_mode, speed_p); |
3430 | rtx r = immed_wide_int_const (mask, word_mode); |
3431 | cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r), |
3432 | mode: word_mode, speed_p); |
3433 | r = immed_wide_int_const (wi::lshift (x: mask, y: m), word_mode); |
3434 | cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r), |
3435 | mode: word_mode, speed_p); |
3436 | if (cost_diff > 0) |
3437 | { |
3438 | mask = wi::lshift (x: mask, y: m); |
3439 | lowi = build_zero_cst (TREE_TYPE (lowi)); |
3440 | } |
3441 | } |
3442 | |
3443 | tree tem; |
3444 | if (entry_test_needed) |
3445 | { |
3446 | tem = build_range_check (loc, optype, unshare_expr (exp), |
3447 | false, lowi, high); |
3448 | if (tem == NULL_TREE || is_gimple_val (tem)) |
3449 | continue; |
3450 | } |
3451 | else |
3452 | tem = NULL_TREE; |
3453 | tree etype = unsigned_type_for (TREE_TYPE (exp)); |
3454 | exp = fold_build2_loc (loc, MINUS_EXPR, etype, |
3455 | fold_convert_loc (loc, etype, exp), |
3456 | fold_convert_loc (loc, etype, lowi)); |
3457 | exp = fold_convert_loc (loc, integer_type_node, exp); |
3458 | tree word_type = lang_hooks.types.type_for_mode (word_mode, 1); |
3459 | exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type, |
3460 | build_int_cst (word_type, 1), exp); |
3461 | exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp, |
3462 | wide_int_to_tree (type: word_type, cst: mask)); |
3463 | exp = fold_build2_loc (loc, EQ_EXPR, optype, exp, |
3464 | build_zero_cst (word_type)); |
3465 | if (is_gimple_val (exp)) |
3466 | continue; |
3467 | |
3468 | /* The shift might have undefined behavior if TEM is true, |
3469 | but reassociate_bb isn't prepared to have basic blocks |
3470 | split when it is running. So, temporarily emit a code |
3471 | with BIT_IOR_EXPR instead of &&, and fix it up in |
3472 | branch_fixup. */ |
3473 | gimple_seq seq = NULL; |
3474 | if (tem) |
3475 | { |
3476 | tem = force_gimple_operand (tem, &seq, true, NULL_TREE); |
3477 | gcc_assert (TREE_CODE (tem) == SSA_NAME); |
3478 | gimple_set_visited (SSA_NAME_DEF_STMT (tem), visited_p: true); |
3479 | } |
3480 | gimple_seq seq2; |
3481 | exp = force_gimple_operand (exp, &seq2, true, NULL_TREE); |
3482 | gimple_seq_add_seq_without_update (&seq, seq2); |
3483 | gcc_assert (TREE_CODE (exp) == SSA_NAME); |
3484 | gimple_set_visited (SSA_NAME_DEF_STMT (exp), visited_p: true); |
3485 | if (tem) |
3486 | { |
3487 | gimple *g = gimple_build_assign (make_ssa_name (var: optype), |
3488 | BIT_IOR_EXPR, tem, exp); |
3489 | gimple_set_location (g, location: loc); |
3490 | gimple_seq_add_stmt_without_update (&seq, g); |
3491 | exp = gimple_assign_lhs (gs: g); |
3492 | } |
3493 | tree val = build_zero_cst (optype); |
3494 | if (update_range_test (range: &ranges[i], NULL, otherrangep: candidates.address (), |
3495 | count: candidates.length (), opcode, ops, exp, |
3496 | seq, in_p: false, low: val, high: val, strict_overflow_p)) |
3497 | { |
3498 | any_changes = true; |
3499 | if (tem) |
3500 | reassoc_branch_fixups.safe_push (obj: tem); |
3501 | } |
3502 | else |
3503 | gimple_seq_discard (seq); |
3504 | } |
3505 | } |
3506 | return any_changes; |
3507 | } |
3508 | |
3509 | /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0 |
3510 | and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. |
3511 | Also, handle x < C && y < C && z < C where C is power of two as |
3512 | (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0 |
3513 | as (x | y | z) < 0. */ |
3514 | |
3515 | static bool |
3516 | optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, |
3517 | vec<operand_entry *> *ops, |
3518 | struct range_entry *ranges) |
3519 | { |
3520 | int i; |
3521 | unsigned int b; |
3522 | bool any_changes = false; |
3523 | auto_vec<int, 128> buckets; |
3524 | auto_vec<int, 32> chains; |
3525 | auto_vec<struct range_entry *, 32> candidates; |
3526 | |
3527 | for (i = first; i < length; i++) |
3528 | { |
3529 | int idx; |
3530 | |
3531 | if (ranges[i].exp == NULL_TREE |
3532 | || TREE_CODE (ranges[i].exp) != SSA_NAME |
3533 | || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1 |
3534 | || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE) |
3535 | continue; |
3536 | |
3537 | if (ranges[i].low != NULL_TREE |
3538 | && ranges[i].high != NULL_TREE |
3539 | && ranges[i].in_p |
3540 | && tree_int_cst_equal (ranges[i].low, ranges[i].high)) |
3541 | { |
3542 | idx = !integer_zerop (ranges[i].low); |
3543 | if (idx && !integer_all_onesp (ranges[i].low)) |
3544 | continue; |
3545 | } |
3546 | else if (ranges[i].high != NULL_TREE |
3547 | && TREE_CODE (ranges[i].high) == INTEGER_CST |
3548 | && ranges[i].in_p) |
3549 | { |
3550 | wide_int w = wi::to_wide (t: ranges[i].high); |
3551 | int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)); |
3552 | int l = wi::clz (w); |
3553 | idx = 2; |
3554 | if (l <= 0 |
3555 | || l >= prec |
3556 | || w != wi::mask (width: prec - l, negate_p: false, precision: prec)) |
3557 | continue; |
3558 | if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp)) |
3559 | && ranges[i].low == NULL_TREE) |
3560 | || (ranges[i].low |
3561 | && integer_zerop (ranges[i].low)))) |
3562 | continue; |
3563 | } |
3564 | else if (ranges[i].high == NULL_TREE |
3565 | && ranges[i].low != NULL_TREE |
3566 | /* Perform this optimization only in the last |
3567 | reassoc pass, as it interferes with the reassociation |
3568 | itself or could also with VRP etc. which might not |
3569 | be able to virtually undo the optimization. */ |
3570 | && !reassoc_insert_powi_p |
3571 | && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp)) |
3572 | && integer_zerop (ranges[i].low)) |
3573 | idx = 3; |
3574 | else |
3575 | continue; |
3576 | |
3577 | b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx; |
3578 | if (buckets.length () <= b) |
3579 | buckets.safe_grow_cleared (len: b + 1, exact: true); |
3580 | if (chains.length () <= (unsigned) i) |
3581 | chains.safe_grow (len: i + 1, exact: true); |
3582 | chains[i] = buckets[b]; |
3583 | buckets[b] = i + 1; |
3584 | } |
3585 | |
3586 | FOR_EACH_VEC_ELT (buckets, b, i) |
3587 | if (i && chains[i - 1]) |
3588 | { |
3589 | int j, k = i; |
3590 | if ((b % 4) == 2) |
3591 | { |
3592 | /* When ranges[X - 1].high + 1 is a power of two, |
3593 | we need to process the same bucket up to |
3594 | precision - 1 times, each time split the entries |
3595 | with the same high bound into one chain and the |
3596 | rest into another one to be processed later. */ |
3597 | int this_prev = i; |
3598 | int other_prev = 0; |
3599 | for (j = chains[i - 1]; j; j = chains[j - 1]) |
3600 | { |
3601 | if (tree_int_cst_equal (ranges[i - 1].high, |
3602 | ranges[j - 1].high)) |
3603 | { |
3604 | chains[this_prev - 1] = j; |
3605 | this_prev = j; |
3606 | } |
3607 | else if (other_prev == 0) |
3608 | { |
3609 | buckets[b] = j; |
3610 | other_prev = j; |
3611 | } |
3612 | else |
3613 | { |
3614 | chains[other_prev - 1] = j; |
3615 | other_prev = j; |
3616 | } |
3617 | } |
3618 | chains[this_prev - 1] = 0; |
3619 | if (other_prev) |
3620 | chains[other_prev - 1] = 0; |
3621 | if (chains[i - 1] == 0) |
3622 | { |
3623 | if (other_prev) |
3624 | b--; |
3625 | continue; |
3626 | } |
3627 | } |
3628 | for (j = chains[i - 1]; j; j = chains[j - 1]) |
3629 | { |
3630 | gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp); |
3631 | gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp); |
3632 | if (reassoc_stmt_dominates_stmt_p (s1: gk, s2: gj)) |
3633 | k = j; |
3634 | } |
3635 | tree type1 = TREE_TYPE (ranges[k - 1].exp); |
3636 | tree type2 = NULL_TREE; |
3637 | bool strict_overflow_p = false; |
3638 | candidates.truncate (size: 0); |
3639 | if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE) |
3640 | type1 = pointer_sized_int_node; |
3641 | for (j = i; j; j = chains[j - 1]) |
3642 | { |
3643 | tree type = TREE_TYPE (ranges[j - 1].exp); |
3644 | strict_overflow_p |= ranges[j - 1].strict_overflow_p; |
3645 | if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE) |
3646 | type = pointer_sized_int_node; |
3647 | if ((b % 4) == 3) |
3648 | { |
3649 | /* For the signed < 0 cases, the types should be |
3650 | really compatible (all signed with the same precision, |
3651 | instead put ranges that have different in_p from |
3652 | k first. */ |
3653 | if (!useless_type_conversion_p (type1, type)) |
3654 | continue; |
3655 | if (ranges[j - 1].in_p != ranges[k - 1].in_p) |
3656 | candidates.safe_push (obj: &ranges[j - 1]); |
3657 | type2 = type1; |
3658 | continue; |
3659 | } |
3660 | if (j == k |
3661 | || useless_type_conversion_p (type1, type)) |
3662 | ; |
3663 | else if (type2 == NULL_TREE |
3664 | || useless_type_conversion_p (type2, type)) |
3665 | { |
3666 | if (type2 == NULL_TREE) |
3667 | type2 = type; |
3668 | candidates.safe_push (obj: &ranges[j - 1]); |
3669 | } |
3670 | } |
3671 | unsigned l = candidates.length (); |
3672 | for (j = i; j; j = chains[j - 1]) |
3673 | { |
3674 | tree type = TREE_TYPE (ranges[j - 1].exp); |
3675 | if (j == k) |
3676 | continue; |
3677 | if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE) |
3678 | type = pointer_sized_int_node; |
3679 | if ((b % 4) == 3) |
3680 | { |
3681 | if (!useless_type_conversion_p (type1, type)) |
3682 | continue; |
3683 | if (ranges[j - 1].in_p == ranges[k - 1].in_p) |
3684 | candidates.safe_push (obj: &ranges[j - 1]); |
3685 | continue; |
3686 | } |
3687 | if (useless_type_conversion_p (type1, type)) |
3688 | ; |
3689 | else if (type2 == NULL_TREE |
3690 | || useless_type_conversion_p (type2, type)) |
3691 | continue; |
3692 | candidates.safe_push (obj: &ranges[j - 1]); |
3693 | } |
3694 | gimple_seq seq = NULL; |
3695 | tree op = NULL_TREE; |
3696 | unsigned int id; |
3697 | struct range_entry *r; |
3698 | candidates.safe_push (obj: &ranges[k - 1]); |
3699 | FOR_EACH_VEC_ELT (candidates, id, r) |
3700 | { |
3701 | gimple *g; |
3702 | enum tree_code code; |
3703 | if (id == 0) |
3704 | { |
3705 | op = r->exp; |
3706 | continue; |
3707 | } |
3708 | if (id == l |
3709 | || POINTER_TYPE_P (TREE_TYPE (op)) |
3710 | || TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE) |
3711 | { |
3712 | code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR; |
3713 | tree type3 = id >= l ? type1 : pointer_sized_int_node; |
3714 | if (code == BIT_NOT_EXPR |
3715 | && TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE) |
3716 | { |
3717 | g = gimple_build_assign (make_ssa_name (var: type3), |
3718 | NOP_EXPR, op); |
3719 | gimple_seq_add_stmt_without_update (&seq, g); |
3720 | op = gimple_assign_lhs (gs: g); |
3721 | } |
3722 | g = gimple_build_assign (make_ssa_name (var: type3), code, op); |
3723 | gimple_seq_add_stmt_without_update (&seq, g); |
3724 | op = gimple_assign_lhs (gs: g); |
3725 | } |
3726 | tree type = TREE_TYPE (r->exp); |
3727 | tree exp = r->exp; |
3728 | if (POINTER_TYPE_P (type) |
3729 | || TREE_CODE (type) == OFFSET_TYPE |
3730 | || (id >= l && !useless_type_conversion_p (type1, type))) |
3731 | { |
3732 | tree type3 = id >= l ? type1 : pointer_sized_int_node; |
3733 | g = gimple_build_assign (make_ssa_name (var: type3), NOP_EXPR, exp); |
3734 | gimple_seq_add_stmt_without_update (&seq, g); |
3735 | exp = gimple_assign_lhs (gs: g); |
3736 | } |
3737 | if ((b % 4) == 3) |
3738 | code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR; |
3739 | else |
3740 | code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR; |
3741 | g = gimple_build_assign (make_ssa_name (var: id >= l ? type1 : type2), |
3742 | code, op, exp); |
3743 | gimple_seq_add_stmt_without_update (&seq, g); |
3744 | op = gimple_assign_lhs (gs: g); |
3745 | } |
3746 | type1 = TREE_TYPE (ranges[k - 1].exp); |
3747 | if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE) |
3748 | { |
3749 | gimple *g |
3750 | = gimple_build_assign (make_ssa_name (var: type1), NOP_EXPR, op); |
3751 | gimple_seq_add_stmt_without_update (&seq, g); |
3752 | op = gimple_assign_lhs (gs: g); |
3753 | } |
3754 | candidates.pop (); |
3755 | if (update_range_test (range: &ranges[k - 1], NULL, otherrangep: candidates.address (), |
3756 | count: candidates.length (), opcode, ops, exp: op, |
3757 | seq, in_p: ranges[k - 1].in_p, low: ranges[k - 1].low, |
3758 | high: ranges[k - 1].high, strict_overflow_p)) |
3759 | any_changes = true; |
3760 | else |
3761 | gimple_seq_discard (seq); |
3762 | if ((b % 4) == 2 && buckets[b] != i) |
3763 | /* There is more work to do for this bucket. */ |
3764 | b--; |
3765 | } |
3766 | |
3767 | return any_changes; |
3768 | } |
3769 | |
3770 | /* Attempt to optimize for signed a and b where b is known to be >= 0: |
3771 | a >= 0 && a < b into (unsigned) a < (unsigned) b |
3772 | a >= 0 && a <= b into (unsigned) a <= (unsigned) b */ |
3773 | |
3774 | static bool |
3775 | optimize_range_tests_var_bound (enum tree_code opcode, int first, int length, |
3776 | vec<operand_entry *> *ops, |
3777 | struct range_entry *ranges, |
3778 | basic_block first_bb) |
3779 | { |
3780 | int i; |
3781 | bool any_changes = false; |
3782 | hash_map<tree, int> *map = NULL; |
3783 | |
3784 | for (i = first; i < length; i++) |
3785 | { |
3786 | if (ranges[i].exp == NULL_TREE |
3787 | || TREE_CODE (ranges[i].exp) != SSA_NAME |
3788 | || !ranges[i].in_p) |
3789 | continue; |
3790 | |
3791 | tree type = TREE_TYPE (ranges[i].exp); |
3792 | if (!INTEGRAL_TYPE_P (type) |
3793 | || TYPE_UNSIGNED (type) |
3794 | || ranges[i].low == NULL_TREE |
3795 | || !integer_zerop (ranges[i].low) |
3796 | || ranges[i].high != NULL_TREE) |
3797 | continue; |
3798 | /* EXP >= 0 here. */ |
3799 | if (map == NULL) |
3800 | map = new hash_map <tree, int>; |
3801 | map->put (k: ranges[i].exp, v: i); |
3802 | } |
3803 | |
3804 | if (map == NULL) |
3805 | return false; |
3806 | |
3807 | for (i = 0; i < length; i++) |
3808 | { |
3809 | bool in_p = ranges[i].in_p; |
3810 | if (ranges[i].low == NULL_TREE |
3811 | || ranges[i].high == NULL_TREE) |
3812 | continue; |
3813 | if (!integer_zerop (ranges[i].low) |
3814 | || !integer_zerop (ranges[i].high)) |
3815 | { |
3816 | if (ranges[i].exp |
3817 | && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1 |
3818 | && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp)) |
3819 | && integer_onep (ranges[i].low) |
3820 | && integer_onep (ranges[i].high)) |
3821 | in_p = !in_p; |
3822 | else |
3823 | continue; |
3824 | } |
3825 | |
3826 | gimple *stmt; |
3827 | tree_code ccode; |
3828 | tree rhs1, rhs2; |
3829 | if (ranges[i].exp) |
3830 | { |
3831 | if (TREE_CODE (ranges[i].exp) != SSA_NAME) |
3832 | continue; |
3833 | stmt = SSA_NAME_DEF_STMT (ranges[i].exp); |
3834 | if (!is_gimple_assign (gs: stmt)) |
3835 | continue; |
3836 | ccode = gimple_assign_rhs_code (gs: stmt); |
3837 | rhs1 = gimple_assign_rhs1 (gs: stmt); |
3838 | rhs2 = gimple_assign_rhs2 (gs: stmt); |
3839 | } |
3840 | else |
3841 | { |
3842 | operand_entry *oe = (*ops)[ranges[i].idx]; |
3843 | stmt = last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); |
3844 | if (gimple_code (g: stmt) != GIMPLE_COND) |
3845 | continue; |
3846 | ccode = gimple_cond_code (gs: stmt); |
3847 | rhs1 = gimple_cond_lhs (gs: stmt); |
3848 | rhs2 = gimple_cond_rhs (gs: stmt); |
3849 | } |
3850 | |
3851 | if (TREE_CODE (rhs1) != SSA_NAME |
3852 | || rhs2 == NULL_TREE |
3853 | || TREE_CODE (rhs2) != SSA_NAME) |
3854 | continue; |
3855 | |
3856 | switch (ccode) |
3857 | { |
3858 | case GT_EXPR: |
3859 | case GE_EXPR: |
3860 | case LT_EXPR: |
3861 | case LE_EXPR: |
3862 | break; |
3863 | default: |
3864 | continue; |
3865 | } |
3866 | if (in_p) |
3867 | ccode = invert_tree_comparison (ccode, false); |
3868 | switch (ccode) |
3869 | { |
3870 | case GT_EXPR: |
3871 | case GE_EXPR: |
3872 | std::swap (a&: rhs1, b&: rhs2); |
3873 | ccode = swap_tree_comparison (ccode); |
3874 | break; |
3875 | case LT_EXPR: |
3876 | case LE_EXPR: |
3877 | break; |
3878 | default: |
3879 | gcc_unreachable (); |
3880 | } |
3881 | |
3882 | int *idx = map->get (k: rhs1); |
3883 | if (idx == NULL) |
3884 | continue; |
3885 | |
3886 | /* maybe_optimize_range_tests allows statements without side-effects |
3887 | in the basic blocks as long as they are consumed in the same bb. |
3888 | Make sure rhs2's def stmt is not among them, otherwise we can't |
3889 | use safely get_nonzero_bits on it. E.g. in: |
3890 | # RANGE [-83, 1] NONZERO 173 |
3891 | # k_32 = PHI <k_47(13), k_12(9)> |
3892 | ... |
3893 | if (k_32 >= 0) |
3894 | goto <bb 5>; [26.46%] |
3895 | else |
3896 | goto <bb 9>; [73.54%] |
3897 | |
3898 | <bb 5> [local count: 140323371]: |
3899 | # RANGE [0, 1] NONZERO 1 |
3900 | _5 = (int) k_32; |
3901 | # RANGE [0, 4] NONZERO 4 |
3902 | _21 = _5 << 2; |
3903 | # RANGE [0, 4] NONZERO 4 |
3904 | iftmp.0_44 = (char) _21; |
3905 | if (k_32 < iftmp.0_44) |
3906 | goto <bb 6>; [84.48%] |
3907 | else |
3908 | goto <bb 9>; [15.52%] |
3909 | the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that |
3910 | k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44 |
3911 | to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute |
3912 | those stmts even for negative k_32 and the value ranges would be no |
3913 | longer guaranteed and so the optimization would be invalid. */ |
3914 | while (opcode == ERROR_MARK) |
3915 | { |
3916 | gimple *g = SSA_NAME_DEF_STMT (rhs2); |
3917 | basic_block bb2 = gimple_bb (g); |
3918 | if (bb2 |
3919 | && bb2 != first_bb |
3920 | && dominated_by_p (CDI_DOMINATORS, bb2, first_bb)) |
3921 | { |
3922 | /* As an exception, handle a few common cases. */ |
3923 | if (gimple_assign_cast_p (s: g) |
3924 | && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g)))) |
3925 | { |
3926 | tree op0 = gimple_assign_rhs1 (gs: g); |
3927 | if (TYPE_UNSIGNED (TREE_TYPE (op0)) |
3928 | && (TYPE_PRECISION (TREE_TYPE (rhs2)) |
3929 | > TYPE_PRECISION (TREE_TYPE (op0)))) |
3930 | /* Zero-extension is always ok. */ |
3931 | break; |
3932 | else if (TYPE_PRECISION (TREE_TYPE (rhs2)) |
3933 | == TYPE_PRECISION (TREE_TYPE (op0)) |
3934 | && TREE_CODE (op0) == SSA_NAME) |
3935 | { |
3936 | /* Cast from signed to unsigned or vice versa. Retry |
3937 | with the op0 as new rhs2. */ |
3938 | rhs2 = op0; |
3939 | continue; |
3940 | } |
3941 | } |
3942 | else if (is_gimple_assign (gs: g) |
3943 | && gimple_assign_rhs_code (gs: g) == BIT_AND_EXPR |
3944 | && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST |
3945 | && !wi::neg_p (x: wi::to_wide (t: gimple_assign_rhs2 (gs: g)))) |
3946 | /* Masking with INTEGER_CST with MSB clear is always ok |
3947 | too. */ |
3948 | break; |
3949 | rhs2 = NULL_TREE; |
3950 | } |
3951 | break; |
3952 | } |
3953 | if (rhs2 == NULL_TREE) |
3954 | continue; |
3955 | |
3956 | wide_int nz = get_nonzero_bits (rhs2); |
3957 | if (wi::neg_p (x: nz)) |
3958 | continue; |
3959 | |
3960 | /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0 |
3961 | and RHS2 is known to be RHS2 >= 0. */ |
3962 | tree utype = unsigned_type_for (TREE_TYPE (rhs1)); |
3963 | |
3964 | enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; |
3965 | if ((ranges[*idx].strict_overflow_p |
3966 | || ranges[i].strict_overflow_p) |
3967 | && issue_strict_overflow_warning (wc)) |
3968 | warning_at (gimple_location (g: stmt), OPT_Wstrict_overflow, |
3969 | "assuming signed overflow does not occur " |
3970 | "when simplifying range test" ); |
3971 | |
3972 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3973 | { |
3974 | struct range_entry *r = &ranges[*idx]; |
3975 | fprintf (stream: dump_file, format: "Optimizing range test " ); |
3976 | print_generic_expr (dump_file, r->exp); |
3977 | fprintf (stream: dump_file, format: " +[" ); |
3978 | print_generic_expr (dump_file, r->low); |
3979 | fprintf (stream: dump_file, format: ", " ); |
3980 | print_generic_expr (dump_file, r->high); |
3981 | fprintf (stream: dump_file, format: "] and comparison " ); |
3982 | print_generic_expr (dump_file, rhs1); |
3983 | fprintf (stream: dump_file, format: " %s " , op_symbol_code (ccode)); |
3984 | print_generic_expr (dump_file, rhs2); |
3985 | fprintf (stream: dump_file, format: "\n into (" ); |
3986 | print_generic_expr (dump_file, utype); |
3987 | fprintf (stream: dump_file, format: ") " ); |
3988 | print_generic_expr (dump_file, rhs1); |
3989 | fprintf (stream: dump_file, format: " %s (" , op_symbol_code (ccode)); |
3990 | print_generic_expr (dump_file, utype); |
3991 | fprintf (stream: dump_file, format: ") " ); |
3992 | print_generic_expr (dump_file, rhs2); |
3993 | fprintf (stream: dump_file, format: "\n" ); |
3994 | } |
3995 | |
3996 | operand_entry *oe = (*ops)[ranges[i].idx]; |
3997 | ranges[i].in_p = 0; |
3998 | if (opcode == BIT_IOR_EXPR |
3999 | || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) |
4000 | { |
4001 | ranges[i].in_p = 1; |
4002 | ccode = invert_tree_comparison (ccode, false); |
4003 | } |
4004 | |
4005 | unsigned int uid = gimple_uid (g: stmt); |
4006 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
4007 | gimple *g = gimple_build_assign (make_ssa_name (var: utype), NOP_EXPR, rhs1); |
4008 | gimple_set_uid (g, uid); |
4009 | rhs1 = gimple_assign_lhs (gs: g); |
4010 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
4011 | if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2))) |
4012 | { |
4013 | g = gimple_build_assign (make_ssa_name (var: utype), NOP_EXPR, rhs2); |
4014 | gimple_set_uid (g, uid); |
4015 | rhs2 = gimple_assign_lhs (gs: g); |
4016 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
4017 | } |
4018 | if (tree_swap_operands_p (rhs1, rhs2)) |
4019 | { |
4020 | std::swap (a&: rhs1, b&: rhs2); |
4021 | ccode = swap_tree_comparison (ccode); |
4022 | } |
4023 | if (gimple_code (g: stmt) == GIMPLE_COND) |
4024 | { |
4025 | gcond *c = as_a <gcond *> (p: stmt); |
4026 | gimple_cond_set_code (gs: c, code: ccode); |
4027 | gimple_cond_set_lhs (gs: c, lhs: rhs1); |
4028 | gimple_cond_set_rhs (gs: c, rhs: rhs2); |
4029 | update_stmt (s: stmt); |
4030 | } |
4031 | else |
4032 | { |
4033 | tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node; |
4034 | if (!INTEGRAL_TYPE_P (ctype) |
4035 | || (TREE_CODE (ctype) != BOOLEAN_TYPE |
4036 | && TYPE_PRECISION (ctype) != 1)) |
4037 | ctype = boolean_type_node; |
4038 | g = gimple_build_assign (make_ssa_name (var: ctype), ccode, rhs1, rhs2); |
4039 | gimple_set_uid (g, uid); |
4040 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
4041 | if (oe->op && ctype != TREE_TYPE (oe->op)) |
4042 | { |
4043 | g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)), |
4044 | NOP_EXPR, gimple_assign_lhs (gs: g)); |
4045 | gimple_set_uid (g, uid); |
4046 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
4047 | } |
4048 | ranges[i].exp = gimple_assign_lhs (gs: g); |
4049 | oe->op = ranges[i].exp; |
4050 | ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp)); |
4051 | ranges[i].high = ranges[i].low; |
4052 | } |
4053 | ranges[i].strict_overflow_p = false; |
4054 | oe = (*ops)[ranges[*idx].idx]; |
4055 | /* Now change all the other range test immediate uses, so that |
4056 | those tests will be optimized away. */ |
4057 | if (opcode == ERROR_MARK) |
4058 | { |
4059 | if (oe->op) |
4060 | oe->op = build_int_cst (TREE_TYPE (oe->op), |
4061 | oe->rank == BIT_IOR_EXPR ? 0 : 1); |
4062 | else |
4063 | oe->op = (oe->rank == BIT_IOR_EXPR |
4064 | ? boolean_false_node : boolean_true_node); |
4065 | } |
4066 | else |
4067 | oe->op = error_mark_node; |
4068 | ranges[*idx].exp = NULL_TREE; |
4069 | ranges[*idx].low = NULL_TREE; |
4070 | ranges[*idx].high = NULL_TREE; |
4071 | any_changes = true; |
4072 | } |
4073 | |
4074 | delete map; |
4075 | return any_changes; |
4076 | } |
4077 | |
4078 | /* Optimize range tests, similarly how fold_range_test optimizes |
4079 | it on trees. The tree code for the binary |
4080 | operation between all the operands is OPCODE. |
4081 | If OPCODE is ERROR_MARK, optimize_range_tests is called from within |
4082 | maybe_optimize_range_tests for inter-bb range optimization. |
4083 | In that case if oe->op is NULL, oe->id is bb->index whose |
4084 | GIMPLE_COND is && or ||ed into the test, and oe->rank says |
4085 | the actual opcode. |
4086 | FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */ |
4087 | |
4088 | static bool |
4089 | optimize_range_tests (enum tree_code opcode, |
4090 | vec<operand_entry *> *ops, basic_block first_bb) |
4091 | { |
4092 | unsigned int length = ops->length (), i, j, first; |
4093 | operand_entry *oe; |
4094 | struct range_entry *ranges; |
4095 | bool any_changes = false; |
4096 | |
4097 | if (length == 1) |
4098 | return false; |
4099 | |
4100 | ranges = XNEWVEC (struct range_entry, length); |
4101 | for (i = 0; i < length; i++) |
4102 | { |
4103 | oe = (*ops)[i]; |
4104 | ranges[i].idx = i; |
4105 | init_range_entry (r: ranges + i, exp: oe->op, |
4106 | stmt: oe->op |
4107 | ? NULL |
4108 | : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id))); |
4109 | /* For | invert it now, we will invert it again before emitting |
4110 | the optimized expression. */ |
4111 | if (opcode == BIT_IOR_EXPR |
4112 | || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) |
4113 | ranges[i].in_p = !ranges[i].in_p; |
4114 | } |
4115 | |
4116 | qsort (ranges, length, sizeof (*ranges), range_entry_cmp); |
4117 | for (i = 0; i < length; i++) |
4118 | if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME) |
4119 | break; |
4120 | |
4121 | /* Try to merge ranges. */ |
4122 | for (first = i; i < length; i++) |
4123 | { |
4124 | tree low = ranges[i].low; |
4125 | tree high = ranges[i].high; |
4126 | int in_p = ranges[i].in_p; |
4127 | bool strict_overflow_p = ranges[i].strict_overflow_p; |
4128 | int update_fail_count = 0; |
4129 | |
4130 | for (j = i + 1; j < length; j++) |
4131 | { |
4132 | if (ranges[i].exp != ranges[j].exp) |
4133 | break; |
4134 | if (!merge_ranges (&in_p, &low, &high, in_p, low, high, |
4135 | ranges[j].in_p, ranges[j].low, ranges[j].high)) |
4136 | break; |
4137 | strict_overflow_p |= ranges[j].strict_overflow_p; |
4138 | } |
4139 | |
4140 | if (j == i + 1) |
4141 | continue; |
4142 | |
4143 | if (update_range_test (range: ranges + i, otherrange: ranges + i + 1, NULL, count: j - i - 1, |
4144 | opcode, ops, exp: ranges[i].exp, NULL, in_p, |
4145 | low, high, strict_overflow_p)) |
4146 | { |
4147 | i = j - 1; |
4148 | any_changes = true; |
4149 | } |
4150 | /* Avoid quadratic complexity if all merge_ranges calls would succeed, |
4151 | while update_range_test would fail. */ |
4152 | else if (update_fail_count == 64) |
4153 | i = j - 1; |
4154 | else |
4155 | ++update_fail_count; |
4156 | } |
4157 | |
4158 | any_changes |= optimize_range_tests_1 (opcode, first, length, optimize_xor: true, |
4159 | ops, ranges); |
4160 | |
4161 | if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2) |
4162 | any_changes |= optimize_range_tests_1 (opcode, first, length, optimize_xor: false, |
4163 | ops, ranges); |
4164 | if (lshift_cheap_p (optimize_function_for_speed_p (cfun))) |
4165 | any_changes |= optimize_range_tests_to_bit_test (opcode, first, length, |
4166 | ops, ranges); |
4167 | any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops, |
4168 | ranges, first_bb); |
4169 | any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length, |
4170 | ops, ranges); |
4171 | |
4172 | if (any_changes && opcode != ERROR_MARK) |
4173 | { |
4174 | j = 0; |
4175 | FOR_EACH_VEC_ELT (*ops, i, oe) |
4176 | { |
4177 | if (oe->op == error_mark_node) |
4178 | continue; |
4179 | else if (i != j) |
4180 | (*ops)[j] = oe; |
4181 | j++; |
4182 | } |
4183 | ops->truncate (size: j); |
4184 | } |
4185 | |
4186 | XDELETEVEC (ranges); |
4187 | return any_changes; |
4188 | } |
4189 | |
4190 | /* A subroutine of optimize_vec_cond_expr to extract and canonicalize |
4191 | the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure, |
4192 | otherwise the comparison code. TYPE is a return value that is set |
4193 | to type of comparison. */ |
4194 | |
4195 | static tree_code |
4196 | (tree var, gassign **rets, bool *reti, tree *type, |
4197 | tree *lhs, tree *rhs, gassign **vcond) |
4198 | { |
4199 | if (TREE_CODE (var) != SSA_NAME) |
4200 | return ERROR_MARK; |
4201 | |
4202 | gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var)); |
4203 | if (stmt == NULL) |
4204 | return ERROR_MARK; |
4205 | if (vcond) |
4206 | *vcond = stmt; |
4207 | |
4208 | /* ??? If we start creating more COND_EXPR, we could perform |
4209 | this same optimization with them. For now, simplify. */ |
4210 | if (gimple_assign_rhs_code (gs: stmt) != VEC_COND_EXPR) |
4211 | return ERROR_MARK; |
4212 | |
4213 | tree cond = gimple_assign_rhs1 (gs: stmt); |
4214 | tree_code cmp = TREE_CODE (cond); |
4215 | if (cmp != SSA_NAME) |
4216 | return ERROR_MARK; |
4217 | |
4218 | gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond)); |
4219 | if (assign == NULL |
4220 | || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison) |
4221 | return ERROR_MARK; |
4222 | |
4223 | cmp = gimple_assign_rhs_code (gs: assign); |
4224 | if (lhs) |
4225 | *lhs = gimple_assign_rhs1 (gs: assign); |
4226 | if (rhs) |
4227 | *rhs = gimple_assign_rhs2 (gs: assign); |
4228 | |
4229 | /* ??? For now, allow only canonical true and false result vectors. |
4230 | We could expand this to other constants should the need arise, |
4231 | but at the moment we don't create them. */ |
4232 | tree t = gimple_assign_rhs2 (gs: stmt); |
4233 | tree f = gimple_assign_rhs3 (gs: stmt); |
4234 | bool inv; |
4235 | if (integer_all_onesp (t)) |
4236 | inv = false; |
4237 | else if (integer_all_onesp (f)) |
4238 | { |
4239 | cmp = invert_tree_comparison (cmp, false); |
4240 | inv = true; |
4241 | } |
4242 | else |
4243 | return ERROR_MARK; |
4244 | if (!integer_zerop (f)) |
4245 | return ERROR_MARK; |
4246 | |
4247 | /* Success! */ |
4248 | if (rets) |
4249 | *rets = assign; |
4250 | if (reti) |
4251 | *reti = inv; |
4252 | if (type) |
4253 | *type = TREE_TYPE (cond); |
4254 | return cmp; |
4255 | } |
4256 | |
4257 | /* Optimize the condition of VEC_COND_EXPRs which have been combined |
4258 | with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */ |
4259 | |
4260 | static bool |
4261 | optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops) |
4262 | { |
4263 | unsigned int length = ops->length (), i, j; |
4264 | bool any_changes = false; |
4265 | |
4266 | if (length == 1) |
4267 | return false; |
4268 | |
4269 | for (i = 0; i < length; ++i) |
4270 | { |
4271 | tree elt0 = (*ops)[i]->op; |
4272 | |
4273 | gassign *stmt0, *vcond0; |
4274 | bool invert; |
4275 | tree type, lhs0, rhs0; |
4276 | tree_code cmp0 = ovce_extract_ops (var: elt0, rets: &stmt0, reti: &invert, type: &type, lhs: &lhs0, |
4277 | rhs: &rhs0, vcond: &vcond0); |
4278 | if (cmp0 == ERROR_MARK) |
4279 | continue; |
4280 | |
4281 | for (j = i + 1; j < length; ++j) |
4282 | { |
4283 | tree &elt1 = (*ops)[j]->op; |
4284 | |
4285 | gassign *stmt1, *vcond1; |
4286 | tree lhs1, rhs1; |
4287 | tree_code cmp1 = ovce_extract_ops (var: elt1, rets: &stmt1, NULL, NULL, lhs: &lhs1, |
4288 | rhs: &rhs1, vcond: &vcond1); |
4289 | if (cmp1 == ERROR_MARK) |
4290 | continue; |
4291 | |
4292 | tree comb; |
4293 | if (opcode == BIT_AND_EXPR) |
4294 | comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0, |
4295 | cmp1, lhs1, rhs1); |
4296 | else if (opcode == BIT_IOR_EXPR) |
4297 | comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0, |
4298 | cmp1, lhs1, rhs1); |
4299 | else |
4300 | gcc_unreachable (); |
4301 | if (comb == NULL) |
4302 | continue; |
4303 | |
4304 | /* Success! */ |
4305 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4306 | { |
4307 | fprintf (stream: dump_file, format: "Transforming " ); |
4308 | print_generic_expr (dump_file, gimple_assign_lhs (gs: stmt0)); |
4309 | fprintf (stream: dump_file, format: " %c " , opcode == BIT_AND_EXPR ? '&' : '|'); |
4310 | print_generic_expr (dump_file, gimple_assign_lhs (gs: stmt1)); |
4311 | fprintf (stream: dump_file, format: " into " ); |
4312 | print_generic_expr (dump_file, comb); |
4313 | fputc (c: '\n', stream: dump_file); |
4314 | } |
4315 | |
4316 | gimple_stmt_iterator gsi = gsi_for_stmt (vcond0); |
4317 | tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE, |
4318 | true, GSI_SAME_STMT); |
4319 | if (invert) |
4320 | swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (gs: vcond0), |
4321 | gimple_assign_rhs3_ptr (gs: vcond0)); |
4322 | gimple_assign_set_rhs1 (gs: vcond0, rhs: exp); |
4323 | update_stmt (s: vcond0); |
4324 | |
4325 | elt1 = error_mark_node; |
4326 | any_changes = true; |
4327 | } |
4328 | } |
4329 | |
4330 | if (any_changes) |
4331 | { |
4332 | operand_entry *oe; |
4333 | j = 0; |
4334 | FOR_EACH_VEC_ELT (*ops, i, oe) |
4335 | { |
4336 | if (oe->op == error_mark_node) |
4337 | continue; |
4338 | else if (i != j) |
4339 | (*ops)[j] = oe; |
4340 | j++; |
4341 | } |
4342 | ops->truncate (size: j); |
4343 | } |
4344 | |
4345 | return any_changes; |
4346 | } |
4347 | |
4348 | /* Return true if STMT is a cast like: |
4349 | <bb N>: |
4350 | ... |
4351 | _123 = (int) _234; |
4352 | |
4353 | <bb M>: |
4354 | # _345 = PHI <_123(N), 1(...), 1(...)> |
4355 | where _234 has bool type, _123 has single use and |
4356 | bb N has a single successor M. This is commonly used in |
4357 | the last block of a range test. |
4358 | |
4359 | Also Return true if STMT is tcc_compare like: |
4360 | <bb N>: |
4361 | ... |
4362 | _234 = a_2(D) == 2; |
4363 | |
4364 | <bb M>: |
4365 | # _345 = PHI <_234(N), 1(...), 1(...)> |
4366 | _346 = (int) _345; |
4367 | where _234 has booltype, single use and |
4368 | bb N has a single successor M. This is commonly used in |
4369 | the last block of a range test. */ |
4370 | |
4371 | static bool |
4372 | final_range_test_p (gimple *stmt) |
4373 | { |
4374 | basic_block bb, rhs_bb, lhs_bb; |
4375 | edge e; |
4376 | tree lhs, rhs; |
4377 | use_operand_p use_p; |
4378 | gimple *use_stmt; |
4379 | |
4380 | if (!gimple_assign_cast_p (s: stmt) |
4381 | && (!is_gimple_assign (gs: stmt) |
4382 | || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) |
4383 | != tcc_comparison))) |
4384 | return false; |
4385 | bb = gimple_bb (g: stmt); |
4386 | if (!single_succ_p (bb)) |
4387 | return false; |
4388 | e = single_succ_edge (bb); |
4389 | if (e->flags & EDGE_COMPLEX) |
4390 | return false; |
4391 | |
4392 | lhs = gimple_assign_lhs (gs: stmt); |
4393 | rhs = gimple_assign_rhs1 (gs: stmt); |
4394 | if (gimple_assign_cast_p (s: stmt) |
4395 | && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) |
4396 | || TREE_CODE (rhs) != SSA_NAME |
4397 | || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)) |
4398 | return false; |
4399 | |
4400 | if (!gimple_assign_cast_p (s: stmt) |
4401 | && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE)) |
4402 | return false; |
4403 | |
4404 | /* Test whether lhs is consumed only by a PHI in the only successor bb. */ |
4405 | if (!single_imm_use (var: lhs, use_p: &use_p, stmt: &use_stmt)) |
4406 | return false; |
4407 | |
4408 | if (gimple_code (g: use_stmt) != GIMPLE_PHI |
4409 | || gimple_bb (g: use_stmt) != e->dest) |
4410 | return false; |
4411 | |
4412 | /* And that the rhs is defined in the same loop. */ |
4413 | if (gimple_assign_cast_p (s: stmt)) |
4414 | { |
4415 | if (TREE_CODE (rhs) != SSA_NAME |
4416 | || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs))) |
4417 | || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb)) |
4418 | return false; |
4419 | } |
4420 | else |
4421 | { |
4422 | if (TREE_CODE (lhs) != SSA_NAME |
4423 | || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs))) |
4424 | || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb)) |
4425 | return false; |
4426 | } |
4427 | |
4428 | return true; |
4429 | } |
4430 | |
4431 | /* Return true if BB is suitable basic block for inter-bb range test |
4432 | optimization. If BACKWARD is true, BB should be the only predecessor |
4433 | of TEST_BB, and *OTHER_BB is either NULL and filled by the routine, |
4434 | or compared with to find a common basic block to which all conditions |
4435 | branch to if true resp. false. If BACKWARD is false, TEST_BB should |
4436 | be the only predecessor of BB. *TEST_SWAPPED_P is set to true if |
4437 | TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB |
4438 | block points to an empty block that falls through into *OTHER_BB and |
4439 | the phi args match that path. */ |
4440 | |
4441 | static bool |
4442 | suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb, |
4443 | bool *test_swapped_p, bool backward) |
4444 | { |
4445 | edge_iterator ei, ei2; |
4446 | edge e, e2; |
4447 | gimple *stmt; |
4448 | gphi_iterator gsi; |
4449 | bool other_edge_seen = false; |
4450 | bool is_cond; |
4451 | |
4452 | if (test_bb == bb) |
4453 | return false; |
4454 | /* Check last stmt first. */ |
4455 | stmt = last_nondebug_stmt (bb); |
4456 | if (stmt == NULL |
4457 | || (gimple_code (g: stmt) != GIMPLE_COND |
4458 | && (backward || !final_range_test_p (stmt))) |
4459 | || gimple_visited_p (stmt) |
4460 | || stmt_could_throw_p (cfun, stmt) |
4461 | || *other_bb == bb) |
4462 | return false; |
4463 | is_cond = gimple_code (g: stmt) == GIMPLE_COND; |
4464 | if (is_cond) |
4465 | { |
4466 | /* If last stmt is GIMPLE_COND, verify that one of the succ edges |
4467 | goes to the next bb (if BACKWARD, it is TEST_BB), and the other |
4468 | to *OTHER_BB (if not set yet, try to find it out). */ |
4469 | if (EDGE_COUNT (bb->succs) != 2) |
4470 | return false; |
4471 | FOR_EACH_EDGE (e, ei, bb->succs) |
4472 | { |
4473 | if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) |
4474 | return false; |
4475 | if (e->dest == test_bb) |
4476 | { |
4477 | if (backward) |
4478 | continue; |
4479 | else |
4480 | return false; |
4481 | } |
4482 | if (e->dest == bb) |
4483 | return false; |
4484 | if (*other_bb == NULL) |
4485 | { |
4486 | FOR_EACH_EDGE (e2, ei2, test_bb->succs) |
4487 | if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) |
4488 | return false; |
4489 | else if (e->dest == e2->dest) |
4490 | *other_bb = e->dest; |
4491 | if (*other_bb == NULL) |
4492 | return false; |
4493 | } |
4494 | if (e->dest == *other_bb) |
4495 | other_edge_seen = true; |
4496 | else if (backward) |
4497 | return false; |
4498 | } |
4499 | if (*other_bb == NULL || !other_edge_seen) |
4500 | return false; |
4501 | } |
4502 | else if (single_succ (bb) != *other_bb) |
4503 | return false; |
4504 | |
4505 | /* Now check all PHIs of *OTHER_BB. */ |
4506 | e = find_edge (bb, *other_bb); |
4507 | e2 = find_edge (test_bb, *other_bb); |
4508 | retry:; |
4509 | for (gsi = gsi_start_phis (e->dest); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
4510 | { |
4511 | gphi *phi = gsi.phi (); |
4512 | /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments |
4513 | corresponding to BB and TEST_BB predecessor must be the same. */ |
4514 | if (!operand_equal_p (gimple_phi_arg_def (gs: phi, index: e->dest_idx), |
4515 | gimple_phi_arg_def (gs: phi, index: e2->dest_idx), flags: 0)) |
4516 | { |
4517 | /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND, |
4518 | one of the PHIs should have the lhs of the last stmt in |
4519 | that block as PHI arg and that PHI should have 0 or 1 |
4520 | corresponding to it in all other range test basic blocks |
4521 | considered. */ |
4522 | if (!is_cond) |
4523 | { |
4524 | if (gimple_phi_arg_def (gs: phi, index: e->dest_idx) |
4525 | == gimple_assign_lhs (gs: stmt) |
4526 | && (integer_zerop (gimple_phi_arg_def (gs: phi, index: e2->dest_idx)) |
4527 | || integer_onep (gimple_phi_arg_def (gs: phi, |
4528 | index: e2->dest_idx)))) |
4529 | continue; |
4530 | } |
4531 | else |
4532 | { |
4533 | gimple *test_last = last_nondebug_stmt (test_bb); |
4534 | if (gimple_code (g: test_last) == GIMPLE_COND) |
4535 | { |
4536 | if (backward ? e2->src != test_bb : e->src != bb) |
4537 | return false; |
4538 | |
4539 | /* For last_bb, handle also: |
4540 | if (x_3(D) == 3) |
4541 | goto <bb 6>; [34.00%] |
4542 | else |
4543 | goto <bb 7>; [66.00%] |
4544 | |
4545 | <bb 6> [local count: 79512730]: |
4546 | |
4547 | <bb 7> [local count: 1073741824]: |
4548 | # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)> |
4549 | where bb 7 is *OTHER_BB, but the PHI values from the |
4550 | earlier bbs match the path through the empty bb |
4551 | in between. */ |
4552 | edge e3; |
4553 | if (backward) |
4554 | e3 = EDGE_SUCC (test_bb, |
4555 | e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0); |
4556 | else |
4557 | e3 = EDGE_SUCC (bb, |
4558 | e == EDGE_SUCC (bb, 0) ? 1 : 0); |
4559 | if (empty_block_p (e3->dest) |
4560 | && single_succ_p (bb: e3->dest) |
4561 | && single_succ (bb: e3->dest) == *other_bb |
4562 | && single_pred_p (bb: e3->dest) |
4563 | && single_succ_edge (bb: e3->dest)->flags == EDGE_FALLTHRU) |
4564 | { |
4565 | if (backward) |
4566 | e2 = single_succ_edge (bb: e3->dest); |
4567 | else |
4568 | e = single_succ_edge (bb: e3->dest); |
4569 | if (test_swapped_p) |
4570 | *test_swapped_p = true; |
4571 | goto retry; |
4572 | } |
4573 | } |
4574 | else if (gimple_phi_arg_def (gs: phi, index: e2->dest_idx) |
4575 | == gimple_assign_lhs (gs: test_last) |
4576 | && (integer_zerop (gimple_phi_arg_def (gs: phi, |
4577 | index: e->dest_idx)) |
4578 | || integer_onep (gimple_phi_arg_def (gs: phi, |
4579 | index: e->dest_idx)))) |
4580 | continue; |
4581 | } |
4582 | |
4583 | return false; |
4584 | } |
4585 | } |
4586 | return true; |
4587 | } |
4588 | |
4589 | /* Return true if BB doesn't have side-effects that would disallow |
4590 | range test optimization, all SSA_NAMEs set in the bb are consumed |
4591 | in the bb and there are no PHIs. */ |
4592 | |
4593 | bool |
4594 | no_side_effect_bb (basic_block bb) |
4595 | { |
4596 | gimple_stmt_iterator gsi; |
4597 | gimple *last; |
4598 | |
4599 | if (!gimple_seq_empty_p (s: phi_nodes (bb))) |
4600 | return false; |
4601 | last = last_nondebug_stmt (bb); |
4602 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
4603 | { |
4604 | gimple *stmt = gsi_stmt (i: gsi); |
4605 | tree lhs; |
4606 | imm_use_iterator imm_iter; |
4607 | use_operand_p use_p; |
4608 | |
4609 | if (is_gimple_debug (gs: stmt)) |
4610 | continue; |
4611 | if (gimple_has_side_effects (stmt)) |
4612 | return false; |
4613 | if (stmt == last) |
4614 | return true; |
4615 | if (!is_gimple_assign (gs: stmt)) |
4616 | return false; |
4617 | lhs = gimple_assign_lhs (gs: stmt); |
4618 | if (TREE_CODE (lhs) != SSA_NAME) |
4619 | return false; |
4620 | if (gimple_assign_rhs_could_trap_p (stmt)) |
4621 | return false; |
4622 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) |
4623 | { |
4624 | gimple *use_stmt = USE_STMT (use_p); |
4625 | if (is_gimple_debug (gs: use_stmt)) |
4626 | continue; |
4627 | if (gimple_bb (g: use_stmt) != bb) |
4628 | return false; |
4629 | } |
4630 | } |
4631 | return false; |
4632 | } |
4633 | |
4634 | /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable, |
4635 | return true and fill in *OPS recursively. */ |
4636 | |
4637 | static bool |
4638 | get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops, |
4639 | class loop *loop) |
4640 | { |
4641 | gimple *stmt = SSA_NAME_DEF_STMT (var); |
4642 | tree rhs[2]; |
4643 | int i; |
4644 | |
4645 | if (!is_reassociable_op (stmt, code, loop)) |
4646 | return false; |
4647 | |
4648 | rhs[0] = gimple_assign_rhs1 (gs: stmt); |
4649 | rhs[1] = gimple_assign_rhs2 (gs: stmt); |
4650 | gimple_set_visited (stmt, visited_p: true); |
4651 | for (i = 0; i < 2; i++) |
4652 | if (TREE_CODE (rhs[i]) == SSA_NAME |
4653 | && !get_ops (var: rhs[i], code, ops, loop) |
4654 | && has_single_use (var: rhs[i])) |
4655 | { |
4656 | operand_entry *oe = operand_entry_pool.allocate (); |
4657 | |
4658 | oe->op = rhs[i]; |
4659 | oe->rank = code; |
4660 | oe->id = 0; |
4661 | oe->count = 1; |
4662 | oe->stmt_to_insert = NULL; |
4663 | ops->safe_push (obj: oe); |
4664 | } |
4665 | return true; |
4666 | } |
4667 | |
4668 | /* Find the ops that were added by get_ops starting from VAR, see if |
4669 | they were changed during update_range_test and if yes, create new |
4670 | stmts. */ |
4671 | |
4672 | static tree |
4673 | update_ops (tree var, enum tree_code code, const vec<operand_entry *> &ops, |
4674 | unsigned int *pidx, class loop *loop) |
4675 | { |
4676 | gimple *stmt = SSA_NAME_DEF_STMT (var); |
4677 | tree rhs[4]; |
4678 | int i; |
4679 | |
4680 | if (!is_reassociable_op (stmt, code, loop)) |
4681 | return NULL; |
4682 | |
4683 | rhs[0] = gimple_assign_rhs1 (gs: stmt); |
4684 | rhs[1] = gimple_assign_rhs2 (gs: stmt); |
4685 | rhs[2] = rhs[0]; |
4686 | rhs[3] = rhs[1]; |
4687 | for (i = 0; i < 2; i++) |
4688 | if (TREE_CODE (rhs[i]) == SSA_NAME) |
4689 | { |
4690 | rhs[2 + i] = update_ops (var: rhs[i], code, ops, pidx, loop); |
4691 | if (rhs[2 + i] == NULL_TREE) |
4692 | { |
4693 | if (has_single_use (var: rhs[i])) |
4694 | rhs[2 + i] = ops[(*pidx)++]->op; |
4695 | else |
4696 | rhs[2 + i] = rhs[i]; |
4697 | } |
4698 | } |
4699 | if ((rhs[2] != rhs[0] || rhs[3] != rhs[1]) |
4700 | && (rhs[2] != rhs[1] || rhs[3] != rhs[0])) |
4701 | { |
4702 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
4703 | var = make_ssa_name (TREE_TYPE (var)); |
4704 | gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (gs: stmt), |
4705 | rhs[2], rhs[3]); |
4706 | gimple_set_uid (g, uid: gimple_uid (g: stmt)); |
4707 | gimple_set_visited (stmt: g, visited_p: true); |
4708 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
4709 | gimple_stmt_iterator gsi2 = gsi_for_stmt (g); |
4710 | if (fold_stmt_inplace (&gsi2)) |
4711 | update_stmt (s: g); |
4712 | } |
4713 | return var; |
4714 | } |
4715 | |
4716 | /* Structure to track the initial value passed to get_ops and |
4717 | the range in the ops vector for each basic block. */ |
4718 | |
4719 | struct inter_bb_range_test_entry |
4720 | { |
4721 | tree op; |
4722 | unsigned int first_idx, last_idx; |
4723 | }; |
4724 | |
4725 | /* Inter-bb range test optimization. |
4726 | |
4727 | Returns TRUE if a gimple conditional is optimized to a true/false, |
4728 | otherwise return FALSE. |
4729 | |
4730 | This indicates to the caller that it should run a CFG cleanup pass |
4731 | once reassociation is completed. */ |
4732 | |
4733 | static bool |
4734 | maybe_optimize_range_tests (gimple *stmt) |
4735 | { |
4736 | basic_block first_bb = gimple_bb (g: stmt); |
4737 | basic_block last_bb = first_bb; |
4738 | basic_block other_bb = NULL; |
4739 | basic_block bb; |
4740 | edge_iterator ei; |
4741 | edge e; |
4742 | auto_vec<operand_entry *> ops; |
4743 | auto_vec<inter_bb_range_test_entry> bbinfo; |
4744 | bool any_changes = false; |
4745 | bool cfg_cleanup_needed = false; |
4746 | |
4747 | /* Consider only basic blocks that end with GIMPLE_COND or |
4748 | a cast statement satisfying final_range_test_p. All |
4749 | but the last bb in the first_bb .. last_bb range |
4750 | should end with GIMPLE_COND. */ |
4751 | if (gimple_code (g: stmt) == GIMPLE_COND) |
4752 | { |
4753 | if (EDGE_COUNT (first_bb->succs) != 2) |
4754 | return cfg_cleanup_needed; |
4755 | } |
4756 | else if (final_range_test_p (stmt)) |
4757 | other_bb = single_succ (bb: first_bb); |
4758 | else |
4759 | return cfg_cleanup_needed; |
4760 | |
4761 | if (stmt_could_throw_p (cfun, stmt)) |
4762 | return cfg_cleanup_needed; |
4763 | |
4764 | /* As relative ordering of post-dominator sons isn't fixed, |
4765 | maybe_optimize_range_tests can be called first on any |
4766 | bb in the range we want to optimize. So, start searching |
4767 | backwards, if first_bb can be set to a predecessor. */ |
4768 | while (single_pred_p (bb: first_bb)) |
4769 | { |
4770 | basic_block pred_bb = single_pred (bb: first_bb); |
4771 | if (!suitable_cond_bb (bb: pred_bb, test_bb: first_bb, other_bb: &other_bb, NULL, backward: true)) |
4772 | break; |
4773 | if (!no_side_effect_bb (bb: first_bb)) |
4774 | break; |
4775 | first_bb = pred_bb; |
4776 | } |
4777 | /* If first_bb is last_bb, other_bb hasn't been computed yet. |
4778 | Before starting forward search in last_bb successors, find |
4779 | out the other_bb. */ |
4780 | if (first_bb == last_bb) |
4781 | { |
4782 | other_bb = NULL; |
4783 | /* As non-GIMPLE_COND last stmt always terminates the range, |
4784 | if forward search didn't discover anything, just give up. */ |
4785 | if (gimple_code (g: stmt) != GIMPLE_COND) |
4786 | return cfg_cleanup_needed; |
4787 | /* Look at both successors. Either it ends with a GIMPLE_COND |
4788 | and satisfies suitable_cond_bb, or ends with a cast and |
4789 | other_bb is that cast's successor. */ |
4790 | FOR_EACH_EDGE (e, ei, first_bb->succs) |
4791 | if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)) |
4792 | || e->dest == first_bb) |
4793 | return cfg_cleanup_needed; |
4794 | else if (single_pred_p (bb: e->dest)) |
4795 | { |
4796 | stmt = last_nondebug_stmt (e->dest); |
4797 | if (stmt |
4798 | && gimple_code (g: stmt) == GIMPLE_COND |
4799 | && EDGE_COUNT (e->dest->succs) == 2) |
4800 | { |
4801 | if (suitable_cond_bb (bb: first_bb, test_bb: e->dest, other_bb: &other_bb, |
4802 | NULL, backward: true)) |
4803 | break; |
4804 | else |
4805 | other_bb = NULL; |
4806 | } |
4807 | else if (stmt |
4808 | && final_range_test_p (stmt) |
4809 | && find_edge (first_bb, single_succ (bb: e->dest))) |
4810 | { |
4811 | other_bb = single_succ (bb: e->dest); |
4812 | if (other_bb == first_bb) |
4813 | other_bb = NULL; |
4814 | } |
4815 | } |
4816 | if (other_bb == NULL) |
4817 | return cfg_cleanup_needed; |
4818 | } |
4819 | /* Now do the forward search, moving last_bb to successor bbs |
4820 | that aren't other_bb. */ |
4821 | while (EDGE_COUNT (last_bb->succs) == 2) |
4822 | { |
4823 | FOR_EACH_EDGE (e, ei, last_bb->succs) |
4824 | if (e->dest != other_bb) |
4825 | break; |
4826 | if (e == NULL) |
4827 | break; |
4828 | if (!single_pred_p (bb: e->dest)) |
4829 | break; |
4830 | if (!suitable_cond_bb (bb: e->dest, test_bb: last_bb, other_bb: &other_bb, NULL, backward: false)) |
4831 | break; |
4832 | if (!no_side_effect_bb (bb: e->dest)) |
4833 | break; |
4834 | last_bb = e->dest; |
4835 | } |
4836 | if (first_bb == last_bb) |
4837 | return cfg_cleanup_needed; |
4838 | /* Here basic blocks first_bb through last_bb's predecessor |
4839 | end with GIMPLE_COND, all of them have one of the edges to |
4840 | other_bb and another to another block in the range, |
4841 | all blocks except first_bb don't have side-effects and |
4842 | last_bb ends with either GIMPLE_COND, or cast satisfying |
4843 | final_range_test_p. */ |
4844 | for (bb = last_bb; ; bb = single_pred (bb)) |
4845 | { |
4846 | enum tree_code code; |
4847 | tree lhs, rhs; |
4848 | inter_bb_range_test_entry bb_ent; |
4849 | |
4850 | bb_ent.op = NULL_TREE; |
4851 | bb_ent.first_idx = ops.length (); |
4852 | bb_ent.last_idx = bb_ent.first_idx; |
4853 | e = find_edge (bb, other_bb); |
4854 | stmt = last_nondebug_stmt (bb); |
4855 | gimple_set_visited (stmt, visited_p: true); |
4856 | if (gimple_code (g: stmt) != GIMPLE_COND) |
4857 | { |
4858 | use_operand_p use_p; |
4859 | gimple *phi; |
4860 | edge e2; |
4861 | unsigned int d; |
4862 | |
4863 | lhs = gimple_assign_lhs (gs: stmt); |
4864 | rhs = gimple_assign_rhs1 (gs: stmt); |
4865 | gcc_assert (bb == last_bb); |
4866 | |
4867 | /* stmt is |
4868 | _123 = (int) _234; |
4869 | OR |
4870 | _234 = a_2(D) == 2; |
4871 | |
4872 | followed by: |
4873 | <bb M>: |
4874 | # _345 = PHI <_123(N), 1(...), 1(...)> |
4875 | |
4876 | or 0 instead of 1. If it is 0, the _234 |
4877 | range test is anded together with all the |
4878 | other range tests, if it is 1, it is ored with |
4879 | them. */ |
4880 | single_imm_use (var: lhs, use_p: &use_p, stmt: &phi); |
4881 | gcc_assert (gimple_code (phi) == GIMPLE_PHI); |
4882 | e2 = find_edge (first_bb, other_bb); |
4883 | d = e2->dest_idx; |
4884 | gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs); |
4885 | if (integer_zerop (gimple_phi_arg_def (gs: phi, index: d))) |
4886 | code = BIT_AND_EXPR; |
4887 | else |
4888 | { |
4889 | gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d))); |
4890 | code = BIT_IOR_EXPR; |
4891 | } |
4892 | |
4893 | /* If _234 SSA_NAME_DEF_STMT is |
4894 | _234 = _567 | _789; |
4895 | (or &, corresponding to 1/0 in the phi arguments, |
4896 | push into ops the individual range test arguments |
4897 | of the bitwise or resp. and, recursively. */ |
4898 | if (TREE_CODE (rhs) == SSA_NAME |
4899 | && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) |
4900 | != tcc_comparison) |
4901 | && !get_ops (var: rhs, code, ops: &ops, |
4902 | loop: loop_containing_stmt (stmt)) |
4903 | && has_single_use (var: rhs)) |
4904 | { |
4905 | /* Otherwise, push the _234 range test itself. */ |
4906 | operand_entry *oe = operand_entry_pool.allocate (); |
4907 | |
4908 | oe->op = rhs; |
4909 | oe->rank = code; |
4910 | oe->id = 0; |
4911 | oe->count = 1; |
4912 | oe->stmt_to_insert = NULL; |
4913 | ops.safe_push (obj: oe); |
4914 | bb_ent.last_idx++; |
4915 | bb_ent.op = rhs; |
4916 | } |
4917 | else if (is_gimple_assign (gs: stmt) |
4918 | && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) |
4919 | == tcc_comparison) |
4920 | && !get_ops (var: lhs, code, ops: &ops, |
4921 | loop: loop_containing_stmt (stmt)) |
4922 | && has_single_use (var: lhs)) |
4923 | { |
4924 | operand_entry *oe = operand_entry_pool.allocate (); |
4925 | oe->op = lhs; |
4926 | oe->rank = code; |
4927 | oe->id = 0; |
4928 | oe->count = 1; |
4929 | ops.safe_push (obj: oe); |
4930 | bb_ent.last_idx++; |
4931 | bb_ent.op = lhs; |
4932 | } |
4933 | else |
4934 | { |
4935 | bb_ent.last_idx = ops.length (); |
4936 | bb_ent.op = rhs; |
4937 | } |
4938 | bbinfo.safe_push (obj: bb_ent); |
4939 | for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i) |
4940 | ops[i]->id = bb->index; |
4941 | continue; |
4942 | } |
4943 | else if (bb == last_bb) |
4944 | { |
4945 | /* For last_bb, handle also: |
4946 | if (x_3(D) == 3) |
4947 | goto <bb 6>; [34.00%] |
4948 | else |
4949 | goto <bb 7>; [66.00%] |
4950 | |
4951 | <bb 6> [local count: 79512730]: |
4952 | |
4953 | <bb 7> [local count: 1073741824]: |
4954 | # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)> |
4955 | where bb 7 is OTHER_BB, but the PHI values from the |
4956 | earlier bbs match the path through the empty bb |
4957 | in between. */ |
4958 | bool test_swapped_p = false; |
4959 | bool ok = suitable_cond_bb (bb: single_pred (bb: last_bb), test_bb: last_bb, |
4960 | other_bb: &other_bb, test_swapped_p: &test_swapped_p, backward: true); |
4961 | gcc_assert (ok); |
4962 | if (test_swapped_p) |
4963 | e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0); |
4964 | } |
4965 | /* Otherwise stmt is GIMPLE_COND. */ |
4966 | code = gimple_cond_code (gs: stmt); |
4967 | lhs = gimple_cond_lhs (gs: stmt); |
4968 | rhs = gimple_cond_rhs (gs: stmt); |
4969 | if (TREE_CODE (lhs) == SSA_NAME |
4970 | && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) |
4971 | && ((code != EQ_EXPR && code != NE_EXPR) |
4972 | || rhs != boolean_false_node |
4973 | /* Either push into ops the individual bitwise |
4974 | or resp. and operands, depending on which |
4975 | edge is other_bb. */ |
4976 | || !get_ops (var: lhs, code: (((e->flags & EDGE_TRUE_VALUE) == 0) |
4977 | ^ (code == EQ_EXPR)) |
4978 | ? BIT_AND_EXPR : BIT_IOR_EXPR, ops: &ops, |
4979 | loop: loop_containing_stmt (stmt)))) |
4980 | { |
4981 | /* Or push the GIMPLE_COND stmt itself. */ |
4982 | operand_entry *oe = operand_entry_pool.allocate (); |
4983 | |
4984 | oe->op = NULL; |
4985 | oe->rank = (e->flags & EDGE_TRUE_VALUE) |
4986 | ? BIT_IOR_EXPR : BIT_AND_EXPR; |
4987 | /* oe->op = NULL signs that there is no SSA_NAME |
4988 | for the range test, and oe->id instead is the |
4989 | basic block number, at which's end the GIMPLE_COND |
4990 | is. */ |
4991 | oe->id = bb->index; |
4992 | oe->count = 1; |
4993 | oe->stmt_to_insert = NULL; |
4994 | ops.safe_push (obj: oe); |
4995 | bb_ent.op = NULL; |
4996 | bb_ent.last_idx++; |
4997 | } |
4998 | else if (ops.length () > bb_ent.first_idx) |
4999 | { |
5000 | bb_ent.op = lhs; |
5001 | bb_ent.last_idx = ops.length (); |
5002 | } |
5003 | bbinfo.safe_push (obj: bb_ent); |
5004 | for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i) |
5005 | ops[i]->id = bb->index; |
5006 | if (bb == first_bb) |
5007 | break; |
5008 | } |
5009 | if (ops.length () > 1) |
5010 | any_changes = optimize_range_tests (opcode: ERROR_MARK, ops: &ops, first_bb); |
5011 | if (any_changes) |
5012 | { |
5013 | unsigned int idx, max_idx = 0; |
5014 | /* update_ops relies on has_single_use predicates returning the |
5015 | same values as it did during get_ops earlier. Additionally it |
5016 | never removes statements, only adds new ones and it should walk |
5017 | from the single imm use and check the predicate already before |
5018 | making those changes. |
5019 | On the other side, the handling of GIMPLE_COND directly can turn |
5020 | previously multiply used SSA_NAMEs into single use SSA_NAMEs, so |
5021 | it needs to be done in a separate loop afterwards. */ |
5022 | for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) |
5023 | { |
5024 | if (bbinfo[idx].first_idx < bbinfo[idx].last_idx |
5025 | && bbinfo[idx].op != NULL_TREE) |
5026 | { |
5027 | tree new_op; |
5028 | |
5029 | max_idx = idx; |
5030 | stmt = last_nondebug_stmt (bb); |
5031 | new_op = update_ops (var: bbinfo[idx].op, |
5032 | code: (enum tree_code) |
5033 | ops[bbinfo[idx].first_idx]->rank, |
5034 | ops, pidx: &bbinfo[idx].first_idx, |
5035 | loop: loop_containing_stmt (stmt)); |
5036 | if (new_op == NULL_TREE) |
5037 | { |
5038 | gcc_assert (bb == last_bb); |
5039 | new_op = ops[bbinfo[idx].first_idx++]->op; |
5040 | } |
5041 | if (bbinfo[idx].op != new_op) |
5042 | { |
5043 | imm_use_iterator iter; |
5044 | use_operand_p use_p; |
5045 | gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL; |
5046 | |
5047 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op) |
5048 | if (is_gimple_debug (gs: use_stmt)) |
5049 | continue; |
5050 | else if (gimple_code (g: use_stmt) == GIMPLE_COND |
5051 | || gimple_code (g: use_stmt) == GIMPLE_PHI) |
5052 | FOR_EACH_IMM_USE_ON_STMT (use_p, iter) |
5053 | SET_USE (use_p, new_op); |
5054 | else if ((is_gimple_assign (gs: use_stmt) |
5055 | && (TREE_CODE_CLASS |
5056 | (gimple_assign_rhs_code (use_stmt)) |
5057 | == tcc_comparison))) |
5058 | cast_or_tcc_cmp_stmt = use_stmt; |
5059 | else if (gimple_assign_cast_p (s: use_stmt)) |
5060 | cast_or_tcc_cmp_stmt = use_stmt; |
5061 | else |
5062 | gcc_unreachable (); |
5063 | |
5064 | if (cast_or_tcc_cmp_stmt) |
5065 | { |
5066 | gcc_assert (bb == last_bb); |
5067 | tree lhs = gimple_assign_lhs (gs: cast_or_tcc_cmp_stmt); |
5068 | tree new_lhs = make_ssa_name (TREE_TYPE (lhs)); |
5069 | enum tree_code rhs_code |
5070 | = gimple_assign_cast_p (s: cast_or_tcc_cmp_stmt) |
5071 | ? gimple_assign_rhs_code (gs: cast_or_tcc_cmp_stmt) |
5072 | : CONVERT_EXPR; |
5073 | gassign *g; |
5074 | if (is_gimple_min_invariant (new_op)) |
5075 | { |
5076 | new_op = fold_convert (TREE_TYPE (lhs), new_op); |
5077 | g = gimple_build_assign (new_lhs, new_op); |
5078 | } |
5079 | else |
5080 | g = gimple_build_assign (new_lhs, rhs_code, new_op); |
5081 | gimple_stmt_iterator gsi |
5082 | = gsi_for_stmt (cast_or_tcc_cmp_stmt); |
5083 | gimple_set_uid (g, uid: gimple_uid (g: cast_or_tcc_cmp_stmt)); |
5084 | gimple_set_visited (stmt: g, visited_p: true); |
5085 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
5086 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) |
5087 | if (is_gimple_debug (gs: use_stmt)) |
5088 | continue; |
5089 | else if (gimple_code (g: use_stmt) == GIMPLE_COND |
5090 | || gimple_code (g: use_stmt) == GIMPLE_PHI) |
5091 | FOR_EACH_IMM_USE_ON_STMT (use_p, iter) |
5092 | SET_USE (use_p, new_lhs); |
5093 | else |
5094 | gcc_unreachable (); |
5095 | } |
5096 | } |
5097 | } |
5098 | if (bb == first_bb) |
5099 | break; |
5100 | } |
5101 | for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) |
5102 | { |
5103 | if (bbinfo[idx].first_idx < bbinfo[idx].last_idx |
5104 | && bbinfo[idx].op == NULL_TREE |
5105 | && ops[bbinfo[idx].first_idx]->op != NULL_TREE) |
5106 | { |
5107 | gcond *cond_stmt = as_a <gcond *> (p: *gsi_last_bb (bb)); |
5108 | |
5109 | if (idx > max_idx) |
5110 | max_idx = idx; |
5111 | |
5112 | /* If we collapse the conditional to a true/false |
5113 | condition, then bubble that knowledge up to our caller. */ |
5114 | if (integer_zerop (ops[bbinfo[idx].first_idx]->op)) |
5115 | { |
5116 | gimple_cond_make_false (gs: cond_stmt); |
5117 | cfg_cleanup_needed = true; |
5118 | } |
5119 | else if (integer_onep (ops[bbinfo[idx].first_idx]->op)) |
5120 | { |
5121 | gimple_cond_make_true (gs: cond_stmt); |
5122 | cfg_cleanup_needed = true; |
5123 | } |
5124 | else |
5125 | { |
5126 | gimple_cond_set_code (gs: cond_stmt, code: NE_EXPR); |
5127 | gimple_cond_set_lhs (gs: cond_stmt, |
5128 | lhs: ops[bbinfo[idx].first_idx]->op); |
5129 | gimple_cond_set_rhs (gs: cond_stmt, boolean_false_node); |
5130 | } |
5131 | update_stmt (s: cond_stmt); |
5132 | } |
5133 | if (bb == first_bb) |
5134 | break; |
5135 | } |
5136 | |
5137 | /* The above changes could result in basic blocks after the first |
5138 | modified one, up to and including last_bb, to be executed even if |
5139 | they would not be in the original program. If the value ranges of |
5140 | assignment lhs' in those bbs were dependent on the conditions |
5141 | guarding those basic blocks which now can change, the VRs might |
5142 | be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs |
5143 | are only used within the same bb, it should be not a big deal if |
5144 | we just reset all the VRs in those bbs. See PR68671. */ |
5145 | for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++) |
5146 | reset_flow_sensitive_info_in_bb (bb); |
5147 | } |
5148 | return cfg_cleanup_needed; |
5149 | } |
5150 | |
5151 | /* Remove def stmt of VAR if VAR has zero uses and recurse |
5152 | on rhs1 operand if so. */ |
5153 | |
5154 | static void |
5155 | remove_visited_stmt_chain (tree var) |
5156 | { |
5157 | gimple *stmt; |
5158 | gimple_stmt_iterator gsi; |
5159 | |
5160 | while (1) |
5161 | { |
5162 | if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var)) |
5163 | return; |
5164 | stmt = SSA_NAME_DEF_STMT (var); |
5165 | if (is_gimple_assign (gs: stmt) && gimple_visited_p (stmt)) |
5166 | { |
5167 | var = gimple_assign_rhs1 (gs: stmt); |
5168 | gsi = gsi_for_stmt (stmt); |
5169 | reassoc_remove_stmt (gsi: &gsi); |
5170 | release_defs (stmt); |
5171 | } |
5172 | else |
5173 | return; |
5174 | } |
5175 | } |
5176 | |
5177 | /* This function checks three consequtive operands in |
5178 | passed operands vector OPS starting from OPINDEX and |
5179 | swaps two operands if it is profitable for binary operation |
5180 | consuming OPINDEX + 1 abnd OPINDEX + 2 operands. |
5181 | |
5182 | We pair ops with the same rank if possible. */ |
5183 | |
5184 | static void |
5185 | swap_ops_for_binary_stmt (const vec<operand_entry *> &ops, |
5186 | unsigned int opindex) |
5187 | { |
5188 | operand_entry *oe1, *oe2, *oe3; |
5189 | |
5190 | oe1 = ops[opindex]; |
5191 | oe2 = ops[opindex + 1]; |
5192 | oe3 = ops[opindex + 2]; |
5193 | |
5194 | if (oe1->rank == oe2->rank && oe2->rank != oe3->rank) |
5195 | std::swap (a&: *oe1, b&: *oe3); |
5196 | else if (oe1->rank == oe3->rank && oe2->rank != oe3->rank) |
5197 | std::swap (a&: *oe1, b&: *oe2); |
5198 | } |
5199 | |
5200 | /* If definition of RHS1 or RHS2 dominates STMT, return the later of those |
5201 | two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate |
5202 | whether RHS1 op RHS2 can be inserted before or needs to be inserted |
5203 | after the returned stmt. */ |
5204 | |
5205 | static inline gimple * |
5206 | find_insert_point (gimple *stmt, tree rhs1, tree rhs2, bool &insert_before) |
5207 | { |
5208 | insert_before = true; |
5209 | if (TREE_CODE (rhs1) == SSA_NAME |
5210 | && reassoc_stmt_dominates_stmt_p (s1: stmt, SSA_NAME_DEF_STMT (rhs1))) |
5211 | { |
5212 | stmt = SSA_NAME_DEF_STMT (rhs1); |
5213 | insert_before = false; |
5214 | } |
5215 | if (TREE_CODE (rhs2) == SSA_NAME |
5216 | && reassoc_stmt_dominates_stmt_p (s1: stmt, SSA_NAME_DEF_STMT (rhs2))) |
5217 | { |
5218 | stmt = SSA_NAME_DEF_STMT (rhs2); |
5219 | insert_before = false; |
5220 | } |
5221 | return stmt; |
5222 | } |
5223 | |
5224 | /* If the stmt that defines operand has to be inserted, insert it |
5225 | before the use. */ |
5226 | static void |
5227 | insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert) |
5228 | { |
5229 | gcc_assert (is_gimple_assign (stmt_to_insert)); |
5230 | tree rhs1 = gimple_assign_rhs1 (gs: stmt_to_insert); |
5231 | tree rhs2 = gimple_assign_rhs2 (gs: stmt_to_insert); |
5232 | bool insert_before; |
5233 | gimple *insert_point = find_insert_point (stmt, rhs1, rhs2, insert_before); |
5234 | gimple_stmt_iterator gsi = gsi_for_stmt (insert_point); |
5235 | gimple_set_uid (g: stmt_to_insert, uid: gimple_uid (g: insert_point)); |
5236 | |
5237 | /* If the insert point is not stmt, then insert_point would be |
5238 | the point where operand rhs1 or rhs2 is defined. In this case, |
5239 | stmt_to_insert has to be inserted afterwards. This would |
5240 | only happen when the stmt insertion point is flexible. */ |
5241 | if (insert_before) |
5242 | gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT); |
5243 | else |
5244 | insert_stmt_after (stmt: stmt_to_insert, insert_point); |
5245 | } |
5246 | |
5247 | |
5248 | /* Recursively rewrite our linearized statements so that the operators |
5249 | match those in OPS[OPINDEX], putting the computation in rank |
5250 | order. Return new lhs. |
5251 | CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in |
5252 | the current stmt and during recursive invocations. |
5253 | NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in |
5254 | recursive invocations. */ |
5255 | |
5256 | static tree |
5257 | rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex, |
5258 | const vec<operand_entry *> &ops, bool changed, |
5259 | bool next_changed) |
5260 | { |
5261 | tree rhs1 = gimple_assign_rhs1 (gs: stmt); |
5262 | tree rhs2 = gimple_assign_rhs2 (gs: stmt); |
5263 | tree lhs = gimple_assign_lhs (gs: stmt); |
5264 | operand_entry *oe; |
5265 | |
5266 | /* The final recursion case for this function is that you have |
5267 | exactly two operations left. |
5268 | If we had exactly one op in the entire list to start with, we |
5269 | would have never called this function, and the tail recursion |
5270 | rewrites them one at a time. */ |
5271 | if (opindex + 2 == ops.length ()) |
5272 | { |
5273 | operand_entry *oe1, *oe2; |
5274 | |
5275 | oe1 = ops[opindex]; |
5276 | oe2 = ops[opindex + 1]; |
5277 | |
5278 | if (rhs1 != oe1->op || rhs2 != oe2->op) |
5279 | { |
5280 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
5281 | unsigned int uid = gimple_uid (g: stmt); |
5282 | |
5283 | if (dump_file && (dump_flags & TDF_DETAILS)) |
5284 | { |
5285 | fprintf (stream: dump_file, format: "Transforming " ); |
5286 | print_gimple_stmt (dump_file, stmt, 0); |
5287 | } |
5288 | |
5289 | /* If the stmt that defines operand has to be inserted, insert it |
5290 | before the use. */ |
5291 | if (oe1->stmt_to_insert) |
5292 | insert_stmt_before_use (stmt, stmt_to_insert: oe1->stmt_to_insert); |
5293 | if (oe2->stmt_to_insert) |
5294 | insert_stmt_before_use (stmt, stmt_to_insert: oe2->stmt_to_insert); |
5295 | /* Even when changed is false, reassociation could have e.g. removed |
5296 | some redundant operations, so unless we are just swapping the |
5297 | arguments or unless there is no change at all (then we just |
5298 | return lhs), force creation of a new SSA_NAME. */ |
5299 | if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex)) |
5300 | { |
5301 | bool insert_before; |
5302 | gimple *insert_point |
5303 | = find_insert_point (stmt, rhs1: oe1->op, rhs2: oe2->op, insert_before); |
5304 | lhs = make_ssa_name (TREE_TYPE (lhs)); |
5305 | stmt |
5306 | = gimple_build_assign (lhs, rhs_code, |
5307 | oe1->op, oe2->op); |
5308 | gimple_set_uid (g: stmt, uid); |
5309 | gimple_set_visited (stmt, visited_p: true); |
5310 | if (insert_before) |
5311 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); |
5312 | else |
5313 | insert_stmt_after (stmt, insert_point); |
5314 | } |
5315 | else |
5316 | { |
5317 | bool insert_before; |
5318 | gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op, |
5319 | insert_before) |
5320 | == stmt); |
5321 | gimple_assign_set_rhs1 (gs: stmt, rhs: oe1->op); |
5322 | gimple_assign_set_rhs2 (gs: stmt, rhs: oe2->op); |
5323 | update_stmt (s: stmt); |
5324 | } |
5325 | |
5326 | if (rhs1 != oe1->op && rhs1 != oe2->op) |
5327 | remove_visited_stmt_chain (var: rhs1); |
5328 | |
5329 | if (dump_file && (dump_flags & TDF_DETAILS)) |
5330 | { |
5331 | fprintf (stream: dump_file, format: " into " ); |
5332 | print_gimple_stmt (dump_file, stmt, 0); |
5333 | } |
5334 | } |
5335 | return lhs; |
5336 | } |
5337 | |
5338 | /* If we hit here, we should have 3 or more ops left. */ |
5339 | gcc_assert (opindex + 2 < ops.length ()); |
5340 | |
5341 | /* Rewrite the next operator. */ |
5342 | oe = ops[opindex]; |
5343 | |
5344 | /* If the stmt that defines operand has to be inserted, insert it |
5345 | before the use. */ |
5346 | if (oe->stmt_to_insert) |
5347 | insert_stmt_before_use (stmt, stmt_to_insert: oe->stmt_to_insert); |
5348 | |
5349 | /* Recurse on the LHS of the binary operator, which is guaranteed to |
5350 | be the non-leaf side. */ |
5351 | tree new_rhs1 |
5352 | = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex: opindex + 1, ops, |
5353 | changed: changed || oe->op != rhs2 || next_changed, |
5354 | next_changed: false); |
5355 | |
5356 | if (oe->op != rhs2 || new_rhs1 != rhs1) |
5357 | { |
5358 | if (dump_file && (dump_flags & TDF_DETAILS)) |
5359 | { |
5360 | fprintf (stream: dump_file, format: "Transforming " ); |
5361 | print_gimple_stmt (dump_file, stmt, 0); |
5362 | } |
5363 | |
5364 | /* If changed is false, this is either opindex == 0 |
5365 | or all outer rhs2's were equal to corresponding oe->op, |
5366 | and powi_result is NULL. |
5367 | That means lhs is equivalent before and after reassociation. |
5368 | Otherwise ensure the old lhs SSA_NAME is not reused and |
5369 | create a new stmt as well, so that any debug stmts will be |
5370 | properly adjusted. */ |
5371 | if (changed) |
5372 | { |
5373 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
5374 | unsigned int uid = gimple_uid (g: stmt); |
5375 | bool insert_before; |
5376 | gimple *insert_point = find_insert_point (stmt, rhs1: new_rhs1, rhs2: oe->op, |
5377 | insert_before); |
5378 | |
5379 | lhs = make_ssa_name (TREE_TYPE (lhs)); |
5380 | stmt = gimple_build_assign (lhs, rhs_code, |
5381 | new_rhs1, oe->op); |
5382 | gimple_set_uid (g: stmt, uid); |
5383 | gimple_set_visited (stmt, visited_p: true); |
5384 | if (insert_before) |
5385 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); |
5386 | else |
5387 | insert_stmt_after (stmt, insert_point); |
5388 | } |
5389 | else |
5390 | { |
5391 | bool insert_before; |
5392 | gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op, |
5393 | insert_before) |
5394 | == stmt); |
5395 | gimple_assign_set_rhs1 (gs: stmt, rhs: new_rhs1); |
5396 | gimple_assign_set_rhs2 (gs: stmt, rhs: oe->op); |
5397 | update_stmt (s: stmt); |
5398 | } |
5399 | |
5400 | if (dump_file && (dump_flags & TDF_DETAILS)) |
5401 | { |
5402 | fprintf (stream: dump_file, format: " into " ); |
5403 | print_gimple_stmt (dump_file, stmt, 0); |
5404 | } |
5405 | } |
5406 | return lhs; |
5407 | } |
5408 | |
5409 | /* Find out how many cycles we need to compute statements chain. |
5410 | OPS_NUM holds number os statements in a chain. CPU_WIDTH is a |
5411 | maximum number of independent statements we may execute per cycle. */ |
5412 | |
5413 | static int |
5414 | get_required_cycles (int ops_num, int cpu_width) |
5415 | { |
5416 | int res; |
5417 | int elog; |
5418 | unsigned int rest; |
5419 | |
5420 | /* While we have more than 2 * cpu_width operands |
5421 | we may reduce number of operands by cpu_width |
5422 | per cycle. */ |
5423 | res = ops_num / (2 * cpu_width); |
5424 | |
5425 | /* Remained operands count may be reduced twice per cycle |
5426 | until we have only one operand. */ |
5427 | rest = (unsigned)(ops_num - res * cpu_width); |
5428 | elog = exact_log2 (x: rest); |
5429 | if (elog >= 0) |
5430 | res += elog; |
5431 | else |
5432 | res += floor_log2 (x: rest) + 1; |
5433 | |
5434 | return res; |
5435 | } |
5436 | |
5437 | /* Given that the target fully pipelines FMA instructions, return the latency |
5438 | of MULT_EXPRs that can't be hidden by the FMAs. WIDTH is the number of |
5439 | pipes. */ |
5440 | |
5441 | static inline int |
5442 | get_mult_latency_consider_fma (int ops_num, int mult_num, int width) |
5443 | { |
5444 | gcc_checking_assert (mult_num && mult_num <= ops_num); |
5445 | |
5446 | /* For each partition, if mult_num == ops_num, there's latency(MULT)*2. |
5447 | e.g: |
5448 | |
5449 | A * B + C * D |
5450 | => |
5451 | _1 = A * B; |
5452 | _2 = .FMA (C, D, _1); |
5453 | |
5454 | Otherwise there's latency(MULT)*1 in the first FMA. */ |
5455 | return CEIL (ops_num, width) == CEIL (mult_num, width) ? 2 : 1; |
5456 | } |
5457 | |
5458 | /* Returns an optimal number of registers to use for computation of |
5459 | given statements. |
5460 | |
5461 | LHS is the result ssa name of OPS. MULT_NUM is number of sub-expressions |
5462 | that are MULT_EXPRs, when OPS are PLUS_EXPRs or MINUS_EXPRs. */ |
5463 | |
5464 | static int |
5465 | get_reassociation_width (vec<operand_entry *> *ops, int mult_num, tree lhs, |
5466 | enum tree_code opc, machine_mode mode) |
5467 | { |
5468 | int param_width = param_tree_reassoc_width; |
5469 | int width; |
5470 | int width_min; |
5471 | int cycles_best; |
5472 | int ops_num = ops->length (); |
5473 | |
5474 | if (param_width > 0) |
5475 | width = param_width; |
5476 | else |
5477 | width = targetm.sched.reassociation_width (opc, mode); |
5478 | |
5479 | if (width == 1) |
5480 | return width; |
5481 | |
5482 | /* Get the minimal time required for sequence computation. */ |
5483 | cycles_best = get_required_cycles (ops_num, cpu_width: width); |
5484 | |
5485 | /* Check if we may use less width and still compute sequence for |
5486 | the same time. It will allow us to reduce registers usage. |
5487 | get_required_cycles is monotonically increasing with lower width |
5488 | so we can perform a binary search for the minimal width that still |
5489 | results in the optimal cycle count. */ |
5490 | width_min = 1; |
5491 | |
5492 | /* If the target fully pipelines FMA instruction, the multiply part can start |
5493 | already if its operands are ready. Assuming symmetric pipes are used for |
5494 | FMUL/FADD/FMA, then for a sequence of FMA like: |
5495 | |
5496 | _8 = .FMA (_2, _3, _1); |
5497 | _9 = .FMA (_5, _4, _8); |
5498 | _10 = .FMA (_7, _6, _9); |
5499 | |
5500 | , if width=1, the latency is latency(MULT) + latency(ADD)*3. |
5501 | While with width=2: |
5502 | |
5503 | _8 = _4 * _5; |
5504 | _9 = .FMA (_2, _3, _1); |
5505 | _10 = .FMA (_6, _7, _8); |
5506 | _11 = _9 + _10; |
5507 | |
5508 | , it is latency(MULT)*2 + latency(ADD)*2. Assuming latency(MULT) >= |
5509 | latency(ADD), the first variant is preferred. |
5510 | |
5511 | Find out if we can get a smaller width considering FMA. */ |
5512 | if (width > 1 && mult_num && param_fully_pipelined_fma) |
5513 | { |
5514 | /* When param_fully_pipelined_fma is set, assume FMUL and FMA use the |
5515 | same units that can also do FADD. For other scenarios, such as when |
5516 | FMUL and FADD are using separated units, the following code may not |
5517 | appy. */ |
5518 | int width_mult = targetm.sched.reassociation_width (MULT_EXPR, mode); |
5519 | gcc_checking_assert (width_mult <= width); |
5520 | |
5521 | /* Latency of MULT_EXPRs. */ |
5522 | int lat_mul |
5523 | = get_mult_latency_consider_fma (ops_num, mult_num, width: width_mult); |
5524 | |
5525 | /* Quick search might not apply. So start from 1. */ |
5526 | for (int i = 1; i < width_mult; i++) |
5527 | { |
5528 | int lat_mul_new |
5529 | = get_mult_latency_consider_fma (ops_num, mult_num, width: i); |
5530 | int lat_add_new = get_required_cycles (ops_num, cpu_width: i); |
5531 | |
5532 | /* Assume latency(MULT) >= latency(ADD). */ |
5533 | if (lat_mul - lat_mul_new >= lat_add_new - cycles_best) |
5534 | { |
5535 | width = i; |
5536 | break; |
5537 | } |
5538 | } |
5539 | } |
5540 | else |
5541 | { |
5542 | while (width > width_min) |
5543 | { |
5544 | int width_mid = (width + width_min) / 2; |
5545 | |
5546 | if (get_required_cycles (ops_num, cpu_width: width_mid) == cycles_best) |
5547 | width = width_mid; |
5548 | else if (width_min < width_mid) |
5549 | width_min = width_mid; |
5550 | else |
5551 | break; |
5552 | } |
5553 | } |
5554 | |
5555 | /* If there's loop dependent FMA result, return width=2 to avoid it. This is |
5556 | better than skipping these FMA candidates in widening_mul. */ |
5557 | if (width == 1 |
5558 | && maybe_le (a: tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (lhs))), |
5559 | param_avoid_fma_max_bits)) |
5560 | { |
5561 | /* Look for cross backedge dependency: |
5562 | 1. LHS is a phi argument in the same basic block it is defined. |
5563 | 2. And the result of the phi node is used in OPS. */ |
5564 | basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)); |
5565 | |
5566 | use_operand_p use_p; |
5567 | imm_use_iterator iter; |
5568 | FOR_EACH_IMM_USE_FAST (use_p, iter, lhs) |
5569 | if (gphi *phi = dyn_cast<gphi *> (USE_STMT (use_p))) |
5570 | { |
5571 | if (gimple_phi_arg_edge (phi, i: phi_arg_index_from_use (use: use_p))->src |
5572 | != bb) |
5573 | continue; |
5574 | tree phi_result = gimple_phi_result (gs: phi); |
5575 | operand_entry *oe; |
5576 | unsigned int j; |
5577 | FOR_EACH_VEC_ELT (*ops, j, oe) |
5578 | { |
5579 | if (TREE_CODE (oe->op) != SSA_NAME) |
5580 | continue; |
5581 | |
5582 | /* Result of phi is operand of PLUS_EXPR. */ |
5583 | if (oe->op == phi_result) |
5584 | return 2; |
5585 | |
5586 | /* Check is result of phi is operand of MULT_EXPR. */ |
5587 | gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op); |
5588 | if (is_gimple_assign (gs: def_stmt) |
5589 | && gimple_assign_rhs_code (gs: def_stmt) == NEGATE_EXPR) |
5590 | { |
5591 | tree rhs = gimple_assign_rhs1 (gs: def_stmt); |
5592 | if (TREE_CODE (rhs) == SSA_NAME) |
5593 | { |
5594 | if (rhs == phi_result) |
5595 | return 2; |
5596 | def_stmt = SSA_NAME_DEF_STMT (rhs); |
5597 | } |
5598 | } |
5599 | if (is_gimple_assign (gs: def_stmt) |
5600 | && gimple_assign_rhs_code (gs: def_stmt) == MULT_EXPR) |
5601 | { |
5602 | if (gimple_assign_rhs1 (gs: def_stmt) == phi_result |
5603 | || gimple_assign_rhs2 (gs: def_stmt) == phi_result) |
5604 | return 2; |
5605 | } |
5606 | } |
5607 | } |
5608 | } |
5609 | |
5610 | return width; |
5611 | } |
5612 | |
5613 | #define SPECIAL_BIASED_END_STMT 0 /* It is the end stmt of all ops. */ |
5614 | #define BIASED_END_STMT 1 /* It is the end stmt of normal or biased ops. */ |
5615 | #define NORMAL_END_STMT 2 /* It is the end stmt of normal ops. */ |
5616 | |
5617 | /* Rewrite statements with dependency chain with regard the chance to generate |
5618 | FMA. |
5619 | For the chain with FMA: Try to keep fma opportunity as much as possible. |
5620 | For the chain without FMA: Putting the computation in rank order and trying |
5621 | to allow operations to be executed in parallel. |
5622 | E.g. |
5623 | e + f + a * b + c * d; |
5624 | |
5625 | ssa1 = e + a * b; |
5626 | ssa2 = f + c * d; |
5627 | ssa3 = ssa1 + ssa2; |
5628 | |
5629 | This reassociation approach preserves the chance of fma generation as much |
5630 | as possible. |
5631 | |
5632 | Another thing is to avoid adding loop-carried ops to long chains, otherwise |
5633 | the whole chain will have dependencies across the loop iteration. Just keep |
5634 | loop-carried ops in a separate chain. |
5635 | E.g. |
5636 | x_1 = phi (x_0, x_2) |
5637 | y_1 = phi (y_0, y_2) |
5638 | |
5639 | a + b + c + d + e + x1 + y1 |
5640 | |
5641 | SSA1 = a + b; |
5642 | SSA2 = c + d; |
5643 | SSA3 = SSA1 + e; |
5644 | SSA4 = SSA3 + SSA2; |
5645 | SSA5 = x1 + y1; |
5646 | SSA6 = SSA4 + SSA5; |
5647 | */ |
5648 | static void |
5649 | rewrite_expr_tree_parallel (gassign *stmt, int width, bool has_fma, |
5650 | const vec<operand_entry *> &ops) |
5651 | { |
5652 | enum tree_code opcode = gimple_assign_rhs_code (gs: stmt); |
5653 | int op_num = ops.length (); |
5654 | int op_normal_num = op_num; |
5655 | gcc_assert (op_num > 0); |
5656 | int stmt_num = op_num - 1; |
5657 | gimple **stmts = XALLOCAVEC (gimple *, stmt_num); |
5658 | int i = 0, j = 0; |
5659 | tree tmp_op[2], op1; |
5660 | operand_entry *oe; |
5661 | gimple *stmt1 = NULL; |
5662 | tree last_rhs1 = gimple_assign_rhs1 (gs: stmt); |
5663 | int last_rhs1_stmt_index = 0, last_rhs2_stmt_index = 0; |
5664 | int width_active = 0, width_count = 0; |
5665 | bool has_biased = false, ops_changed = false; |
5666 | auto_vec<operand_entry *> ops_normal; |
5667 | auto_vec<operand_entry *> ops_biased; |
5668 | vec<operand_entry *> *ops1; |
5669 | |
5670 | /* We start expression rewriting from the top statements. |
5671 | So, in this loop we create a full list of statements |
5672 | we will work with. */ |
5673 | stmts[stmt_num - 1] = stmt; |
5674 | for (i = stmt_num - 2; i >= 0; i--) |
5675 | stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1])); |
5676 | |
5677 | /* Avoid adding loop-carried ops to long chains, first filter out the |
5678 | loop-carried. But we need to make sure that the length of the remainder |
5679 | is not less than 4, which is the smallest ops length we can break the |
5680 | dependency. */ |
5681 | FOR_EACH_VEC_ELT (ops, i, oe) |
5682 | { |
5683 | if (TREE_CODE (oe->op) == SSA_NAME |
5684 | && bitmap_bit_p (biased_names, SSA_NAME_VERSION (oe->op)) |
5685 | && op_normal_num > 4) |
5686 | { |
5687 | ops_biased.safe_push (obj: oe); |
5688 | has_biased = true; |
5689 | op_normal_num --; |
5690 | } |
5691 | else |
5692 | ops_normal.safe_push (obj: oe); |
5693 | } |
5694 | |
5695 | /* Width should not be larger than ops length / 2, since we can not create |
5696 | more parallel dependency chains that exceeds such value. */ |
5697 | int width_normal = op_normal_num / 2; |
5698 | int width_biased = (op_num - op_normal_num) / 2; |
5699 | width_normal = width <= width_normal ? width : width_normal; |
5700 | width_biased = width <= width_biased ? width : width_biased; |
5701 | |
5702 | ops1 = &ops_normal; |
5703 | width_count = width_active = width_normal; |
5704 | |
5705 | /* Build parallel dependency chain according to width. */ |
5706 | for (i = 0; i < stmt_num; i++) |
5707 | { |
5708 | if (dump_file && (dump_flags & TDF_DETAILS)) |
5709 | { |
5710 | fprintf (stream: dump_file, format: "Transforming " ); |
5711 | print_gimple_stmt (dump_file, stmts[i], 0); |
5712 | } |
5713 | |
5714 | /* When the work of normal ops is over, but the loop is not over, |
5715 | continue to do biased ops. */ |
5716 | if (width_count == 0 && ops1 == &ops_normal) |
5717 | { |
5718 | ops1 = &ops_biased; |
5719 | width_count = width_active = width_biased; |
5720 | ops_changed = true; |
5721 | } |
5722 | |
5723 | /* Swap the operands if no FMA in the chain. */ |
5724 | if (ops1->length () > 2 && !has_fma) |
5725 | swap_ops_for_binary_stmt (ops: *ops1, opindex: ops1->length () - 3); |
5726 | |
5727 | if (i < width_active |
5728 | || (ops_changed && i <= (last_rhs1_stmt_index + width_active))) |
5729 | { |
5730 | for (j = 0; j < 2; j++) |
5731 | { |
5732 | oe = ops1->pop (); |
5733 | tmp_op[j] = oe->op; |
5734 | /* If the stmt that defines operand has to be inserted, insert it |
5735 | before the use. */ |
5736 | stmt1 = oe->stmt_to_insert; |
5737 | if (stmt1) |
5738 | insert_stmt_before_use (stmt: stmts[i], stmt_to_insert: stmt1); |
5739 | stmt1 = NULL; |
5740 | } |
5741 | stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), |
5742 | op1: tmp_op[1], |
5743 | op2: tmp_op[0], |
5744 | opcode); |
5745 | gimple_set_visited (stmt: stmts[i], visited_p: true); |
5746 | |
5747 | } |
5748 | else |
5749 | { |
5750 | /* We keep original statement only for the last one. All others are |
5751 | recreated. */ |
5752 | if (!ops1->length ()) |
5753 | { |
5754 | /* For biased length equal to 2. */ |
5755 | if (width_count == BIASED_END_STMT && !last_rhs2_stmt_index) |
5756 | last_rhs2_stmt_index = i - 1; |
5757 | |
5758 | /* When width_count == 2 and there is no biased, just finish. */ |
5759 | if (width_count == NORMAL_END_STMT && !has_biased) |
5760 | { |
5761 | last_rhs1_stmt_index = i - 1; |
5762 | last_rhs2_stmt_index = i - 2; |
5763 | } |
5764 | if (last_rhs1_stmt_index && (last_rhs2_stmt_index || !has_biased)) |
5765 | { |
5766 | /* We keep original statement only for the last one. All |
5767 | others are recreated. */ |
5768 | gimple_assign_set_rhs1 (gs: stmts[i], rhs: gimple_assign_lhs |
5769 | (gs: stmts[last_rhs1_stmt_index])); |
5770 | gimple_assign_set_rhs2 (gs: stmts[i], rhs: gimple_assign_lhs |
5771 | (gs: stmts[last_rhs2_stmt_index])); |
5772 | update_stmt (s: stmts[i]); |
5773 | } |
5774 | else |
5775 | { |
5776 | stmts[i] = |
5777 | build_and_add_sum (TREE_TYPE (last_rhs1), |
5778 | op1: gimple_assign_lhs (gs: stmts[i-width_count]), |
5779 | op2: gimple_assign_lhs |
5780 | (gs: stmts[i-width_count+1]), |
5781 | opcode); |
5782 | gimple_set_visited (stmt: stmts[i], visited_p: true); |
5783 | width_count--; |
5784 | |
5785 | /* It is the end of normal or biased ops. |
5786 | last_rhs1_stmt_index used to record the last stmt index |
5787 | for normal ops. last_rhs2_stmt_index used to record the |
5788 | last stmt index for biased ops. */ |
5789 | if (width_count == BIASED_END_STMT) |
5790 | { |
5791 | gcc_assert (has_biased); |
5792 | if (ops_biased.length ()) |
5793 | last_rhs1_stmt_index = i; |
5794 | else |
5795 | last_rhs2_stmt_index = i; |
5796 | width_count--; |
5797 | } |
5798 | } |
5799 | } |
5800 | else |
5801 | { |
5802 | /* Attach the rest ops to the parallel dependency chain. */ |
5803 | oe = ops1->pop (); |
5804 | op1 = oe->op; |
5805 | stmt1 = oe->stmt_to_insert; |
5806 | if (stmt1) |
5807 | insert_stmt_before_use (stmt: stmts[i], stmt_to_insert: stmt1); |
5808 | stmt1 = NULL; |
5809 | |
5810 | /* For only one biased ops. */ |
5811 | if (width_count == SPECIAL_BIASED_END_STMT) |
5812 | { |
5813 | /* We keep original statement only for the last one. All |
5814 | others are recreated. */ |
5815 | gcc_assert (has_biased); |
5816 | gimple_assign_set_rhs1 (gs: stmts[i], rhs: gimple_assign_lhs |
5817 | (gs: stmts[last_rhs1_stmt_index])); |
5818 | gimple_assign_set_rhs2 (gs: stmts[i], rhs: op1); |
5819 | update_stmt (s: stmts[i]); |
5820 | } |
5821 | else |
5822 | { |
5823 | stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), |
5824 | op1: gimple_assign_lhs |
5825 | (gs: stmts[i-width_active]), |
5826 | op2: op1, |
5827 | opcode); |
5828 | gimple_set_visited (stmt: stmts[i], visited_p: true); |
5829 | } |
5830 | } |
5831 | } |
5832 | |
5833 | if (dump_file && (dump_flags & TDF_DETAILS)) |
5834 | { |
5835 | fprintf (stream: dump_file, format: " into " ); |
5836 | print_gimple_stmt (dump_file, stmts[i], 0); |
5837 | } |
5838 | } |
5839 | |
5840 | remove_visited_stmt_chain (var: last_rhs1); |
5841 | } |
5842 | |
5843 | /* Transform STMT, which is really (A +B) + (C + D) into the left |
5844 | linear form, ((A+B)+C)+D. |
5845 | Recurse on D if necessary. */ |
5846 | |
5847 | static void |
5848 | linearize_expr (gimple *stmt) |
5849 | { |
5850 | gimple_stmt_iterator gsi; |
5851 | gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); |
5852 | gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); |
5853 | gimple *oldbinrhs = binrhs; |
5854 | enum tree_code rhscode = gimple_assign_rhs_code (gs: stmt); |
5855 | gimple *newbinrhs = NULL; |
5856 | class loop *loop = loop_containing_stmt (stmt); |
5857 | tree lhs = gimple_assign_lhs (gs: stmt); |
5858 | |
5859 | gcc_assert (is_reassociable_op (binlhs, rhscode, loop) |
5860 | && is_reassociable_op (binrhs, rhscode, loop)); |
5861 | |
5862 | gsi = gsi_for_stmt (stmt); |
5863 | |
5864 | gimple_assign_set_rhs2 (gs: stmt, rhs: gimple_assign_rhs1 (gs: binrhs)); |
5865 | binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)), |
5866 | gimple_assign_rhs_code (gs: binrhs), |
5867 | gimple_assign_lhs (gs: binlhs), |
5868 | gimple_assign_rhs2 (gs: binrhs)); |
5869 | gimple_assign_set_rhs1 (gs: stmt, rhs: gimple_assign_lhs (gs: binrhs)); |
5870 | gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT); |
5871 | gimple_set_uid (g: binrhs, uid: gimple_uid (g: stmt)); |
5872 | |
5873 | if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME) |
5874 | newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); |
5875 | |
5876 | if (dump_file && (dump_flags & TDF_DETAILS)) |
5877 | { |
5878 | fprintf (stream: dump_file, format: "Linearized: " ); |
5879 | print_gimple_stmt (dump_file, stmt, 0); |
5880 | } |
5881 | |
5882 | reassociate_stats.linearized++; |
5883 | update_stmt (s: stmt); |
5884 | |
5885 | gsi = gsi_for_stmt (oldbinrhs); |
5886 | reassoc_remove_stmt (gsi: &gsi); |
5887 | release_defs (oldbinrhs); |
5888 | |
5889 | gimple_set_visited (stmt, visited_p: true); |
5890 | gimple_set_visited (stmt: binlhs, visited_p: true); |
5891 | gimple_set_visited (stmt: binrhs, visited_p: true); |
5892 | |
5893 | /* Tail recurse on the new rhs if it still needs reassociation. */ |
5894 | if (newbinrhs && is_reassociable_op (stmt: newbinrhs, code: rhscode, loop)) |
5895 | /* ??? This should probably be linearize_expr (newbinrhs) but I don't |
5896 | want to change the algorithm while converting to tuples. */ |
5897 | linearize_expr (stmt); |
5898 | } |
5899 | |
5900 | /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return |
5901 | it. Otherwise, return NULL. */ |
5902 | |
5903 | static gimple * |
5904 | get_single_immediate_use (tree lhs) |
5905 | { |
5906 | use_operand_p immuse; |
5907 | gimple *immusestmt; |
5908 | |
5909 | if (TREE_CODE (lhs) == SSA_NAME |
5910 | && single_imm_use (var: lhs, use_p: &immuse, stmt: &immusestmt) |
5911 | && is_gimple_assign (gs: immusestmt)) |
5912 | return immusestmt; |
5913 | |
5914 | return NULL; |
5915 | } |
5916 | |
5917 | /* Recursively negate the value of TONEGATE, and return the SSA_NAME |
5918 | representing the negated value. Insertions of any necessary |
5919 | instructions go before GSI. |
5920 | This function is recursive in that, if you hand it "a_5" as the |
5921 | value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will |
5922 | transform b_3 + b_4 into a_5 = -b_3 + -b_4. */ |
5923 | |
5924 | static tree |
5925 | negate_value (tree tonegate, gimple_stmt_iterator *gsip) |
5926 | { |
5927 | gimple *negatedefstmt = NULL; |
5928 | tree resultofnegate; |
5929 | gimple_stmt_iterator gsi; |
5930 | unsigned int uid; |
5931 | |
5932 | /* If we are trying to negate a name, defined by an add, negate the |
5933 | add operands instead. */ |
5934 | if (TREE_CODE (tonegate) == SSA_NAME) |
5935 | negatedefstmt = SSA_NAME_DEF_STMT (tonegate); |
5936 | if (TREE_CODE (tonegate) == SSA_NAME |
5937 | && is_gimple_assign (gs: negatedefstmt) |
5938 | && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME |
5939 | && has_single_use (var: gimple_assign_lhs (gs: negatedefstmt)) |
5940 | && gimple_assign_rhs_code (gs: negatedefstmt) == PLUS_EXPR) |
5941 | { |
5942 | tree rhs1 = gimple_assign_rhs1 (gs: negatedefstmt); |
5943 | tree rhs2 = gimple_assign_rhs2 (gs: negatedefstmt); |
5944 | tree lhs = gimple_assign_lhs (gs: negatedefstmt); |
5945 | gimple *g; |
5946 | |
5947 | gsi = gsi_for_stmt (negatedefstmt); |
5948 | rhs1 = negate_value (tonegate: rhs1, gsip: &gsi); |
5949 | |
5950 | gsi = gsi_for_stmt (negatedefstmt); |
5951 | rhs2 = negate_value (tonegate: rhs2, gsip: &gsi); |
5952 | |
5953 | gsi = gsi_for_stmt (negatedefstmt); |
5954 | lhs = make_ssa_name (TREE_TYPE (lhs)); |
5955 | gimple_set_visited (stmt: negatedefstmt, visited_p: true); |
5956 | g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2); |
5957 | gimple_set_uid (g, uid: gimple_uid (g: negatedefstmt)); |
5958 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
5959 | return lhs; |
5960 | } |
5961 | |
5962 | tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate); |
5963 | resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true, |
5964 | NULL_TREE, true, GSI_SAME_STMT); |
5965 | gsi = *gsip; |
5966 | uid = gimple_uid (g: gsi_stmt (i: gsi)); |
5967 | for (gsi_prev (i: &gsi); !gsi_end_p (i: gsi); gsi_prev (i: &gsi)) |
5968 | { |
5969 | gimple *stmt = gsi_stmt (i: gsi); |
5970 | if (gimple_uid (g: stmt) != 0) |
5971 | break; |
5972 | gimple_set_uid (g: stmt, uid); |
5973 | } |
5974 | return resultofnegate; |
5975 | } |
5976 | |
5977 | /* Return true if we should break up the subtract in STMT into an add |
5978 | with negate. This is true when we the subtract operands are really |
5979 | adds, or the subtract itself is used in an add expression. In |
5980 | either case, breaking up the subtract into an add with negate |
5981 | exposes the adds to reassociation. */ |
5982 | |
5983 | static bool |
5984 | should_break_up_subtract (gimple *stmt) |
5985 | { |
5986 | tree lhs = gimple_assign_lhs (gs: stmt); |
5987 | tree binlhs = gimple_assign_rhs1 (gs: stmt); |
5988 | tree binrhs = gimple_assign_rhs2 (gs: stmt); |
5989 | gimple *immusestmt; |
5990 | class loop *loop = loop_containing_stmt (stmt); |
5991 | |
5992 | if (TREE_CODE (binlhs) == SSA_NAME |
5993 | && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), code: PLUS_EXPR, loop)) |
5994 | return true; |
5995 | |
5996 | if (TREE_CODE (binrhs) == SSA_NAME |
5997 | && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), code: PLUS_EXPR, loop)) |
5998 | return true; |
5999 | |
6000 | if (TREE_CODE (lhs) == SSA_NAME |
6001 | && (immusestmt = get_single_immediate_use (lhs)) |
6002 | && is_gimple_assign (gs: immusestmt) |
6003 | && (gimple_assign_rhs_code (gs: immusestmt) == PLUS_EXPR |
6004 | || (gimple_assign_rhs_code (gs: immusestmt) == MINUS_EXPR |
6005 | && gimple_assign_rhs1 (gs: immusestmt) == lhs) |
6006 | || gimple_assign_rhs_code (gs: immusestmt) == MULT_EXPR)) |
6007 | return true; |
6008 | return false; |
6009 | } |
6010 | |
6011 | /* Transform STMT from A - B into A + -B. */ |
6012 | |
6013 | static void |
6014 | break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip) |
6015 | { |
6016 | tree rhs1 = gimple_assign_rhs1 (gs: stmt); |
6017 | tree rhs2 = gimple_assign_rhs2 (gs: stmt); |
6018 | |
6019 | if (dump_file && (dump_flags & TDF_DETAILS)) |
6020 | { |
6021 | fprintf (stream: dump_file, format: "Breaking up subtract " ); |
6022 | print_gimple_stmt (dump_file, stmt, 0); |
6023 | } |
6024 | |
6025 | rhs2 = negate_value (tonegate: rhs2, gsip); |
6026 | gimple_assign_set_rhs_with_ops (gsi: gsip, code: PLUS_EXPR, op1: rhs1, op2: rhs2); |
6027 | update_stmt (s: stmt); |
6028 | } |
6029 | |
6030 | /* Determine whether STMT is a builtin call that raises an SSA name |
6031 | to an integer power and has only one use. If so, and this is early |
6032 | reassociation and unsafe math optimizations are permitted, place |
6033 | the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE. |
6034 | If any of these conditions does not hold, return FALSE. */ |
6035 | |
6036 | static bool |
6037 | acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent) |
6038 | { |
6039 | tree arg1; |
6040 | REAL_VALUE_TYPE c, cint; |
6041 | |
6042 | switch (gimple_call_combined_fn (stmt)) |
6043 | { |
6044 | CASE_CFN_POW: |
6045 | if (flag_errno_math) |
6046 | return false; |
6047 | |
6048 | *base = gimple_call_arg (gs: stmt, index: 0); |
6049 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
6050 | |
6051 | if (TREE_CODE (arg1) != REAL_CST) |
6052 | return false; |
6053 | |
6054 | c = TREE_REAL_CST (arg1); |
6055 | |
6056 | if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT) |
6057 | return false; |
6058 | |
6059 | *exponent = real_to_integer (&c); |
6060 | real_from_integer (&cint, VOIDmode, *exponent, SIGNED); |
6061 | if (!real_identical (&c, &cint)) |
6062 | return false; |
6063 | |
6064 | break; |
6065 | |
6066 | CASE_CFN_POWI: |
6067 | *base = gimple_call_arg (gs: stmt, index: 0); |
6068 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
6069 | |
6070 | if (!tree_fits_shwi_p (arg1)) |
6071 | return false; |
6072 | |
6073 | *exponent = tree_to_shwi (arg1); |
6074 | break; |
6075 | |
6076 | default: |
6077 | return false; |
6078 | } |
6079 | |
6080 | /* Expanding negative exponents is generally unproductive, so we don't |
6081 | complicate matters with those. Exponents of zero and one should |
6082 | have been handled by expression folding. */ |
6083 | if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME) |
6084 | return false; |
6085 | |
6086 | return true; |
6087 | } |
6088 | |
6089 | /* Try to derive and add operand entry for OP to *OPS. Return false if |
6090 | unsuccessful. */ |
6091 | |
6092 | static bool |
6093 | try_special_add_to_ops (vec<operand_entry *> *ops, |
6094 | enum tree_code code, |
6095 | tree op, gimple* def_stmt) |
6096 | { |
6097 | tree base = NULL_TREE; |
6098 | HOST_WIDE_INT exponent = 0; |
6099 | |
6100 | if (TREE_CODE (op) != SSA_NAME |
6101 | || ! has_single_use (var: op)) |
6102 | return false; |
6103 | |
6104 | if (code == MULT_EXPR |
6105 | && reassoc_insert_powi_p |
6106 | && flag_unsafe_math_optimizations |
6107 | && is_gimple_call (gs: def_stmt) |
6108 | && acceptable_pow_call (stmt: as_a <gcall *> (p: def_stmt), base: &base, exponent: &exponent)) |
6109 | { |
6110 | add_repeat_to_ops_vec (ops, op: base, repeat: exponent); |
6111 | gimple_set_visited (stmt: def_stmt, visited_p: true); |
6112 | return true; |
6113 | } |
6114 | else if (code == MULT_EXPR |
6115 | && is_gimple_assign (gs: def_stmt) |
6116 | && gimple_assign_rhs_code (gs: def_stmt) == NEGATE_EXPR |
6117 | && !HONOR_SNANS (TREE_TYPE (op)) |
6118 | && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op)) |
6119 | || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))) |
6120 | && (!FLOAT_TYPE_P (TREE_TYPE (op)) |
6121 | || !DECIMAL_FLOAT_MODE_P (element_mode (op)))) |
6122 | { |
6123 | tree rhs1 = gimple_assign_rhs1 (gs: def_stmt); |
6124 | tree cst = build_minus_one_cst (TREE_TYPE (op)); |
6125 | add_to_ops_vec (ops, op: rhs1); |
6126 | add_to_ops_vec (ops, op: cst); |
6127 | gimple_set_visited (stmt: def_stmt, visited_p: true); |
6128 | return true; |
6129 | } |
6130 | |
6131 | return false; |
6132 | } |
6133 | |
6134 | /* Recursively linearize a binary expression that is the RHS of STMT. |
6135 | Place the operands of the expression tree in the vector named OPS. */ |
6136 | |
6137 | static void |
6138 | linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt, |
6139 | bool is_associative, bool set_visited) |
6140 | { |
6141 | tree binlhs = gimple_assign_rhs1 (gs: stmt); |
6142 | tree binrhs = gimple_assign_rhs2 (gs: stmt); |
6143 | gimple *binlhsdef = NULL, *binrhsdef = NULL; |
6144 | bool binlhsisreassoc = false; |
6145 | bool binrhsisreassoc = false; |
6146 | enum tree_code rhscode = gimple_assign_rhs_code (gs: stmt); |
6147 | class loop *loop = loop_containing_stmt (stmt); |
6148 | |
6149 | if (set_visited) |
6150 | gimple_set_visited (stmt, visited_p: true); |
6151 | |
6152 | if (TREE_CODE (binlhs) == SSA_NAME) |
6153 | { |
6154 | binlhsdef = SSA_NAME_DEF_STMT (binlhs); |
6155 | binlhsisreassoc = (is_reassociable_op (stmt: binlhsdef, code: rhscode, loop) |
6156 | && !stmt_could_throw_p (cfun, binlhsdef)); |
6157 | } |
6158 | |
6159 | if (TREE_CODE (binrhs) == SSA_NAME) |
6160 | { |
6161 | binrhsdef = SSA_NAME_DEF_STMT (binrhs); |
6162 | binrhsisreassoc = (is_reassociable_op (stmt: binrhsdef, code: rhscode, loop) |
6163 | && !stmt_could_throw_p (cfun, binrhsdef)); |
6164 | } |
6165 | |
6166 | /* If the LHS is not reassociable, but the RHS is, we need to swap |
6167 | them. If neither is reassociable, there is nothing we can do, so |
6168 | just put them in the ops vector. If the LHS is reassociable, |
6169 | linearize it. If both are reassociable, then linearize the RHS |
6170 | and the LHS. */ |
6171 | |
6172 | if (!binlhsisreassoc) |
6173 | { |
6174 | /* If this is not a associative operation like division, give up. */ |
6175 | if (!is_associative) |
6176 | { |
6177 | add_to_ops_vec (ops, op: binrhs); |
6178 | return; |
6179 | } |
6180 | |
6181 | if (!binrhsisreassoc) |
6182 | { |
6183 | bool swap = false; |
6184 | if (try_special_add_to_ops (ops, code: rhscode, op: binrhs, def_stmt: binrhsdef)) |
6185 | /* If we add ops for the rhs we expect to be able to recurse |
6186 | to it via the lhs during expression rewrite so swap |
6187 | operands. */ |
6188 | swap = true; |
6189 | else |
6190 | add_to_ops_vec (ops, op: binrhs); |
6191 | |
6192 | if (!try_special_add_to_ops (ops, code: rhscode, op: binlhs, def_stmt: binlhsdef)) |
6193 | add_to_ops_vec (ops, op: binlhs); |
6194 | |
6195 | if (!swap) |
6196 | return; |
6197 | } |
6198 | |
6199 | if (dump_file && (dump_flags & TDF_DETAILS)) |
6200 | { |
6201 | fprintf (stream: dump_file, format: "swapping operands of " ); |
6202 | print_gimple_stmt (dump_file, stmt, 0); |
6203 | } |
6204 | |
6205 | swap_ssa_operands (stmt, |
6206 | gimple_assign_rhs1_ptr (gs: stmt), |
6207 | gimple_assign_rhs2_ptr (gs: stmt)); |
6208 | update_stmt (s: stmt); |
6209 | |
6210 | if (dump_file && (dump_flags & TDF_DETAILS)) |
6211 | { |
6212 | fprintf (stream: dump_file, format: " is now " ); |
6213 | print_gimple_stmt (dump_file, stmt, 0); |
6214 | } |
6215 | if (!binrhsisreassoc) |
6216 | return; |
6217 | |
6218 | /* We want to make it so the lhs is always the reassociative op, |
6219 | so swap. */ |
6220 | std::swap (a&: binlhs, b&: binrhs); |
6221 | } |
6222 | else if (binrhsisreassoc) |
6223 | { |
6224 | linearize_expr (stmt); |
6225 | binlhs = gimple_assign_rhs1 (gs: stmt); |
6226 | binrhs = gimple_assign_rhs2 (gs: stmt); |
6227 | } |
6228 | |
6229 | gcc_assert (TREE_CODE (binrhs) != SSA_NAME |
6230 | || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), |
6231 | rhscode, loop)); |
6232 | linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs), |
6233 | is_associative, set_visited); |
6234 | |
6235 | if (!try_special_add_to_ops (ops, code: rhscode, op: binrhs, def_stmt: binrhsdef)) |
6236 | add_to_ops_vec (ops, op: binrhs); |
6237 | } |
6238 | |
6239 | /* Repropagate the negates back into subtracts, since no other pass |
6240 | currently does it. */ |
6241 | |
6242 | static void |
6243 | repropagate_negates (void) |
6244 | { |
6245 | unsigned int i = 0; |
6246 | tree negate; |
6247 | |
6248 | FOR_EACH_VEC_ELT (plus_negates, i, negate) |
6249 | { |
6250 | gimple *user = get_single_immediate_use (lhs: negate); |
6251 | if (!user || !is_gimple_assign (gs: user)) |
6252 | continue; |
6253 | |
6254 | tree negateop = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate)); |
6255 | if (TREE_CODE (negateop) == SSA_NAME |
6256 | && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop)) |
6257 | continue; |
6258 | |
6259 | /* The negate operand can be either operand of a PLUS_EXPR |
6260 | (it can be the LHS if the RHS is a constant for example). |
6261 | |
6262 | Force the negate operand to the RHS of the PLUS_EXPR, then |
6263 | transform the PLUS_EXPR into a MINUS_EXPR. */ |
6264 | if (gimple_assign_rhs_code (gs: user) == PLUS_EXPR) |
6265 | { |
6266 | /* If the negated operand appears on the LHS of the |
6267 | PLUS_EXPR, exchange the operands of the PLUS_EXPR |
6268 | to force the negated operand to the RHS of the PLUS_EXPR. */ |
6269 | if (gimple_assign_rhs1 (gs: user) == negate) |
6270 | { |
6271 | swap_ssa_operands (user, |
6272 | gimple_assign_rhs1_ptr (gs: user), |
6273 | gimple_assign_rhs2_ptr (gs: user)); |
6274 | } |
6275 | |
6276 | /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace |
6277 | the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */ |
6278 | if (gimple_assign_rhs2 (gs: user) == negate) |
6279 | { |
6280 | tree rhs1 = gimple_assign_rhs1 (gs: user); |
6281 | gimple_stmt_iterator gsi = gsi_for_stmt (user); |
6282 | gimple_assign_set_rhs_with_ops (gsi: &gsi, code: MINUS_EXPR, op1: rhs1, |
6283 | op2: negateop); |
6284 | update_stmt (s: user); |
6285 | } |
6286 | } |
6287 | else if (gimple_assign_rhs_code (gs: user) == MINUS_EXPR) |
6288 | { |
6289 | if (gimple_assign_rhs1 (gs: user) == negate) |
6290 | { |
6291 | /* We have |
6292 | x = -negateop |
6293 | y = x - b |
6294 | which we transform into |
6295 | x = negateop + b |
6296 | y = -x . |
6297 | This pushes down the negate which we possibly can merge |
6298 | into some other operation, hence insert it into the |
6299 | plus_negates vector. */ |
6300 | gimple *feed = SSA_NAME_DEF_STMT (negate); |
6301 | tree b = gimple_assign_rhs2 (gs: user); |
6302 | gimple_stmt_iterator gsi = gsi_for_stmt (feed); |
6303 | gimple_stmt_iterator gsi2 = gsi_for_stmt (user); |
6304 | tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed))); |
6305 | gimple *g = gimple_build_assign (x, PLUS_EXPR, negateop, b); |
6306 | gsi_insert_before (&gsi2, g, GSI_SAME_STMT); |
6307 | gimple_assign_set_rhs_with_ops (gsi: &gsi2, code: NEGATE_EXPR, op1: x); |
6308 | user = gsi_stmt (i: gsi2); |
6309 | update_stmt (s: user); |
6310 | reassoc_remove_stmt (gsi: &gsi); |
6311 | release_defs (feed); |
6312 | plus_negates.safe_push (obj: gimple_assign_lhs (gs: user)); |
6313 | } |
6314 | else |
6315 | { |
6316 | /* Transform "x = -negateop; y = b - x" into "y = b + negateop", |
6317 | getting rid of one operation. */ |
6318 | tree rhs1 = gimple_assign_rhs1 (gs: user); |
6319 | gimple_stmt_iterator gsi = gsi_for_stmt (user); |
6320 | gimple_assign_set_rhs_with_ops (gsi: &gsi, code: PLUS_EXPR, op1: rhs1, op2: negateop); |
6321 | update_stmt (s: gsi_stmt (i: gsi)); |
6322 | } |
6323 | } |
6324 | } |
6325 | } |
6326 | |
6327 | /* Break up subtract operations in block BB. |
6328 | |
6329 | We do this top down because we don't know whether the subtract is |
6330 | part of a possible chain of reassociation except at the top. |
6331 | |
6332 | IE given |
6333 | d = f + g |
6334 | c = a + e |
6335 | b = c - d |
6336 | q = b - r |
6337 | k = t - q |
6338 | |
6339 | we want to break up k = t - q, but we won't until we've transformed q |
6340 | = b - r, which won't be broken up until we transform b = c - d. |
6341 | |
6342 | En passant, clear the GIMPLE visited flag on every statement |
6343 | and set UIDs within each basic block. */ |
6344 | |
6345 | static void |
6346 | break_up_subtract_bb (basic_block bb) |
6347 | { |
6348 | gimple_stmt_iterator gsi; |
6349 | basic_block son; |
6350 | unsigned int uid = 1; |
6351 | |
6352 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
6353 | { |
6354 | gimple *stmt = gsi_stmt (i: gsi); |
6355 | gimple_set_visited (stmt, visited_p: false); |
6356 | gimple_set_uid (g: stmt, uid: uid++); |
6357 | |
6358 | if (!is_gimple_assign (gs: stmt) |
6359 | || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt))) |
6360 | || !can_reassociate_op_p (op: gimple_assign_lhs (gs: stmt))) |
6361 | continue; |
6362 | |
6363 | /* Look for simple gimple subtract operations. */ |
6364 | if (gimple_assign_rhs_code (gs: stmt) == MINUS_EXPR) |
6365 | { |
6366 | if (!can_reassociate_op_p (op: gimple_assign_rhs1 (gs: stmt)) |
6367 | || !can_reassociate_op_p (op: gimple_assign_rhs2 (gs: stmt))) |
6368 | continue; |
6369 | |
6370 | /* Check for a subtract used only in an addition. If this |
6371 | is the case, transform it into add of a negate for better |
6372 | reassociation. IE transform C = A-B into C = A + -B if C |
6373 | is only used in an addition. */ |
6374 | if (should_break_up_subtract (stmt)) |
6375 | break_up_subtract (stmt, gsip: &gsi); |
6376 | } |
6377 | else if (gimple_assign_rhs_code (gs: stmt) == NEGATE_EXPR |
6378 | && can_reassociate_op_p (op: gimple_assign_rhs1 (gs: stmt))) |
6379 | plus_negates.safe_push (obj: gimple_assign_lhs (gs: stmt)); |
6380 | } |
6381 | for (son = first_dom_son (CDI_DOMINATORS, bb); |
6382 | son; |
6383 | son = next_dom_son (CDI_DOMINATORS, son)) |
6384 | break_up_subtract_bb (bb: son); |
6385 | } |
6386 | |
6387 | /* Used for repeated factor analysis. */ |
6388 | struct repeat_factor |
6389 | { |
6390 | /* An SSA name that occurs in a multiply chain. */ |
6391 | tree factor; |
6392 | |
6393 | /* Cached rank of the factor. */ |
6394 | unsigned rank; |
6395 | |
6396 | /* Number of occurrences of the factor in the chain. */ |
6397 | HOST_WIDE_INT count; |
6398 | |
6399 | /* An SSA name representing the product of this factor and |
6400 | all factors appearing later in the repeated factor vector. */ |
6401 | tree repr; |
6402 | }; |
6403 | |
6404 | |
6405 | static vec<repeat_factor> repeat_factor_vec; |
6406 | |
6407 | /* Used for sorting the repeat factor vector. Sort primarily by |
6408 | ascending occurrence count, secondarily by descending rank. */ |
6409 | |
6410 | static int |
6411 | compare_repeat_factors (const void *x1, const void *x2) |
6412 | { |
6413 | const repeat_factor *rf1 = (const repeat_factor *) x1; |
6414 | const repeat_factor *rf2 = (const repeat_factor *) x2; |
6415 | |
6416 | if (rf1->count != rf2->count) |
6417 | return rf1->count - rf2->count; |
6418 | |
6419 | return rf2->rank - rf1->rank; |
6420 | } |
6421 | |
6422 | /* Look for repeated operands in OPS in the multiply tree rooted at |
6423 | STMT. Replace them with an optimal sequence of multiplies and powi |
6424 | builtin calls, and remove the used operands from OPS. Return an |
6425 | SSA name representing the value of the replacement sequence. */ |
6426 | |
6427 | static tree |
6428 | attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops) |
6429 | { |
6430 | unsigned i, j, vec_len; |
6431 | int ii; |
6432 | operand_entry *oe; |
6433 | repeat_factor *rf1, *rf2; |
6434 | repeat_factor rfnew; |
6435 | tree result = NULL_TREE; |
6436 | tree target_ssa, iter_result; |
6437 | tree type = TREE_TYPE (gimple_get_lhs (stmt)); |
6438 | tree powi_fndecl = mathfn_built_in (type, fn: BUILT_IN_POWI); |
6439 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
6440 | gimple *mul_stmt, *pow_stmt; |
6441 | |
6442 | /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and |
6443 | target, unless type is integral. */ |
6444 | if (!powi_fndecl && !INTEGRAL_TYPE_P (type)) |
6445 | return NULL_TREE; |
6446 | |
6447 | /* Allocate the repeated factor vector. */ |
6448 | repeat_factor_vec.create (nelems: 10); |
6449 | |
6450 | /* Scan the OPS vector for all SSA names in the product and build |
6451 | up a vector of occurrence counts for each factor. */ |
6452 | FOR_EACH_VEC_ELT (*ops, i, oe) |
6453 | { |
6454 | if (TREE_CODE (oe->op) == SSA_NAME) |
6455 | { |
6456 | FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) |
6457 | { |
6458 | if (rf1->factor == oe->op) |
6459 | { |
6460 | rf1->count += oe->count; |
6461 | break; |
6462 | } |
6463 | } |
6464 | |
6465 | if (j >= repeat_factor_vec.length ()) |
6466 | { |
6467 | rfnew.factor = oe->op; |
6468 | rfnew.rank = oe->rank; |
6469 | rfnew.count = oe->count; |
6470 | rfnew.repr = NULL_TREE; |
6471 | repeat_factor_vec.safe_push (obj: rfnew); |
6472 | } |
6473 | } |
6474 | } |
6475 | |
6476 | /* Sort the repeated factor vector by (a) increasing occurrence count, |
6477 | and (b) decreasing rank. */ |
6478 | repeat_factor_vec.qsort (compare_repeat_factors); |
6479 | |
6480 | /* It is generally best to combine as many base factors as possible |
6481 | into a product before applying __builtin_powi to the result. |
6482 | However, the sort order chosen for the repeated factor vector |
6483 | allows us to cache partial results for the product of the base |
6484 | factors for subsequent use. When we already have a cached partial |
6485 | result from a previous iteration, it is best to make use of it |
6486 | before looking for another __builtin_pow opportunity. |
6487 | |
6488 | As an example, consider x * x * y * y * y * z * z * z * z. |
6489 | We want to first compose the product x * y * z, raise it to the |
6490 | second power, then multiply this by y * z, and finally multiply |
6491 | by z. This can be done in 5 multiplies provided we cache y * z |
6492 | for use in both expressions: |
6493 | |
6494 | t1 = y * z |
6495 | t2 = t1 * x |
6496 | t3 = t2 * t2 |
6497 | t4 = t1 * t3 |
6498 | result = t4 * z |
6499 | |
6500 | If we instead ignored the cached y * z and first multiplied by |
6501 | the __builtin_pow opportunity z * z, we would get the inferior: |
6502 | |
6503 | t1 = y * z |
6504 | t2 = t1 * x |
6505 | t3 = t2 * t2 |
6506 | t4 = z * z |
6507 | t5 = t3 * t4 |
6508 | result = t5 * y */ |
6509 | |
6510 | vec_len = repeat_factor_vec.length (); |
6511 | |
6512 | /* Repeatedly look for opportunities to create a builtin_powi call. */ |
6513 | while (true) |
6514 | { |
6515 | HOST_WIDE_INT power; |
6516 | |
6517 | /* First look for the largest cached product of factors from |
6518 | preceding iterations. If found, create a builtin_powi for |
6519 | it if the minimum occurrence count for its factors is at |
6520 | least 2, or just use this cached product as our next |
6521 | multiplicand if the minimum occurrence count is 1. */ |
6522 | FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) |
6523 | { |
6524 | if (rf1->repr && rf1->count > 0) |
6525 | break; |
6526 | } |
6527 | |
6528 | if (j < vec_len) |
6529 | { |
6530 | power = rf1->count; |
6531 | |
6532 | if (power == 1) |
6533 | { |
6534 | iter_result = rf1->repr; |
6535 | |
6536 | if (dump_file && (dump_flags & TDF_DETAILS)) |
6537 | { |
6538 | unsigned elt; |
6539 | repeat_factor *rf; |
6540 | fputs (s: "Multiplying by cached product " , stream: dump_file); |
6541 | for (elt = j; elt < vec_len; elt++) |
6542 | { |
6543 | rf = &repeat_factor_vec[elt]; |
6544 | print_generic_expr (dump_file, rf->factor); |
6545 | if (elt < vec_len - 1) |
6546 | fputs (s: " * " , stream: dump_file); |
6547 | } |
6548 | fputs (s: "\n" , stream: dump_file); |
6549 | } |
6550 | } |
6551 | else |
6552 | { |
6553 | if (INTEGRAL_TYPE_P (type)) |
6554 | { |
6555 | gcc_assert (power > 1); |
6556 | gimple_stmt_iterator gsip = gsi; |
6557 | gsi_prev (i: &gsip); |
6558 | iter_result = powi_as_mults (&gsi, gimple_location (g: stmt), |
6559 | rf1->repr, power); |
6560 | gimple_stmt_iterator gsic = gsi; |
6561 | while (gsi_stmt (i: gsic) != gsi_stmt (i: gsip)) |
6562 | { |
6563 | gimple_set_uid (g: gsi_stmt (i: gsic), uid: gimple_uid (g: stmt)); |
6564 | gimple_set_visited (stmt: gsi_stmt (i: gsic), visited_p: true); |
6565 | gsi_prev (i: &gsic); |
6566 | } |
6567 | } |
6568 | else |
6569 | { |
6570 | iter_result = make_temp_ssa_name (type, NULL, name: "reassocpow" ); |
6571 | pow_stmt |
6572 | = gimple_build_call (powi_fndecl, 2, rf1->repr, |
6573 | build_int_cst (integer_type_node, |
6574 | power)); |
6575 | gimple_call_set_lhs (gs: pow_stmt, lhs: iter_result); |
6576 | gimple_set_location (g: pow_stmt, location: gimple_location (g: stmt)); |
6577 | gimple_set_uid (g: pow_stmt, uid: gimple_uid (g: stmt)); |
6578 | gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); |
6579 | } |
6580 | |
6581 | if (dump_file && (dump_flags & TDF_DETAILS)) |
6582 | { |
6583 | unsigned elt; |
6584 | repeat_factor *rf; |
6585 | fputs (s: "Building __builtin_pow call for cached product (" , |
6586 | stream: dump_file); |
6587 | for (elt = j; elt < vec_len; elt++) |
6588 | { |
6589 | rf = &repeat_factor_vec[elt]; |
6590 | print_generic_expr (dump_file, rf->factor); |
6591 | if (elt < vec_len - 1) |
6592 | fputs (s: " * " , stream: dump_file); |
6593 | } |
6594 | fprintf (stream: dump_file, format: ")^" HOST_WIDE_INT_PRINT_DEC"\n" , |
6595 | power); |
6596 | } |
6597 | } |
6598 | } |
6599 | else |
6600 | { |
6601 | /* Otherwise, find the first factor in the repeated factor |
6602 | vector whose occurrence count is at least 2. If no such |
6603 | factor exists, there are no builtin_powi opportunities |
6604 | remaining. */ |
6605 | FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) |
6606 | { |
6607 | if (rf1->count >= 2) |
6608 | break; |
6609 | } |
6610 | |
6611 | if (j >= vec_len) |
6612 | break; |
6613 | |
6614 | power = rf1->count; |
6615 | |
6616 | if (dump_file && (dump_flags & TDF_DETAILS)) |
6617 | { |
6618 | unsigned elt; |
6619 | repeat_factor *rf; |
6620 | fputs (s: "Building __builtin_pow call for (" , stream: dump_file); |
6621 | for (elt = j; elt < vec_len; elt++) |
6622 | { |
6623 | rf = &repeat_factor_vec[elt]; |
6624 | print_generic_expr (dump_file, rf->factor); |
6625 | if (elt < vec_len - 1) |
6626 | fputs (s: " * " , stream: dump_file); |
6627 | } |
6628 | fprintf (stream: dump_file, format: ")^" HOST_WIDE_INT_PRINT_DEC"\n" , power); |
6629 | } |
6630 | |
6631 | reassociate_stats.pows_created++; |
6632 | |
6633 | /* Visit each element of the vector in reverse order (so that |
6634 | high-occurrence elements are visited first, and within the |
6635 | same occurrence count, lower-ranked elements are visited |
6636 | first). Form a linear product of all elements in this order |
6637 | whose occurrencce count is at least that of element J. |
6638 | Record the SSA name representing the product of each element |
6639 | with all subsequent elements in the vector. */ |
6640 | if (j == vec_len - 1) |
6641 | rf1->repr = rf1->factor; |
6642 | else |
6643 | { |
6644 | for (ii = vec_len - 2; ii >= (int)j; ii--) |
6645 | { |
6646 | tree op1, op2; |
6647 | |
6648 | rf1 = &repeat_factor_vec[ii]; |
6649 | rf2 = &repeat_factor_vec[ii + 1]; |
6650 | |
6651 | /* Init the last factor's representative to be itself. */ |
6652 | if (!rf2->repr) |
6653 | rf2->repr = rf2->factor; |
6654 | |
6655 | op1 = rf1->factor; |
6656 | op2 = rf2->repr; |
6657 | |
6658 | target_ssa = make_temp_ssa_name (type, NULL, name: "reassocpow" ); |
6659 | mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR, |
6660 | op1, op2); |
6661 | gimple_set_location (g: mul_stmt, location: gimple_location (g: stmt)); |
6662 | gimple_set_uid (g: mul_stmt, uid: gimple_uid (g: stmt)); |
6663 | gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); |
6664 | rf1->repr = target_ssa; |
6665 | |
6666 | /* Don't reprocess the multiply we just introduced. */ |
6667 | gimple_set_visited (stmt: mul_stmt, visited_p: true); |
6668 | } |
6669 | } |
6670 | |
6671 | /* Form a call to __builtin_powi for the maximum product |
6672 | just formed, raised to the power obtained earlier. */ |
6673 | rf1 = &repeat_factor_vec[j]; |
6674 | if (INTEGRAL_TYPE_P (type)) |
6675 | { |
6676 | gcc_assert (power > 1); |
6677 | gimple_stmt_iterator gsip = gsi; |
6678 | gsi_prev (i: &gsip); |
6679 | iter_result = powi_as_mults (&gsi, gimple_location (g: stmt), |
6680 | rf1->repr, power); |
6681 | gimple_stmt_iterator gsic = gsi; |
6682 | while (gsi_stmt (i: gsic) != gsi_stmt (i: gsip)) |
6683 | { |
6684 | gimple_set_uid (g: gsi_stmt (i: gsic), uid: gimple_uid (g: stmt)); |
6685 | gimple_set_visited (stmt: gsi_stmt (i: gsic), visited_p: true); |
6686 | gsi_prev (i: &gsic); |
6687 | } |
6688 | } |
6689 | else |
6690 | { |
6691 | iter_result = make_temp_ssa_name (type, NULL, name: "reassocpow" ); |
6692 | pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, |
6693 | build_int_cst (integer_type_node, |
6694 | power)); |
6695 | gimple_call_set_lhs (gs: pow_stmt, lhs: iter_result); |
6696 | gimple_set_location (g: pow_stmt, location: gimple_location (g: stmt)); |
6697 | gimple_set_uid (g: pow_stmt, uid: gimple_uid (g: stmt)); |
6698 | gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); |
6699 | } |
6700 | } |
6701 | |
6702 | /* If we previously formed at least one other builtin_powi call, |
6703 | form the product of this one and those others. */ |
6704 | if (result) |
6705 | { |
6706 | tree new_result = make_temp_ssa_name (type, NULL, name: "reassocpow" ); |
6707 | mul_stmt = gimple_build_assign (new_result, MULT_EXPR, |
6708 | result, iter_result); |
6709 | gimple_set_location (g: mul_stmt, location: gimple_location (g: stmt)); |
6710 | gimple_set_uid (g: mul_stmt, uid: gimple_uid (g: stmt)); |
6711 | gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); |
6712 | gimple_set_visited (stmt: mul_stmt, visited_p: true); |
6713 | result = new_result; |
6714 | } |
6715 | else |
6716 | result = iter_result; |
6717 | |
6718 | /* Decrement the occurrence count of each element in the product |
6719 | by the count found above, and remove this many copies of each |
6720 | factor from OPS. */ |
6721 | for (i = j; i < vec_len; i++) |
6722 | { |
6723 | unsigned k = power; |
6724 | unsigned n; |
6725 | |
6726 | rf1 = &repeat_factor_vec[i]; |
6727 | rf1->count -= power; |
6728 | |
6729 | FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe) |
6730 | { |
6731 | if (oe->op == rf1->factor) |
6732 | { |
6733 | if (oe->count <= k) |
6734 | { |
6735 | ops->ordered_remove (ix: n); |
6736 | k -= oe->count; |
6737 | |
6738 | if (k == 0) |
6739 | break; |
6740 | } |
6741 | else |
6742 | { |
6743 | oe->count -= k; |
6744 | break; |
6745 | } |
6746 | } |
6747 | } |
6748 | } |
6749 | } |
6750 | |
6751 | /* At this point all elements in the repeated factor vector have a |
6752 | remaining occurrence count of 0 or 1, and those with a count of 1 |
6753 | don't have cached representatives. Re-sort the ops vector and |
6754 | clean up. */ |
6755 | ops->qsort (sort_by_operand_rank); |
6756 | repeat_factor_vec.release (); |
6757 | |
6758 | /* Return the final product computed herein. Note that there may |
6759 | still be some elements with single occurrence count left in OPS; |
6760 | those will be handled by the normal reassociation logic. */ |
6761 | return result; |
6762 | } |
6763 | |
6764 | /* Attempt to optimize |
6765 | CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or |
6766 | CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */ |
6767 | |
6768 | static void |
6769 | attempt_builtin_copysign (vec<operand_entry *> *ops) |
6770 | { |
6771 | operand_entry *oe; |
6772 | unsigned int i; |
6773 | unsigned int length = ops->length (); |
6774 | tree cst = ops->last ()->op; |
6775 | |
6776 | if (length == 1 || TREE_CODE (cst) != REAL_CST) |
6777 | return; |
6778 | |
6779 | FOR_EACH_VEC_ELT (*ops, i, oe) |
6780 | { |
6781 | if (TREE_CODE (oe->op) == SSA_NAME |
6782 | && has_single_use (var: oe->op)) |
6783 | { |
6784 | gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op); |
6785 | if (gcall *old_call = dyn_cast <gcall *> (p: def_stmt)) |
6786 | { |
6787 | tree arg0, arg1; |
6788 | switch (gimple_call_combined_fn (old_call)) |
6789 | { |
6790 | CASE_CFN_COPYSIGN: |
6791 | CASE_CFN_COPYSIGN_FN: |
6792 | arg0 = gimple_call_arg (gs: old_call, index: 0); |
6793 | arg1 = gimple_call_arg (gs: old_call, index: 1); |
6794 | /* The first argument of copysign must be a constant, |
6795 | otherwise there's nothing to do. */ |
6796 | if (TREE_CODE (arg0) == REAL_CST) |
6797 | { |
6798 | tree type = TREE_TYPE (arg0); |
6799 | tree mul = const_binop (MULT_EXPR, type, cst, arg0); |
6800 | /* If we couldn't fold to a single constant, skip it. |
6801 | That happens e.g. for inexact multiplication when |
6802 | -frounding-math. */ |
6803 | if (mul == NULL_TREE) |
6804 | break; |
6805 | /* Instead of adjusting OLD_CALL, let's build a new |
6806 | call to not leak the LHS and prevent keeping bogus |
6807 | debug statements. DCE will clean up the old call. */ |
6808 | gcall *new_call; |
6809 | if (gimple_call_internal_p (gs: old_call)) |
6810 | new_call = gimple_build_call_internal |
6811 | (IFN_COPYSIGN, 2, mul, arg1); |
6812 | else |
6813 | new_call = gimple_build_call |
6814 | (gimple_call_fndecl (gs: old_call), 2, mul, arg1); |
6815 | tree lhs = make_ssa_name (var: type); |
6816 | gimple_call_set_lhs (gs: new_call, lhs); |
6817 | gimple_set_location (g: new_call, |
6818 | location: gimple_location (g: old_call)); |
6819 | insert_stmt_after (stmt: new_call, insert_point: old_call); |
6820 | /* We've used the constant, get rid of it. */ |
6821 | ops->pop (); |
6822 | bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst)); |
6823 | /* Handle the CST1 < 0 case by negating the result. */ |
6824 | if (cst1_neg) |
6825 | { |
6826 | tree negrhs = make_ssa_name (TREE_TYPE (lhs)); |
6827 | gimple *negate_stmt |
6828 | = gimple_build_assign (negrhs, NEGATE_EXPR, lhs); |
6829 | insert_stmt_after (stmt: negate_stmt, insert_point: new_call); |
6830 | oe->op = negrhs; |
6831 | } |
6832 | else |
6833 | oe->op = lhs; |
6834 | if (dump_file && (dump_flags & TDF_DETAILS)) |
6835 | { |
6836 | fprintf (stream: dump_file, format: "Optimizing copysign: " ); |
6837 | print_generic_expr (dump_file, cst); |
6838 | fprintf (stream: dump_file, format: " * COPYSIGN (" ); |
6839 | print_generic_expr (dump_file, arg0); |
6840 | fprintf (stream: dump_file, format: ", " ); |
6841 | print_generic_expr (dump_file, arg1); |
6842 | fprintf (stream: dump_file, format: ") into %sCOPYSIGN (" , |
6843 | cst1_neg ? "-" : "" ); |
6844 | print_generic_expr (dump_file, mul); |
6845 | fprintf (stream: dump_file, format: ", " ); |
6846 | print_generic_expr (dump_file, arg1); |
6847 | fprintf (stream: dump_file, format: "\n" ); |
6848 | } |
6849 | return; |
6850 | } |
6851 | break; |
6852 | default: |
6853 | break; |
6854 | } |
6855 | } |
6856 | } |
6857 | } |
6858 | } |
6859 | |
6860 | /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */ |
6861 | |
6862 | static void |
6863 | transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs) |
6864 | { |
6865 | tree rhs1; |
6866 | |
6867 | if (dump_file && (dump_flags & TDF_DETAILS)) |
6868 | { |
6869 | fprintf (stream: dump_file, format: "Transforming " ); |
6870 | print_gimple_stmt (dump_file, stmt, 0); |
6871 | } |
6872 | |
6873 | rhs1 = gimple_assign_rhs1 (gs: stmt); |
6874 | gimple_assign_set_rhs_from_tree (gsi, new_rhs); |
6875 | update_stmt (s: stmt); |
6876 | remove_visited_stmt_chain (var: rhs1); |
6877 | |
6878 | if (dump_file && (dump_flags & TDF_DETAILS)) |
6879 | { |
6880 | fprintf (stream: dump_file, format: " into " ); |
6881 | print_gimple_stmt (dump_file, stmt, 0); |
6882 | } |
6883 | } |
6884 | |
6885 | /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */ |
6886 | |
6887 | static void |
6888 | transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt, |
6889 | tree rhs1, tree rhs2) |
6890 | { |
6891 | if (dump_file && (dump_flags & TDF_DETAILS)) |
6892 | { |
6893 | fprintf (stream: dump_file, format: "Transforming " ); |
6894 | print_gimple_stmt (dump_file, stmt, 0); |
6895 | } |
6896 | |
6897 | gimple_assign_set_rhs_with_ops (gsi, code: MULT_EXPR, op1: rhs1, op2: rhs2); |
6898 | update_stmt (s: gsi_stmt (i: *gsi)); |
6899 | remove_visited_stmt_chain (var: rhs1); |
6900 | |
6901 | if (dump_file && (dump_flags & TDF_DETAILS)) |
6902 | { |
6903 | fprintf (stream: dump_file, format: " into " ); |
6904 | print_gimple_stmt (dump_file, stmt, 0); |
6905 | } |
6906 | } |
6907 | |
6908 | /* Rearrange ops may have more FMA when the chain may has more than 2 FMAs. |
6909 | Put no-mult ops and mult ops alternately at the end of the queue, which is |
6910 | conducive to generating more FMA and reducing the loss of FMA when breaking |
6911 | the chain. |
6912 | E.g. |
6913 | a * b + c * d + e generates: |
6914 | |
6915 | _4 = c_9(D) * d_10(D); |
6916 | _12 = .FMA (a_7(D), b_8(D), _4); |
6917 | _11 = e_6(D) + _12; |
6918 | |
6919 | Rearrange ops to -> e + a * b + c * d generates: |
6920 | |
6921 | _4 = .FMA (c_7(D), d_8(D), _3); |
6922 | _11 = .FMA (a_5(D), b_6(D), _4); |
6923 | |
6924 | Return the number of MULT_EXPRs in the chain. */ |
6925 | static int |
6926 | rank_ops_for_fma (vec<operand_entry *> *ops) |
6927 | { |
6928 | operand_entry *oe; |
6929 | unsigned int i; |
6930 | unsigned int ops_length = ops->length (); |
6931 | auto_vec<operand_entry *> ops_mult; |
6932 | auto_vec<operand_entry *> ops_others; |
6933 | |
6934 | FOR_EACH_VEC_ELT (*ops, i, oe) |
6935 | { |
6936 | if (TREE_CODE (oe->op) == SSA_NAME) |
6937 | { |
6938 | gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op); |
6939 | if (is_gimple_assign (gs: def_stmt)) |
6940 | { |
6941 | if (gimple_assign_rhs_code (gs: def_stmt) == MULT_EXPR) |
6942 | ops_mult.safe_push (obj: oe); |
6943 | /* A negate on the multiplication leads to FNMA. */ |
6944 | else if (gimple_assign_rhs_code (gs: def_stmt) == NEGATE_EXPR |
6945 | && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME) |
6946 | { |
6947 | gimple *neg_def_stmt |
6948 | = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt)); |
6949 | if (is_gimple_assign (gs: neg_def_stmt) |
6950 | && gimple_bb (g: neg_def_stmt) == gimple_bb (g: def_stmt) |
6951 | && gimple_assign_rhs_code (gs: neg_def_stmt) == MULT_EXPR) |
6952 | ops_mult.safe_push (obj: oe); |
6953 | else |
6954 | ops_others.safe_push (obj: oe); |
6955 | } |
6956 | else |
6957 | ops_others.safe_push (obj: oe); |
6958 | } |
6959 | else |
6960 | ops_others.safe_push (obj: oe); |
6961 | } |
6962 | else |
6963 | ops_others.safe_push (obj: oe); |
6964 | } |
6965 | /* 1. When ops_mult.length == 2, like the following case, |
6966 | |
6967 | a * b + c * d + e. |
6968 | |
6969 | we need to rearrange the ops. |
6970 | |
6971 | Putting ops that not def from mult in front can generate more FMAs. |
6972 | |
6973 | 2. If all ops are defined with mult, we don't need to rearrange them. */ |
6974 | unsigned mult_num = ops_mult.length (); |
6975 | if (mult_num >= 2 && mult_num != ops_length) |
6976 | { |
6977 | /* Put no-mult ops and mult ops alternately at the end of the |
6978 | queue, which is conducive to generating more FMA and reducing the |
6979 | loss of FMA when breaking the chain. */ |
6980 | ops->truncate (size: 0); |
6981 | ops->splice (src: ops_mult); |
6982 | int j, opindex = ops->length (); |
6983 | int others_length = ops_others.length (); |
6984 | for (j = 0; j < others_length; j++) |
6985 | { |
6986 | oe = ops_others.pop (); |
6987 | ops->quick_insert (ix: opindex, obj: oe); |
6988 | if (opindex > 0) |
6989 | opindex--; |
6990 | } |
6991 | } |
6992 | return mult_num; |
6993 | } |
6994 | /* Reassociate expressions in basic block BB and its post-dominator as |
6995 | children. |
6996 | |
6997 | Bubble up return status from maybe_optimize_range_tests. */ |
6998 | |
6999 | static bool |
7000 | reassociate_bb (basic_block bb) |
7001 | { |
7002 | gimple_stmt_iterator gsi; |
7003 | basic_block son; |
7004 | gimple *stmt = last_nondebug_stmt (bb); |
7005 | bool cfg_cleanup_needed = false; |
7006 | |
7007 | if (stmt && !gimple_visited_p (stmt)) |
7008 | cfg_cleanup_needed |= maybe_optimize_range_tests (stmt); |
7009 | |
7010 | bool do_prev = false; |
7011 | for (gsi = gsi_last_bb (bb); |
7012 | !gsi_end_p (i: gsi); do_prev ? gsi_prev (i: &gsi) : (void) 0) |
7013 | { |
7014 | do_prev = true; |
7015 | stmt = gsi_stmt (i: gsi); |
7016 | |
7017 | if (is_gimple_assign (gs: stmt) |
7018 | && !stmt_could_throw_p (cfun, stmt)) |
7019 | { |
7020 | tree lhs, rhs1, rhs2; |
7021 | enum tree_code rhs_code = gimple_assign_rhs_code (gs: stmt); |
7022 | |
7023 | /* If this was part of an already processed statement, |
7024 | we don't need to touch it again. */ |
7025 | if (gimple_visited_p (stmt)) |
7026 | { |
7027 | /* This statement might have become dead because of previous |
7028 | reassociations. */ |
7029 | if (has_zero_uses (var: gimple_get_lhs (stmt))) |
7030 | { |
7031 | reassoc_remove_stmt (gsi: &gsi); |
7032 | release_defs (stmt); |
7033 | /* We might end up removing the last stmt above which |
7034 | places the iterator to the end of the sequence. |
7035 | Reset it to the last stmt in this case and make sure |
7036 | we don't do gsi_prev in that case. */ |
7037 | if (gsi_end_p (i: gsi)) |
7038 | { |
7039 | gsi = gsi_last_bb (bb); |
7040 | do_prev = false; |
7041 | } |
7042 | } |
7043 | continue; |
7044 | } |
7045 | |
7046 | /* If this is not a gimple binary expression, there is |
7047 | nothing for us to do with it. */ |
7048 | if (get_gimple_rhs_class (code: rhs_code) != GIMPLE_BINARY_RHS) |
7049 | continue; |
7050 | |
7051 | lhs = gimple_assign_lhs (gs: stmt); |
7052 | rhs1 = gimple_assign_rhs1 (gs: stmt); |
7053 | rhs2 = gimple_assign_rhs2 (gs: stmt); |
7054 | |
7055 | /* For non-bit or min/max operations we can't associate |
7056 | all types. Verify that here. */ |
7057 | if ((rhs_code != BIT_IOR_EXPR |
7058 | && rhs_code != BIT_AND_EXPR |
7059 | && rhs_code != BIT_XOR_EXPR |
7060 | && rhs_code != MIN_EXPR |
7061 | && rhs_code != MAX_EXPR |
7062 | && !can_reassociate_type_p (TREE_TYPE (lhs))) |
7063 | || !can_reassociate_op_p (op: rhs1) |
7064 | || !can_reassociate_op_p (op: rhs2)) |
7065 | continue; |
7066 | |
7067 | if (associative_tree_code (rhs_code)) |
7068 | { |
7069 | auto_vec<operand_entry *> ops; |
7070 | tree powi_result = NULL_TREE; |
7071 | bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs)); |
7072 | |
7073 | /* There may be no immediate uses left by the time we |
7074 | get here because we may have eliminated them all. */ |
7075 | if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (var: lhs)) |
7076 | continue; |
7077 | |
7078 | gimple_set_visited (stmt, visited_p: true); |
7079 | linearize_expr_tree (ops: &ops, stmt, is_associative: true, set_visited: true); |
7080 | ops.qsort (sort_by_operand_rank); |
7081 | int orig_len = ops.length (); |
7082 | optimize_ops_list (opcode: rhs_code, ops: &ops); |
7083 | if (undistribute_ops_list (opcode: rhs_code, ops: &ops, |
7084 | loop: loop_containing_stmt (stmt))) |
7085 | { |
7086 | ops.qsort (sort_by_operand_rank); |
7087 | optimize_ops_list (opcode: rhs_code, ops: &ops); |
7088 | } |
7089 | if (undistribute_bitref_for_vector (opcode: rhs_code, ops: &ops, |
7090 | loop: loop_containing_stmt (stmt))) |
7091 | { |
7092 | ops.qsort (sort_by_operand_rank); |
7093 | optimize_ops_list (opcode: rhs_code, ops: &ops); |
7094 | } |
7095 | if (rhs_code == PLUS_EXPR |
7096 | && transform_add_to_multiply (ops: &ops)) |
7097 | ops.qsort (sort_by_operand_rank); |
7098 | |
7099 | if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) |
7100 | { |
7101 | if (is_vector) |
7102 | optimize_vec_cond_expr (opcode: rhs_code, ops: &ops); |
7103 | else |
7104 | optimize_range_tests (opcode: rhs_code, ops: &ops, NULL); |
7105 | } |
7106 | |
7107 | if (rhs_code == MULT_EXPR && !is_vector) |
7108 | { |
7109 | attempt_builtin_copysign (ops: &ops); |
7110 | |
7111 | if (reassoc_insert_powi_p |
7112 | && (flag_unsafe_math_optimizations |
7113 | || (INTEGRAL_TYPE_P (TREE_TYPE (lhs))))) |
7114 | powi_result = attempt_builtin_powi (stmt, ops: &ops); |
7115 | } |
7116 | |
7117 | operand_entry *last; |
7118 | bool negate_result = false; |
7119 | if (ops.length () > 1 |
7120 | && rhs_code == MULT_EXPR) |
7121 | { |
7122 | last = ops.last (); |
7123 | if ((integer_minus_onep (last->op) |
7124 | || real_minus_onep (last->op)) |
7125 | && !HONOR_SNANS (TREE_TYPE (lhs)) |
7126 | && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs)) |
7127 | || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs)))) |
7128 | { |
7129 | ops.pop (); |
7130 | negate_result = true; |
7131 | } |
7132 | } |
7133 | |
7134 | tree new_lhs = lhs; |
7135 | /* If the operand vector is now empty, all operands were |
7136 | consumed by the __builtin_powi optimization. */ |
7137 | if (ops.length () == 0) |
7138 | transform_stmt_to_copy (gsi: &gsi, stmt, new_rhs: powi_result); |
7139 | else if (ops.length () == 1) |
7140 | { |
7141 | tree last_op = ops.last ()->op; |
7142 | |
7143 | /* If the stmt that defines operand has to be inserted, insert it |
7144 | before the use. */ |
7145 | if (ops.last ()->stmt_to_insert) |
7146 | insert_stmt_before_use (stmt, stmt_to_insert: ops.last ()->stmt_to_insert); |
7147 | if (powi_result) |
7148 | transform_stmt_to_multiply (gsi: &gsi, stmt, rhs1: last_op, |
7149 | rhs2: powi_result); |
7150 | else |
7151 | transform_stmt_to_copy (gsi: &gsi, stmt, new_rhs: last_op); |
7152 | } |
7153 | else |
7154 | { |
7155 | machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); |
7156 | int ops_num = ops.length (); |
7157 | int width = 0; |
7158 | int mult_num = 0; |
7159 | |
7160 | /* For binary bit operations, if there are at least 3 |
7161 | operands and the last operand in OPS is a constant, |
7162 | move it to the front. This helps ensure that we generate |
7163 | (X & Y) & C rather than (X & C) & Y. The former will |
7164 | often match a canonical bit test when we get to RTL. */ |
7165 | if (ops.length () > 2 |
7166 | && (rhs_code == BIT_AND_EXPR |
7167 | || rhs_code == BIT_IOR_EXPR |
7168 | || rhs_code == BIT_XOR_EXPR) |
7169 | && TREE_CODE (ops.last ()->op) == INTEGER_CST) |
7170 | std::swap (a&: *ops[0], b&: *ops[ops_num - 1]); |
7171 | |
7172 | optimization_type opt_type = bb_optimization_type (bb); |
7173 | |
7174 | /* If the target support FMA, rank_ops_for_fma will detect if |
7175 | the chain has fmas and rearrange the ops if so. */ |
7176 | if (direct_internal_fn_supported_p (IFN_FMA, |
7177 | TREE_TYPE (lhs), |
7178 | opt_type) |
7179 | && (rhs_code == PLUS_EXPR || rhs_code == MINUS_EXPR)) |
7180 | { |
7181 | mult_num = rank_ops_for_fma (ops: &ops); |
7182 | } |
7183 | |
7184 | /* Only rewrite the expression tree to parallel in the |
7185 | last reassoc pass to avoid useless work back-and-forth |
7186 | with initial linearization. */ |
7187 | bool has_fma = mult_num >= 2 && mult_num != ops_num; |
7188 | if (!reassoc_insert_powi_p |
7189 | && ops.length () > 3 |
7190 | && (width = get_reassociation_width (ops: &ops, mult_num, lhs, |
7191 | opc: rhs_code, mode)) |
7192 | > 1) |
7193 | { |
7194 | if (dump_file && (dump_flags & TDF_DETAILS)) |
7195 | fprintf (stream: dump_file, |
7196 | format: "Width = %d was chosen for reassociation\n" , |
7197 | width); |
7198 | rewrite_expr_tree_parallel (stmt: as_a <gassign *> (p: stmt), |
7199 | width, |
7200 | has_fma, |
7201 | ops); |
7202 | } |
7203 | else |
7204 | { |
7205 | /* When there are three operands left, we want |
7206 | to make sure the ones that get the double |
7207 | binary op are chosen wisely. */ |
7208 | int len = ops.length (); |
7209 | if (len >= 3 |
7210 | && (!has_fma |
7211 | /* width > 1 means ranking ops results in better |
7212 | parallelism. Check current value to avoid |
7213 | calling get_reassociation_width again. */ |
7214 | || (width != 1 |
7215 | && get_reassociation_width ( |
7216 | ops: &ops, mult_num, lhs, opc: rhs_code, mode) |
7217 | > 1))) |
7218 | swap_ops_for_binary_stmt (ops, opindex: len - 3); |
7219 | |
7220 | new_lhs = rewrite_expr_tree (stmt, rhs_code, opindex: 0, ops, |
7221 | changed: powi_result != NULL |
7222 | || negate_result, |
7223 | next_changed: len != orig_len); |
7224 | } |
7225 | |
7226 | /* If we combined some repeated factors into a |
7227 | __builtin_powi call, multiply that result by the |
7228 | reassociated operands. */ |
7229 | if (powi_result) |
7230 | { |
7231 | gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs); |
7232 | tree type = TREE_TYPE (lhs); |
7233 | tree target_ssa = make_temp_ssa_name (type, NULL, |
7234 | name: "reassocpow" ); |
7235 | gimple_set_lhs (lhs_stmt, target_ssa); |
7236 | update_stmt (s: lhs_stmt); |
7237 | if (lhs != new_lhs) |
7238 | { |
7239 | target_ssa = new_lhs; |
7240 | new_lhs = lhs; |
7241 | } |
7242 | mul_stmt = gimple_build_assign (lhs, MULT_EXPR, |
7243 | powi_result, target_ssa); |
7244 | gimple_set_location (g: mul_stmt, location: gimple_location (g: stmt)); |
7245 | gimple_set_uid (g: mul_stmt, uid: gimple_uid (g: stmt)); |
7246 | gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT); |
7247 | } |
7248 | } |
7249 | |
7250 | if (negate_result) |
7251 | { |
7252 | stmt = SSA_NAME_DEF_STMT (lhs); |
7253 | tree tmp = make_ssa_name (TREE_TYPE (lhs)); |
7254 | gimple_set_lhs (stmt, tmp); |
7255 | if (lhs != new_lhs) |
7256 | tmp = new_lhs; |
7257 | gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR, |
7258 | tmp); |
7259 | gimple_set_uid (g: neg_stmt, uid: gimple_uid (g: stmt)); |
7260 | gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT); |
7261 | update_stmt (s: stmt); |
7262 | } |
7263 | } |
7264 | } |
7265 | } |
7266 | for (son = first_dom_son (CDI_POST_DOMINATORS, bb); |
7267 | son; |
7268 | son = next_dom_son (CDI_POST_DOMINATORS, son)) |
7269 | cfg_cleanup_needed |= reassociate_bb (bb: son); |
7270 | |
7271 | return cfg_cleanup_needed; |
7272 | } |
7273 | |
7274 | /* Add jumps around shifts for range tests turned into bit tests. |
7275 | For each SSA_NAME VAR we have code like: |
7276 | VAR = ...; // final stmt of range comparison |
7277 | // bit test here...; |
7278 | OTHERVAR = ...; // final stmt of the bit test sequence |
7279 | RES = VAR | OTHERVAR; |
7280 | Turn the above into: |
7281 | VAR = ...; |
7282 | if (VAR != 0) |
7283 | goto <l3>; |
7284 | else |
7285 | goto <l2>; |
7286 | <l2>: |
7287 | // bit test here...; |
7288 | OTHERVAR = ...; |
7289 | <l3>: |
7290 | # RES = PHI<1(l1), OTHERVAR(l2)>; */ |
7291 | |
7292 | static void |
7293 | branch_fixup (void) |
7294 | { |
7295 | tree var; |
7296 | unsigned int i; |
7297 | |
7298 | FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var) |
7299 | { |
7300 | gimple *def_stmt = SSA_NAME_DEF_STMT (var); |
7301 | gimple *use_stmt; |
7302 | use_operand_p use; |
7303 | bool ok = single_imm_use (var, use_p: &use, stmt: &use_stmt); |
7304 | gcc_assert (ok |
7305 | && is_gimple_assign (use_stmt) |
7306 | && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR |
7307 | && gimple_bb (def_stmt) == gimple_bb (use_stmt)); |
7308 | |
7309 | basic_block cond_bb = gimple_bb (g: def_stmt); |
7310 | basic_block then_bb = split_block (cond_bb, def_stmt)->dest; |
7311 | basic_block merge_bb = split_block (then_bb, use_stmt)->dest; |
7312 | |
7313 | gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt); |
7314 | gimple *g = gimple_build_cond (NE_EXPR, var, |
7315 | build_zero_cst (TREE_TYPE (var)), |
7316 | NULL_TREE, NULL_TREE); |
7317 | location_t loc = gimple_location (g: use_stmt); |
7318 | gimple_set_location (g, location: loc); |
7319 | gsi_insert_after (&gsi, g, GSI_NEW_STMT); |
7320 | |
7321 | edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE); |
7322 | etrue->probability = profile_probability::even (); |
7323 | edge efalse = find_edge (cond_bb, then_bb); |
7324 | efalse->flags = EDGE_FALSE_VALUE; |
7325 | efalse->probability -= etrue->probability; |
7326 | then_bb->count -= etrue->count (); |
7327 | |
7328 | tree othervar = NULL_TREE; |
7329 | if (gimple_assign_rhs1 (gs: use_stmt) == var) |
7330 | othervar = gimple_assign_rhs2 (gs: use_stmt); |
7331 | else if (gimple_assign_rhs2 (gs: use_stmt) == var) |
7332 | othervar = gimple_assign_rhs1 (gs: use_stmt); |
7333 | else |
7334 | gcc_unreachable (); |
7335 | tree lhs = gimple_assign_lhs (gs: use_stmt); |
7336 | gphi *phi = create_phi_node (lhs, merge_bb); |
7337 | add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc); |
7338 | add_phi_arg (phi, othervar, single_succ_edge (bb: then_bb), loc); |
7339 | gsi = gsi_for_stmt (use_stmt); |
7340 | gsi_remove (&gsi, true); |
7341 | |
7342 | set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb); |
7343 | set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb); |
7344 | } |
7345 | reassoc_branch_fixups.release (); |
7346 | } |
7347 | |
7348 | void dump_ops_vector (FILE *file, vec<operand_entry *> ops); |
7349 | void debug_ops_vector (vec<operand_entry *> ops); |
7350 | |
7351 | /* Dump the operand entry vector OPS to FILE. */ |
7352 | |
7353 | void |
7354 | dump_ops_vector (FILE *file, vec<operand_entry *> ops) |
7355 | { |
7356 | operand_entry *oe; |
7357 | unsigned int i; |
7358 | |
7359 | FOR_EACH_VEC_ELT (ops, i, oe) |
7360 | { |
7361 | fprintf (stream: file, format: "Op %d -> rank: %d, tree: " , i, oe->rank); |
7362 | print_generic_expr (file, oe->op); |
7363 | fprintf (stream: file, format: "\n" ); |
7364 | } |
7365 | } |
7366 | |
7367 | /* Dump the operand entry vector OPS to STDERR. */ |
7368 | |
7369 | DEBUG_FUNCTION void |
7370 | debug_ops_vector (vec<operand_entry *> ops) |
7371 | { |
7372 | dump_ops_vector (stderr, ops); |
7373 | } |
7374 | |
7375 | /* Bubble up return status from reassociate_bb. */ |
7376 | |
7377 | static bool |
7378 | do_reassoc (void) |
7379 | { |
7380 | break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); |
7381 | return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)); |
7382 | } |
7383 | |
7384 | /* Initialize the reassociation pass. */ |
7385 | |
7386 | static void |
7387 | init_reassoc (void) |
7388 | { |
7389 | int i; |
7390 | int64_t rank = 2; |
7391 | int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); |
7392 | |
7393 | /* Find the loops, so that we can prevent moving calculations in |
7394 | them. */ |
7395 | loop_optimizer_init (AVOID_CFG_MODIFICATIONS); |
7396 | |
7397 | memset (s: &reassociate_stats, c: 0, n: sizeof (reassociate_stats)); |
7398 | |
7399 | next_operand_entry_id = 0; |
7400 | |
7401 | /* Reverse RPO (Reverse Post Order) will give us something where |
7402 | deeper loops come later. */ |
7403 | pre_and_rev_post_order_compute (NULL, bbs, false); |
7404 | bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun)); |
7405 | operand_rank = new hash_map<tree, int64_t>; |
7406 | |
7407 | /* Give each default definition a distinct rank. This includes |
7408 | parameters and the static chain. Walk backwards over all |
7409 | SSA names so that we get proper rank ordering according |
7410 | to tree_swap_operands_p. */ |
7411 | for (i = num_ssa_names - 1; i > 0; --i) |
7412 | { |
7413 | tree name = ssa_name (i); |
7414 | if (name && SSA_NAME_IS_DEFAULT_DEF (name)) |
7415 | insert_operand_rank (e: name, rank: ++rank); |
7416 | } |
7417 | |
7418 | /* Set up rank for each BB */ |
7419 | for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++) |
7420 | bb_rank[bbs[i]] = ++rank << 16; |
7421 | |
7422 | free (ptr: bbs); |
7423 | calculate_dominance_info (CDI_POST_DOMINATORS); |
7424 | plus_negates = vNULL; |
7425 | mark_ssa_maybe_undefs (); |
7426 | } |
7427 | |
7428 | /* Cleanup after the reassociation pass, and print stats if |
7429 | requested. */ |
7430 | |
7431 | static void |
7432 | fini_reassoc (void) |
7433 | { |
7434 | statistics_counter_event (cfun, "Linearized" , |
7435 | reassociate_stats.linearized); |
7436 | statistics_counter_event (cfun, "Constants eliminated" , |
7437 | reassociate_stats.constants_eliminated); |
7438 | statistics_counter_event (cfun, "Ops eliminated" , |
7439 | reassociate_stats.ops_eliminated); |
7440 | statistics_counter_event (cfun, "Statements rewritten" , |
7441 | reassociate_stats.rewritten); |
7442 | statistics_counter_event (cfun, "Built-in pow[i] calls encountered" , |
7443 | reassociate_stats.pows_encountered); |
7444 | statistics_counter_event (cfun, "Built-in powi calls created" , |
7445 | reassociate_stats.pows_created); |
7446 | |
7447 | delete operand_rank; |
7448 | bitmap_clear (biased_names); |
7449 | operand_entry_pool.release (); |
7450 | free (ptr: bb_rank); |
7451 | plus_negates.release (); |
7452 | free_dominance_info (CDI_POST_DOMINATORS); |
7453 | loop_optimizer_finalize (); |
7454 | } |
7455 | |
7456 | /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable |
7457 | insertion of __builtin_powi calls. |
7458 | |
7459 | Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to |
7460 | optimization of a gimple conditional. Otherwise returns zero. */ |
7461 | |
7462 | static unsigned int |
7463 | execute_reassoc (bool insert_powi_p, bool bias_loop_carried_phi_ranks_p) |
7464 | { |
7465 | reassoc_insert_powi_p = insert_powi_p; |
7466 | reassoc_bias_loop_carried_phi_ranks_p = bias_loop_carried_phi_ranks_p; |
7467 | |
7468 | init_reassoc (); |
7469 | |
7470 | bool cfg_cleanup_needed; |
7471 | cfg_cleanup_needed = do_reassoc (); |
7472 | repropagate_negates (); |
7473 | branch_fixup (); |
7474 | |
7475 | fini_reassoc (); |
7476 | return cfg_cleanup_needed ? TODO_cleanup_cfg : 0; |
7477 | } |
7478 | |
7479 | namespace { |
7480 | |
7481 | const pass_data pass_data_reassoc = |
7482 | { |
7483 | .type: GIMPLE_PASS, /* type */ |
7484 | .name: "reassoc" , /* name */ |
7485 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
7486 | .tv_id: TV_TREE_REASSOC, /* tv_id */ |
7487 | .properties_required: ( PROP_cfg | PROP_ssa ), /* properties_required */ |
7488 | .properties_provided: 0, /* properties_provided */ |
7489 | .properties_destroyed: 0, /* properties_destroyed */ |
7490 | .todo_flags_start: 0, /* todo_flags_start */ |
7491 | TODO_update_ssa_only_virtuals, /* todo_flags_finish */ |
7492 | }; |
7493 | |
7494 | class pass_reassoc : public gimple_opt_pass |
7495 | { |
7496 | public: |
7497 | pass_reassoc (gcc::context *ctxt) |
7498 | : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false) |
7499 | {} |
7500 | |
7501 | /* opt_pass methods: */ |
7502 | opt_pass * clone () final override { return new pass_reassoc (m_ctxt); } |
7503 | void set_pass_param (unsigned int n, bool param) final override |
7504 | { |
7505 | gcc_assert (n == 0); |
7506 | insert_powi_p = param; |
7507 | bias_loop_carried_phi_ranks_p = !param; |
7508 | } |
7509 | bool gate (function *) final override { return flag_tree_reassoc != 0; } |
7510 | unsigned int execute (function *) final override |
7511 | { |
7512 | return execute_reassoc (insert_powi_p, bias_loop_carried_phi_ranks_p); |
7513 | } |
7514 | |
7515 | private: |
7516 | /* Enable insertion of __builtin_powi calls during execute_reassoc. See |
7517 | point 3a in the pass header comment. */ |
7518 | bool insert_powi_p; |
7519 | bool bias_loop_carried_phi_ranks_p; |
7520 | }; // class pass_reassoc |
7521 | |
7522 | } // anon namespace |
7523 | |
7524 | gimple_opt_pass * |
7525 | make_pass_reassoc (gcc::context *ctxt) |
7526 | { |
7527 | return new pass_reassoc (ctxt); |
7528 | } |
7529 | |