1 | /* Induction variable optimizations. |
2 | Copyright (C) 2003-2017 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the |
8 | Free Software Foundation; either version 3, or (at your option) any |
9 | later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | /* This pass tries to find the optimal set of induction variables for the loop. |
21 | It optimizes just the basic linear induction variables (although adding |
22 | support for other types should not be too hard). It includes the |
23 | optimizations commonly known as strength reduction, induction variable |
24 | coalescing and induction variable elimination. It does it in the |
25 | following steps: |
26 | |
27 | 1) The interesting uses of induction variables are found. This includes |
28 | |
29 | -- uses of induction variables in non-linear expressions |
30 | -- addresses of arrays |
31 | -- comparisons of induction variables |
32 | |
33 | Note the interesting uses are categorized and handled in group. |
34 | Generally, address type uses are grouped together if their iv bases |
35 | are different in constant offset. |
36 | |
37 | 2) Candidates for the induction variables are found. This includes |
38 | |
39 | -- old induction variables |
40 | -- the variables defined by expressions derived from the "interesting |
41 | groups/uses" above |
42 | |
43 | 3) The optimal (w.r. to a cost function) set of variables is chosen. The |
44 | cost function assigns a cost to sets of induction variables and consists |
45 | of three parts: |
46 | |
47 | -- The group/use costs. Each of the interesting groups/uses chooses |
48 | the best induction variable in the set and adds its cost to the sum. |
49 | The cost reflects the time spent on modifying the induction variables |
50 | value to be usable for the given purpose (adding base and offset for |
51 | arrays, etc.). |
52 | -- The variable costs. Each of the variables has a cost assigned that |
53 | reflects the costs associated with incrementing the value of the |
54 | variable. The original variables are somewhat preferred. |
55 | -- The set cost. Depending on the size of the set, extra cost may be |
56 | added to reflect register pressure. |
57 | |
58 | All the costs are defined in a machine-specific way, using the target |
59 | hooks and machine descriptions to determine them. |
60 | |
61 | 4) The trees are transformed to use the new variables, the dead code is |
62 | removed. |
63 | |
64 | All of this is done loop by loop. Doing it globally is theoretically |
65 | possible, it might give a better performance and it might enable us |
66 | to decide costs more precisely, but getting all the interactions right |
67 | would be complicated. */ |
68 | |
69 | #include "config.h" |
70 | #include "system.h" |
71 | #include "coretypes.h" |
72 | #include "backend.h" |
73 | #include "rtl.h" |
74 | #include "tree.h" |
75 | #include "gimple.h" |
76 | #include "cfghooks.h" |
77 | #include "tree-pass.h" |
78 | #include "memmodel.h" |
79 | #include "tm_p.h" |
80 | #include "ssa.h" |
81 | #include "expmed.h" |
82 | #include "insn-config.h" |
83 | #include "emit-rtl.h" |
84 | #include "recog.h" |
85 | #include "cgraph.h" |
86 | #include "gimple-pretty-print.h" |
87 | #include "alias.h" |
88 | #include "fold-const.h" |
89 | #include "stor-layout.h" |
90 | #include "tree-eh.h" |
91 | #include "gimplify.h" |
92 | #include "gimple-iterator.h" |
93 | #include "gimplify-me.h" |
94 | #include "tree-cfg.h" |
95 | #include "tree-ssa-loop-ivopts.h" |
96 | #include "tree-ssa-loop-manip.h" |
97 | #include "tree-ssa-loop-niter.h" |
98 | #include "tree-ssa-loop.h" |
99 | #include "explow.h" |
100 | #include "expr.h" |
101 | #include "tree-dfa.h" |
102 | #include "tree-ssa.h" |
103 | #include "cfgloop.h" |
104 | #include "tree-scalar-evolution.h" |
105 | #include "params.h" |
106 | #include "tree-affine.h" |
107 | #include "tree-ssa-propagate.h" |
108 | #include "tree-ssa-address.h" |
109 | #include "builtins.h" |
110 | #include "tree-vectorizer.h" |
111 | |
112 | /* FIXME: Expressions are expanded to RTL in this pass to determine the |
113 | cost of different addressing modes. This should be moved to a TBD |
114 | interface between the GIMPLE and RTL worlds. */ |
115 | |
116 | /* The infinite cost. */ |
117 | #define INFTY 10000000 |
118 | |
119 | /* Returns the expected number of loop iterations for LOOP. |
120 | The average trip count is computed from profile data if it |
121 | exists. */ |
122 | |
123 | static inline HOST_WIDE_INT |
124 | avg_loop_niter (struct loop *loop) |
125 | { |
126 | HOST_WIDE_INT niter = estimated_stmt_executions_int (loop); |
127 | if (niter == -1) |
128 | { |
129 | niter = likely_max_stmt_executions_int (loop); |
130 | |
131 | if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER)) |
132 | return PARAM_VALUE (PARAM_AVG_LOOP_NITER); |
133 | } |
134 | |
135 | return niter; |
136 | } |
137 | |
138 | struct iv_use; |
139 | |
140 | /* Representation of the induction variable. */ |
141 | struct iv |
142 | { |
143 | tree base; /* Initial value of the iv. */ |
144 | tree base_object; /* A memory object to that the induction variable points. */ |
145 | tree step; /* Step of the iv (constant only). */ |
146 | tree ssa_name; /* The ssa name with the value. */ |
147 | struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */ |
148 | bool biv_p; /* Is it a biv? */ |
149 | bool no_overflow; /* True if the iv doesn't overflow. */ |
150 | bool have_address_use;/* For biv, indicate if it's used in any address |
151 | type use. */ |
152 | }; |
153 | |
154 | /* Per-ssa version information (induction variable descriptions, etc.). */ |
155 | struct version_info |
156 | { |
157 | tree name; /* The ssa name. */ |
158 | struct iv *iv; /* Induction variable description. */ |
159 | bool has_nonlin_use; /* For a loop-level invariant, whether it is used in |
160 | an expression that is not an induction variable. */ |
161 | bool preserve_biv; /* For the original biv, whether to preserve it. */ |
162 | unsigned inv_id; /* Id of an invariant. */ |
163 | }; |
164 | |
165 | /* Types of uses. */ |
166 | enum use_type |
167 | { |
168 | USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */ |
169 | USE_ADDRESS, /* Use in an address. */ |
170 | USE_COMPARE /* Use is a compare. */ |
171 | }; |
172 | |
173 | /* Cost of a computation. */ |
174 | struct comp_cost |
175 | { |
176 | comp_cost (): cost (0), complexity (0), scratch (0) |
177 | {} |
178 | |
179 | comp_cost (int cost, unsigned complexity, int scratch = 0) |
180 | : cost (cost), complexity (complexity), scratch (scratch) |
181 | {} |
182 | |
183 | /* Returns true if COST is infinite. */ |
184 | bool infinite_cost_p (); |
185 | |
186 | /* Adds costs COST1 and COST2. */ |
187 | friend comp_cost operator+ (comp_cost cost1, comp_cost cost2); |
188 | |
189 | /* Adds COST to the comp_cost. */ |
190 | comp_cost operator+= (comp_cost cost); |
191 | |
192 | /* Adds constant C to this comp_cost. */ |
193 | comp_cost operator+= (HOST_WIDE_INT c); |
194 | |
195 | /* Subtracts constant C to this comp_cost. */ |
196 | comp_cost operator-= (HOST_WIDE_INT c); |
197 | |
198 | /* Divide the comp_cost by constant C. */ |
199 | comp_cost operator/= (HOST_WIDE_INT c); |
200 | |
201 | /* Multiply the comp_cost by constant C. */ |
202 | comp_cost operator*= (HOST_WIDE_INT c); |
203 | |
204 | /* Subtracts costs COST1 and COST2. */ |
205 | friend comp_cost operator- (comp_cost cost1, comp_cost cost2); |
206 | |
207 | /* Subtracts COST from this comp_cost. */ |
208 | comp_cost operator-= (comp_cost cost); |
209 | |
210 | /* Returns true if COST1 is smaller than COST2. */ |
211 | friend bool operator< (comp_cost cost1, comp_cost cost2); |
212 | |
213 | /* Returns true if COST1 and COST2 are equal. */ |
214 | friend bool operator== (comp_cost cost1, comp_cost cost2); |
215 | |
216 | /* Returns true if COST1 is smaller or equal than COST2. */ |
217 | friend bool operator<= (comp_cost cost1, comp_cost cost2); |
218 | |
219 | int cost; /* The runtime cost. */ |
220 | unsigned complexity; /* The estimate of the complexity of the code for |
221 | the computation (in no concrete units -- |
222 | complexity field should be larger for more |
223 | complex expressions and addressing modes). */ |
224 | int scratch; /* Scratch used during cost computation. */ |
225 | }; |
226 | |
227 | static const comp_cost no_cost; |
228 | static const comp_cost infinite_cost (INFTY, INFTY, INFTY); |
229 | |
230 | bool |
231 | comp_cost::infinite_cost_p () |
232 | { |
233 | return cost == INFTY; |
234 | } |
235 | |
236 | comp_cost |
237 | operator+ (comp_cost cost1, comp_cost cost2) |
238 | { |
239 | if (cost1.infinite_cost_p () || cost2.infinite_cost_p ()) |
240 | return infinite_cost; |
241 | |
242 | cost1.cost += cost2.cost; |
243 | cost1.complexity += cost2.complexity; |
244 | |
245 | return cost1; |
246 | } |
247 | |
248 | comp_cost |
249 | operator- (comp_cost cost1, comp_cost cost2) |
250 | { |
251 | if (cost1.infinite_cost_p ()) |
252 | return infinite_cost; |
253 | |
254 | gcc_assert (!cost2.infinite_cost_p ()); |
255 | |
256 | cost1.cost -= cost2.cost; |
257 | cost1.complexity -= cost2.complexity; |
258 | |
259 | return cost1; |
260 | } |
261 | |
262 | comp_cost |
263 | comp_cost::operator+= (comp_cost cost) |
264 | { |
265 | *this = *this + cost; |
266 | return *this; |
267 | } |
268 | |
269 | comp_cost |
270 | comp_cost::operator+= (HOST_WIDE_INT c) |
271 | { |
272 | if (infinite_cost_p ()) |
273 | return *this; |
274 | |
275 | this->cost += c; |
276 | |
277 | return *this; |
278 | } |
279 | |
280 | comp_cost |
281 | comp_cost::operator-= (HOST_WIDE_INT c) |
282 | { |
283 | if (infinite_cost_p ()) |
284 | return *this; |
285 | |
286 | this->cost -= c; |
287 | |
288 | return *this; |
289 | } |
290 | |
291 | comp_cost |
292 | comp_cost::operator/= (HOST_WIDE_INT c) |
293 | { |
294 | if (infinite_cost_p ()) |
295 | return *this; |
296 | |
297 | this->cost /= c; |
298 | |
299 | return *this; |
300 | } |
301 | |
302 | comp_cost |
303 | comp_cost::operator*= (HOST_WIDE_INT c) |
304 | { |
305 | if (infinite_cost_p ()) |
306 | return *this; |
307 | |
308 | this->cost *= c; |
309 | |
310 | return *this; |
311 | } |
312 | |
313 | comp_cost |
314 | comp_cost::operator-= (comp_cost cost) |
315 | { |
316 | *this = *this - cost; |
317 | return *this; |
318 | } |
319 | |
320 | bool |
321 | operator< (comp_cost cost1, comp_cost cost2) |
322 | { |
323 | if (cost1.cost == cost2.cost) |
324 | return cost1.complexity < cost2.complexity; |
325 | |
326 | return cost1.cost < cost2.cost; |
327 | } |
328 | |
329 | bool |
330 | operator== (comp_cost cost1, comp_cost cost2) |
331 | { |
332 | return cost1.cost == cost2.cost |
333 | && cost1.complexity == cost2.complexity; |
334 | } |
335 | |
336 | bool |
337 | operator<= (comp_cost cost1, comp_cost cost2) |
338 | { |
339 | return cost1 < cost2 || cost1 == cost2; |
340 | } |
341 | |
342 | struct iv_inv_expr_ent; |
343 | |
344 | /* The candidate - cost pair. */ |
345 | struct cost_pair |
346 | { |
347 | struct iv_cand *cand; /* The candidate. */ |
348 | comp_cost cost; /* The cost. */ |
349 | enum tree_code comp; /* For iv elimination, the comparison. */ |
350 | bitmap inv_vars; /* The list of invariant ssa_vars that have to be |
351 | preserved when representing iv_use with iv_cand. */ |
352 | bitmap inv_exprs; /* The list of newly created invariant expressions |
353 | when representing iv_use with iv_cand. */ |
354 | tree value; /* For final value elimination, the expression for |
355 | the final value of the iv. For iv elimination, |
356 | the new bound to compare with. */ |
357 | }; |
358 | |
359 | /* Use. */ |
360 | struct iv_use |
361 | { |
362 | unsigned id; /* The id of the use. */ |
363 | unsigned group_id; /* The group id the use belongs to. */ |
364 | enum use_type type; /* Type of the use. */ |
365 | struct iv *iv; /* The induction variable it is based on. */ |
366 | gimple *stmt; /* Statement in that it occurs. */ |
367 | tree *op_p; /* The place where it occurs. */ |
368 | |
369 | tree addr_base; /* Base address with const offset stripped. */ |
370 | unsigned HOST_WIDE_INT addr_offset; |
371 | /* Const offset stripped from base address. */ |
372 | }; |
373 | |
374 | /* Group of uses. */ |
375 | struct iv_group |
376 | { |
377 | /* The id of the group. */ |
378 | unsigned id; |
379 | /* Uses of the group are of the same type. */ |
380 | enum use_type type; |
381 | /* The set of "related" IV candidates, plus the important ones. */ |
382 | bitmap related_cands; |
383 | /* Number of IV candidates in the cost_map. */ |
384 | unsigned n_map_members; |
385 | /* The costs wrto the iv candidates. */ |
386 | struct cost_pair *cost_map; |
387 | /* The selected candidate for the group. */ |
388 | struct iv_cand *selected; |
389 | /* Uses in the group. */ |
390 | vec<struct iv_use *> vuses; |
391 | }; |
392 | |
393 | /* The position where the iv is computed. */ |
394 | enum iv_position |
395 | { |
396 | IP_NORMAL, /* At the end, just before the exit condition. */ |
397 | IP_END, /* At the end of the latch block. */ |
398 | IP_BEFORE_USE, /* Immediately before a specific use. */ |
399 | IP_AFTER_USE, /* Immediately after a specific use. */ |
400 | IP_ORIGINAL /* The original biv. */ |
401 | }; |
402 | |
403 | /* The induction variable candidate. */ |
404 | struct iv_cand |
405 | { |
406 | unsigned id; /* The number of the candidate. */ |
407 | bool important; /* Whether this is an "important" candidate, i.e. such |
408 | that it should be considered by all uses. */ |
409 | ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */ |
410 | gimple *incremented_at;/* For original biv, the statement where it is |
411 | incremented. */ |
412 | tree var_before; /* The variable used for it before increment. */ |
413 | tree var_after; /* The variable used for it after increment. */ |
414 | struct iv *iv; /* The value of the candidate. NULL for |
415 | "pseudocandidate" used to indicate the possibility |
416 | to replace the final value of an iv by direct |
417 | computation of the value. */ |
418 | unsigned cost; /* Cost of the candidate. */ |
419 | unsigned cost_step; /* Cost of the candidate's increment operation. */ |
420 | struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place |
421 | where it is incremented. */ |
422 | bitmap inv_vars; /* The list of invariant ssa_vars used in step of the |
423 | iv_cand. */ |
424 | bitmap inv_exprs; /* If step is more complicated than a single ssa_var, |
425 | hanlde it as a new invariant expression which will |
426 | be hoisted out of loop. */ |
427 | struct iv *orig_iv; /* The original iv if this cand is added from biv with |
428 | smaller type. */ |
429 | }; |
430 | |
431 | /* Hashtable entry for common candidate derived from iv uses. */ |
432 | struct iv_common_cand |
433 | { |
434 | tree base; |
435 | tree step; |
436 | /* IV uses from which this common candidate is derived. */ |
437 | auto_vec<struct iv_use *> uses; |
438 | hashval_t hash; |
439 | }; |
440 | |
441 | /* Hashtable helpers. */ |
442 | |
443 | struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand> |
444 | { |
445 | static inline hashval_t hash (const iv_common_cand *); |
446 | static inline bool equal (const iv_common_cand *, const iv_common_cand *); |
447 | }; |
448 | |
449 | /* Hash function for possible common candidates. */ |
450 | |
451 | inline hashval_t |
452 | iv_common_cand_hasher::hash (const iv_common_cand *ccand) |
453 | { |
454 | return ccand->hash; |
455 | } |
456 | |
457 | /* Hash table equality function for common candidates. */ |
458 | |
459 | inline bool |
460 | iv_common_cand_hasher::equal (const iv_common_cand *ccand1, |
461 | const iv_common_cand *ccand2) |
462 | { |
463 | return (ccand1->hash == ccand2->hash |
464 | && operand_equal_p (ccand1->base, ccand2->base, 0) |
465 | && operand_equal_p (ccand1->step, ccand2->step, 0) |
466 | && (TYPE_PRECISION (TREE_TYPE (ccand1->base)) |
467 | == TYPE_PRECISION (TREE_TYPE (ccand2->base)))); |
468 | } |
469 | |
470 | /* Loop invariant expression hashtable entry. */ |
471 | |
472 | struct iv_inv_expr_ent |
473 | { |
474 | /* Tree expression of the entry. */ |
475 | tree expr; |
476 | /* Unique indentifier. */ |
477 | int id; |
478 | /* Hash value. */ |
479 | hashval_t hash; |
480 | }; |
481 | |
482 | /* Sort iv_inv_expr_ent pair A and B by id field. */ |
483 | |
484 | static int |
485 | sort_iv_inv_expr_ent (const void *a, const void *b) |
486 | { |
487 | const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a); |
488 | const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b); |
489 | |
490 | unsigned id1 = (*e1)->id; |
491 | unsigned id2 = (*e2)->id; |
492 | |
493 | if (id1 < id2) |
494 | return -1; |
495 | else if (id1 > id2) |
496 | return 1; |
497 | else |
498 | return 0; |
499 | } |
500 | |
501 | /* Hashtable helpers. */ |
502 | |
503 | struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent> |
504 | { |
505 | static inline hashval_t hash (const iv_inv_expr_ent *); |
506 | static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *); |
507 | }; |
508 | |
509 | /* Hash function for loop invariant expressions. */ |
510 | |
511 | inline hashval_t |
512 | iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr) |
513 | { |
514 | return expr->hash; |
515 | } |
516 | |
517 | /* Hash table equality function for expressions. */ |
518 | |
519 | inline bool |
520 | iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1, |
521 | const iv_inv_expr_ent *expr2) |
522 | { |
523 | return expr1->hash == expr2->hash |
524 | && operand_equal_p (expr1->expr, expr2->expr, 0); |
525 | } |
526 | |
527 | struct ivopts_data |
528 | { |
529 | /* The currently optimized loop. */ |
530 | struct loop *current_loop; |
531 | source_location loop_loc; |
532 | |
533 | /* Numbers of iterations for all exits of the current loop. */ |
534 | hash_map<edge, tree_niter_desc *> *niters; |
535 | |
536 | /* Number of registers used in it. */ |
537 | unsigned regs_used; |
538 | |
539 | /* The size of version_info array allocated. */ |
540 | unsigned version_info_size; |
541 | |
542 | /* The array of information for the ssa names. */ |
543 | struct version_info *version_info; |
544 | |
545 | /* The hashtable of loop invariant expressions created |
546 | by ivopt. */ |
547 | hash_table<iv_inv_expr_hasher> *inv_expr_tab; |
548 | |
549 | /* The bitmap of indices in version_info whose value was changed. */ |
550 | bitmap relevant; |
551 | |
552 | /* The uses of induction variables. */ |
553 | vec<iv_group *> vgroups; |
554 | |
555 | /* The candidates. */ |
556 | vec<iv_cand *> vcands; |
557 | |
558 | /* A bitmap of important candidates. */ |
559 | bitmap important_candidates; |
560 | |
561 | /* Cache used by tree_to_aff_combination_expand. */ |
562 | hash_map<tree, name_expansion *> *name_expansion_cache; |
563 | |
564 | /* The hashtable of common candidates derived from iv uses. */ |
565 | hash_table<iv_common_cand_hasher> *iv_common_cand_tab; |
566 | |
567 | /* The common candidates. */ |
568 | vec<iv_common_cand *> iv_common_cands; |
569 | |
570 | /* The maximum invariant variable id. */ |
571 | unsigned max_inv_var_id; |
572 | |
573 | /* The maximum invariant expression id. */ |
574 | unsigned max_inv_expr_id; |
575 | |
576 | /* Number of no_overflow BIVs which are not used in memory address. */ |
577 | unsigned bivs_not_used_in_addr; |
578 | |
579 | /* Obstack for iv structure. */ |
580 | struct obstack iv_obstack; |
581 | |
582 | /* Whether to consider just related and important candidates when replacing a |
583 | use. */ |
584 | bool consider_all_candidates; |
585 | |
586 | /* Are we optimizing for speed? */ |
587 | bool speed; |
588 | |
589 | /* Whether the loop body includes any function calls. */ |
590 | bool body_includes_call; |
591 | |
592 | /* Whether the loop body can only be exited via single exit. */ |
593 | bool loop_single_exit_p; |
594 | }; |
595 | |
596 | /* An assignment of iv candidates to uses. */ |
597 | |
598 | struct iv_ca |
599 | { |
600 | /* The number of uses covered by the assignment. */ |
601 | unsigned upto; |
602 | |
603 | /* Number of uses that cannot be expressed by the candidates in the set. */ |
604 | unsigned bad_groups; |
605 | |
606 | /* Candidate assigned to a use, together with the related costs. */ |
607 | struct cost_pair **cand_for_group; |
608 | |
609 | /* Number of times each candidate is used. */ |
610 | unsigned *n_cand_uses; |
611 | |
612 | /* The candidates used. */ |
613 | bitmap cands; |
614 | |
615 | /* The number of candidates in the set. */ |
616 | unsigned n_cands; |
617 | |
618 | /* The number of invariants needed, including both invariant variants and |
619 | invariant expressions. */ |
620 | unsigned n_invs; |
621 | |
622 | /* Total cost of expressing uses. */ |
623 | comp_cost cand_use_cost; |
624 | |
625 | /* Total cost of candidates. */ |
626 | unsigned cand_cost; |
627 | |
628 | /* Number of times each invariant variable is used. */ |
629 | unsigned *n_inv_var_uses; |
630 | |
631 | /* Number of times each invariant expression is used. */ |
632 | unsigned *n_inv_expr_uses; |
633 | |
634 | /* Total cost of the assignment. */ |
635 | comp_cost cost; |
636 | }; |
637 | |
638 | /* Difference of two iv candidate assignments. */ |
639 | |
640 | struct iv_ca_delta |
641 | { |
642 | /* Changed group. */ |
643 | struct iv_group *group; |
644 | |
645 | /* An old assignment (for rollback purposes). */ |
646 | struct cost_pair *old_cp; |
647 | |
648 | /* A new assignment. */ |
649 | struct cost_pair *new_cp; |
650 | |
651 | /* Next change in the list. */ |
652 | struct iv_ca_delta *next; |
653 | }; |
654 | |
655 | /* Bound on number of candidates below that all candidates are considered. */ |
656 | |
657 | #define CONSIDER_ALL_CANDIDATES_BOUND \ |
658 | ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND)) |
659 | |
660 | /* If there are more iv occurrences, we just give up (it is quite unlikely that |
661 | optimizing such a loop would help, and it would take ages). */ |
662 | |
663 | #define MAX_CONSIDERED_GROUPS \ |
664 | ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES)) |
665 | |
666 | /* If there are at most this number of ivs in the set, try removing unnecessary |
667 | ivs from the set always. */ |
668 | |
669 | #define ALWAYS_PRUNE_CAND_SET_BOUND \ |
670 | ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND)) |
671 | |
672 | /* The list of trees for that the decl_rtl field must be reset is stored |
673 | here. */ |
674 | |
675 | static vec<tree> decl_rtl_to_reset; |
676 | |
677 | static comp_cost force_expr_to_var_cost (tree, bool); |
678 | |
679 | /* The single loop exit if it dominates the latch, NULL otherwise. */ |
680 | |
681 | edge |
682 | single_dom_exit (struct loop *loop) |
683 | { |
684 | edge exit = single_exit (loop); |
685 | |
686 | if (!exit) |
687 | return NULL; |
688 | |
689 | if (!just_once_each_iteration_p (loop, exit->src)) |
690 | return NULL; |
691 | |
692 | return exit; |
693 | } |
694 | |
695 | /* Dumps information about the induction variable IV to FILE. Don't dump |
696 | variable's name if DUMP_NAME is FALSE. The information is dumped with |
697 | preceding spaces indicated by INDENT_LEVEL. */ |
698 | |
699 | void |
700 | dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level) |
701 | { |
702 | const char *p; |
703 | const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'}; |
704 | |
705 | if (indent_level > 4) |
706 | indent_level = 4; |
707 | p = spaces + 8 - (indent_level << 1); |
708 | |
709 | fprintf (file, "%sIV struct:\n" , p); |
710 | if (iv->ssa_name && dump_name) |
711 | { |
712 | fprintf (file, "%s SSA_NAME:\t" , p); |
713 | print_generic_expr (file, iv->ssa_name, TDF_SLIM); |
714 | fprintf (file, "\n" ); |
715 | } |
716 | |
717 | fprintf (file, "%s Type:\t" , p); |
718 | print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM); |
719 | fprintf (file, "\n" ); |
720 | |
721 | fprintf (file, "%s Base:\t" , p); |
722 | print_generic_expr (file, iv->base, TDF_SLIM); |
723 | fprintf (file, "\n" ); |
724 | |
725 | fprintf (file, "%s Step:\t" , p); |
726 | print_generic_expr (file, iv->step, TDF_SLIM); |
727 | fprintf (file, "\n" ); |
728 | |
729 | if (iv->base_object) |
730 | { |
731 | fprintf (file, "%s Object:\t" , p); |
732 | print_generic_expr (file, iv->base_object, TDF_SLIM); |
733 | fprintf (file, "\n" ); |
734 | } |
735 | |
736 | fprintf (file, "%s Biv:\t%c\n" , p, iv->biv_p ? 'Y' : 'N'); |
737 | |
738 | fprintf (file, "%s Overflowness wrto loop niter:\t%s\n" , |
739 | p, iv->no_overflow ? "No-overflow" : "Overflow" ); |
740 | } |
741 | |
742 | /* Dumps information about the USE to FILE. */ |
743 | |
744 | void |
745 | dump_use (FILE *file, struct iv_use *use) |
746 | { |
747 | fprintf (file, " Use %d.%d:\n" , use->group_id, use->id); |
748 | fprintf (file, " At stmt:\t" ); |
749 | print_gimple_stmt (file, use->stmt, 0); |
750 | fprintf (file, " At pos:\t" ); |
751 | if (use->op_p) |
752 | print_generic_expr (file, *use->op_p, TDF_SLIM); |
753 | fprintf (file, "\n" ); |
754 | dump_iv (file, use->iv, false, 2); |
755 | } |
756 | |
757 | /* Dumps information about the uses to FILE. */ |
758 | |
759 | void |
760 | dump_groups (FILE *file, struct ivopts_data *data) |
761 | { |
762 | unsigned i, j; |
763 | struct iv_group *group; |
764 | |
765 | for (i = 0; i < data->vgroups.length (); i++) |
766 | { |
767 | group = data->vgroups[i]; |
768 | fprintf (file, "Group %d:\n" , group->id); |
769 | if (group->type == USE_NONLINEAR_EXPR) |
770 | fprintf (file, " Type:\tGENERIC\n" ); |
771 | else if (group->type == USE_ADDRESS) |
772 | fprintf (file, " Type:\tADDRESS\n" ); |
773 | else |
774 | { |
775 | gcc_assert (group->type == USE_COMPARE); |
776 | fprintf (file, " Type:\tCOMPARE\n" ); |
777 | } |
778 | for (j = 0; j < group->vuses.length (); j++) |
779 | dump_use (file, group->vuses[j]); |
780 | } |
781 | } |
782 | |
783 | /* Dumps information about induction variable candidate CAND to FILE. */ |
784 | |
785 | void |
786 | dump_cand (FILE *file, struct iv_cand *cand) |
787 | { |
788 | struct iv *iv = cand->iv; |
789 | |
790 | fprintf (file, "Candidate %d:\n" , cand->id); |
791 | if (cand->inv_vars) |
792 | { |
793 | fprintf (file, " Depend on inv.vars: " ); |
794 | dump_bitmap (file, cand->inv_vars); |
795 | } |
796 | if (cand->inv_exprs) |
797 | { |
798 | fprintf (file, " Depend on inv.exprs: " ); |
799 | dump_bitmap (file, cand->inv_exprs); |
800 | } |
801 | |
802 | if (cand->var_before) |
803 | { |
804 | fprintf (file, " Var befor: " ); |
805 | print_generic_expr (file, cand->var_before, TDF_SLIM); |
806 | fprintf (file, "\n" ); |
807 | } |
808 | if (cand->var_after) |
809 | { |
810 | fprintf (file, " Var after: " ); |
811 | print_generic_expr (file, cand->var_after, TDF_SLIM); |
812 | fprintf (file, "\n" ); |
813 | } |
814 | |
815 | switch (cand->pos) |
816 | { |
817 | case IP_NORMAL: |
818 | fprintf (file, " Incr POS: before exit test\n" ); |
819 | break; |
820 | |
821 | case IP_BEFORE_USE: |
822 | fprintf (file, " Incr POS: before use %d\n" , cand->ainc_use->id); |
823 | break; |
824 | |
825 | case IP_AFTER_USE: |
826 | fprintf (file, " Incr POS: after use %d\n" , cand->ainc_use->id); |
827 | break; |
828 | |
829 | case IP_END: |
830 | fprintf (file, " Incr POS: at end\n" ); |
831 | break; |
832 | |
833 | case IP_ORIGINAL: |
834 | fprintf (file, " Incr POS: orig biv\n" ); |
835 | break; |
836 | } |
837 | |
838 | dump_iv (file, iv, false, 1); |
839 | } |
840 | |
841 | /* Returns the info for ssa version VER. */ |
842 | |
843 | static inline struct version_info * |
844 | ver_info (struct ivopts_data *data, unsigned ver) |
845 | { |
846 | return data->version_info + ver; |
847 | } |
848 | |
849 | /* Returns the info for ssa name NAME. */ |
850 | |
851 | static inline struct version_info * |
852 | name_info (struct ivopts_data *data, tree name) |
853 | { |
854 | return ver_info (data, SSA_NAME_VERSION (name)); |
855 | } |
856 | |
857 | /* Returns true if STMT is after the place where the IP_NORMAL ivs will be |
858 | emitted in LOOP. */ |
859 | |
860 | static bool |
861 | stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt) |
862 | { |
863 | basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt); |
864 | |
865 | gcc_assert (bb); |
866 | |
867 | if (sbb == loop->latch) |
868 | return true; |
869 | |
870 | if (sbb != bb) |
871 | return false; |
872 | |
873 | return stmt == last_stmt (bb); |
874 | } |
875 | |
876 | /* Returns true if STMT if after the place where the original induction |
877 | variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true |
878 | if the positions are identical. */ |
879 | |
880 | static bool |
881 | stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal) |
882 | { |
883 | basic_block cand_bb = gimple_bb (cand->incremented_at); |
884 | basic_block stmt_bb = gimple_bb (stmt); |
885 | |
886 | if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb)) |
887 | return false; |
888 | |
889 | if (stmt_bb != cand_bb) |
890 | return true; |
891 | |
892 | if (true_if_equal |
893 | && gimple_uid (stmt) == gimple_uid (cand->incremented_at)) |
894 | return true; |
895 | return gimple_uid (stmt) > gimple_uid (cand->incremented_at); |
896 | } |
897 | |
898 | /* Returns true if STMT if after the place where the induction variable |
899 | CAND is incremented in LOOP. */ |
900 | |
901 | static bool |
902 | stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt) |
903 | { |
904 | switch (cand->pos) |
905 | { |
906 | case IP_END: |
907 | return false; |
908 | |
909 | case IP_NORMAL: |
910 | return stmt_after_ip_normal_pos (loop, stmt); |
911 | |
912 | case IP_ORIGINAL: |
913 | case IP_AFTER_USE: |
914 | return stmt_after_inc_pos (cand, stmt, false); |
915 | |
916 | case IP_BEFORE_USE: |
917 | return stmt_after_inc_pos (cand, stmt, true); |
918 | |
919 | default: |
920 | gcc_unreachable (); |
921 | } |
922 | } |
923 | |
924 | /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */ |
925 | |
926 | static bool |
927 | abnormal_ssa_name_p (tree exp) |
928 | { |
929 | if (!exp) |
930 | return false; |
931 | |
932 | if (TREE_CODE (exp) != SSA_NAME) |
933 | return false; |
934 | |
935 | return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0; |
936 | } |
937 | |
938 | /* Returns false if BASE or INDEX contains a ssa name that occurs in an |
939 | abnormal phi node. Callback for for_each_index. */ |
940 | |
941 | static bool |
942 | idx_contains_abnormal_ssa_name_p (tree base, tree *index, |
943 | void *data ATTRIBUTE_UNUSED) |
944 | { |
945 | if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) |
946 | { |
947 | if (abnormal_ssa_name_p (TREE_OPERAND (base, 2))) |
948 | return false; |
949 | if (abnormal_ssa_name_p (TREE_OPERAND (base, 3))) |
950 | return false; |
951 | } |
952 | |
953 | return !abnormal_ssa_name_p (*index); |
954 | } |
955 | |
956 | /* Returns true if EXPR contains a ssa name that occurs in an |
957 | abnormal phi node. */ |
958 | |
959 | bool |
960 | contains_abnormal_ssa_name_p (tree expr) |
961 | { |
962 | enum tree_code code; |
963 | enum tree_code_class codeclass; |
964 | |
965 | if (!expr) |
966 | return false; |
967 | |
968 | code = TREE_CODE (expr); |
969 | codeclass = TREE_CODE_CLASS (code); |
970 | |
971 | if (code == SSA_NAME) |
972 | return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0; |
973 | |
974 | if (code == INTEGER_CST |
975 | || is_gimple_min_invariant (expr)) |
976 | return false; |
977 | |
978 | if (code == ADDR_EXPR) |
979 | return !for_each_index (&TREE_OPERAND (expr, 0), |
980 | idx_contains_abnormal_ssa_name_p, |
981 | NULL); |
982 | |
983 | if (code == COND_EXPR) |
984 | return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)) |
985 | || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)) |
986 | || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2)); |
987 | |
988 | switch (codeclass) |
989 | { |
990 | case tcc_binary: |
991 | case tcc_comparison: |
992 | if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))) |
993 | return true; |
994 | |
995 | /* Fallthru. */ |
996 | case tcc_unary: |
997 | if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))) |
998 | return true; |
999 | |
1000 | break; |
1001 | |
1002 | default: |
1003 | gcc_unreachable (); |
1004 | } |
1005 | |
1006 | return false; |
1007 | } |
1008 | |
1009 | /* Returns the structure describing number of iterations determined from |
1010 | EXIT of DATA->current_loop, or NULL if something goes wrong. */ |
1011 | |
1012 | static struct tree_niter_desc * |
1013 | niter_for_exit (struct ivopts_data *data, edge exit) |
1014 | { |
1015 | struct tree_niter_desc *desc; |
1016 | tree_niter_desc **slot; |
1017 | |
1018 | if (!data->niters) |
1019 | { |
1020 | data->niters = new hash_map<edge, tree_niter_desc *>; |
1021 | slot = NULL; |
1022 | } |
1023 | else |
1024 | slot = data->niters->get (exit); |
1025 | |
1026 | if (!slot) |
1027 | { |
1028 | /* Try to determine number of iterations. We cannot safely work with ssa |
1029 | names that appear in phi nodes on abnormal edges, so that we do not |
1030 | create overlapping life ranges for them (PR 27283). */ |
1031 | desc = XNEW (struct tree_niter_desc); |
1032 | if (!number_of_iterations_exit (data->current_loop, |
1033 | exit, desc, true) |
1034 | || contains_abnormal_ssa_name_p (desc->niter)) |
1035 | { |
1036 | XDELETE (desc); |
1037 | desc = NULL; |
1038 | } |
1039 | data->niters->put (exit, desc); |
1040 | } |
1041 | else |
1042 | desc = *slot; |
1043 | |
1044 | return desc; |
1045 | } |
1046 | |
1047 | /* Returns the structure describing number of iterations determined from |
1048 | single dominating exit of DATA->current_loop, or NULL if something |
1049 | goes wrong. */ |
1050 | |
1051 | static struct tree_niter_desc * |
1052 | niter_for_single_dom_exit (struct ivopts_data *data) |
1053 | { |
1054 | edge exit = single_dom_exit (data->current_loop); |
1055 | |
1056 | if (!exit) |
1057 | return NULL; |
1058 | |
1059 | return niter_for_exit (data, exit); |
1060 | } |
1061 | |
1062 | /* Initializes data structures used by the iv optimization pass, stored |
1063 | in DATA. */ |
1064 | |
1065 | static void |
1066 | tree_ssa_iv_optimize_init (struct ivopts_data *data) |
1067 | { |
1068 | data->version_info_size = 2 * num_ssa_names; |
1069 | data->version_info = XCNEWVEC (struct version_info, data->version_info_size); |
1070 | data->relevant = BITMAP_ALLOC (NULL); |
1071 | data->important_candidates = BITMAP_ALLOC (NULL); |
1072 | data->max_inv_var_id = 0; |
1073 | data->max_inv_expr_id = 0; |
1074 | data->niters = NULL; |
1075 | data->vgroups.create (20); |
1076 | data->vcands.create (20); |
1077 | data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10); |
1078 | data->name_expansion_cache = NULL; |
1079 | data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10); |
1080 | data->iv_common_cands.create (20); |
1081 | decl_rtl_to_reset.create (20); |
1082 | gcc_obstack_init (&data->iv_obstack); |
1083 | } |
1084 | |
1085 | /* Returns a memory object to that EXPR points. In case we are able to |
1086 | determine that it does not point to any such object, NULL is returned. */ |
1087 | |
1088 | static tree |
1089 | determine_base_object (tree expr) |
1090 | { |
1091 | enum tree_code code = TREE_CODE (expr); |
1092 | tree base, obj; |
1093 | |
1094 | /* If this is a pointer casted to any type, we need to determine |
1095 | the base object for the pointer; so handle conversions before |
1096 | throwing away non-pointer expressions. */ |
1097 | if (CONVERT_EXPR_P (expr)) |
1098 | return determine_base_object (TREE_OPERAND (expr, 0)); |
1099 | |
1100 | if (!POINTER_TYPE_P (TREE_TYPE (expr))) |
1101 | return NULL_TREE; |
1102 | |
1103 | switch (code) |
1104 | { |
1105 | case INTEGER_CST: |
1106 | return NULL_TREE; |
1107 | |
1108 | case ADDR_EXPR: |
1109 | obj = TREE_OPERAND (expr, 0); |
1110 | base = get_base_address (obj); |
1111 | |
1112 | if (!base) |
1113 | return expr; |
1114 | |
1115 | if (TREE_CODE (base) == MEM_REF) |
1116 | return determine_base_object (TREE_OPERAND (base, 0)); |
1117 | |
1118 | return fold_convert (ptr_type_node, |
1119 | build_fold_addr_expr (base)); |
1120 | |
1121 | case POINTER_PLUS_EXPR: |
1122 | return determine_base_object (TREE_OPERAND (expr, 0)); |
1123 | |
1124 | case PLUS_EXPR: |
1125 | case MINUS_EXPR: |
1126 | /* Pointer addition is done solely using POINTER_PLUS_EXPR. */ |
1127 | gcc_unreachable (); |
1128 | |
1129 | default: |
1130 | return fold_convert (ptr_type_node, expr); |
1131 | } |
1132 | } |
1133 | |
1134 | /* Return true if address expression with non-DECL_P operand appears |
1135 | in EXPR. */ |
1136 | |
1137 | static bool |
1138 | contain_complex_addr_expr (tree expr) |
1139 | { |
1140 | bool res = false; |
1141 | |
1142 | STRIP_NOPS (expr); |
1143 | switch (TREE_CODE (expr)) |
1144 | { |
1145 | case POINTER_PLUS_EXPR: |
1146 | case PLUS_EXPR: |
1147 | case MINUS_EXPR: |
1148 | res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0)); |
1149 | res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1)); |
1150 | break; |
1151 | |
1152 | case ADDR_EXPR: |
1153 | return (!DECL_P (TREE_OPERAND (expr, 0))); |
1154 | |
1155 | default: |
1156 | return false; |
1157 | } |
1158 | |
1159 | return res; |
1160 | } |
1161 | |
1162 | /* Allocates an induction variable with given initial value BASE and step STEP |
1163 | for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */ |
1164 | |
1165 | static struct iv * |
1166 | alloc_iv (struct ivopts_data *data, tree base, tree step, |
1167 | bool no_overflow = false) |
1168 | { |
1169 | tree expr = base; |
1170 | struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack, |
1171 | sizeof (struct iv)); |
1172 | gcc_assert (step != NULL_TREE); |
1173 | |
1174 | /* Lower address expression in base except ones with DECL_P as operand. |
1175 | By doing this: |
1176 | 1) More accurate cost can be computed for address expressions; |
1177 | 2) Duplicate candidates won't be created for bases in different |
1178 | forms, like &a[0] and &a. */ |
1179 | STRIP_NOPS (expr); |
1180 | if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0))) |
1181 | || contain_complex_addr_expr (expr)) |
1182 | { |
1183 | aff_tree comb; |
1184 | tree_to_aff_combination (expr, TREE_TYPE (expr), &comb); |
1185 | base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb)); |
1186 | } |
1187 | |
1188 | iv->base = base; |
1189 | iv->base_object = determine_base_object (base); |
1190 | iv->step = step; |
1191 | iv->biv_p = false; |
1192 | iv->nonlin_use = NULL; |
1193 | iv->ssa_name = NULL_TREE; |
1194 | if (!no_overflow |
1195 | && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base), |
1196 | base, step)) |
1197 | no_overflow = true; |
1198 | iv->no_overflow = no_overflow; |
1199 | iv->have_address_use = false; |
1200 | |
1201 | return iv; |
1202 | } |
1203 | |
1204 | /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV |
1205 | doesn't overflow. */ |
1206 | |
1207 | static void |
1208 | set_iv (struct ivopts_data *data, tree iv, tree base, tree step, |
1209 | bool no_overflow) |
1210 | { |
1211 | struct version_info *info = name_info (data, iv); |
1212 | |
1213 | gcc_assert (!info->iv); |
1214 | |
1215 | bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv)); |
1216 | info->iv = alloc_iv (data, base, step, no_overflow); |
1217 | info->iv->ssa_name = iv; |
1218 | } |
1219 | |
1220 | /* Finds induction variable declaration for VAR. */ |
1221 | |
1222 | static struct iv * |
1223 | get_iv (struct ivopts_data *data, tree var) |
1224 | { |
1225 | basic_block bb; |
1226 | tree type = TREE_TYPE (var); |
1227 | |
1228 | if (!POINTER_TYPE_P (type) |
1229 | && !INTEGRAL_TYPE_P (type)) |
1230 | return NULL; |
1231 | |
1232 | if (!name_info (data, var)->iv) |
1233 | { |
1234 | bb = gimple_bb (SSA_NAME_DEF_STMT (var)); |
1235 | |
1236 | if (!bb |
1237 | || !flow_bb_inside_loop_p (data->current_loop, bb)) |
1238 | set_iv (data, var, var, build_int_cst (type, 0), true); |
1239 | } |
1240 | |
1241 | return name_info (data, var)->iv; |
1242 | } |
1243 | |
1244 | /* Return the first non-invariant ssa var found in EXPR. */ |
1245 | |
1246 | static tree |
1247 | (tree expr) |
1248 | { |
1249 | int i, n; |
1250 | tree tmp; |
1251 | enum tree_code code; |
1252 | |
1253 | if (!expr || is_gimple_min_invariant (expr)) |
1254 | return NULL; |
1255 | |
1256 | code = TREE_CODE (expr); |
1257 | if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))) |
1258 | { |
1259 | n = TREE_OPERAND_LENGTH (expr); |
1260 | for (i = 0; i < n; i++) |
1261 | { |
1262 | tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i)); |
1263 | |
1264 | if (tmp) |
1265 | return tmp; |
1266 | } |
1267 | } |
1268 | return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL; |
1269 | } |
1270 | |
1271 | /* Finds basic ivs. */ |
1272 | |
1273 | static bool |
1274 | find_bivs (struct ivopts_data *data) |
1275 | { |
1276 | gphi *phi; |
1277 | affine_iv iv; |
1278 | tree step, type, base, stop; |
1279 | bool found = false; |
1280 | struct loop *loop = data->current_loop; |
1281 | gphi_iterator psi; |
1282 | |
1283 | for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) |
1284 | { |
1285 | phi = psi.phi (); |
1286 | |
1287 | if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi))) |
1288 | continue; |
1289 | |
1290 | if (virtual_operand_p (PHI_RESULT (phi))) |
1291 | continue; |
1292 | |
1293 | if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true)) |
1294 | continue; |
1295 | |
1296 | if (integer_zerop (iv.step)) |
1297 | continue; |
1298 | |
1299 | step = iv.step; |
1300 | base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); |
1301 | /* Stop expanding iv base at the first ssa var referred by iv step. |
1302 | Ideally we should stop at any ssa var, because that's expensive |
1303 | and unusual to happen, we just do it on the first one. |
1304 | |
1305 | See PR64705 for the rationale. */ |
1306 | stop = extract_single_var_from_expr (step); |
1307 | base = expand_simple_operations (base, stop); |
1308 | if (contains_abnormal_ssa_name_p (base) |
1309 | || contains_abnormal_ssa_name_p (step)) |
1310 | continue; |
1311 | |
1312 | type = TREE_TYPE (PHI_RESULT (phi)); |
1313 | base = fold_convert (type, base); |
1314 | if (step) |
1315 | { |
1316 | if (POINTER_TYPE_P (type)) |
1317 | step = convert_to_ptrofftype (step); |
1318 | else |
1319 | step = fold_convert (type, step); |
1320 | } |
1321 | |
1322 | set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow); |
1323 | found = true; |
1324 | } |
1325 | |
1326 | return found; |
1327 | } |
1328 | |
1329 | /* Marks basic ivs. */ |
1330 | |
1331 | static void |
1332 | mark_bivs (struct ivopts_data *data) |
1333 | { |
1334 | gphi *phi; |
1335 | gimple *def; |
1336 | tree var; |
1337 | struct iv *iv, *incr_iv; |
1338 | struct loop *loop = data->current_loop; |
1339 | basic_block incr_bb; |
1340 | gphi_iterator psi; |
1341 | |
1342 | data->bivs_not_used_in_addr = 0; |
1343 | for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) |
1344 | { |
1345 | phi = psi.phi (); |
1346 | |
1347 | iv = get_iv (data, PHI_RESULT (phi)); |
1348 | if (!iv) |
1349 | continue; |
1350 | |
1351 | var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); |
1352 | def = SSA_NAME_DEF_STMT (var); |
1353 | /* Don't mark iv peeled from other one as biv. */ |
1354 | if (def |
1355 | && gimple_code (def) == GIMPLE_PHI |
1356 | && gimple_bb (def) == loop->header) |
1357 | continue; |
1358 | |
1359 | incr_iv = get_iv (data, var); |
1360 | if (!incr_iv) |
1361 | continue; |
1362 | |
1363 | /* If the increment is in the subloop, ignore it. */ |
1364 | incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var)); |
1365 | if (incr_bb->loop_father != data->current_loop |
1366 | || (incr_bb->flags & BB_IRREDUCIBLE_LOOP)) |
1367 | continue; |
1368 | |
1369 | iv->biv_p = true; |
1370 | incr_iv->biv_p = true; |
1371 | if (iv->no_overflow) |
1372 | data->bivs_not_used_in_addr++; |
1373 | if (incr_iv->no_overflow) |
1374 | data->bivs_not_used_in_addr++; |
1375 | } |
1376 | } |
1377 | |
1378 | /* Checks whether STMT defines a linear induction variable and stores its |
1379 | parameters to IV. */ |
1380 | |
1381 | static bool |
1382 | find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv) |
1383 | { |
1384 | tree lhs, stop; |
1385 | struct loop *loop = data->current_loop; |
1386 | |
1387 | iv->base = NULL_TREE; |
1388 | iv->step = NULL_TREE; |
1389 | |
1390 | if (gimple_code (stmt) != GIMPLE_ASSIGN) |
1391 | return false; |
1392 | |
1393 | lhs = gimple_assign_lhs (stmt); |
1394 | if (TREE_CODE (lhs) != SSA_NAME) |
1395 | return false; |
1396 | |
1397 | if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true)) |
1398 | return false; |
1399 | |
1400 | /* Stop expanding iv base at the first ssa var referred by iv step. |
1401 | Ideally we should stop at any ssa var, because that's expensive |
1402 | and unusual to happen, we just do it on the first one. |
1403 | |
1404 | See PR64705 for the rationale. */ |
1405 | stop = extract_single_var_from_expr (iv->step); |
1406 | iv->base = expand_simple_operations (iv->base, stop); |
1407 | if (contains_abnormal_ssa_name_p (iv->base) |
1408 | || contains_abnormal_ssa_name_p (iv->step)) |
1409 | return false; |
1410 | |
1411 | /* If STMT could throw, then do not consider STMT as defining a GIV. |
1412 | While this will suppress optimizations, we can not safely delete this |
1413 | GIV and associated statements, even if it appears it is not used. */ |
1414 | if (stmt_could_throw_p (stmt)) |
1415 | return false; |
1416 | |
1417 | return true; |
1418 | } |
1419 | |
1420 | /* Finds general ivs in statement STMT. */ |
1421 | |
1422 | static void |
1423 | find_givs_in_stmt (struct ivopts_data *data, gimple *stmt) |
1424 | { |
1425 | affine_iv iv; |
1426 | |
1427 | if (!find_givs_in_stmt_scev (data, stmt, &iv)) |
1428 | return; |
1429 | |
1430 | set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow); |
1431 | } |
1432 | |
1433 | /* Finds general ivs in basic block BB. */ |
1434 | |
1435 | static void |
1436 | find_givs_in_bb (struct ivopts_data *data, basic_block bb) |
1437 | { |
1438 | gimple_stmt_iterator bsi; |
1439 | |
1440 | for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) |
1441 | find_givs_in_stmt (data, gsi_stmt (bsi)); |
1442 | } |
1443 | |
1444 | /* Finds general ivs. */ |
1445 | |
1446 | static void |
1447 | find_givs (struct ivopts_data *data) |
1448 | { |
1449 | struct loop *loop = data->current_loop; |
1450 | basic_block *body = get_loop_body_in_dom_order (loop); |
1451 | unsigned i; |
1452 | |
1453 | for (i = 0; i < loop->num_nodes; i++) |
1454 | find_givs_in_bb (data, body[i]); |
1455 | free (body); |
1456 | } |
1457 | |
1458 | /* For each ssa name defined in LOOP determines whether it is an induction |
1459 | variable and if so, its initial value and step. */ |
1460 | |
1461 | static bool |
1462 | find_induction_variables (struct ivopts_data *data) |
1463 | { |
1464 | unsigned i; |
1465 | bitmap_iterator bi; |
1466 | |
1467 | if (!find_bivs (data)) |
1468 | return false; |
1469 | |
1470 | find_givs (data); |
1471 | mark_bivs (data); |
1472 | |
1473 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1474 | { |
1475 | struct tree_niter_desc *niter = niter_for_single_dom_exit (data); |
1476 | |
1477 | if (niter) |
1478 | { |
1479 | fprintf (dump_file, " number of iterations " ); |
1480 | print_generic_expr (dump_file, niter->niter, TDF_SLIM); |
1481 | if (!integer_zerop (niter->may_be_zero)) |
1482 | { |
1483 | fprintf (dump_file, "; zero if " ); |
1484 | print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM); |
1485 | } |
1486 | fprintf (dump_file, "\n" ); |
1487 | }; |
1488 | |
1489 | fprintf (dump_file, "\n<Induction Vars>:\n" ); |
1490 | EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) |
1491 | { |
1492 | struct version_info *info = ver_info (data, i); |
1493 | if (info->iv && info->iv->step && !integer_zerop (info->iv->step)) |
1494 | dump_iv (dump_file, ver_info (data, i)->iv, true, 0); |
1495 | } |
1496 | } |
1497 | |
1498 | return true; |
1499 | } |
1500 | |
1501 | /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP. |
1502 | For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET |
1503 | is the const offset stripped from IV base; for other types use, both |
1504 | are zero by default. */ |
1505 | |
1506 | static struct iv_use * |
1507 | record_use (struct iv_group *group, tree *use_p, struct iv *iv, |
1508 | gimple *stmt, enum use_type type, tree addr_base, |
1509 | unsigned HOST_WIDE_INT addr_offset) |
1510 | { |
1511 | struct iv_use *use = XCNEW (struct iv_use); |
1512 | |
1513 | use->id = group->vuses.length (); |
1514 | use->group_id = group->id; |
1515 | use->type = type; |
1516 | use->iv = iv; |
1517 | use->stmt = stmt; |
1518 | use->op_p = use_p; |
1519 | use->addr_base = addr_base; |
1520 | use->addr_offset = addr_offset; |
1521 | |
1522 | group->vuses.safe_push (use); |
1523 | return use; |
1524 | } |
1525 | |
1526 | /* Checks whether OP is a loop-level invariant and if so, records it. |
1527 | NONLINEAR_USE is true if the invariant is used in a way we do not |
1528 | handle specially. */ |
1529 | |
1530 | static void |
1531 | record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use) |
1532 | { |
1533 | basic_block bb; |
1534 | struct version_info *info; |
1535 | |
1536 | if (TREE_CODE (op) != SSA_NAME |
1537 | || virtual_operand_p (op)) |
1538 | return; |
1539 | |
1540 | bb = gimple_bb (SSA_NAME_DEF_STMT (op)); |
1541 | if (bb |
1542 | && flow_bb_inside_loop_p (data->current_loop, bb)) |
1543 | return; |
1544 | |
1545 | info = name_info (data, op); |
1546 | info->name = op; |
1547 | info->has_nonlin_use |= nonlinear_use; |
1548 | if (!info->inv_id) |
1549 | info->inv_id = ++data->max_inv_var_id; |
1550 | bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op)); |
1551 | } |
1552 | |
1553 | /* Record a group of TYPE. */ |
1554 | |
1555 | static struct iv_group * |
1556 | record_group (struct ivopts_data *data, enum use_type type) |
1557 | { |
1558 | struct iv_group *group = XCNEW (struct iv_group); |
1559 | |
1560 | group->id = data->vgroups.length (); |
1561 | group->type = type; |
1562 | group->related_cands = BITMAP_ALLOC (NULL); |
1563 | group->vuses.create (1); |
1564 | |
1565 | data->vgroups.safe_push (group); |
1566 | return group; |
1567 | } |
1568 | |
1569 | /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group. |
1570 | New group will be created if there is no existing group for the use. */ |
1571 | |
1572 | static struct iv_use * |
1573 | record_group_use (struct ivopts_data *data, tree *use_p, |
1574 | struct iv *iv, gimple *stmt, enum use_type type) |
1575 | { |
1576 | tree addr_base = NULL; |
1577 | struct iv_group *group = NULL; |
1578 | unsigned HOST_WIDE_INT addr_offset = 0; |
1579 | |
1580 | /* Record non address type use in a new group. */ |
1581 | if (type == USE_ADDRESS && iv->base_object) |
1582 | { |
1583 | unsigned int i; |
1584 | |
1585 | addr_base = strip_offset (iv->base, &addr_offset); |
1586 | for (i = 0; i < data->vgroups.length (); i++) |
1587 | { |
1588 | struct iv_use *use; |
1589 | |
1590 | group = data->vgroups[i]; |
1591 | use = group->vuses[0]; |
1592 | if (use->type != USE_ADDRESS || !use->iv->base_object) |
1593 | continue; |
1594 | |
1595 | /* Check if it has the same stripped base and step. */ |
1596 | if (operand_equal_p (iv->base_object, use->iv->base_object, 0) |
1597 | && operand_equal_p (iv->step, use->iv->step, 0) |
1598 | && operand_equal_p (addr_base, use->addr_base, 0)) |
1599 | break; |
1600 | } |
1601 | if (i == data->vgroups.length ()) |
1602 | group = NULL; |
1603 | } |
1604 | |
1605 | if (!group) |
1606 | group = record_group (data, type); |
1607 | |
1608 | return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset); |
1609 | } |
1610 | |
1611 | /* Checks whether the use OP is interesting and if so, records it. */ |
1612 | |
1613 | static struct iv_use * |
1614 | find_interesting_uses_op (struct ivopts_data *data, tree op) |
1615 | { |
1616 | struct iv *iv; |
1617 | gimple *stmt; |
1618 | struct iv_use *use; |
1619 | |
1620 | if (TREE_CODE (op) != SSA_NAME) |
1621 | return NULL; |
1622 | |
1623 | iv = get_iv (data, op); |
1624 | if (!iv) |
1625 | return NULL; |
1626 | |
1627 | if (iv->nonlin_use) |
1628 | { |
1629 | gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR); |
1630 | return iv->nonlin_use; |
1631 | } |
1632 | |
1633 | if (integer_zerop (iv->step)) |
1634 | { |
1635 | record_invariant (data, op, true); |
1636 | return NULL; |
1637 | } |
1638 | |
1639 | stmt = SSA_NAME_DEF_STMT (op); |
1640 | gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt)); |
1641 | |
1642 | use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR); |
1643 | iv->nonlin_use = use; |
1644 | return use; |
1645 | } |
1646 | |
1647 | /* Indicate how compare type iv_use can be handled. */ |
1648 | enum comp_iv_rewrite |
1649 | { |
1650 | COMP_IV_NA, |
1651 | /* We may rewrite compare type iv_use by expressing value of the iv_use. */ |
1652 | COMP_IV_EXPR, |
1653 | /* We may rewrite compare type iv_uses on both sides of comparison by |
1654 | expressing value of each iv_use. */ |
1655 | COMP_IV_EXPR_2, |
1656 | /* We may rewrite compare type iv_use by expressing value of the iv_use |
1657 | or by eliminating it with other iv_cand. */ |
1658 | COMP_IV_ELIM |
1659 | }; |
1660 | |
1661 | /* Given a condition in statement STMT, checks whether it is a compare |
1662 | of an induction variable and an invariant. If this is the case, |
1663 | CONTROL_VAR is set to location of the iv, BOUND to the location of |
1664 | the invariant, IV_VAR and IV_BOUND are set to the corresponding |
1665 | induction variable descriptions, and true is returned. If this is not |
1666 | the case, CONTROL_VAR and BOUND are set to the arguments of the |
1667 | condition and false is returned. */ |
1668 | |
1669 | static enum comp_iv_rewrite |
1670 | extract_cond_operands (struct ivopts_data *data, gimple *stmt, |
1671 | tree **control_var, tree **bound, |
1672 | struct iv **iv_var, struct iv **iv_bound) |
1673 | { |
1674 | /* The objects returned when COND has constant operands. */ |
1675 | static struct iv const_iv; |
1676 | static tree zero; |
1677 | tree *op0 = &zero, *op1 = &zero; |
1678 | struct iv *iv0 = &const_iv, *iv1 = &const_iv; |
1679 | enum comp_iv_rewrite rewrite_type = COMP_IV_NA; |
1680 | |
1681 | if (gimple_code (stmt) == GIMPLE_COND) |
1682 | { |
1683 | gcond *cond_stmt = as_a <gcond *> (stmt); |
1684 | op0 = gimple_cond_lhs_ptr (cond_stmt); |
1685 | op1 = gimple_cond_rhs_ptr (cond_stmt); |
1686 | } |
1687 | else |
1688 | { |
1689 | op0 = gimple_assign_rhs1_ptr (stmt); |
1690 | op1 = gimple_assign_rhs2_ptr (stmt); |
1691 | } |
1692 | |
1693 | zero = integer_zero_node; |
1694 | const_iv.step = integer_zero_node; |
1695 | |
1696 | if (TREE_CODE (*op0) == SSA_NAME) |
1697 | iv0 = get_iv (data, *op0); |
1698 | if (TREE_CODE (*op1) == SSA_NAME) |
1699 | iv1 = get_iv (data, *op1); |
1700 | |
1701 | /* If both sides of comparison are IVs. We can express ivs on both end. */ |
1702 | if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step)) |
1703 | { |
1704 | rewrite_type = COMP_IV_EXPR_2; |
1705 | goto end; |
1706 | } |
1707 | |
1708 | /* If none side of comparison is IV. */ |
1709 | if ((!iv0 || integer_zerop (iv0->step)) |
1710 | && (!iv1 || integer_zerop (iv1->step))) |
1711 | goto end; |
1712 | |
1713 | /* Control variable may be on the other side. */ |
1714 | if (!iv0 || integer_zerop (iv0->step)) |
1715 | { |
1716 | std::swap (op0, op1); |
1717 | std::swap (iv0, iv1); |
1718 | } |
1719 | /* If one side is IV and the other side isn't loop invariant. */ |
1720 | if (!iv1) |
1721 | rewrite_type = COMP_IV_EXPR; |
1722 | /* If one side is IV and the other side is loop invariant. */ |
1723 | else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step)) |
1724 | rewrite_type = COMP_IV_ELIM; |
1725 | |
1726 | end: |
1727 | if (control_var) |
1728 | *control_var = op0; |
1729 | if (iv_var) |
1730 | *iv_var = iv0; |
1731 | if (bound) |
1732 | *bound = op1; |
1733 | if (iv_bound) |
1734 | *iv_bound = iv1; |
1735 | |
1736 | return rewrite_type; |
1737 | } |
1738 | |
1739 | /* Checks whether the condition in STMT is interesting and if so, |
1740 | records it. */ |
1741 | |
1742 | static void |
1743 | find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt) |
1744 | { |
1745 | tree *var_p, *bound_p; |
1746 | struct iv *var_iv, *bound_iv; |
1747 | enum comp_iv_rewrite ret; |
1748 | |
1749 | ret = extract_cond_operands (data, stmt, |
1750 | &var_p, &bound_p, &var_iv, &bound_iv); |
1751 | if (ret == COMP_IV_NA) |
1752 | { |
1753 | find_interesting_uses_op (data, *var_p); |
1754 | find_interesting_uses_op (data, *bound_p); |
1755 | return; |
1756 | } |
1757 | |
1758 | record_group_use (data, var_p, var_iv, stmt, USE_COMPARE); |
1759 | /* Record compare type iv_use for iv on the other side of comparison. */ |
1760 | if (ret == COMP_IV_EXPR_2) |
1761 | record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE); |
1762 | } |
1763 | |
1764 | /* Returns the outermost loop EXPR is obviously invariant in |
1765 | relative to the loop LOOP, i.e. if all its operands are defined |
1766 | outside of the returned loop. Returns NULL if EXPR is not |
1767 | even obviously invariant in LOOP. */ |
1768 | |
1769 | struct loop * |
1770 | outermost_invariant_loop_for_expr (struct loop *loop, tree expr) |
1771 | { |
1772 | basic_block def_bb; |
1773 | unsigned i, len; |
1774 | |
1775 | if (is_gimple_min_invariant (expr)) |
1776 | return current_loops->tree_root; |
1777 | |
1778 | if (TREE_CODE (expr) == SSA_NAME) |
1779 | { |
1780 | def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr)); |
1781 | if (def_bb) |
1782 | { |
1783 | if (flow_bb_inside_loop_p (loop, def_bb)) |
1784 | return NULL; |
1785 | return superloop_at_depth (loop, |
1786 | loop_depth (def_bb->loop_father) + 1); |
1787 | } |
1788 | |
1789 | return current_loops->tree_root; |
1790 | } |
1791 | |
1792 | if (!EXPR_P (expr)) |
1793 | return NULL; |
1794 | |
1795 | unsigned maxdepth = 0; |
1796 | len = TREE_OPERAND_LENGTH (expr); |
1797 | for (i = 0; i < len; i++) |
1798 | { |
1799 | struct loop *ivloop; |
1800 | if (!TREE_OPERAND (expr, i)) |
1801 | continue; |
1802 | |
1803 | ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i)); |
1804 | if (!ivloop) |
1805 | return NULL; |
1806 | maxdepth = MAX (maxdepth, loop_depth (ivloop)); |
1807 | } |
1808 | |
1809 | return superloop_at_depth (loop, maxdepth); |
1810 | } |
1811 | |
1812 | /* Returns true if expression EXPR is obviously invariant in LOOP, |
1813 | i.e. if all its operands are defined outside of the LOOP. LOOP |
1814 | should not be the function body. */ |
1815 | |
1816 | bool |
1817 | expr_invariant_in_loop_p (struct loop *loop, tree expr) |
1818 | { |
1819 | basic_block def_bb; |
1820 | unsigned i, len; |
1821 | |
1822 | gcc_assert (loop_depth (loop) > 0); |
1823 | |
1824 | if (is_gimple_min_invariant (expr)) |
1825 | return true; |
1826 | |
1827 | if (TREE_CODE (expr) == SSA_NAME) |
1828 | { |
1829 | def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr)); |
1830 | if (def_bb |
1831 | && flow_bb_inside_loop_p (loop, def_bb)) |
1832 | return false; |
1833 | |
1834 | return true; |
1835 | } |
1836 | |
1837 | if (!EXPR_P (expr)) |
1838 | return false; |
1839 | |
1840 | len = TREE_OPERAND_LENGTH (expr); |
1841 | for (i = 0; i < len; i++) |
1842 | if (TREE_OPERAND (expr, i) |
1843 | && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i))) |
1844 | return false; |
1845 | |
1846 | return true; |
1847 | } |
1848 | |
1849 | /* Given expression EXPR which computes inductive values with respect |
1850 | to loop recorded in DATA, this function returns biv from which EXPR |
1851 | is derived by tracing definition chains of ssa variables in EXPR. */ |
1852 | |
1853 | static struct iv* |
1854 | find_deriving_biv_for_expr (struct ivopts_data *data, tree expr) |
1855 | { |
1856 | struct iv *iv; |
1857 | unsigned i, n; |
1858 | tree e2, e1; |
1859 | enum tree_code code; |
1860 | gimple *stmt; |
1861 | |
1862 | if (expr == NULL_TREE) |
1863 | return NULL; |
1864 | |
1865 | if (is_gimple_min_invariant (expr)) |
1866 | return NULL; |
1867 | |
1868 | code = TREE_CODE (expr); |
1869 | if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))) |
1870 | { |
1871 | n = TREE_OPERAND_LENGTH (expr); |
1872 | for (i = 0; i < n; i++) |
1873 | { |
1874 | iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i)); |
1875 | if (iv) |
1876 | return iv; |
1877 | } |
1878 | } |
1879 | |
1880 | /* Stop if it's not ssa name. */ |
1881 | if (code != SSA_NAME) |
1882 | return NULL; |
1883 | |
1884 | iv = get_iv (data, expr); |
1885 | if (!iv || integer_zerop (iv->step)) |
1886 | return NULL; |
1887 | else if (iv->biv_p) |
1888 | return iv; |
1889 | |
1890 | stmt = SSA_NAME_DEF_STMT (expr); |
1891 | if (gphi *phi = dyn_cast <gphi *> (stmt)) |
1892 | { |
1893 | ssa_op_iter iter; |
1894 | use_operand_p use_p; |
1895 | basic_block phi_bb = gimple_bb (phi); |
1896 | |
1897 | /* Skip loop header PHI that doesn't define biv. */ |
1898 | if (phi_bb->loop_father == data->current_loop) |
1899 | return NULL; |
1900 | |
1901 | if (virtual_operand_p (gimple_phi_result (phi))) |
1902 | return NULL; |
1903 | |
1904 | FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE) |
1905 | { |
1906 | tree use = USE_FROM_PTR (use_p); |
1907 | iv = find_deriving_biv_for_expr (data, use); |
1908 | if (iv) |
1909 | return iv; |
1910 | } |
1911 | return NULL; |
1912 | } |
1913 | if (gimple_code (stmt) != GIMPLE_ASSIGN) |
1914 | return NULL; |
1915 | |
1916 | e1 = gimple_assign_rhs1 (stmt); |
1917 | code = gimple_assign_rhs_code (stmt); |
1918 | if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS) |
1919 | return find_deriving_biv_for_expr (data, e1); |
1920 | |
1921 | switch (code) |
1922 | { |
1923 | case MULT_EXPR: |
1924 | case PLUS_EXPR: |
1925 | case MINUS_EXPR: |
1926 | case POINTER_PLUS_EXPR: |
1927 | /* Increments, decrements and multiplications by a constant |
1928 | are simple. */ |
1929 | e2 = gimple_assign_rhs2 (stmt); |
1930 | iv = find_deriving_biv_for_expr (data, e2); |
1931 | if (iv) |
1932 | return iv; |
1933 | gcc_fallthrough (); |
1934 | |
1935 | CASE_CONVERT: |
1936 | /* Casts are simple. */ |
1937 | return find_deriving_biv_for_expr (data, e1); |
1938 | |
1939 | default: |
1940 | break; |
1941 | } |
1942 | |
1943 | return NULL; |
1944 | } |
1945 | |
1946 | /* Record BIV, its predecessor and successor that they are used in |
1947 | address type uses. */ |
1948 | |
1949 | static void |
1950 | record_biv_for_address_use (struct ivopts_data *data, struct iv *biv) |
1951 | { |
1952 | unsigned i; |
1953 | tree type, base_1, base_2; |
1954 | bitmap_iterator bi; |
1955 | |
1956 | if (!biv || !biv->biv_p || integer_zerop (biv->step) |
1957 | || biv->have_address_use || !biv->no_overflow) |
1958 | return; |
1959 | |
1960 | type = TREE_TYPE (biv->base); |
1961 | if (!INTEGRAL_TYPE_P (type)) |
1962 | return; |
1963 | |
1964 | biv->have_address_use = true; |
1965 | data->bivs_not_used_in_addr--; |
1966 | base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step); |
1967 | EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) |
1968 | { |
1969 | struct iv *iv = ver_info (data, i)->iv; |
1970 | |
1971 | if (!iv || !iv->biv_p || integer_zerop (iv->step) |
1972 | || iv->have_address_use || !iv->no_overflow) |
1973 | continue; |
1974 | |
1975 | if (type != TREE_TYPE (iv->base) |
1976 | || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base))) |
1977 | continue; |
1978 | |
1979 | if (!operand_equal_p (biv->step, iv->step, 0)) |
1980 | continue; |
1981 | |
1982 | base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step); |
1983 | if (operand_equal_p (base_1, iv->base, 0) |
1984 | || operand_equal_p (base_2, biv->base, 0)) |
1985 | { |
1986 | iv->have_address_use = true; |
1987 | data->bivs_not_used_in_addr--; |
1988 | } |
1989 | } |
1990 | } |
1991 | |
1992 | /* Cumulates the steps of indices into DATA and replaces their values with the |
1993 | initial ones. Returns false when the value of the index cannot be determined. |
1994 | Callback for for_each_index. */ |
1995 | |
1996 | struct ifs_ivopts_data |
1997 | { |
1998 | struct ivopts_data *ivopts_data; |
1999 | gimple *stmt; |
2000 | tree step; |
2001 | }; |
2002 | |
2003 | static bool |
2004 | idx_find_step (tree base, tree *idx, void *data) |
2005 | { |
2006 | struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data; |
2007 | struct iv *iv; |
2008 | bool use_overflow_semantics = false; |
2009 | tree step, iv_base, iv_step, lbound, off; |
2010 | struct loop *loop = dta->ivopts_data->current_loop; |
2011 | |
2012 | /* If base is a component ref, require that the offset of the reference |
2013 | be invariant. */ |
2014 | if (TREE_CODE (base) == COMPONENT_REF) |
2015 | { |
2016 | off = component_ref_field_offset (base); |
2017 | return expr_invariant_in_loop_p (loop, off); |
2018 | } |
2019 | |
2020 | /* If base is array, first check whether we will be able to move the |
2021 | reference out of the loop (in order to take its address in strength |
2022 | reduction). In order for this to work we need both lower bound |
2023 | and step to be loop invariants. */ |
2024 | if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) |
2025 | { |
2026 | /* Moreover, for a range, the size needs to be invariant as well. */ |
2027 | if (TREE_CODE (base) == ARRAY_RANGE_REF |
2028 | && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base)))) |
2029 | return false; |
2030 | |
2031 | step = array_ref_element_size (base); |
2032 | lbound = array_ref_low_bound (base); |
2033 | |
2034 | if (!expr_invariant_in_loop_p (loop, step) |
2035 | || !expr_invariant_in_loop_p (loop, lbound)) |
2036 | return false; |
2037 | } |
2038 | |
2039 | if (TREE_CODE (*idx) != SSA_NAME) |
2040 | return true; |
2041 | |
2042 | iv = get_iv (dta->ivopts_data, *idx); |
2043 | if (!iv) |
2044 | return false; |
2045 | |
2046 | /* XXX We produce for a base of *D42 with iv->base being &x[0] |
2047 | *&x[0], which is not folded and does not trigger the |
2048 | ARRAY_REF path below. */ |
2049 | *idx = iv->base; |
2050 | |
2051 | if (integer_zerop (iv->step)) |
2052 | return true; |
2053 | |
2054 | if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) |
2055 | { |
2056 | step = array_ref_element_size (base); |
2057 | |
2058 | /* We only handle addresses whose step is an integer constant. */ |
2059 | if (TREE_CODE (step) != INTEGER_CST) |
2060 | return false; |
2061 | } |
2062 | else |
2063 | /* The step for pointer arithmetics already is 1 byte. */ |
2064 | step = size_one_node; |
2065 | |
2066 | iv_base = iv->base; |
2067 | iv_step = iv->step; |
2068 | if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step))) |
2069 | use_overflow_semantics = true; |
2070 | |
2071 | if (!convert_affine_scev (dta->ivopts_data->current_loop, |
2072 | sizetype, &iv_base, &iv_step, dta->stmt, |
2073 | use_overflow_semantics)) |
2074 | { |
2075 | /* The index might wrap. */ |
2076 | return false; |
2077 | } |
2078 | |
2079 | step = fold_build2 (MULT_EXPR, sizetype, step, iv_step); |
2080 | dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step); |
2081 | |
2082 | if (dta->ivopts_data->bivs_not_used_in_addr) |
2083 | { |
2084 | if (!iv->biv_p) |
2085 | iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name); |
2086 | |
2087 | record_biv_for_address_use (dta->ivopts_data, iv); |
2088 | } |
2089 | return true; |
2090 | } |
2091 | |
2092 | /* Records use in index IDX. Callback for for_each_index. Ivopts data |
2093 | object is passed to it in DATA. */ |
2094 | |
2095 | static bool |
2096 | idx_record_use (tree base, tree *idx, |
2097 | void *vdata) |
2098 | { |
2099 | struct ivopts_data *data = (struct ivopts_data *) vdata; |
2100 | find_interesting_uses_op (data, *idx); |
2101 | if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) |
2102 | { |
2103 | find_interesting_uses_op (data, array_ref_element_size (base)); |
2104 | find_interesting_uses_op (data, array_ref_low_bound (base)); |
2105 | } |
2106 | return true; |
2107 | } |
2108 | |
2109 | /* If we can prove that TOP = cst * BOT for some constant cst, |
2110 | store cst to MUL and return true. Otherwise return false. |
2111 | The returned value is always sign-extended, regardless of the |
2112 | signedness of TOP and BOT. */ |
2113 | |
2114 | static bool |
2115 | constant_multiple_of (tree top, tree bot, widest_int *mul) |
2116 | { |
2117 | tree mby; |
2118 | enum tree_code code; |
2119 | unsigned precision = TYPE_PRECISION (TREE_TYPE (top)); |
2120 | widest_int res, p0, p1; |
2121 | |
2122 | STRIP_NOPS (top); |
2123 | STRIP_NOPS (bot); |
2124 | |
2125 | if (operand_equal_p (top, bot, 0)) |
2126 | { |
2127 | *mul = 1; |
2128 | return true; |
2129 | } |
2130 | |
2131 | code = TREE_CODE (top); |
2132 | switch (code) |
2133 | { |
2134 | case MULT_EXPR: |
2135 | mby = TREE_OPERAND (top, 1); |
2136 | if (TREE_CODE (mby) != INTEGER_CST) |
2137 | return false; |
2138 | |
2139 | if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res)) |
2140 | return false; |
2141 | |
2142 | *mul = wi::sext (res * wi::to_widest (mby), precision); |
2143 | return true; |
2144 | |
2145 | case PLUS_EXPR: |
2146 | case MINUS_EXPR: |
2147 | if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0) |
2148 | || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1)) |
2149 | return false; |
2150 | |
2151 | if (code == MINUS_EXPR) |
2152 | p1 = -p1; |
2153 | *mul = wi::sext (p0 + p1, precision); |
2154 | return true; |
2155 | |
2156 | case INTEGER_CST: |
2157 | if (TREE_CODE (bot) != INTEGER_CST) |
2158 | return false; |
2159 | |
2160 | p0 = widest_int::from (wi::to_wide (top), SIGNED); |
2161 | p1 = widest_int::from (wi::to_wide (bot), SIGNED); |
2162 | if (p1 == 0) |
2163 | return false; |
2164 | *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision); |
2165 | return res == 0; |
2166 | |
2167 | default: |
2168 | return false; |
2169 | } |
2170 | } |
2171 | |
2172 | /* Return true if memory reference REF with step STEP may be unaligned. */ |
2173 | |
2174 | static bool |
2175 | may_be_unaligned_p (tree ref, tree step) |
2176 | { |
2177 | /* TARGET_MEM_REFs are translated directly to valid MEMs on the target, |
2178 | thus they are not misaligned. */ |
2179 | if (TREE_CODE (ref) == TARGET_MEM_REF) |
2180 | return false; |
2181 | |
2182 | unsigned int align = TYPE_ALIGN (TREE_TYPE (ref)); |
2183 | if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align) |
2184 | align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))); |
2185 | |
2186 | unsigned HOST_WIDE_INT bitpos; |
2187 | unsigned int ref_align; |
2188 | get_object_alignment_1 (ref, &ref_align, &bitpos); |
2189 | if (ref_align < align |
2190 | || (bitpos % align) != 0 |
2191 | || (bitpos % BITS_PER_UNIT) != 0) |
2192 | return true; |
2193 | |
2194 | unsigned int trailing_zeros = tree_ctz (step); |
2195 | if (trailing_zeros < HOST_BITS_PER_INT |
2196 | && (1U << trailing_zeros) * BITS_PER_UNIT < align) |
2197 | return true; |
2198 | |
2199 | return false; |
2200 | } |
2201 | |
2202 | /* Return true if EXPR may be non-addressable. */ |
2203 | |
2204 | bool |
2205 | may_be_nonaddressable_p (tree expr) |
2206 | { |
2207 | switch (TREE_CODE (expr)) |
2208 | { |
2209 | case TARGET_MEM_REF: |
2210 | /* TARGET_MEM_REFs are translated directly to valid MEMs on the |
2211 | target, thus they are always addressable. */ |
2212 | return false; |
2213 | |
2214 | case MEM_REF: |
2215 | /* Likewise for MEM_REFs, modulo the storage order. */ |
2216 | return REF_REVERSE_STORAGE_ORDER (expr); |
2217 | |
2218 | case BIT_FIELD_REF: |
2219 | if (REF_REVERSE_STORAGE_ORDER (expr)) |
2220 | return true; |
2221 | return may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); |
2222 | |
2223 | case COMPONENT_REF: |
2224 | if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0)))) |
2225 | return true; |
2226 | return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1)) |
2227 | || may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); |
2228 | |
2229 | case ARRAY_REF: |
2230 | case ARRAY_RANGE_REF: |
2231 | if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0)))) |
2232 | return true; |
2233 | return may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); |
2234 | |
2235 | case VIEW_CONVERT_EXPR: |
2236 | /* This kind of view-conversions may wrap non-addressable objects |
2237 | and make them look addressable. After some processing the |
2238 | non-addressability may be uncovered again, causing ADDR_EXPRs |
2239 | of inappropriate objects to be built. */ |
2240 | if (is_gimple_reg (TREE_OPERAND (expr, 0)) |
2241 | || !is_gimple_addressable (TREE_OPERAND (expr, 0))) |
2242 | return true; |
2243 | return may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); |
2244 | |
2245 | CASE_CONVERT: |
2246 | return true; |
2247 | |
2248 | default: |
2249 | break; |
2250 | } |
2251 | |
2252 | return false; |
2253 | } |
2254 | |
2255 | /* Finds addresses in *OP_P inside STMT. */ |
2256 | |
2257 | static void |
2258 | find_interesting_uses_address (struct ivopts_data *data, gimple *stmt, |
2259 | tree *op_p) |
2260 | { |
2261 | tree base = *op_p, step = size_zero_node; |
2262 | struct iv *civ; |
2263 | struct ifs_ivopts_data ifs_ivopts_data; |
2264 | |
2265 | /* Do not play with volatile memory references. A bit too conservative, |
2266 | perhaps, but safe. */ |
2267 | if (gimple_has_volatile_ops (stmt)) |
2268 | goto fail; |
2269 | |
2270 | /* Ignore bitfields for now. Not really something terribly complicated |
2271 | to handle. TODO. */ |
2272 | if (TREE_CODE (base) == BIT_FIELD_REF) |
2273 | goto fail; |
2274 | |
2275 | base = unshare_expr (base); |
2276 | |
2277 | if (TREE_CODE (base) == TARGET_MEM_REF) |
2278 | { |
2279 | tree type = build_pointer_type (TREE_TYPE (base)); |
2280 | tree astep; |
2281 | |
2282 | if (TMR_BASE (base) |
2283 | && TREE_CODE (TMR_BASE (base)) == SSA_NAME) |
2284 | { |
2285 | civ = get_iv (data, TMR_BASE (base)); |
2286 | if (!civ) |
2287 | goto fail; |
2288 | |
2289 | TMR_BASE (base) = civ->base; |
2290 | step = civ->step; |
2291 | } |
2292 | if (TMR_INDEX2 (base) |
2293 | && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME) |
2294 | { |
2295 | civ = get_iv (data, TMR_INDEX2 (base)); |
2296 | if (!civ) |
2297 | goto fail; |
2298 | |
2299 | TMR_INDEX2 (base) = civ->base; |
2300 | step = civ->step; |
2301 | } |
2302 | if (TMR_INDEX (base) |
2303 | && TREE_CODE (TMR_INDEX (base)) == SSA_NAME) |
2304 | { |
2305 | civ = get_iv (data, TMR_INDEX (base)); |
2306 | if (!civ) |
2307 | goto fail; |
2308 | |
2309 | TMR_INDEX (base) = civ->base; |
2310 | astep = civ->step; |
2311 | |
2312 | if (astep) |
2313 | { |
2314 | if (TMR_STEP (base)) |
2315 | astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep); |
2316 | |
2317 | step = fold_build2 (PLUS_EXPR, type, step, astep); |
2318 | } |
2319 | } |
2320 | |
2321 | if (integer_zerop (step)) |
2322 | goto fail; |
2323 | base = tree_mem_ref_addr (type, base); |
2324 | } |
2325 | else |
2326 | { |
2327 | ifs_ivopts_data.ivopts_data = data; |
2328 | ifs_ivopts_data.stmt = stmt; |
2329 | ifs_ivopts_data.step = size_zero_node; |
2330 | if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data) |
2331 | || integer_zerop (ifs_ivopts_data.step)) |
2332 | goto fail; |
2333 | step = ifs_ivopts_data.step; |
2334 | |
2335 | /* Check that the base expression is addressable. This needs |
2336 | to be done after substituting bases of IVs into it. */ |
2337 | if (may_be_nonaddressable_p (base)) |
2338 | goto fail; |
2339 | |
2340 | /* Moreover, on strict alignment platforms, check that it is |
2341 | sufficiently aligned. */ |
2342 | if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step)) |
2343 | goto fail; |
2344 | |
2345 | base = build_fold_addr_expr (base); |
2346 | |
2347 | /* Substituting bases of IVs into the base expression might |
2348 | have caused folding opportunities. */ |
2349 | if (TREE_CODE (base) == ADDR_EXPR) |
2350 | { |
2351 | tree *ref = &TREE_OPERAND (base, 0); |
2352 | while (handled_component_p (*ref)) |
2353 | ref = &TREE_OPERAND (*ref, 0); |
2354 | if (TREE_CODE (*ref) == MEM_REF) |
2355 | { |
2356 | tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref), |
2357 | TREE_OPERAND (*ref, 0), |
2358 | TREE_OPERAND (*ref, 1)); |
2359 | if (tem) |
2360 | *ref = tem; |
2361 | } |
2362 | } |
2363 | } |
2364 | |
2365 | civ = alloc_iv (data, base, step); |
2366 | /* Fail if base object of this memory reference is unknown. */ |
2367 | if (civ->base_object == NULL_TREE) |
2368 | goto fail; |
2369 | |
2370 | record_group_use (data, op_p, civ, stmt, USE_ADDRESS); |
2371 | return; |
2372 | |
2373 | fail: |
2374 | for_each_index (op_p, idx_record_use, data); |
2375 | } |
2376 | |
2377 | /* Finds and records invariants used in STMT. */ |
2378 | |
2379 | static void |
2380 | find_invariants_stmt (struct ivopts_data *data, gimple *stmt) |
2381 | { |
2382 | ssa_op_iter iter; |
2383 | use_operand_p use_p; |
2384 | tree op; |
2385 | |
2386 | FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) |
2387 | { |
2388 | op = USE_FROM_PTR (use_p); |
2389 | record_invariant (data, op, false); |
2390 | } |
2391 | } |
2392 | |
2393 | /* Finds interesting uses of induction variables in the statement STMT. */ |
2394 | |
2395 | static void |
2396 | find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt) |
2397 | { |
2398 | struct iv *iv; |
2399 | tree op, *lhs, *rhs; |
2400 | ssa_op_iter iter; |
2401 | use_operand_p use_p; |
2402 | enum tree_code code; |
2403 | |
2404 | find_invariants_stmt (data, stmt); |
2405 | |
2406 | if (gimple_code (stmt) == GIMPLE_COND) |
2407 | { |
2408 | find_interesting_uses_cond (data, stmt); |
2409 | return; |
2410 | } |
2411 | |
2412 | if (is_gimple_assign (stmt)) |
2413 | { |
2414 | lhs = gimple_assign_lhs_ptr (stmt); |
2415 | rhs = gimple_assign_rhs1_ptr (stmt); |
2416 | |
2417 | if (TREE_CODE (*lhs) == SSA_NAME) |
2418 | { |
2419 | /* If the statement defines an induction variable, the uses are not |
2420 | interesting by themselves. */ |
2421 | |
2422 | iv = get_iv (data, *lhs); |
2423 | |
2424 | if (iv && !integer_zerop (iv->step)) |
2425 | return; |
2426 | } |
2427 | |
2428 | code = gimple_assign_rhs_code (stmt); |
2429 | if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS |
2430 | && (REFERENCE_CLASS_P (*rhs) |
2431 | || is_gimple_val (*rhs))) |
2432 | { |
2433 | if (REFERENCE_CLASS_P (*rhs)) |
2434 | find_interesting_uses_address (data, stmt, rhs); |
2435 | else |
2436 | find_interesting_uses_op (data, *rhs); |
2437 | |
2438 | if (REFERENCE_CLASS_P (*lhs)) |
2439 | find_interesting_uses_address (data, stmt, lhs); |
2440 | return; |
2441 | } |
2442 | else if (TREE_CODE_CLASS (code) == tcc_comparison) |
2443 | { |
2444 | find_interesting_uses_cond (data, stmt); |
2445 | return; |
2446 | } |
2447 | |
2448 | /* TODO -- we should also handle address uses of type |
2449 | |
2450 | memory = call (whatever); |
2451 | |
2452 | and |
2453 | |
2454 | call (memory). */ |
2455 | } |
2456 | |
2457 | if (gimple_code (stmt) == GIMPLE_PHI |
2458 | && gimple_bb (stmt) == data->current_loop->header) |
2459 | { |
2460 | iv = get_iv (data, PHI_RESULT (stmt)); |
2461 | |
2462 | if (iv && !integer_zerop (iv->step)) |
2463 | return; |
2464 | } |
2465 | |
2466 | FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) |
2467 | { |
2468 | op = USE_FROM_PTR (use_p); |
2469 | |
2470 | if (TREE_CODE (op) != SSA_NAME) |
2471 | continue; |
2472 | |
2473 | iv = get_iv (data, op); |
2474 | if (!iv) |
2475 | continue; |
2476 | |
2477 | find_interesting_uses_op (data, op); |
2478 | } |
2479 | } |
2480 | |
2481 | /* Finds interesting uses of induction variables outside of loops |
2482 | on loop exit edge EXIT. */ |
2483 | |
2484 | static void |
2485 | find_interesting_uses_outside (struct ivopts_data *data, edge exit) |
2486 | { |
2487 | gphi *phi; |
2488 | gphi_iterator psi; |
2489 | tree def; |
2490 | |
2491 | for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi)) |
2492 | { |
2493 | phi = psi.phi (); |
2494 | def = PHI_ARG_DEF_FROM_EDGE (phi, exit); |
2495 | if (!virtual_operand_p (def)) |
2496 | find_interesting_uses_op (data, def); |
2497 | } |
2498 | } |
2499 | |
2500 | /* Return TRUE if OFFSET is within the range of [base + offset] addressing |
2501 | mode for memory reference represented by USE. */ |
2502 | |
2503 | static GTY (()) vec<rtx, va_gc> *addr_list; |
2504 | |
2505 | static bool |
2506 | addr_offset_valid_p (struct iv_use *use, HOST_WIDE_INT offset) |
2507 | { |
2508 | rtx reg, addr; |
2509 | unsigned list_index; |
2510 | addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base)); |
2511 | machine_mode addr_mode, mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p)); |
2512 | |
2513 | list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode; |
2514 | if (list_index >= vec_safe_length (addr_list)) |
2515 | vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE); |
2516 | |
2517 | addr = (*addr_list)[list_index]; |
2518 | if (!addr) |
2519 | { |
2520 | addr_mode = targetm.addr_space.address_mode (as); |
2521 | reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1); |
2522 | addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX); |
2523 | (*addr_list)[list_index] = addr; |
2524 | } |
2525 | else |
2526 | addr_mode = GET_MODE (addr); |
2527 | |
2528 | XEXP (addr, 1) = gen_int_mode (offset, addr_mode); |
2529 | return (memory_address_addr_space_p (mem_mode, addr, as)); |
2530 | } |
2531 | |
2532 | /* Comparison function to sort group in ascending order of addr_offset. */ |
2533 | |
2534 | static int |
2535 | group_compare_offset (const void *a, const void *b) |
2536 | { |
2537 | const struct iv_use *const *u1 = (const struct iv_use *const *) a; |
2538 | const struct iv_use *const *u2 = (const struct iv_use *const *) b; |
2539 | |
2540 | if ((*u1)->addr_offset != (*u2)->addr_offset) |
2541 | return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1; |
2542 | else |
2543 | return 0; |
2544 | } |
2545 | |
2546 | /* Check if small groups should be split. Return true if no group |
2547 | contains more than two uses with distinct addr_offsets. Return |
2548 | false otherwise. We want to split such groups because: |
2549 | |
2550 | 1) Small groups don't have much benefit and may interfer with |
2551 | general candidate selection. |
2552 | 2) Size for problem with only small groups is usually small and |
2553 | general algorithm can handle it well. |
2554 | |
2555 | TODO -- Above claim may not hold when we want to merge memory |
2556 | accesses with conseuctive addresses. */ |
2557 | |
2558 | static bool |
2559 | split_small_address_groups_p (struct ivopts_data *data) |
2560 | { |
2561 | unsigned int i, j, distinct = 1; |
2562 | struct iv_use *pre; |
2563 | struct iv_group *group; |
2564 | |
2565 | for (i = 0; i < data->vgroups.length (); i++) |
2566 | { |
2567 | group = data->vgroups[i]; |
2568 | if (group->vuses.length () == 1) |
2569 | continue; |
2570 | |
2571 | gcc_assert (group->type == USE_ADDRESS); |
2572 | if (group->vuses.length () == 2) |
2573 | { |
2574 | if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset) |
2575 | std::swap (group->vuses[0], group->vuses[1]); |
2576 | } |
2577 | else |
2578 | group->vuses.qsort (group_compare_offset); |
2579 | |
2580 | if (distinct > 2) |
2581 | continue; |
2582 | |
2583 | distinct = 1; |
2584 | for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++) |
2585 | { |
2586 | if (group->vuses[j]->addr_offset != pre->addr_offset) |
2587 | { |
2588 | pre = group->vuses[j]; |
2589 | distinct++; |
2590 | } |
2591 | |
2592 | if (distinct > 2) |
2593 | break; |
2594 | } |
2595 | } |
2596 | |
2597 | return (distinct <= 2); |
2598 | } |
2599 | |
2600 | /* For each group of address type uses, this function further groups |
2601 | these uses according to the maximum offset supported by target's |
2602 | [base + offset] addressing mode. */ |
2603 | |
2604 | static void |
2605 | split_address_groups (struct ivopts_data *data) |
2606 | { |
2607 | unsigned int i, j; |
2608 | /* Always split group. */ |
2609 | bool split_p = split_small_address_groups_p (data); |
2610 | |
2611 | for (i = 0; i < data->vgroups.length (); i++) |
2612 | { |
2613 | struct iv_group *new_group = NULL; |
2614 | struct iv_group *group = data->vgroups[i]; |
2615 | struct iv_use *use = group->vuses[0]; |
2616 | |
2617 | use->id = 0; |
2618 | use->group_id = group->id; |
2619 | if (group->vuses.length () == 1) |
2620 | continue; |
2621 | |
2622 | gcc_assert (group->type == USE_ADDRESS); |
2623 | |
2624 | for (j = 1; j < group->vuses.length ();) |
2625 | { |
2626 | struct iv_use *next = group->vuses[j]; |
2627 | HOST_WIDE_INT offset = next->addr_offset - use->addr_offset; |
2628 | |
2629 | /* Split group if aksed to, or the offset against the first |
2630 | use can't fit in offset part of addressing mode. IV uses |
2631 | having the same offset are still kept in one group. */ |
2632 | if (offset != 0 && |
2633 | (split_p || !addr_offset_valid_p (use, offset))) |
2634 | { |
2635 | if (!new_group) |
2636 | new_group = record_group (data, group->type); |
2637 | group->vuses.ordered_remove (j); |
2638 | new_group->vuses.safe_push (next); |
2639 | continue; |
2640 | } |
2641 | |
2642 | next->id = j; |
2643 | next->group_id = group->id; |
2644 | j++; |
2645 | } |
2646 | } |
2647 | } |
2648 | |
2649 | /* Finds uses of the induction variables that are interesting. */ |
2650 | |
2651 | static void |
2652 | find_interesting_uses (struct ivopts_data *data) |
2653 | { |
2654 | basic_block bb; |
2655 | gimple_stmt_iterator bsi; |
2656 | basic_block *body = get_loop_body (data->current_loop); |
2657 | unsigned i; |
2658 | edge e; |
2659 | |
2660 | for (i = 0; i < data->current_loop->num_nodes; i++) |
2661 | { |
2662 | edge_iterator ei; |
2663 | bb = body[i]; |
2664 | |
2665 | FOR_EACH_EDGE (e, ei, bb->succs) |
2666 | if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) |
2667 | && !flow_bb_inside_loop_p (data->current_loop, e->dest)) |
2668 | find_interesting_uses_outside (data, e); |
2669 | |
2670 | for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi)) |
2671 | find_interesting_uses_stmt (data, gsi_stmt (bsi)); |
2672 | for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) |
2673 | if (!is_gimple_debug (gsi_stmt (bsi))) |
2674 | find_interesting_uses_stmt (data, gsi_stmt (bsi)); |
2675 | } |
2676 | free (body); |
2677 | |
2678 | split_address_groups (data); |
2679 | |
2680 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2681 | { |
2682 | fprintf (dump_file, "\n<IV Groups>:\n" ); |
2683 | dump_groups (dump_file, data); |
2684 | fprintf (dump_file, "\n" ); |
2685 | } |
2686 | } |
2687 | |
2688 | /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR |
2689 | is true, assume we are inside an address. If TOP_COMPREF is true, assume |
2690 | we are at the top-level of the processed address. */ |
2691 | |
2692 | static tree |
2693 | strip_offset_1 (tree expr, bool inside_addr, bool top_compref, |
2694 | HOST_WIDE_INT *offset) |
2695 | { |
2696 | tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step; |
2697 | enum tree_code code; |
2698 | tree type, orig_type = TREE_TYPE (expr); |
2699 | HOST_WIDE_INT off0, off1, st; |
2700 | tree orig_expr = expr; |
2701 | |
2702 | STRIP_NOPS (expr); |
2703 | |
2704 | type = TREE_TYPE (expr); |
2705 | code = TREE_CODE (expr); |
2706 | *offset = 0; |
2707 | |
2708 | switch (code) |
2709 | { |
2710 | case INTEGER_CST: |
2711 | if (!cst_and_fits_in_hwi (expr) |
2712 | || integer_zerop (expr)) |
2713 | return orig_expr; |
2714 | |
2715 | *offset = int_cst_value (expr); |
2716 | return build_int_cst (orig_type, 0); |
2717 | |
2718 | case POINTER_PLUS_EXPR: |
2719 | case PLUS_EXPR: |
2720 | case MINUS_EXPR: |
2721 | op0 = TREE_OPERAND (expr, 0); |
2722 | op1 = TREE_OPERAND (expr, 1); |
2723 | |
2724 | op0 = strip_offset_1 (op0, false, false, &off0); |
2725 | op1 = strip_offset_1 (op1, false, false, &off1); |
2726 | |
2727 | *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1); |
2728 | if (op0 == TREE_OPERAND (expr, 0) |
2729 | && op1 == TREE_OPERAND (expr, 1)) |
2730 | return orig_expr; |
2731 | |
2732 | if (integer_zerop (op1)) |
2733 | expr = op0; |
2734 | else if (integer_zerop (op0)) |
2735 | { |
2736 | if (code == MINUS_EXPR) |
2737 | expr = fold_build1 (NEGATE_EXPR, type, op1); |
2738 | else |
2739 | expr = op1; |
2740 | } |
2741 | else |
2742 | expr = fold_build2 (code, type, op0, op1); |
2743 | |
2744 | return fold_convert (orig_type, expr); |
2745 | |
2746 | case MULT_EXPR: |
2747 | op1 = TREE_OPERAND (expr, 1); |
2748 | if (!cst_and_fits_in_hwi (op1)) |
2749 | return orig_expr; |
2750 | |
2751 | op0 = TREE_OPERAND (expr, 0); |
2752 | op0 = strip_offset_1 (op0, false, false, &off0); |
2753 | if (op0 == TREE_OPERAND (expr, 0)) |
2754 | return orig_expr; |
2755 | |
2756 | *offset = off0 * int_cst_value (op1); |
2757 | if (integer_zerop (op0)) |
2758 | expr = op0; |
2759 | else |
2760 | expr = fold_build2 (MULT_EXPR, type, op0, op1); |
2761 | |
2762 | return fold_convert (orig_type, expr); |
2763 | |
2764 | case ARRAY_REF: |
2765 | case ARRAY_RANGE_REF: |
2766 | if (!inside_addr) |
2767 | return orig_expr; |
2768 | |
2769 | step = array_ref_element_size (expr); |
2770 | if (!cst_and_fits_in_hwi (step)) |
2771 | break; |
2772 | |
2773 | st = int_cst_value (step); |
2774 | op1 = TREE_OPERAND (expr, 1); |
2775 | op1 = strip_offset_1 (op1, false, false, &off1); |
2776 | *offset = off1 * st; |
2777 | |
2778 | if (top_compref |
2779 | && integer_zerop (op1)) |
2780 | { |
2781 | /* Strip the component reference completely. */ |
2782 | op0 = TREE_OPERAND (expr, 0); |
2783 | op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0); |
2784 | *offset += off0; |
2785 | return op0; |
2786 | } |
2787 | break; |
2788 | |
2789 | case COMPONENT_REF: |
2790 | { |
2791 | tree field; |
2792 | |
2793 | if (!inside_addr) |
2794 | return orig_expr; |
2795 | |
2796 | tmp = component_ref_field_offset (expr); |
2797 | field = TREE_OPERAND (expr, 1); |
2798 | if (top_compref |
2799 | && cst_and_fits_in_hwi (tmp) |
2800 | && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field))) |
2801 | { |
2802 | HOST_WIDE_INT boffset, abs_off; |
2803 | |
2804 | /* Strip the component reference completely. */ |
2805 | op0 = TREE_OPERAND (expr, 0); |
2806 | op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0); |
2807 | boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field)); |
2808 | abs_off = abs_hwi (boffset) / BITS_PER_UNIT; |
2809 | if (boffset < 0) |
2810 | abs_off = -abs_off; |
2811 | |
2812 | *offset = off0 + int_cst_value (tmp) + abs_off; |
2813 | return op0; |
2814 | } |
2815 | } |
2816 | break; |
2817 | |
2818 | case ADDR_EXPR: |
2819 | op0 = TREE_OPERAND (expr, 0); |
2820 | op0 = strip_offset_1 (op0, true, true, &off0); |
2821 | *offset += off0; |
2822 | |
2823 | if (op0 == TREE_OPERAND (expr, 0)) |
2824 | return orig_expr; |
2825 | |
2826 | expr = build_fold_addr_expr (op0); |
2827 | return fold_convert (orig_type, expr); |
2828 | |
2829 | case MEM_REF: |
2830 | /* ??? Offset operand? */ |
2831 | inside_addr = false; |
2832 | break; |
2833 | |
2834 | default: |
2835 | return orig_expr; |
2836 | } |
2837 | |
2838 | /* Default handling of expressions for that we want to recurse into |
2839 | the first operand. */ |
2840 | op0 = TREE_OPERAND (expr, 0); |
2841 | op0 = strip_offset_1 (op0, inside_addr, false, &off0); |
2842 | *offset += off0; |
2843 | |
2844 | if (op0 == TREE_OPERAND (expr, 0) |
2845 | && (!op1 || op1 == TREE_OPERAND (expr, 1))) |
2846 | return orig_expr; |
2847 | |
2848 | expr = copy_node (expr); |
2849 | TREE_OPERAND (expr, 0) = op0; |
2850 | if (op1) |
2851 | TREE_OPERAND (expr, 1) = op1; |
2852 | |
2853 | /* Inside address, we might strip the top level component references, |
2854 | thus changing type of the expression. Handling of ADDR_EXPR |
2855 | will fix that. */ |
2856 | expr = fold_convert (orig_type, expr); |
2857 | |
2858 | return expr; |
2859 | } |
2860 | |
2861 | /* Strips constant offsets from EXPR and stores them to OFFSET. */ |
2862 | |
2863 | tree |
2864 | strip_offset (tree expr, unsigned HOST_WIDE_INT *offset) |
2865 | { |
2866 | HOST_WIDE_INT off; |
2867 | tree core = strip_offset_1 (expr, false, false, &off); |
2868 | *offset = off; |
2869 | return core; |
2870 | } |
2871 | |
2872 | /* Returns variant of TYPE that can be used as base for different uses. |
2873 | We return unsigned type with the same precision, which avoids problems |
2874 | with overflows. */ |
2875 | |
2876 | static tree |
2877 | generic_type_for (tree type) |
2878 | { |
2879 | if (POINTER_TYPE_P (type)) |
2880 | return unsigned_type_for (type); |
2881 | |
2882 | if (TYPE_UNSIGNED (type)) |
2883 | return type; |
2884 | |
2885 | return unsigned_type_for (type); |
2886 | } |
2887 | |
2888 | /* Private data for walk_tree. */ |
2889 | |
2890 | struct walk_tree_data |
2891 | { |
2892 | bitmap *inv_vars; |
2893 | struct ivopts_data *idata; |
2894 | }; |
2895 | |
2896 | /* Callback function for walk_tree, it records invariants and symbol |
2897 | reference in *EXPR_P. DATA is the structure storing result info. */ |
2898 | |
2899 | static tree |
2900 | find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data) |
2901 | { |
2902 | tree op = *expr_p; |
2903 | struct version_info *info; |
2904 | struct walk_tree_data *wdata = (struct walk_tree_data*) data; |
2905 | |
2906 | if (TREE_CODE (op) != SSA_NAME) |
2907 | return NULL_TREE; |
2908 | |
2909 | info = name_info (wdata->idata, op); |
2910 | /* Because we expand simple operations when finding IVs, loop invariant |
2911 | variable that isn't referred by the original loop could be used now. |
2912 | Record such invariant variables here. */ |
2913 | if (!info->iv) |
2914 | { |
2915 | struct ivopts_data *idata = wdata->idata; |
2916 | basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op)); |
2917 | |
2918 | if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb)) |
2919 | { |
2920 | set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true); |
2921 | record_invariant (idata, op, false); |
2922 | } |
2923 | } |
2924 | if (!info->inv_id || info->has_nonlin_use) |
2925 | return NULL_TREE; |
2926 | |
2927 | if (!*wdata->inv_vars) |
2928 | *wdata->inv_vars = BITMAP_ALLOC (NULL); |
2929 | bitmap_set_bit (*wdata->inv_vars, info->inv_id); |
2930 | |
2931 | return NULL_TREE; |
2932 | } |
2933 | |
2934 | /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should |
2935 | store it. */ |
2936 | |
2937 | static inline void |
2938 | find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars) |
2939 | { |
2940 | struct walk_tree_data wdata; |
2941 | |
2942 | if (!inv_vars) |
2943 | return; |
2944 | |
2945 | wdata.idata = data; |
2946 | wdata.inv_vars = inv_vars; |
2947 | walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL); |
2948 | } |
2949 | |
2950 | /* Get entry from invariant expr hash table for INV_EXPR. New entry |
2951 | will be recorded if it doesn't exist yet. Given below two exprs: |
2952 | inv_expr + cst1, inv_expr + cst2 |
2953 | It's hard to make decision whether constant part should be stripped |
2954 | or not. We choose to not strip based on below facts: |
2955 | 1) We need to count ADD cost for constant part if it's stripped, |
2956 | which is't always trivial where this functions is called. |
2957 | 2) Stripping constant away may be conflict with following loop |
2958 | invariant hoisting pass. |
2959 | 3) Not stripping constant away results in more invariant exprs, |
2960 | which usually leads to decision preferring lower reg pressure. */ |
2961 | |
2962 | static iv_inv_expr_ent * |
2963 | get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr) |
2964 | { |
2965 | STRIP_NOPS (inv_expr); |
2966 | |
2967 | if (TREE_CODE (inv_expr) == INTEGER_CST || TREE_CODE (inv_expr) == SSA_NAME) |
2968 | return NULL; |
2969 | |
2970 | /* Don't strip constant part away as we used to. */ |
2971 | |
2972 | /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */ |
2973 | struct iv_inv_expr_ent ent; |
2974 | ent.expr = inv_expr; |
2975 | ent.hash = iterative_hash_expr (inv_expr, 0); |
2976 | struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT); |
2977 | |
2978 | if (!*slot) |
2979 | { |
2980 | *slot = XNEW (struct iv_inv_expr_ent); |
2981 | (*slot)->expr = inv_expr; |
2982 | (*slot)->hash = ent.hash; |
2983 | (*slot)->id = ++data->max_inv_expr_id; |
2984 | } |
2985 | |
2986 | return *slot; |
2987 | } |
2988 | |
2989 | /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and |
2990 | position to POS. If USE is not NULL, the candidate is set as related to |
2991 | it. If both BASE and STEP are NULL, we add a pseudocandidate for the |
2992 | replacement of the final value of the iv by a direct computation. */ |
2993 | |
2994 | static struct iv_cand * |
2995 | add_candidate_1 (struct ivopts_data *data, |
2996 | tree base, tree step, bool important, enum iv_position pos, |
2997 | struct iv_use *use, gimple *incremented_at, |
2998 | struct iv *orig_iv = NULL) |
2999 | { |
3000 | unsigned i; |
3001 | struct iv_cand *cand = NULL; |
3002 | tree type, orig_type; |
3003 | |
3004 | gcc_assert (base && step); |
3005 | |
3006 | /* -fkeep-gc-roots-live means that we have to keep a real pointer |
3007 | live, but the ivopts code may replace a real pointer with one |
3008 | pointing before or after the memory block that is then adjusted |
3009 | into the memory block during the loop. FIXME: It would likely be |
3010 | better to actually force the pointer live and still use ivopts; |
3011 | for example, it would be enough to write the pointer into memory |
3012 | and keep it there until after the loop. */ |
3013 | if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base))) |
3014 | return NULL; |
3015 | |
3016 | /* For non-original variables, make sure their values are computed in a type |
3017 | that does not invoke undefined behavior on overflows (since in general, |
3018 | we cannot prove that these induction variables are non-wrapping). */ |
3019 | if (pos != IP_ORIGINAL) |
3020 | { |
3021 | orig_type = TREE_TYPE (base); |
3022 | type = generic_type_for (orig_type); |
3023 | if (type != orig_type) |
3024 | { |
3025 | base = fold_convert (type, base); |
3026 | step = fold_convert (type, step); |
3027 | } |
3028 | } |
3029 | |
3030 | for (i = 0; i < data->vcands.length (); i++) |
3031 | { |
3032 | cand = data->vcands[i]; |
3033 | |
3034 | if (cand->pos != pos) |
3035 | continue; |
3036 | |
3037 | if (cand->incremented_at != incremented_at |
3038 | || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE) |
3039 | && cand->ainc_use != use)) |
3040 | continue; |
3041 | |
3042 | if (operand_equal_p (base, cand->iv->base, 0) |
3043 | && operand_equal_p (step, cand->iv->step, 0) |
3044 | && (TYPE_PRECISION (TREE_TYPE (base)) |
3045 | == TYPE_PRECISION (TREE_TYPE (cand->iv->base)))) |
3046 | break; |
3047 | } |
3048 | |
3049 | if (i == data->vcands.length ()) |
3050 | { |
3051 | cand = XCNEW (struct iv_cand); |
3052 | cand->id = i; |
3053 | cand->iv = alloc_iv (data, base, step); |
3054 | cand->pos = pos; |
3055 | if (pos != IP_ORIGINAL) |
3056 | { |
3057 | cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp" ); |
3058 | cand->var_after = cand->var_before; |
3059 | } |
3060 | cand->important = important; |
3061 | cand->incremented_at = incremented_at; |
3062 | data->vcands.safe_push (cand); |
3063 | |
3064 | if (TREE_CODE (step) != INTEGER_CST) |
3065 | { |
3066 | find_inv_vars (data, &step, &cand->inv_vars); |
3067 | |
3068 | iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step); |
3069 | /* Share bitmap between inv_vars and inv_exprs for cand. */ |
3070 | if (inv_expr != NULL) |
3071 | { |
3072 | cand->inv_exprs = cand->inv_vars; |
3073 | cand->inv_vars = NULL; |
3074 | if (cand->inv_exprs) |
3075 | bitmap_clear (cand->inv_exprs); |
3076 | else |
3077 | cand->inv_exprs = BITMAP_ALLOC (NULL); |
3078 | |
3079 | bitmap_set_bit (cand->inv_exprs, inv_expr->id); |
3080 | } |
3081 | } |
3082 | |
3083 | if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE) |
3084 | cand->ainc_use = use; |
3085 | else |
3086 | cand->ainc_use = NULL; |
3087 | |
3088 | cand->orig_iv = orig_iv; |
3089 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3090 | dump_cand (dump_file, cand); |
3091 | } |
3092 | |
3093 | cand->important |= important; |
3094 | |
3095 | /* Relate candidate to the group for which it is added. */ |
3096 | if (use) |
3097 | bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i); |
3098 | |
3099 | return cand; |
3100 | } |
3101 | |
3102 | /* Returns true if incrementing the induction variable at the end of the LOOP |
3103 | is allowed. |
3104 | |
3105 | The purpose is to avoid splitting latch edge with a biv increment, thus |
3106 | creating a jump, possibly confusing other optimization passes and leaving |
3107 | less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not |
3108 | available (so we do not have a better alternative), or if the latch edge |
3109 | is already nonempty. */ |
3110 | |
3111 | static bool |
3112 | allow_ip_end_pos_p (struct loop *loop) |
3113 | { |
3114 | if (!ip_normal_pos (loop)) |
3115 | return true; |
3116 | |
3117 | if (!empty_block_p (ip_end_pos (loop))) |
3118 | return true; |
3119 | |
3120 | return false; |
3121 | } |
3122 | |
3123 | /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE. |
3124 | Important field is set to IMPORTANT. */ |
3125 | |
3126 | static void |
3127 | add_autoinc_candidates (struct ivopts_data *data, tree base, tree step, |
3128 | bool important, struct iv_use *use) |
3129 | { |
3130 | basic_block use_bb = gimple_bb (use->stmt); |
3131 | machine_mode mem_mode; |
3132 | unsigned HOST_WIDE_INT cstepi; |
3133 | |
3134 | /* If we insert the increment in any position other than the standard |
3135 | ones, we must ensure that it is incremented once per iteration. |
3136 | It must not be in an inner nested loop, or one side of an if |
3137 | statement. */ |
3138 | if (use_bb->loop_father != data->current_loop |
3139 | || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb) |
3140 | || stmt_can_throw_internal (use->stmt) |
3141 | || !cst_and_fits_in_hwi (step)) |
3142 | return; |
3143 | |
3144 | cstepi = int_cst_value (step); |
3145 | |
3146 | mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p)); |
3147 | if (((USE_LOAD_PRE_INCREMENT (mem_mode) |
3148 | || USE_STORE_PRE_INCREMENT (mem_mode)) |
3149 | && GET_MODE_SIZE (mem_mode) == cstepi) |
3150 | || ((USE_LOAD_PRE_DECREMENT (mem_mode) |
3151 | || USE_STORE_PRE_DECREMENT (mem_mode)) |
3152 | && GET_MODE_SIZE (mem_mode) == -cstepi)) |
3153 | { |
3154 | enum tree_code code = MINUS_EXPR; |
3155 | tree new_base; |
3156 | tree new_step = step; |
3157 | |
3158 | if (POINTER_TYPE_P (TREE_TYPE (base))) |
3159 | { |
3160 | new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); |
3161 | code = POINTER_PLUS_EXPR; |
3162 | } |
3163 | else |
3164 | new_step = fold_convert (TREE_TYPE (base), new_step); |
3165 | new_base = fold_build2 (code, TREE_TYPE (base), base, new_step); |
3166 | add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use, |
3167 | use->stmt); |
3168 | } |
3169 | if (((USE_LOAD_POST_INCREMENT (mem_mode) |
3170 | || USE_STORE_POST_INCREMENT (mem_mode)) |
3171 | && GET_MODE_SIZE (mem_mode) == cstepi) |
3172 | || ((USE_LOAD_POST_DECREMENT (mem_mode) |
3173 | || USE_STORE_POST_DECREMENT (mem_mode)) |
3174 | && GET_MODE_SIZE (mem_mode) == -cstepi)) |
3175 | { |
3176 | add_candidate_1 (data, base, step, important, IP_AFTER_USE, use, |
3177 | use->stmt); |
3178 | } |
3179 | } |
3180 | |
3181 | /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and |
3182 | position to POS. If USE is not NULL, the candidate is set as related to |
3183 | it. The candidate computation is scheduled before exit condition and at |
3184 | the end of loop. */ |
3185 | |
3186 | static void |
3187 | add_candidate (struct ivopts_data *data, |
3188 | tree base, tree step, bool important, struct iv_use *use, |
3189 | struct iv *orig_iv = NULL) |
3190 | { |
3191 | if (ip_normal_pos (data->current_loop)) |
3192 | add_candidate_1 (data, base, step, important, |
3193 | IP_NORMAL, use, NULL, orig_iv); |
3194 | if (ip_end_pos (data->current_loop) |
3195 | && allow_ip_end_pos_p (data->current_loop)) |
3196 | add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv); |
3197 | } |
3198 | |
3199 | /* Adds standard iv candidates. */ |
3200 | |
3201 | static void |
3202 | add_standard_iv_candidates (struct ivopts_data *data) |
3203 | { |
3204 | add_candidate (data, integer_zero_node, integer_one_node, true, NULL); |
3205 | |
3206 | /* The same for a double-integer type if it is still fast enough. */ |
3207 | if (TYPE_PRECISION |
3208 | (long_integer_type_node) > TYPE_PRECISION (integer_type_node) |
3209 | && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD) |
3210 | add_candidate (data, build_int_cst (long_integer_type_node, 0), |
3211 | build_int_cst (long_integer_type_node, 1), true, NULL); |
3212 | |
3213 | /* The same for a double-integer type if it is still fast enough. */ |
3214 | if (TYPE_PRECISION |
3215 | (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node) |
3216 | && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD) |
3217 | add_candidate (data, build_int_cst (long_long_integer_type_node, 0), |
3218 | build_int_cst (long_long_integer_type_node, 1), true, NULL); |
3219 | } |
3220 | |
3221 | |
3222 | /* Adds candidates bases on the old induction variable IV. */ |
3223 | |
3224 | static void |
3225 | add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv) |
3226 | { |
3227 | gimple *phi; |
3228 | tree def; |
3229 | struct iv_cand *cand; |
3230 | |
3231 | /* Check if this biv is used in address type use. */ |
3232 | if (iv->no_overflow && iv->have_address_use |
3233 | && INTEGRAL_TYPE_P (TREE_TYPE (iv->base)) |
3234 | && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype)) |
3235 | { |
3236 | tree base = fold_convert (sizetype, iv->base); |
3237 | tree step = fold_convert (sizetype, iv->step); |
3238 | |
3239 | /* Add iv cand of same precision as index part in TARGET_MEM_REF. */ |
3240 | add_candidate (data, base, step, true, NULL, iv); |
3241 | /* Add iv cand of the original type only if it has nonlinear use. */ |
3242 | if (iv->nonlin_use) |
3243 | add_candidate (data, iv->base, iv->step, true, NULL); |
3244 | } |
3245 | else |
3246 | add_candidate (data, iv->base, iv->step, true, NULL); |
3247 | |
3248 | /* The same, but with initial value zero. */ |
3249 | if (POINTER_TYPE_P (TREE_TYPE (iv->base))) |
3250 | add_candidate (data, size_int (0), iv->step, true, NULL); |
3251 | else |
3252 | add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0), |
3253 | iv->step, true, NULL); |
3254 | |
3255 | phi = SSA_NAME_DEF_STMT (iv->ssa_name); |
3256 | if (gimple_code (phi) == GIMPLE_PHI) |
3257 | { |
3258 | /* Additionally record the possibility of leaving the original iv |
3259 | untouched. */ |
3260 | def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop)); |
3261 | /* Don't add candidate if it's from another PHI node because |
3262 | it's an affine iv appearing in the form of PEELED_CHREC. */ |
3263 | phi = SSA_NAME_DEF_STMT (def); |
3264 | if (gimple_code (phi) != GIMPLE_PHI) |
3265 | { |
3266 | cand = add_candidate_1 (data, |
3267 | iv->base, iv->step, true, IP_ORIGINAL, NULL, |
3268 | SSA_NAME_DEF_STMT (def)); |
3269 | if (cand) |
3270 | { |
3271 | cand->var_before = iv->ssa_name; |
3272 | cand->var_after = def; |
3273 | } |
3274 | } |
3275 | else |
3276 | gcc_assert (gimple_bb (phi) == data->current_loop->header); |
3277 | } |
3278 | } |
3279 | |
3280 | /* Adds candidates based on the old induction variables. */ |
3281 | |
3282 | static void |
3283 | add_iv_candidate_for_bivs (struct ivopts_data *data) |
3284 | { |
3285 | unsigned i; |
3286 | struct iv *iv; |
3287 | bitmap_iterator bi; |
3288 | |
3289 | EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) |
3290 | { |
3291 | iv = ver_info (data, i)->iv; |
3292 | if (iv && iv->biv_p && !integer_zerop (iv->step)) |
3293 | add_iv_candidate_for_biv (data, iv); |
3294 | } |
3295 | } |
3296 | |
3297 | /* Record common candidate {BASE, STEP} derived from USE in hashtable. */ |
3298 | |
3299 | static void |
3300 | record_common_cand (struct ivopts_data *data, tree base, |
3301 | tree step, struct iv_use *use) |
3302 | { |
3303 | struct iv_common_cand ent; |
3304 | struct iv_common_cand **slot; |
3305 | |
3306 | ent.base = base; |
3307 | ent.step = step; |
3308 | ent.hash = iterative_hash_expr (base, 0); |
3309 | ent.hash = iterative_hash_expr (step, ent.hash); |
3310 | |
3311 | slot = data->iv_common_cand_tab->find_slot (&ent, INSERT); |
3312 | if (*slot == NULL) |
3313 | { |
3314 | *slot = new iv_common_cand (); |
3315 | (*slot)->base = base; |
3316 | (*slot)->step = step; |
3317 | (*slot)->uses.create (8); |
3318 | (*slot)->hash = ent.hash; |
3319 | data->iv_common_cands.safe_push ((*slot)); |
3320 | } |
3321 | |
3322 | gcc_assert (use != NULL); |
3323 | (*slot)->uses.safe_push (use); |
3324 | return; |
3325 | } |
3326 | |
3327 | /* Comparison function used to sort common candidates. */ |
3328 | |
3329 | static int |
3330 | common_cand_cmp (const void *p1, const void *p2) |
3331 | { |
3332 | unsigned n1, n2; |
3333 | const struct iv_common_cand *const *const ccand1 |
3334 | = (const struct iv_common_cand *const *)p1; |
3335 | const struct iv_common_cand *const *const ccand2 |
3336 | = (const struct iv_common_cand *const *)p2; |
3337 | |
3338 | n1 = (*ccand1)->uses.length (); |
3339 | n2 = (*ccand2)->uses.length (); |
3340 | return n2 - n1; |
3341 | } |
3342 | |
3343 | /* Adds IV candidates based on common candidated recorded. */ |
3344 | |
3345 | static void |
3346 | add_iv_candidate_derived_from_uses (struct ivopts_data *data) |
3347 | { |
3348 | unsigned i, j; |
3349 | struct iv_cand *cand_1, *cand_2; |
3350 | |
3351 | data->iv_common_cands.qsort (common_cand_cmp); |
3352 | for (i = 0; i < data->iv_common_cands.length (); i++) |
3353 | { |
3354 | struct iv_common_cand *ptr = data->iv_common_cands[i]; |
3355 | |
3356 | /* Only add IV candidate if it's derived from multiple uses. */ |
3357 | if (ptr->uses.length () <= 1) |
3358 | break; |
3359 | |
3360 | cand_1 = NULL; |
3361 | cand_2 = NULL; |
3362 | if (ip_normal_pos (data->current_loop)) |
3363 | cand_1 = add_candidate_1 (data, ptr->base, ptr->step, |
3364 | false, IP_NORMAL, NULL, NULL); |
3365 | |
3366 | if (ip_end_pos (data->current_loop) |
3367 | && allow_ip_end_pos_p (data->current_loop)) |
3368 | cand_2 = add_candidate_1 (data, ptr->base, ptr->step, |
3369 | false, IP_END, NULL, NULL); |
3370 | |
3371 | /* Bind deriving uses and the new candidates. */ |
3372 | for (j = 0; j < ptr->uses.length (); j++) |
3373 | { |
3374 | struct iv_group *group = data->vgroups[ptr->uses[j]->group_id]; |
3375 | if (cand_1) |
3376 | bitmap_set_bit (group->related_cands, cand_1->id); |
3377 | if (cand_2) |
3378 | bitmap_set_bit (group->related_cands, cand_2->id); |
3379 | } |
3380 | } |
3381 | |
3382 | /* Release data since it is useless from this point. */ |
3383 | data->iv_common_cand_tab->empty (); |
3384 | data->iv_common_cands.truncate (0); |
3385 | } |
3386 | |
3387 | /* Adds candidates based on the value of USE's iv. */ |
3388 | |
3389 | static void |
3390 | add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use) |
3391 | { |
3392 | unsigned HOST_WIDE_INT offset; |
3393 | tree base; |
3394 | tree basetype; |
3395 | struct iv *iv = use->iv; |
3396 | |
3397 | add_candidate (data, iv->base, iv->step, false, use); |
3398 | |
3399 | /* Record common candidate for use in case it can be shared by others. */ |
3400 | record_common_cand (data, iv->base, iv->step, use); |
3401 | |
3402 | /* Record common candidate with initial value zero. */ |
3403 | basetype = TREE_TYPE (iv->base); |
3404 | if (POINTER_TYPE_P (basetype)) |
3405 | basetype = sizetype; |
3406 | record_common_cand (data, build_int_cst (basetype, 0), iv->step, use); |
3407 | |
3408 | /* Record common candidate with constant offset stripped in base. |
3409 | Like the use itself, we also add candidate directly for it. */ |
3410 | base = strip_offset (iv->base, &offset); |
3411 | if (offset || base != iv->base) |
3412 | { |
3413 | record_common_cand (data, base, iv->step, use); |
3414 | add_candidate (data, base, iv->step, false, use); |
3415 | } |
3416 | |
3417 | /* Record common candidate with base_object removed in base. */ |
3418 | base = iv->base; |
3419 | STRIP_NOPS (base); |
3420 | if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR) |
3421 | { |
3422 | tree step = iv->step; |
3423 | |
3424 | STRIP_NOPS (step); |
3425 | base = TREE_OPERAND (base, 1); |
3426 | step = fold_convert (sizetype, step); |
3427 | record_common_cand (data, base, step, use); |
3428 | /* Also record common candidate with offset stripped. */ |
3429 | base = strip_offset (base, &offset); |
3430 | if (offset) |
3431 | record_common_cand (data, base, step, use); |
3432 | } |
3433 | |
3434 | /* At last, add auto-incremental candidates. Make such variables |
3435 | important since other iv uses with same base object may be based |
3436 | on it. */ |
3437 | if (use != NULL && use->type == USE_ADDRESS) |
3438 | add_autoinc_candidates (data, iv->base, iv->step, true, use); |
3439 | } |
3440 | |
3441 | /* Adds candidates based on the uses. */ |
3442 | |
3443 | static void |
3444 | add_iv_candidate_for_groups (struct ivopts_data *data) |
3445 | { |
3446 | unsigned i; |
3447 | |
3448 | /* Only add candidate for the first use in group. */ |
3449 | for (i = 0; i < data->vgroups.length (); i++) |
3450 | { |
3451 | struct iv_group *group = data->vgroups[i]; |
3452 | |
3453 | gcc_assert (group->vuses[0] != NULL); |
3454 | add_iv_candidate_for_use (data, group->vuses[0]); |
3455 | } |
3456 | add_iv_candidate_derived_from_uses (data); |
3457 | } |
3458 | |
3459 | /* Record important candidates and add them to related_cands bitmaps. */ |
3460 | |
3461 | static void |
3462 | record_important_candidates (struct ivopts_data *data) |
3463 | { |
3464 | unsigned i; |
3465 | struct iv_group *group; |
3466 | |
3467 | for (i = 0; i < data->vcands.length (); i++) |
3468 | { |
3469 | struct iv_cand *cand = data->vcands[i]; |
3470 | |
3471 | if (cand->important) |
3472 | bitmap_set_bit (data->important_candidates, i); |
3473 | } |
3474 | |
3475 | data->consider_all_candidates = (data->vcands.length () |
3476 | <= CONSIDER_ALL_CANDIDATES_BOUND); |
3477 | |
3478 | /* Add important candidates to groups' related_cands bitmaps. */ |
3479 | for (i = 0; i < data->vgroups.length (); i++) |
3480 | { |
3481 | group = data->vgroups[i]; |
3482 | bitmap_ior_into (group->related_cands, data->important_candidates); |
3483 | } |
3484 | } |
3485 | |
3486 | /* Allocates the data structure mapping the (use, candidate) pairs to costs. |
3487 | If consider_all_candidates is true, we use a two-dimensional array, otherwise |
3488 | we allocate a simple list to every use. */ |
3489 | |
3490 | static void |
3491 | alloc_use_cost_map (struct ivopts_data *data) |
3492 | { |
3493 | unsigned i, size, s; |
3494 | |
3495 | for (i = 0; i < data->vgroups.length (); i++) |
3496 | { |
3497 | struct iv_group *group = data->vgroups[i]; |
3498 | |
3499 | if (data->consider_all_candidates) |
3500 | size = data->vcands.length (); |
3501 | else |
3502 | { |
3503 | s = bitmap_count_bits (group->related_cands); |
3504 | |
3505 | /* Round up to the power of two, so that moduling by it is fast. */ |
3506 | size = s ? (1 << ceil_log2 (s)) : 1; |
3507 | } |
3508 | |
3509 | group->n_map_members = size; |
3510 | group->cost_map = XCNEWVEC (struct cost_pair, size); |
3511 | } |
3512 | } |
3513 | |
3514 | /* Sets cost of (GROUP, CAND) pair to COST and record that it depends |
3515 | on invariants INV_VARS and that the value used in expressing it is |
3516 | VALUE, and in case of iv elimination the comparison operator is COMP. */ |
3517 | |
3518 | static void |
3519 | set_group_iv_cost (struct ivopts_data *data, |
3520 | struct iv_group *group, struct iv_cand *cand, |
3521 | comp_cost cost, bitmap inv_vars, tree value, |
3522 | enum tree_code comp, bitmap inv_exprs) |
3523 | { |
3524 | unsigned i, s; |
3525 | |
3526 | if (cost.infinite_cost_p ()) |
3527 | { |
3528 | BITMAP_FREE (inv_vars); |
3529 | BITMAP_FREE (inv_exprs); |
3530 | return; |
3531 | } |
3532 | |
3533 | if (data->consider_all_candidates) |
3534 | { |
3535 | group->cost_map[cand->id].cand = cand; |
3536 | group->cost_map[cand->id].cost = cost; |
3537 | group->cost_map[cand->id].inv_vars = inv_vars; |
3538 | group->cost_map[cand->id].inv_exprs = inv_exprs; |
3539 | group->cost_map[cand->id].value = value; |
3540 | group->cost_map[cand->id].comp = comp; |
3541 | return; |
3542 | } |
3543 | |
3544 | /* n_map_members is a power of two, so this computes modulo. */ |
3545 | s = cand->id & (group->n_map_members - 1); |
3546 | for (i = s; i < group->n_map_members; i++) |
3547 | if (!group->cost_map[i].cand) |
3548 | goto found; |
3549 | for (i = 0; i < s; i++) |
3550 | if (!group->cost_map[i].cand) |
3551 | goto found; |
3552 | |
3553 | gcc_unreachable (); |
3554 | |
3555 | found: |
3556 | group->cost_map[i].cand = cand; |
3557 | group->cost_map[i].cost = cost; |
3558 | group->cost_map[i].inv_vars = inv_vars; |
3559 | group->cost_map[i].inv_exprs = inv_exprs; |
3560 | group->cost_map[i].value = value; |
3561 | group->cost_map[i].comp = comp; |
3562 | } |
3563 | |
3564 | /* Gets cost of (GROUP, CAND) pair. */ |
3565 | |
3566 | static struct cost_pair * |
3567 | get_group_iv_cost (struct ivopts_data *data, struct iv_group *group, |
3568 | struct iv_cand *cand) |
3569 | { |
3570 | unsigned i, s; |
3571 | struct cost_pair *ret; |
3572 | |
3573 | if (!cand) |
3574 | return NULL; |
3575 | |
3576 | if (data->consider_all_candidates) |
3577 | { |
3578 | ret = group->cost_map + cand->id; |
3579 | if (!ret->cand) |
3580 | return NULL; |
3581 | |
3582 | return ret; |
3583 | } |
3584 | |
3585 | /* n_map_members is a power of two, so this computes modulo. */ |
3586 | s = cand->id & (group->n_map_members - 1); |
3587 | for (i = s; i < group->n_map_members; i++) |
3588 | if (group->cost_map[i].cand == cand) |
3589 | return group->cost_map + i; |
3590 | else if (group->cost_map[i].cand == NULL) |
3591 | return NULL; |
3592 | for (i = 0; i < s; i++) |
3593 | if (group->cost_map[i].cand == cand) |
3594 | return group->cost_map + i; |
3595 | else if (group->cost_map[i].cand == NULL) |
3596 | return NULL; |
3597 | |
3598 | return NULL; |
3599 | } |
3600 | |
3601 | /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */ |
3602 | static rtx |
3603 | produce_memory_decl_rtl (tree obj, int *regno) |
3604 | { |
3605 | addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj)); |
3606 | machine_mode address_mode = targetm.addr_space.address_mode (as); |
3607 | rtx x; |
3608 | |
3609 | gcc_assert (obj); |
3610 | if (TREE_STATIC (obj) || DECL_EXTERNAL (obj)) |
3611 | { |
3612 | const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj)); |
3613 | x = gen_rtx_SYMBOL_REF (address_mode, name); |
3614 | SET_SYMBOL_REF_DECL (x, obj); |
3615 | x = gen_rtx_MEM (DECL_MODE (obj), x); |
3616 | set_mem_addr_space (x, as); |
3617 | targetm.encode_section_info (obj, x, true); |
3618 | } |
3619 | else |
3620 | { |
3621 | x = gen_raw_REG (address_mode, (*regno)++); |
3622 | x = gen_rtx_MEM (DECL_MODE (obj), x); |
3623 | set_mem_addr_space (x, as); |
3624 | } |
3625 | |
3626 | return x; |
3627 | } |
3628 | |
3629 | /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for |
3630 | walk_tree. DATA contains the actual fake register number. */ |
3631 | |
3632 | static tree |
3633 | prepare_decl_rtl (tree *expr_p, int *ws, void *data) |
3634 | { |
3635 | tree obj = NULL_TREE; |
3636 | rtx x = NULL_RTX; |
3637 | int *regno = (int *) data; |
3638 | |
3639 | switch (TREE_CODE (*expr_p)) |
3640 | { |
3641 | case ADDR_EXPR: |
3642 | for (expr_p = &TREE_OPERAND (*expr_p, 0); |
3643 | handled_component_p (*expr_p); |
3644 | expr_p = &TREE_OPERAND (*expr_p, 0)) |
3645 | continue; |
3646 | obj = *expr_p; |
3647 | if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj)) |
3648 | x = produce_memory_decl_rtl (obj, regno); |
3649 | break; |
3650 | |
3651 | case SSA_NAME: |
3652 | *ws = 0; |
3653 | obj = SSA_NAME_VAR (*expr_p); |
3654 | /* Defer handling of anonymous SSA_NAMEs to the expander. */ |
3655 | if (!obj) |
3656 | return NULL_TREE; |
3657 | if (!DECL_RTL_SET_P (obj)) |
3658 | x = gen_raw_REG (DECL_MODE (obj), (*regno)++); |
3659 | break; |
3660 | |
3661 | case VAR_DECL: |
3662 | case PARM_DECL: |
3663 | case RESULT_DECL: |
3664 | *ws = 0; |
3665 | obj = *expr_p; |
3666 | |
3667 | if (DECL_RTL_SET_P (obj)) |
3668 | break; |
3669 | |
3670 | if (DECL_MODE (obj) == BLKmode) |
3671 | x = produce_memory_decl_rtl (obj, regno); |
3672 | else |
3673 | x = gen_raw_REG (DECL_MODE (obj), (*regno)++); |
3674 | |
3675 | break; |
3676 | |
3677 | default: |
3678 | break; |
3679 | } |
3680 | |
3681 | if (x) |
3682 | { |
3683 | decl_rtl_to_reset.safe_push (obj); |
3684 | SET_DECL_RTL (obj, x); |
3685 | } |
3686 | |
3687 | return NULL_TREE; |
3688 | } |
3689 | |
3690 | /* Determines cost of the computation of EXPR. */ |
3691 | |
3692 | static unsigned |
3693 | computation_cost (tree expr, bool speed) |
3694 | { |
3695 | rtx_insn *seq; |
3696 | rtx rslt; |
3697 | tree type = TREE_TYPE (expr); |
3698 | unsigned cost; |
3699 | /* Avoid using hard regs in ways which may be unsupported. */ |
3700 | int regno = LAST_VIRTUAL_REGISTER + 1; |
3701 | struct cgraph_node *node = cgraph_node::get (current_function_decl); |
3702 | enum node_frequency real_frequency = node->frequency; |
3703 | |
3704 | node->frequency = NODE_FREQUENCY_NORMAL; |
3705 | crtl->maybe_hot_insn_p = speed; |
3706 | walk_tree (&expr, prepare_decl_rtl, ®no, NULL); |
3707 | start_sequence (); |
3708 | rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL); |
3709 | seq = get_insns (); |
3710 | end_sequence (); |
3711 | default_rtl_profile (); |
3712 | node->frequency = real_frequency; |
3713 | |
3714 | cost = seq_cost (seq, speed); |
3715 | if (MEM_P (rslt)) |
3716 | cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), |
3717 | TYPE_ADDR_SPACE (type), speed); |
3718 | else if (!REG_P (rslt)) |
3719 | cost += set_src_cost (rslt, TYPE_MODE (type), speed); |
3720 | |
3721 | return cost; |
3722 | } |
3723 | |
3724 | /* Returns variable containing the value of candidate CAND at statement AT. */ |
3725 | |
3726 | static tree |
3727 | var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt) |
3728 | { |
3729 | if (stmt_after_increment (loop, cand, stmt)) |
3730 | return cand->var_after; |
3731 | else |
3732 | return cand->var_before; |
3733 | } |
3734 | |
3735 | /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the |
3736 | same precision that is at least as wide as the precision of TYPE, stores |
3737 | BA to A and BB to B, and returns the type of BA. Otherwise, returns the |
3738 | type of A and B. */ |
3739 | |
3740 | static tree |
3741 | determine_common_wider_type (tree *a, tree *b) |
3742 | { |
3743 | tree wider_type = NULL; |
3744 | tree suba, subb; |
3745 | tree atype = TREE_TYPE (*a); |
3746 | |
3747 | if (CONVERT_EXPR_P (*a)) |
3748 | { |
3749 | suba = TREE_OPERAND (*a, 0); |
3750 | wider_type = TREE_TYPE (suba); |
3751 | if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype)) |
3752 | return atype; |
3753 | } |
3754 | else |
3755 | return atype; |
3756 | |
3757 | if (CONVERT_EXPR_P (*b)) |
3758 | { |
3759 | subb = TREE_OPERAND (*b, 0); |
3760 | if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb))) |
3761 | return atype; |
3762 | } |
3763 | else |
3764 | return atype; |
3765 | |
3766 | *a = suba; |
3767 | *b = subb; |
3768 | return wider_type; |
3769 | } |
3770 | |
3771 | /* Determines the expression by that USE is expressed from induction variable |
3772 | CAND at statement AT in LOOP. The expression is stored in two parts in a |
3773 | decomposed form. The invariant part is stored in AFF_INV; while variant |
3774 | part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's |
3775 | non-null. Returns false if USE cannot be expressed using CAND. */ |
3776 | |
3777 | static bool |
3778 | get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use, |
3779 | struct iv_cand *cand, struct aff_tree *aff_inv, |
3780 | struct aff_tree *aff_var, widest_int *prat = NULL) |
3781 | { |
3782 | tree ubase = use->iv->base, ustep = use->iv->step; |
3783 | tree cbase = cand->iv->base, cstep = cand->iv->step; |
3784 | tree common_type, uutype, var, cstep_common; |
3785 | tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase); |
3786 | aff_tree aff_cbase; |
3787 | widest_int rat; |
3788 | |
3789 | /* We must have a precision to express the values of use. */ |
3790 | if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype)) |
3791 | return false; |
3792 | |
3793 | var = var_at_stmt (loop, cand, at); |
3794 | uutype = unsigned_type_for (utype); |
3795 | |
3796 | /* If the conversion is not noop, perform it. */ |
3797 | if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype)) |
3798 | { |
3799 | if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase) |
3800 | && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST)) |
3801 | { |
3802 | tree inner_base, inner_step, inner_type; |
3803 | inner_base = TREE_OPERAND (cbase, 0); |
3804 | if (CONVERT_EXPR_P (cstep)) |
3805 | inner_step = TREE_OPERAND (cstep, 0); |
3806 | else |
3807 | inner_step = cstep; |
3808 | |
3809 | inner_type = TREE_TYPE (inner_base); |
3810 | /* If candidate is added from a biv whose type is smaller than |
3811 | ctype, we know both candidate and the biv won't overflow. |
3812 | In this case, it's safe to skip the convertion in candidate. |
3813 | As an example, (unsigned short)((unsigned long)A) equals to |
3814 | (unsigned short)A, if A has a type no larger than short. */ |
3815 | if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype)) |
3816 | { |
3817 | cbase = inner_base; |
3818 | cstep = inner_step; |
3819 | } |
3820 | } |
3821 | cbase = fold_convert (uutype, cbase); |
3822 | cstep = fold_convert (uutype, cstep); |
3823 | var = fold_convert (uutype, var); |
3824 | } |
3825 | |
3826 | /* Ratio is 1 when computing the value of biv cand by itself. |
3827 | We can't rely on constant_multiple_of in this case because the |
3828 | use is created after the original biv is selected. The call |
3829 | could fail because of inconsistent fold behavior. See PR68021 |
3830 | for more information. */ |
3831 | if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt) |
3832 | { |
3833 | gcc_assert (is_gimple_assign (use->stmt)); |
3834 | gcc_assert (use->iv->ssa_name == cand->var_after); |
3835 | gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after); |
3836 | rat = 1; |
3837 | } |
3838 | else if (!constant_multiple_of (ustep, cstep, &rat)) |
3839 | return false; |
3840 | |
3841 | if (prat) |
3842 | *prat = rat; |
3843 | |
3844 | /* In case both UBASE and CBASE are shortened to UUTYPE from some common |
3845 | type, we achieve better folding by computing their difference in this |
3846 | wider type, and cast the result to UUTYPE. We do not need to worry about |
3847 | overflows, as all the arithmetics will in the end be performed in UUTYPE |
3848 | anyway. */ |
3849 | common_type = determine_common_wider_type (&ubase, &cbase); |
3850 | |
3851 | /* use = ubase - ratio * cbase + ratio * var. */ |
3852 | tree_to_aff_combination (ubase, common_type, aff_inv); |
3853 | tree_to_aff_combination (cbase, common_type, &aff_cbase); |
3854 | tree_to_aff_combination (var, uutype, aff_var); |
3855 | |
3856 | /* We need to shift the value if we are after the increment. */ |
3857 | if (stmt_after_increment (loop, cand, at)) |
3858 | { |
3859 | aff_tree cstep_aff; |
3860 | |
3861 | if (common_type != uutype) |
3862 | cstep_common = fold_convert (common_type, cstep); |
3863 | else |
3864 | cstep_common = cstep; |
3865 | |
3866 | tree_to_aff_combination (cstep_common, common_type, &cstep_aff); |
3867 | aff_combination_add (&aff_cbase, &cstep_aff); |
3868 | } |
3869 | |
3870 | aff_combination_scale (&aff_cbase, -rat); |
3871 | aff_combination_add (aff_inv, &aff_cbase); |
3872 | if (common_type != uutype) |
3873 | aff_combination_convert (aff_inv, uutype); |
3874 | |
3875 | aff_combination_scale (aff_var, rat); |
3876 | return true; |
3877 | } |
3878 | |
3879 | /* Determines the expression by that USE is expressed from induction variable |
3880 | CAND at statement AT in LOOP. The expression is stored in a decomposed |
3881 | form into AFF. Returns false if USE cannot be expressed using CAND. */ |
3882 | |
3883 | static bool |
3884 | get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use, |
3885 | struct iv_cand *cand, struct aff_tree *aff) |
3886 | { |
3887 | aff_tree aff_var; |
3888 | |
3889 | if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var)) |
3890 | return false; |
3891 | |
3892 | aff_combination_add (aff, &aff_var); |
3893 | return true; |
3894 | } |
3895 | |
3896 | /* Return the type of USE. */ |
3897 | |
3898 | static tree |
3899 | get_use_type (struct iv_use *use) |
3900 | { |
3901 | tree base_type = TREE_TYPE (use->iv->base); |
3902 | tree type; |
3903 | |
3904 | if (use->type == USE_ADDRESS) |
3905 | { |
3906 | /* The base_type may be a void pointer. Create a pointer type based on |
3907 | the mem_ref instead. */ |
3908 | type = build_pointer_type (TREE_TYPE (*use->op_p)); |
3909 | gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type)) |
3910 | == TYPE_ADDR_SPACE (TREE_TYPE (base_type))); |
3911 | } |
3912 | else |
3913 | type = base_type; |
3914 | |
3915 | return type; |
3916 | } |
3917 | |
3918 | /* Determines the expression by that USE is expressed from induction variable |
3919 | CAND at statement AT in LOOP. The computation is unshared. */ |
3920 | |
3921 | static tree |
3922 | get_computation_at (struct loop *loop, gimple *at, |
3923 | struct iv_use *use, struct iv_cand *cand) |
3924 | { |
3925 | aff_tree aff; |
3926 | tree type = get_use_type (use); |
3927 | |
3928 | if (!get_computation_aff (loop, at, use, cand, &aff)) |
3929 | return NULL_TREE; |
3930 | unshare_aff_combination (&aff); |
3931 | return fold_convert (type, aff_combination_to_tree (&aff)); |
3932 | } |
3933 | |
3934 | /* Adjust the cost COST for being in loop setup rather than loop body. |
3935 | If we're optimizing for space, the loop setup overhead is constant; |
3936 | if we're optimizing for speed, amortize it over the per-iteration cost. |
3937 | If ROUND_UP_P is true, the result is round up rather than to zero when |
3938 | optimizing for speed. */ |
3939 | static unsigned |
3940 | adjust_setup_cost (struct ivopts_data *data, unsigned cost, |
3941 | bool round_up_p = false) |
3942 | { |
3943 | if (cost == INFTY) |
3944 | return cost; |
3945 | else if (optimize_loop_for_speed_p (data->current_loop)) |
3946 | { |
3947 | HOST_WIDE_INT niters = avg_loop_niter (data->current_loop); |
3948 | return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters; |
3949 | } |
3950 | else |
3951 | return cost; |
3952 | } |
3953 | |
3954 | /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the |
3955 | EXPR operand holding the shift. COST0 and COST1 are the costs for |
3956 | calculating the operands of EXPR. Returns true if successful, and returns |
3957 | the cost in COST. */ |
3958 | |
3959 | static bool |
3960 | get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0, |
3961 | comp_cost cost1, tree mult, bool speed, comp_cost *cost) |
3962 | { |
3963 | comp_cost res; |
3964 | tree op1 = TREE_OPERAND (expr, 1); |
3965 | tree cst = TREE_OPERAND (mult, 1); |
3966 | tree multop = TREE_OPERAND (mult, 0); |
3967 | int m = exact_log2 (int_cst_value (cst)); |
3968 | int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode)); |
3969 | int as_cost, sa_cost; |
3970 | bool mult_in_op1; |
3971 | |
3972 | if (!(m >= 0 && m < maxm)) |
3973 | return false; |
3974 | |
3975 | STRIP_NOPS (op1); |
3976 | mult_in_op1 = operand_equal_p (op1, mult, 0); |
3977 | |
3978 | as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); |
3979 | |
3980 | /* If the target has a cheap shift-and-add or shift-and-sub instruction, |
3981 | use that in preference to a shift insn followed by an add insn. */ |
3982 | sa_cost = (TREE_CODE (expr) != MINUS_EXPR |
3983 | ? shiftadd_cost (speed, mode, m) |
3984 | : (mult_in_op1 |
3985 | ? shiftsub1_cost (speed, mode, m) |
3986 | : shiftsub0_cost (speed, mode, m))); |
3987 | |
3988 | res = comp_cost (MIN (as_cost, sa_cost), 0); |
3989 | res += (mult_in_op1 ? cost0 : cost1); |
3990 | |
3991 | STRIP_NOPS (multop); |
3992 | if (!is_gimple_val (multop)) |
3993 | res += force_expr_to_var_cost (multop, speed); |
3994 | |
3995 | *cost = res; |
3996 | return true; |
3997 | } |
3998 | |
3999 | /* Estimates cost of forcing expression EXPR into a variable. */ |
4000 | |
4001 | static comp_cost |
4002 | force_expr_to_var_cost (tree expr, bool speed) |
4003 | { |
4004 | static bool costs_initialized = false; |
4005 | static unsigned integer_cost [2]; |
4006 | static unsigned symbol_cost [2]; |
4007 | static unsigned address_cost [2]; |
4008 | tree op0, op1; |
4009 | comp_cost cost0, cost1, cost; |
4010 | machine_mode mode; |
4011 | scalar_int_mode int_mode; |
4012 | |
4013 | if (!costs_initialized) |
4014 | { |
4015 | tree type = build_pointer_type (integer_type_node); |
4016 | tree var, addr; |
4017 | rtx x; |
4018 | int i; |
4019 | |
4020 | var = create_tmp_var_raw (integer_type_node, "test_var" ); |
4021 | TREE_STATIC (var) = 1; |
4022 | x = produce_memory_decl_rtl (var, NULL); |
4023 | SET_DECL_RTL (var, x); |
4024 | |
4025 | addr = build1 (ADDR_EXPR, type, var); |
4026 | |
4027 | |
4028 | for (i = 0; i < 2; i++) |
4029 | { |
4030 | integer_cost[i] = computation_cost (build_int_cst (integer_type_node, |
4031 | 2000), i); |
4032 | |
4033 | symbol_cost[i] = computation_cost (addr, i) + 1; |
4034 | |
4035 | address_cost[i] |
4036 | = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1; |
4037 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4038 | { |
4039 | fprintf (dump_file, "force_expr_to_var_cost %s costs:\n" , i ? "speed" : "size" ); |
4040 | fprintf (dump_file, " integer %d\n" , (int) integer_cost[i]); |
4041 | fprintf (dump_file, " symbol %d\n" , (int) symbol_cost[i]); |
4042 | fprintf (dump_file, " address %d\n" , (int) address_cost[i]); |
4043 | fprintf (dump_file, " other %d\n" , (int) target_spill_cost[i]); |
4044 | fprintf (dump_file, "\n" ); |
4045 | } |
4046 | } |
4047 | |
4048 | costs_initialized = true; |
4049 | } |
4050 | |
4051 | STRIP_NOPS (expr); |
4052 | |
4053 | if (SSA_VAR_P (expr)) |
4054 | return no_cost; |
4055 | |
4056 | if (is_gimple_min_invariant (expr)) |
4057 | { |
4058 | if (TREE_CODE (expr) == INTEGER_CST) |
4059 | return comp_cost (integer_cost [speed], 0); |
4060 | |
4061 | if (TREE_CODE (expr) == ADDR_EXPR) |
4062 | { |
4063 | tree obj = TREE_OPERAND (expr, 0); |
4064 | |
4065 | if (VAR_P (obj) |
4066 | || TREE_CODE (obj) == PARM_DECL |
4067 | || TREE_CODE (obj) == RESULT_DECL) |
4068 | return comp_cost (symbol_cost [speed], 0); |
4069 | } |
4070 | |
4071 | return comp_cost (address_cost [speed], 0); |
4072 | } |
4073 | |
4074 | switch (TREE_CODE (expr)) |
4075 | { |
4076 | case POINTER_PLUS_EXPR: |
4077 | case PLUS_EXPR: |
4078 | case MINUS_EXPR: |
4079 | case MULT_EXPR: |
4080 | case TRUNC_DIV_EXPR: |
4081 | case BIT_AND_EXPR: |
4082 | case BIT_IOR_EXPR: |
4083 | case LSHIFT_EXPR: |
4084 | case RSHIFT_EXPR: |
4085 | op0 = TREE_OPERAND (expr, 0); |
4086 | op1 = TREE_OPERAND (expr, 1); |
4087 | STRIP_NOPS (op0); |
4088 | STRIP_NOPS (op1); |
4089 | break; |
4090 | |
4091 | CASE_CONVERT: |
4092 | case NEGATE_EXPR: |
4093 | case BIT_NOT_EXPR: |
4094 | op0 = TREE_OPERAND (expr, 0); |
4095 | STRIP_NOPS (op0); |
4096 | op1 = NULL_TREE; |
4097 | break; |
4098 | |
4099 | default: |
4100 | /* Just an arbitrary value, FIXME. */ |
4101 | return comp_cost (target_spill_cost[speed], 0); |
4102 | } |
4103 | |
4104 | if (op0 == NULL_TREE |
4105 | || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0)) |
4106 | cost0 = no_cost; |
4107 | else |
4108 | cost0 = force_expr_to_var_cost (op0, speed); |
4109 | |
4110 | if (op1 == NULL_TREE |
4111 | || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1)) |
4112 | cost1 = no_cost; |
4113 | else |
4114 | cost1 = force_expr_to_var_cost (op1, speed); |
4115 | |
4116 | mode = TYPE_MODE (TREE_TYPE (expr)); |
4117 | switch (TREE_CODE (expr)) |
4118 | { |
4119 | case POINTER_PLUS_EXPR: |
4120 | case PLUS_EXPR: |
4121 | case MINUS_EXPR: |
4122 | case NEGATE_EXPR: |
4123 | cost = comp_cost (add_cost (speed, mode), 0); |
4124 | if (TREE_CODE (expr) != NEGATE_EXPR) |
4125 | { |
4126 | tree mult = NULL_TREE; |
4127 | comp_cost sa_cost; |
4128 | if (TREE_CODE (op1) == MULT_EXPR) |
4129 | mult = op1; |
4130 | else if (TREE_CODE (op0) == MULT_EXPR) |
4131 | mult = op0; |
4132 | |
4133 | if (mult != NULL_TREE |
4134 | && is_a <scalar_int_mode> (mode, &int_mode) |
4135 | && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1)) |
4136 | && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult, |
4137 | speed, &sa_cost)) |
4138 | return sa_cost; |
4139 | } |
4140 | break; |
4141 | |
4142 | CASE_CONVERT: |
4143 | { |
4144 | tree inner_mode, outer_mode; |
4145 | outer_mode = TREE_TYPE (expr); |
4146 | inner_mode = TREE_TYPE (op0); |
4147 | cost = comp_cost (convert_cost (TYPE_MODE (outer_mode), |
4148 | TYPE_MODE (inner_mode), speed), 0); |
4149 | } |
4150 | break; |
4151 | |
4152 | case MULT_EXPR: |
4153 | if (cst_and_fits_in_hwi (op0)) |
4154 | cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0), |
4155 | mode, speed), 0); |
4156 | else if (cst_and_fits_in_hwi (op1)) |
4157 | cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1), |
4158 | mode, speed), 0); |
4159 | else |
4160 | return comp_cost (target_spill_cost [speed], 0); |
4161 | break; |
4162 | |
4163 | case TRUNC_DIV_EXPR: |
4164 | /* Division by power of two is usually cheap, so we allow it. Forbid |
4165 | anything else. */ |
4166 | if (integer_pow2p (TREE_OPERAND (expr, 1))) |
4167 | cost = comp_cost (add_cost (speed, mode), 0); |
4168 | else |
4169 | cost = comp_cost (target_spill_cost[speed], 0); |
4170 | break; |
4171 | |
4172 | case BIT_AND_EXPR: |
4173 | case BIT_IOR_EXPR: |
4174 | case BIT_NOT_EXPR: |
4175 | case LSHIFT_EXPR: |
4176 | case RSHIFT_EXPR: |
4177 | cost = comp_cost (add_cost (speed, mode), 0); |
4178 | break; |
4179 | |
4180 | default: |
4181 | gcc_unreachable (); |
4182 | } |
4183 | |
4184 | cost += cost0; |
4185 | cost += cost1; |
4186 | return cost; |
4187 | } |
4188 | |
4189 | /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the |
4190 | invariants the computation depends on. */ |
4191 | |
4192 | static comp_cost |
4193 | force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars) |
4194 | { |
4195 | if (!expr) |
4196 | return no_cost; |
4197 | |
4198 | find_inv_vars (data, &expr, inv_vars); |
4199 | return force_expr_to_var_cost (expr, data->speed); |
4200 | } |
4201 | |
4202 | /* Returns cost of auto-modifying address expression in shape base + offset. |
4203 | AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the |
4204 | address expression. The address expression has ADDR_MODE in addr space |
4205 | AS. The memory access has MEM_MODE. SPEED means we are optimizing for |
4206 | speed or size. */ |
4207 | |
4208 | enum ainc_type |
4209 | { |
4210 | AINC_PRE_INC, /* Pre increment. */ |
4211 | AINC_PRE_DEC, /* Pre decrement. */ |
4212 | AINC_POST_INC, /* Post increment. */ |
4213 | AINC_POST_DEC, /* Post decrement. */ |
4214 | AINC_NONE /* Also the number of auto increment types. */ |
4215 | }; |
4216 | |
4217 | struct ainc_cost_data |
4218 | { |
4219 | unsigned costs[AINC_NONE]; |
4220 | }; |
4221 | |
4222 | static comp_cost |
4223 | get_address_cost_ainc (HOST_WIDE_INT ainc_step, HOST_WIDE_INT ainc_offset, |
4224 | machine_mode addr_mode, machine_mode mem_mode, |
4225 | addr_space_t as, bool speed) |
4226 | { |
4227 | if (!USE_LOAD_PRE_DECREMENT (mem_mode) |
4228 | && !USE_STORE_PRE_DECREMENT (mem_mode) |
4229 | && !USE_LOAD_POST_DECREMENT (mem_mode) |
4230 | && !USE_STORE_POST_DECREMENT (mem_mode) |
4231 | && !USE_LOAD_PRE_INCREMENT (mem_mode) |
4232 | && !USE_STORE_PRE_INCREMENT (mem_mode) |
4233 | && !USE_LOAD_POST_INCREMENT (mem_mode) |
4234 | && !USE_STORE_POST_INCREMENT (mem_mode)) |
4235 | return infinite_cost; |
4236 | |
4237 | static vec<ainc_cost_data *> ainc_cost_data_list; |
4238 | unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode; |
4239 | if (idx >= ainc_cost_data_list.length ()) |
4240 | { |
4241 | unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE; |
4242 | |
4243 | gcc_assert (nsize > idx); |
4244 | ainc_cost_data_list.safe_grow_cleared (nsize); |
4245 | } |
4246 | |
4247 | ainc_cost_data *data = ainc_cost_data_list[idx]; |
4248 | if (data == NULL) |
4249 | { |
4250 | rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1); |
4251 | |
4252 | data = (ainc_cost_data *) xcalloc (1, sizeof (*data)); |
4253 | data->costs[AINC_PRE_DEC] = INFTY; |
4254 | data->costs[AINC_POST_DEC] = INFTY; |
4255 | data->costs[AINC_PRE_INC] = INFTY; |
4256 | data->costs[AINC_POST_INC] = INFTY; |
4257 | if (USE_LOAD_PRE_DECREMENT (mem_mode) |
4258 | || USE_STORE_PRE_DECREMENT (mem_mode)) |
4259 | { |
4260 | rtx addr = gen_rtx_PRE_DEC (addr_mode, reg); |
4261 | |
4262 | if (memory_address_addr_space_p (mem_mode, addr, as)) |
4263 | data->costs[AINC_PRE_DEC] |
4264 | = address_cost (addr, mem_mode, as, speed); |
4265 | } |
4266 | if (USE_LOAD_POST_DECREMENT (mem_mode) |
4267 | || USE_STORE_POST_DECREMENT (mem_mode)) |
4268 | { |
4269 | rtx addr = gen_rtx_POST_DEC (addr_mode, reg); |
4270 | |
4271 | if (memory_address_addr_space_p (mem_mode, addr, as)) |
4272 | data->costs[AINC_POST_DEC] |
4273 | = address_cost (addr, mem_mode, as, speed); |
4274 | } |
4275 | if (USE_LOAD_PRE_INCREMENT (mem_mode) |
4276 | || USE_STORE_PRE_INCREMENT (mem_mode)) |
4277 | { |
4278 | rtx addr = gen_rtx_PRE_INC (addr_mode, reg); |
4279 | |
4280 | if (memory_address_addr_space_p (mem_mode, addr, as)) |
4281 | data->costs[AINC_PRE_INC] |
4282 | = address_cost (addr, mem_mode, as, speed); |
4283 | } |
4284 | if (USE_LOAD_POST_INCREMENT (mem_mode) |
4285 | || USE_STORE_POST_INCREMENT (mem_mode)) |
4286 | { |
4287 | rtx addr = gen_rtx_POST_INC (addr_mode, reg); |
4288 | |
4289 | if (memory_address_addr_space_p (mem_mode, addr, as)) |
4290 | data->costs[AINC_POST_INC] |
4291 | = address_cost (addr, mem_mode, as, speed); |
4292 | } |
4293 | ainc_cost_data_list[idx] = |
---|