1 | /* Functions to determine/estimate number of iterations of a loop. |
2 | Copyright (C) 2004-2024 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the |
8 | Free Software Foundation; either version 3, or (at your option) any |
9 | later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #include "config.h" |
21 | #include "system.h" |
22 | #include "coretypes.h" |
23 | #include "backend.h" |
24 | #include "rtl.h" |
25 | #include "tree.h" |
26 | #include "gimple.h" |
27 | #include "tree-pass.h" |
28 | #include "ssa.h" |
29 | #include "gimple-pretty-print.h" |
30 | #include "diagnostic-core.h" |
31 | #include "stor-layout.h" |
32 | #include "fold-const.h" |
33 | #include "calls.h" |
34 | #include "intl.h" |
35 | #include "gimplify.h" |
36 | #include "gimple-iterator.h" |
37 | #include "tree-cfg.h" |
38 | #include "tree-ssa-loop-ivopts.h" |
39 | #include "tree-ssa-loop-niter.h" |
40 | #include "tree-ssa-loop.h" |
41 | #include "cfgloop.h" |
42 | #include "tree-chrec.h" |
43 | #include "tree-scalar-evolution.h" |
44 | #include "tree-dfa.h" |
45 | #include "internal-fn.h" |
46 | #include "gimple-range.h" |
47 | #include "sreal.h" |
48 | |
49 | |
50 | /* The maximum number of dominator BBs we search for conditions |
51 | of loop header copies we use for simplifying a conditional |
52 | expression. */ |
53 | #define MAX_DOMINATORS_TO_WALK 8 |
54 | |
55 | /* |
56 | |
57 | Analysis of number of iterations of an affine exit test. |
58 | |
59 | */ |
60 | |
61 | /* Bounds on some value, BELOW <= X <= UP. */ |
62 | |
63 | struct bounds |
64 | { |
65 | mpz_t below, up; |
66 | }; |
67 | |
68 | /* Splits expression EXPR to a variable part VAR and constant OFFSET. */ |
69 | |
70 | static void |
71 | split_to_var_and_offset (tree expr, tree *var, mpz_t offset) |
72 | { |
73 | tree type = TREE_TYPE (expr); |
74 | tree op0, op1; |
75 | bool negate = false; |
76 | |
77 | *var = expr; |
78 | mpz_set_ui (offset, 0); |
79 | |
80 | switch (TREE_CODE (expr)) |
81 | { |
82 | case MINUS_EXPR: |
83 | negate = true; |
84 | /* Fallthru. */ |
85 | |
86 | case PLUS_EXPR: |
87 | case POINTER_PLUS_EXPR: |
88 | op0 = TREE_OPERAND (expr, 0); |
89 | op1 = TREE_OPERAND (expr, 1); |
90 | |
91 | if (TREE_CODE (op1) != INTEGER_CST) |
92 | break; |
93 | |
94 | *var = op0; |
95 | /* Always sign extend the offset. */ |
96 | wi::to_mpz (wi::to_wide (t: op1), offset, SIGNED); |
97 | if (negate) |
98 | mpz_neg (gmp_w: offset, gmp_u: offset); |
99 | break; |
100 | |
101 | case INTEGER_CST: |
102 | *var = build_int_cst_type (type, 0); |
103 | wi::to_mpz (wi::to_wide (t: expr), offset, TYPE_SIGN (type)); |
104 | break; |
105 | |
106 | default: |
107 | break; |
108 | } |
109 | } |
110 | |
111 | /* From condition C0 CMP C1 derives information regarding the value range |
112 | of VAR, which is of TYPE. Results are stored in to BELOW and UP. */ |
113 | |
114 | static void |
115 | refine_value_range_using_guard (tree type, tree var, |
116 | tree c0, enum tree_code cmp, tree c1, |
117 | mpz_t below, mpz_t up) |
118 | { |
119 | tree varc0, varc1, ctype; |
120 | mpz_t offc0, offc1; |
121 | mpz_t mint, maxt, minc1, maxc1; |
122 | bool no_wrap = nowrap_type_p (type); |
123 | bool c0_ok, c1_ok; |
124 | signop sgn = TYPE_SIGN (type); |
125 | |
126 | switch (cmp) |
127 | { |
128 | case LT_EXPR: |
129 | case LE_EXPR: |
130 | case GT_EXPR: |
131 | case GE_EXPR: |
132 | STRIP_SIGN_NOPS (c0); |
133 | STRIP_SIGN_NOPS (c1); |
134 | ctype = TREE_TYPE (c0); |
135 | if (!useless_type_conversion_p (ctype, type)) |
136 | return; |
137 | |
138 | break; |
139 | |
140 | case EQ_EXPR: |
141 | /* We could derive quite precise information from EQ_EXPR, however, |
142 | such a guard is unlikely to appear, so we do not bother with |
143 | handling it. */ |
144 | return; |
145 | |
146 | case NE_EXPR: |
147 | /* NE_EXPR comparisons do not contain much of useful information, |
148 | except for cases of comparing with bounds. */ |
149 | if (TREE_CODE (c1) != INTEGER_CST |
150 | || !INTEGRAL_TYPE_P (type)) |
151 | return; |
152 | |
153 | /* Ensure that the condition speaks about an expression in the same |
154 | type as X and Y. */ |
155 | ctype = TREE_TYPE (c0); |
156 | if (TYPE_PRECISION (ctype) != TYPE_PRECISION (type)) |
157 | return; |
158 | c0 = fold_convert (type, c0); |
159 | c1 = fold_convert (type, c1); |
160 | |
161 | if (operand_equal_p (var, c0, flags: 0)) |
162 | { |
163 | /* Case of comparing VAR with its below/up bounds. */ |
164 | auto_mpz valc1; |
165 | wi::to_mpz (wi::to_wide (t: c1), valc1, TYPE_SIGN (type)); |
166 | if (mpz_cmp (valc1, below) == 0) |
167 | cmp = GT_EXPR; |
168 | if (mpz_cmp (valc1, up) == 0) |
169 | cmp = LT_EXPR; |
170 | } |
171 | else |
172 | { |
173 | /* Case of comparing with the bounds of the type. */ |
174 | wide_int min = wi::min_value (type); |
175 | wide_int max = wi::max_value (type); |
176 | |
177 | if (wi::to_wide (t: c1) == min) |
178 | cmp = GT_EXPR; |
179 | if (wi::to_wide (t: c1) == max) |
180 | cmp = LT_EXPR; |
181 | } |
182 | |
183 | /* Quick return if no useful information. */ |
184 | if (cmp == NE_EXPR) |
185 | return; |
186 | |
187 | break; |
188 | |
189 | default: |
190 | return; |
191 | } |
192 | |
193 | mpz_init (offc0); |
194 | mpz_init (offc1); |
195 | split_to_var_and_offset (expr: expand_simple_operations (c0), var: &varc0, offset: offc0); |
196 | split_to_var_and_offset (expr: expand_simple_operations (c1), var: &varc1, offset: offc1); |
197 | |
198 | /* We are only interested in comparisons of expressions based on VAR. */ |
199 | if (operand_equal_p (var, varc1, flags: 0)) |
200 | { |
201 | std::swap (a&: varc0, b&: varc1); |
202 | mpz_swap (offc0, offc1); |
203 | cmp = swap_tree_comparison (cmp); |
204 | } |
205 | else if (!operand_equal_p (var, varc0, flags: 0)) |
206 | { |
207 | mpz_clear (offc0); |
208 | mpz_clear (offc1); |
209 | return; |
210 | } |
211 | |
212 | mpz_init (mint); |
213 | mpz_init (maxt); |
214 | get_type_static_bounds (type, mint, maxt); |
215 | mpz_init (minc1); |
216 | mpz_init (maxc1); |
217 | Value_Range r (TREE_TYPE (varc1)); |
218 | /* Setup range information for varc1. */ |
219 | if (integer_zerop (varc1)) |
220 | { |
221 | wi::to_mpz (0, minc1, TYPE_SIGN (type)); |
222 | wi::to_mpz (0, maxc1, TYPE_SIGN (type)); |
223 | } |
224 | else if (TREE_CODE (varc1) == SSA_NAME |
225 | && INTEGRAL_TYPE_P (type) |
226 | && get_range_query (cfun)->range_of_expr (r, expr: varc1) |
227 | && !r.undefined_p () |
228 | && !r.varying_p ()) |
229 | { |
230 | gcc_assert (wi::le_p (r.lower_bound (), r.upper_bound (), sgn)); |
231 | wi::to_mpz (r.lower_bound (), minc1, sgn); |
232 | wi::to_mpz (r.upper_bound (), maxc1, sgn); |
233 | } |
234 | else |
235 | { |
236 | mpz_set (minc1, mint); |
237 | mpz_set (maxc1, maxt); |
238 | } |
239 | |
240 | /* Compute valid range information for varc1 + offc1. Note nothing |
241 | useful can be derived if it overflows or underflows. Overflow or |
242 | underflow could happen when: |
243 | |
244 | offc1 > 0 && varc1 + offc1 > MAX_VAL (type) |
245 | offc1 < 0 && varc1 + offc1 < MIN_VAL (type). */ |
246 | mpz_add (minc1, minc1, offc1); |
247 | mpz_add (maxc1, maxc1, offc1); |
248 | c1_ok = (no_wrap |
249 | || mpz_sgn (offc1) == 0 |
250 | || (mpz_sgn (offc1) < 0 && mpz_cmp (minc1, mint) >= 0) |
251 | || (mpz_sgn (offc1) > 0 && mpz_cmp (maxc1, maxt) <= 0)); |
252 | if (!c1_ok) |
253 | goto end; |
254 | |
255 | if (mpz_cmp (minc1, mint) < 0) |
256 | mpz_set (minc1, mint); |
257 | if (mpz_cmp (maxc1, maxt) > 0) |
258 | mpz_set (maxc1, maxt); |
259 | |
260 | if (cmp == LT_EXPR) |
261 | { |
262 | cmp = LE_EXPR; |
263 | mpz_sub_ui (maxc1, maxc1, 1); |
264 | } |
265 | if (cmp == GT_EXPR) |
266 | { |
267 | cmp = GE_EXPR; |
268 | mpz_add_ui (minc1, minc1, 1); |
269 | } |
270 | |
271 | /* Compute range information for varc0. If there is no overflow, |
272 | the condition implied that |
273 | |
274 | (varc0) cmp (varc1 + offc1 - offc0) |
275 | |
276 | We can possibly improve the upper bound of varc0 if cmp is LE_EXPR, |
277 | or the below bound if cmp is GE_EXPR. |
278 | |
279 | To prove there is no overflow/underflow, we need to check below |
280 | four cases: |
281 | 1) cmp == LE_EXPR && offc0 > 0 |
282 | |
283 | (varc0 + offc0) doesn't overflow |
284 | && (varc1 + offc1 - offc0) doesn't underflow |
285 | |
286 | 2) cmp == LE_EXPR && offc0 < 0 |
287 | |
288 | (varc0 + offc0) doesn't underflow |
289 | && (varc1 + offc1 - offc0) doesn't overfloe |
290 | |
291 | In this case, (varc0 + offc0) will never underflow if we can |
292 | prove (varc1 + offc1 - offc0) doesn't overflow. |
293 | |
294 | 3) cmp == GE_EXPR && offc0 < 0 |
295 | |
296 | (varc0 + offc0) doesn't underflow |
297 | && (varc1 + offc1 - offc0) doesn't overflow |
298 | |
299 | 4) cmp == GE_EXPR && offc0 > 0 |
300 | |
301 | (varc0 + offc0) doesn't overflow |
302 | && (varc1 + offc1 - offc0) doesn't underflow |
303 | |
304 | In this case, (varc0 + offc0) will never overflow if we can |
305 | prove (varc1 + offc1 - offc0) doesn't underflow. |
306 | |
307 | Note we only handle case 2 and 4 in below code. */ |
308 | |
309 | mpz_sub (minc1, minc1, offc0); |
310 | mpz_sub (maxc1, maxc1, offc0); |
311 | c0_ok = (no_wrap |
312 | || mpz_sgn (offc0) == 0 |
313 | || (cmp == LE_EXPR |
314 | && mpz_sgn (offc0) < 0 && mpz_cmp (maxc1, maxt) <= 0) |
315 | || (cmp == GE_EXPR |
316 | && mpz_sgn (offc0) > 0 && mpz_cmp (minc1, mint) >= 0)); |
317 | if (!c0_ok) |
318 | goto end; |
319 | |
320 | if (cmp == LE_EXPR) |
321 | { |
322 | if (mpz_cmp (up, maxc1) > 0) |
323 | mpz_set (up, maxc1); |
324 | } |
325 | else |
326 | { |
327 | if (mpz_cmp (below, minc1) < 0) |
328 | mpz_set (below, minc1); |
329 | } |
330 | |
331 | end: |
332 | mpz_clear (mint); |
333 | mpz_clear (maxt); |
334 | mpz_clear (minc1); |
335 | mpz_clear (maxc1); |
336 | mpz_clear (offc0); |
337 | mpz_clear (offc1); |
338 | } |
339 | |
340 | /* Stores estimate on the minimum/maximum value of the expression VAR + OFF |
341 | in TYPE to MIN and MAX. */ |
342 | |
343 | static void |
344 | determine_value_range (class loop *loop, tree type, tree var, mpz_t off, |
345 | mpz_t min, mpz_t max) |
346 | { |
347 | int cnt = 0; |
348 | mpz_t minm, maxm; |
349 | basic_block bb; |
350 | wide_int minv, maxv; |
351 | enum value_range_kind rtype = VR_VARYING; |
352 | |
353 | /* If the expression is a constant, we know its value exactly. */ |
354 | if (integer_zerop (var)) |
355 | { |
356 | mpz_set (min, off); |
357 | mpz_set (max, off); |
358 | return; |
359 | } |
360 | |
361 | get_type_static_bounds (type, min, max); |
362 | |
363 | /* See if we have some range info from VRP. */ |
364 | if (TREE_CODE (var) == SSA_NAME && INTEGRAL_TYPE_P (type)) |
365 | { |
366 | edge e = loop_preheader_edge (loop); |
367 | signop sgn = TYPE_SIGN (type); |
368 | gphi_iterator gsi; |
369 | |
370 | /* Either for VAR itself... */ |
371 | Value_Range var_range (TREE_TYPE (var)); |
372 | get_range_query (cfun)->range_of_expr (r&: var_range, expr: var); |
373 | if (var_range.varying_p () || var_range.undefined_p ()) |
374 | rtype = VR_VARYING; |
375 | else |
376 | rtype = VR_RANGE; |
377 | if (!var_range.undefined_p ()) |
378 | { |
379 | minv = var_range.lower_bound (); |
380 | maxv = var_range.upper_bound (); |
381 | } |
382 | |
383 | /* Or for PHI results in loop->header where VAR is used as |
384 | PHI argument from the loop preheader edge. */ |
385 | Value_Range phi_range (TREE_TYPE (var)); |
386 | for (gsi = gsi_start_phis (loop->header); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
387 | { |
388 | gphi *phi = gsi.phi (); |
389 | if (PHI_ARG_DEF_FROM_EDGE (phi, e) == var |
390 | && get_range_query (cfun)->range_of_expr (r&: phi_range, |
391 | expr: gimple_phi_result (gs: phi)) |
392 | && !phi_range.varying_p () |
393 | && !phi_range.undefined_p ()) |
394 | { |
395 | if (rtype != VR_RANGE) |
396 | { |
397 | rtype = VR_RANGE; |
398 | minv = phi_range.lower_bound (); |
399 | maxv = phi_range.upper_bound (); |
400 | } |
401 | else |
402 | { |
403 | minv = wi::max (x: minv, y: phi_range.lower_bound (), sgn); |
404 | maxv = wi::min (x: maxv, y: phi_range.upper_bound (), sgn); |
405 | /* If the PHI result range are inconsistent with |
406 | the VAR range, give up on looking at the PHI |
407 | results. This can happen if VR_UNDEFINED is |
408 | involved. */ |
409 | if (wi::gt_p (x: minv, y: maxv, sgn)) |
410 | { |
411 | Value_Range vr (TREE_TYPE (var)); |
412 | get_range_query (cfun)->range_of_expr (r&: vr, expr: var); |
413 | if (vr.varying_p () || vr.undefined_p ()) |
414 | rtype = VR_VARYING; |
415 | else |
416 | rtype = VR_RANGE; |
417 | if (!vr.undefined_p ()) |
418 | { |
419 | minv = vr.lower_bound (); |
420 | maxv = vr.upper_bound (); |
421 | } |
422 | break; |
423 | } |
424 | } |
425 | } |
426 | } |
427 | mpz_init (minm); |
428 | mpz_init (maxm); |
429 | if (rtype != VR_RANGE) |
430 | { |
431 | mpz_set (minm, min); |
432 | mpz_set (maxm, max); |
433 | } |
434 | else |
435 | { |
436 | gcc_assert (wi::le_p (minv, maxv, sgn)); |
437 | wi::to_mpz (minv, minm, sgn); |
438 | wi::to_mpz (maxv, maxm, sgn); |
439 | } |
440 | /* Now walk the dominators of the loop header and use the entry |
441 | guards to refine the estimates. */ |
442 | for (bb = loop->header; |
443 | bb != ENTRY_BLOCK_PTR_FOR_FN (cfun) && cnt < MAX_DOMINATORS_TO_WALK; |
444 | bb = get_immediate_dominator (CDI_DOMINATORS, bb)) |
445 | { |
446 | edge e; |
447 | tree c0, c1; |
448 | enum tree_code cmp; |
449 | |
450 | if (!single_pred_p (bb)) |
451 | continue; |
452 | e = single_pred_edge (bb); |
453 | |
454 | if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) |
455 | continue; |
456 | |
457 | gcond *cond = as_a <gcond *> (p: *gsi_last_bb (bb: e->src)); |
458 | c0 = gimple_cond_lhs (gs: cond); |
459 | cmp = gimple_cond_code (gs: cond); |
460 | c1 = gimple_cond_rhs (gs: cond); |
461 | |
462 | if (e->flags & EDGE_FALSE_VALUE) |
463 | cmp = invert_tree_comparison (cmp, false); |
464 | |
465 | refine_value_range_using_guard (type, var, c0, cmp, c1, below: minm, up: maxm); |
466 | ++cnt; |
467 | } |
468 | |
469 | mpz_add (minm, minm, off); |
470 | mpz_add (maxm, maxm, off); |
471 | /* If the computation may not wrap or off is zero, then this |
472 | is always fine. If off is negative and minv + off isn't |
473 | smaller than type's minimum, or off is positive and |
474 | maxv + off isn't bigger than type's maximum, use the more |
475 | precise range too. */ |
476 | if (nowrap_type_p (type) |
477 | || mpz_sgn (off) == 0 |
478 | || (mpz_sgn (off) < 0 && mpz_cmp (minm, min) >= 0) |
479 | || (mpz_sgn (off) > 0 && mpz_cmp (maxm, max) <= 0)) |
480 | { |
481 | mpz_set (min, minm); |
482 | mpz_set (max, maxm); |
483 | mpz_clear (minm); |
484 | mpz_clear (maxm); |
485 | return; |
486 | } |
487 | mpz_clear (minm); |
488 | mpz_clear (maxm); |
489 | } |
490 | |
491 | /* If the computation may wrap, we know nothing about the value, except for |
492 | the range of the type. */ |
493 | if (!nowrap_type_p (type)) |
494 | return; |
495 | |
496 | /* Since the addition of OFF does not wrap, if OFF is positive, then we may |
497 | add it to MIN, otherwise to MAX. */ |
498 | if (mpz_sgn (off) < 0) |
499 | mpz_add (max, max, off); |
500 | else |
501 | mpz_add (min, min, off); |
502 | } |
503 | |
504 | /* Stores the bounds on the difference of the values of the expressions |
505 | (var + X) and (var + Y), computed in TYPE, to BNDS. */ |
506 | |
507 | static void |
508 | bound_difference_of_offsetted_base (tree type, mpz_t x, mpz_t y, |
509 | bounds *bnds) |
510 | { |
511 | int rel = mpz_cmp (x, y); |
512 | bool may_wrap = !nowrap_type_p (type); |
513 | |
514 | /* If X == Y, then the expressions are always equal. |
515 | If X > Y, there are the following possibilities: |
516 | a) neither of var + X and var + Y overflow or underflow, or both of |
517 | them do. Then their difference is X - Y. |
518 | b) var + X overflows, and var + Y does not. Then the values of the |
519 | expressions are var + X - M and var + Y, where M is the range of |
520 | the type, and their difference is X - Y - M. |
521 | c) var + Y underflows and var + X does not. Their difference again |
522 | is M - X + Y. |
523 | Therefore, if the arithmetics in type does not overflow, then the |
524 | bounds are (X - Y, X - Y), otherwise they are (X - Y - M, X - Y) |
525 | Similarly, if X < Y, the bounds are either (X - Y, X - Y) or |
526 | (X - Y, X - Y + M). */ |
527 | |
528 | if (rel == 0) |
529 | { |
530 | mpz_set_ui (bnds->below, 0); |
531 | mpz_set_ui (bnds->up, 0); |
532 | return; |
533 | } |
534 | |
535 | auto_mpz m; |
536 | wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), m, UNSIGNED); |
537 | mpz_add_ui (m, m, 1); |
538 | mpz_sub (bnds->up, x, y); |
539 | mpz_set (bnds->below, bnds->up); |
540 | |
541 | if (may_wrap) |
542 | { |
543 | if (rel > 0) |
544 | mpz_sub (bnds->below, bnds->below, m); |
545 | else |
546 | mpz_add (bnds->up, bnds->up, m); |
547 | } |
548 | } |
549 | |
550 | /* From condition C0 CMP C1 derives information regarding the |
551 | difference of values of VARX + OFFX and VARY + OFFY, computed in TYPE, |
552 | and stores it to BNDS. */ |
553 | |
554 | static void |
555 | refine_bounds_using_guard (tree type, tree varx, mpz_t offx, |
556 | tree vary, mpz_t offy, |
557 | tree c0, enum tree_code cmp, tree c1, |
558 | bounds *bnds) |
559 | { |
560 | tree varc0, varc1, ctype; |
561 | mpz_t offc0, offc1, loffx, loffy, bnd; |
562 | bool lbound = false; |
563 | bool no_wrap = nowrap_type_p (type); |
564 | bool x_ok, y_ok; |
565 | |
566 | switch (cmp) |
567 | { |
568 | case LT_EXPR: |
569 | case LE_EXPR: |
570 | case GT_EXPR: |
571 | case GE_EXPR: |
572 | STRIP_SIGN_NOPS (c0); |
573 | STRIP_SIGN_NOPS (c1); |
574 | ctype = TREE_TYPE (c0); |
575 | if (!useless_type_conversion_p (ctype, type)) |
576 | return; |
577 | |
578 | break; |
579 | |
580 | case EQ_EXPR: |
581 | /* We could derive quite precise information from EQ_EXPR, however, such |
582 | a guard is unlikely to appear, so we do not bother with handling |
583 | it. */ |
584 | return; |
585 | |
586 | case NE_EXPR: |
587 | /* NE_EXPR comparisons do not contain much of useful information, except for |
588 | special case of comparing with the bounds of the type. */ |
589 | if (TREE_CODE (c1) != INTEGER_CST |
590 | || !INTEGRAL_TYPE_P (type)) |
591 | return; |
592 | |
593 | /* Ensure that the condition speaks about an expression in the same type |
594 | as X and Y. */ |
595 | ctype = TREE_TYPE (c0); |
596 | if (TYPE_PRECISION (ctype) != TYPE_PRECISION (type)) |
597 | return; |
598 | c0 = fold_convert (type, c0); |
599 | c1 = fold_convert (type, c1); |
600 | |
601 | if (TYPE_MIN_VALUE (type) |
602 | && operand_equal_p (c1, TYPE_MIN_VALUE (type), flags: 0)) |
603 | { |
604 | cmp = GT_EXPR; |
605 | break; |
606 | } |
607 | if (TYPE_MAX_VALUE (type) |
608 | && operand_equal_p (c1, TYPE_MAX_VALUE (type), flags: 0)) |
609 | { |
610 | cmp = LT_EXPR; |
611 | break; |
612 | } |
613 | |
614 | return; |
615 | default: |
616 | return; |
617 | } |
618 | |
619 | mpz_init (offc0); |
620 | mpz_init (offc1); |
621 | split_to_var_and_offset (expr: expand_simple_operations (c0), var: &varc0, offset: offc0); |
622 | split_to_var_and_offset (expr: expand_simple_operations (c1), var: &varc1, offset: offc1); |
623 | |
624 | /* We are only interested in comparisons of expressions based on VARX and |
625 | VARY. TODO -- we might also be able to derive some bounds from |
626 | expressions containing just one of the variables. */ |
627 | |
628 | if (operand_equal_p (varx, varc1, flags: 0)) |
629 | { |
630 | std::swap (a&: varc0, b&: varc1); |
631 | mpz_swap (offc0, offc1); |
632 | cmp = swap_tree_comparison (cmp); |
633 | } |
634 | |
635 | if (!operand_equal_p (varx, varc0, flags: 0) |
636 | || !operand_equal_p (vary, varc1, flags: 0)) |
637 | goto end; |
638 | |
639 | mpz_init_set (loffx, offx); |
640 | mpz_init_set (loffy, offy); |
641 | |
642 | if (cmp == GT_EXPR || cmp == GE_EXPR) |
643 | { |
644 | std::swap (a&: varx, b&: vary); |
645 | mpz_swap (offc0, offc1); |
646 | mpz_swap (loffx, loffy); |
647 | cmp = swap_tree_comparison (cmp); |
648 | lbound = true; |
649 | } |
650 | |
651 | /* If there is no overflow, the condition implies that |
652 | |
653 | (VARX + OFFX) cmp (VARY + OFFY) + (OFFX - OFFY + OFFC1 - OFFC0). |
654 | |
655 | The overflows and underflows may complicate things a bit; each |
656 | overflow decreases the appropriate offset by M, and underflow |
657 | increases it by M. The above inequality would not necessarily be |
658 | true if |
659 | |
660 | -- VARX + OFFX underflows and VARX + OFFC0 does not, or |
661 | VARX + OFFC0 overflows, but VARX + OFFX does not. |
662 | This may only happen if OFFX < OFFC0. |
663 | -- VARY + OFFY overflows and VARY + OFFC1 does not, or |
664 | VARY + OFFC1 underflows and VARY + OFFY does not. |
665 | This may only happen if OFFY > OFFC1. */ |
666 | |
667 | if (no_wrap) |
668 | { |
669 | x_ok = true; |
670 | y_ok = true; |
671 | } |
672 | else |
673 | { |
674 | x_ok = (integer_zerop (varx) |
675 | || mpz_cmp (loffx, offc0) >= 0); |
676 | y_ok = (integer_zerop (vary) |
677 | || mpz_cmp (loffy, offc1) <= 0); |
678 | } |
679 | |
680 | if (x_ok && y_ok) |
681 | { |
682 | mpz_init (bnd); |
683 | mpz_sub (bnd, loffx, loffy); |
684 | mpz_add (bnd, bnd, offc1); |
685 | mpz_sub (bnd, bnd, offc0); |
686 | |
687 | if (cmp == LT_EXPR) |
688 | mpz_sub_ui (bnd, bnd, 1); |
689 | |
690 | if (lbound) |
691 | { |
692 | mpz_neg (gmp_w: bnd, gmp_u: bnd); |
693 | if (mpz_cmp (bnds->below, bnd) < 0) |
694 | mpz_set (bnds->below, bnd); |
695 | } |
696 | else |
697 | { |
698 | if (mpz_cmp (bnd, bnds->up) < 0) |
699 | mpz_set (bnds->up, bnd); |
700 | } |
701 | mpz_clear (bnd); |
702 | } |
703 | |
704 | mpz_clear (loffx); |
705 | mpz_clear (loffy); |
706 | end: |
707 | mpz_clear (offc0); |
708 | mpz_clear (offc1); |
709 | } |
710 | |
711 | /* Stores the bounds on the value of the expression X - Y in LOOP to BNDS. |
712 | The subtraction is considered to be performed in arbitrary precision, |
713 | without overflows. |
714 | |
715 | We do not attempt to be too clever regarding the value ranges of X and |
716 | Y; most of the time, they are just integers or ssa names offsetted by |
717 | integer. However, we try to use the information contained in the |
718 | comparisons before the loop (usually created by loop header copying). */ |
719 | |
720 | static void |
721 | bound_difference (class loop *loop, tree x, tree y, bounds *bnds) |
722 | { |
723 | tree type = TREE_TYPE (x); |
724 | tree varx, vary; |
725 | mpz_t offx, offy; |
726 | int cnt = 0; |
727 | edge e; |
728 | basic_block bb; |
729 | tree c0, c1; |
730 | enum tree_code cmp; |
731 | |
732 | /* Get rid of unnecessary casts, but preserve the value of |
733 | the expressions. */ |
734 | STRIP_SIGN_NOPS (x); |
735 | STRIP_SIGN_NOPS (y); |
736 | |
737 | mpz_init (bnds->below); |
738 | mpz_init (bnds->up); |
739 | mpz_init (offx); |
740 | mpz_init (offy); |
741 | split_to_var_and_offset (expr: x, var: &varx, offset: offx); |
742 | split_to_var_and_offset (expr: y, var: &vary, offset: offy); |
743 | |
744 | if (!integer_zerop (varx) |
745 | && operand_equal_p (varx, vary, flags: 0)) |
746 | { |
747 | /* Special case VARX == VARY -- we just need to compare the |
748 | offsets. The matters are a bit more complicated in the |
749 | case addition of offsets may wrap. */ |
750 | bound_difference_of_offsetted_base (type, x: offx, y: offy, bnds); |
751 | } |
752 | else |
753 | { |
754 | /* Otherwise, use the value ranges to determine the initial |
755 | estimates on below and up. */ |
756 | auto_mpz minx, maxx, miny, maxy; |
757 | determine_value_range (loop, type, var: varx, off: offx, min: minx, max: maxx); |
758 | determine_value_range (loop, type, var: vary, off: offy, min: miny, max: maxy); |
759 | |
760 | mpz_sub (bnds->below, minx, maxy); |
761 | mpz_sub (bnds->up, maxx, miny); |
762 | } |
763 | |
764 | /* If both X and Y are constants, we cannot get any more precise. */ |
765 | if (integer_zerop (varx) && integer_zerop (vary)) |
766 | goto end; |
767 | |
768 | /* Now walk the dominators of the loop header and use the entry |
769 | guards to refine the estimates. */ |
770 | for (bb = loop->header; |
771 | bb != ENTRY_BLOCK_PTR_FOR_FN (cfun) && cnt < MAX_DOMINATORS_TO_WALK; |
772 | bb = get_immediate_dominator (CDI_DOMINATORS, bb)) |
773 | { |
774 | if (!single_pred_p (bb)) |
775 | continue; |
776 | e = single_pred_edge (bb); |
777 | |
778 | if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) |
779 | continue; |
780 | |
781 | gcond *cond = as_a <gcond *> (p: *gsi_last_bb (bb: e->src)); |
782 | c0 = gimple_cond_lhs (gs: cond); |
783 | cmp = gimple_cond_code (gs: cond); |
784 | c1 = gimple_cond_rhs (gs: cond); |
785 | |
786 | if (e->flags & EDGE_FALSE_VALUE) |
787 | cmp = invert_tree_comparison (cmp, false); |
788 | |
789 | refine_bounds_using_guard (type, varx, offx, vary, offy, |
790 | c0, cmp, c1, bnds); |
791 | ++cnt; |
792 | } |
793 | |
794 | end: |
795 | mpz_clear (offx); |
796 | mpz_clear (offy); |
797 | } |
798 | |
799 | /* Update the bounds in BNDS that restrict the value of X to the bounds |
800 | that restrict the value of X + DELTA. X can be obtained as a |
801 | difference of two values in TYPE. */ |
802 | |
803 | static void |
804 | bounds_add (bounds *bnds, const widest_int &delta, tree type) |
805 | { |
806 | mpz_t mdelta, max; |
807 | |
808 | mpz_init (mdelta); |
809 | wi::to_mpz (delta, mdelta, SIGNED); |
810 | |
811 | mpz_init (max); |
812 | wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), max, UNSIGNED); |
813 | |
814 | mpz_add (bnds->up, bnds->up, mdelta); |
815 | mpz_add (bnds->below, bnds->below, mdelta); |
816 | |
817 | if (mpz_cmp (bnds->up, max) > 0) |
818 | mpz_set (bnds->up, max); |
819 | |
820 | mpz_neg (gmp_w: max, gmp_u: max); |
821 | if (mpz_cmp (bnds->below, max) < 0) |
822 | mpz_set (bnds->below, max); |
823 | |
824 | mpz_clear (mdelta); |
825 | mpz_clear (max); |
826 | } |
827 | |
828 | /* Update the bounds in BNDS that restrict the value of X to the bounds |
829 | that restrict the value of -X. */ |
830 | |
831 | static void |
832 | bounds_negate (bounds *bnds) |
833 | { |
834 | mpz_t tmp; |
835 | |
836 | mpz_init_set (tmp, bnds->up); |
837 | mpz_neg (gmp_w: bnds->up, gmp_u: bnds->below); |
838 | mpz_neg (gmp_w: bnds->below, gmp_u: tmp); |
839 | mpz_clear (tmp); |
840 | } |
841 | |
842 | /* Returns inverse of X modulo 2^s, where MASK = 2^s-1. */ |
843 | |
844 | static tree |
845 | inverse (tree x, tree mask) |
846 | { |
847 | tree type = TREE_TYPE (x); |
848 | tree rslt; |
849 | unsigned ctr = tree_floor_log2 (mask); |
850 | |
851 | if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT) |
852 | { |
853 | unsigned HOST_WIDE_INT ix; |
854 | unsigned HOST_WIDE_INT imask; |
855 | unsigned HOST_WIDE_INT irslt = 1; |
856 | |
857 | gcc_assert (cst_and_fits_in_hwi (x)); |
858 | gcc_assert (cst_and_fits_in_hwi (mask)); |
859 | |
860 | ix = int_cst_value (x); |
861 | imask = int_cst_value (mask); |
862 | |
863 | for (; ctr; ctr--) |
864 | { |
865 | irslt *= ix; |
866 | ix *= ix; |
867 | } |
868 | irslt &= imask; |
869 | |
870 | rslt = build_int_cst_type (type, irslt); |
871 | } |
872 | else |
873 | { |
874 | rslt = build_int_cst (type, 1); |
875 | for (; ctr; ctr--) |
876 | { |
877 | rslt = int_const_binop (MULT_EXPR, rslt, x); |
878 | x = int_const_binop (MULT_EXPR, x, x); |
879 | } |
880 | rslt = int_const_binop (BIT_AND_EXPR, rslt, mask); |
881 | } |
882 | |
883 | return rslt; |
884 | } |
885 | |
886 | /* Derives the upper bound BND on the number of executions of loop with exit |
887 | condition S * i <> C. If NO_OVERFLOW is true, then the control variable of |
888 | the loop does not overflow. EXIT_MUST_BE_TAKEN is true if we are guaranteed |
889 | that the loop ends through this exit, i.e., the induction variable ever |
890 | reaches the value of C. |
891 | |
892 | The value C is equal to final - base, where final and base are the final and |
893 | initial value of the actual induction variable in the analysed loop. BNDS |
894 | bounds the value of this difference when computed in signed type with |
895 | unbounded range, while the computation of C is performed in an unsigned |
896 | type with the range matching the range of the type of the induction variable. |
897 | In particular, BNDS.up contains an upper bound on C in the following cases: |
898 | -- if the iv must reach its final value without overflow, i.e., if |
899 | NO_OVERFLOW && EXIT_MUST_BE_TAKEN is true, or |
900 | -- if final >= base, which we know to hold when BNDS.below >= 0. */ |
901 | |
902 | static void |
903 | number_of_iterations_ne_max (mpz_t bnd, bool no_overflow, tree c, tree s, |
904 | bounds *bnds, bool exit_must_be_taken) |
905 | { |
906 | widest_int max; |
907 | mpz_t d; |
908 | tree type = TREE_TYPE (c); |
909 | bool bnds_u_valid = ((no_overflow && exit_must_be_taken) |
910 | || mpz_sgn (bnds->below) >= 0); |
911 | |
912 | if (integer_onep (s) |
913 | || (TREE_CODE (c) == INTEGER_CST |
914 | && TREE_CODE (s) == INTEGER_CST |
915 | && wi::mod_trunc (x: wi::to_wide (t: c), y: wi::to_wide (t: s), |
916 | TYPE_SIGN (type)) == 0) |
917 | || (TYPE_OVERFLOW_UNDEFINED (type) |
918 | && multiple_of_p (type, c, s))) |
919 | { |
920 | /* If C is an exact multiple of S, then its value will be reached before |
921 | the induction variable overflows (unless the loop is exited in some |
922 | other way before). Note that the actual induction variable in the |
923 | loop (which ranges from base to final instead of from 0 to C) may |
924 | overflow, in which case BNDS.up will not be giving a correct upper |
925 | bound on C; thus, BNDS_U_VALID had to be computed in advance. */ |
926 | no_overflow = true; |
927 | exit_must_be_taken = true; |
928 | } |
929 | |
930 | /* If the induction variable can overflow, the number of iterations is at |
931 | most the period of the control variable (or infinite, but in that case |
932 | the whole # of iterations analysis will fail). */ |
933 | if (!no_overflow) |
934 | { |
935 | max = wi::mask <widest_int> (TYPE_PRECISION (type) |
936 | - wi::ctz (wi::to_wide (t: s)), negate_p: false); |
937 | wi::to_mpz (max, bnd, UNSIGNED); |
938 | return; |
939 | } |
940 | |
941 | /* Now we know that the induction variable does not overflow, so the loop |
942 | iterates at most (range of type / S) times. */ |
943 | wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), bnd, UNSIGNED); |
944 | |
945 | /* If the induction variable is guaranteed to reach the value of C before |
946 | overflow, ... */ |
947 | if (exit_must_be_taken) |
948 | { |
949 | /* ... then we can strengthen this to C / S, and possibly we can use |
950 | the upper bound on C given by BNDS. */ |
951 | if (TREE_CODE (c) == INTEGER_CST) |
952 | wi::to_mpz (wi::to_wide (t: c), bnd, UNSIGNED); |
953 | else if (bnds_u_valid) |
954 | mpz_set (bnd, bnds->up); |
955 | } |
956 | |
957 | mpz_init (d); |
958 | wi::to_mpz (wi::to_wide (t: s), d, UNSIGNED); |
959 | mpz_fdiv_q (bnd, bnd, d); |
960 | mpz_clear (d); |
961 | } |
962 | |
963 | /* Determines number of iterations of loop whose ending condition |
964 | is IV <> FINAL. TYPE is the type of the iv. The number of |
965 | iterations is stored to NITER. EXIT_MUST_BE_TAKEN is true if |
966 | we know that the exit must be taken eventually, i.e., that the IV |
967 | ever reaches the value FINAL (we derived this earlier, and possibly set |
968 | NITER->assumptions to make sure this is the case). BNDS contains the |
969 | bounds on the difference FINAL - IV->base. */ |
970 | |
971 | static bool |
972 | number_of_iterations_ne (class loop *loop, tree type, affine_iv *iv, |
973 | tree final, class tree_niter_desc *niter, |
974 | bool exit_must_be_taken, bounds *bnds) |
975 | { |
976 | tree niter_type = unsigned_type_for (type); |
977 | tree s, c, d, bits, assumption, tmp, bound; |
978 | |
979 | niter->control = *iv; |
980 | niter->bound = final; |
981 | niter->cmp = NE_EXPR; |
982 | |
983 | /* Rearrange the terms so that we get inequality S * i <> C, with S |
984 | positive. Also cast everything to the unsigned type. If IV does |
985 | not overflow, BNDS bounds the value of C. Also, this is the |
986 | case if the computation |FINAL - IV->base| does not overflow, i.e., |
987 | if BNDS->below in the result is nonnegative. */ |
988 | if (tree_int_cst_sign_bit (iv->step)) |
989 | { |
990 | s = fold_convert (niter_type, |
991 | fold_build1 (NEGATE_EXPR, type, iv->step)); |
992 | c = fold_build2 (MINUS_EXPR, niter_type, |
993 | fold_convert (niter_type, iv->base), |
994 | fold_convert (niter_type, final)); |
995 | bounds_negate (bnds); |
996 | } |
997 | else |
998 | { |
999 | s = fold_convert (niter_type, iv->step); |
1000 | c = fold_build2 (MINUS_EXPR, niter_type, |
1001 | fold_convert (niter_type, final), |
1002 | fold_convert (niter_type, iv->base)); |
1003 | } |
1004 | |
1005 | auto_mpz max; |
1006 | number_of_iterations_ne_max (bnd: max, no_overflow: iv->no_overflow, c, s, bnds, |
1007 | exit_must_be_taken); |
1008 | niter->max = widest_int::from (x: wi::from_mpz (niter_type, max, false), |
1009 | TYPE_SIGN (niter_type)); |
1010 | |
1011 | /* Compute no-overflow information for the control iv. This can be |
1012 | proven when below two conditions are satisfied: |
1013 | |
1014 | 1) IV evaluates toward FINAL at beginning, i.e: |
1015 | base <= FINAL ; step > 0 |
1016 | base >= FINAL ; step < 0 |
1017 | |
1018 | 2) |FINAL - base| is an exact multiple of step. |
1019 | |
1020 | Unfortunately, it's hard to prove above conditions after pass loop-ch |
1021 | because loop with exit condition (IV != FINAL) usually will be guarded |
1022 | by initial-condition (IV.base - IV.step != FINAL). In this case, we |
1023 | can alternatively try to prove below conditions: |
1024 | |
1025 | 1') IV evaluates toward FINAL at beginning, i.e: |
1026 | new_base = base - step < FINAL ; step > 0 |
1027 | && base - step doesn't underflow |
1028 | new_base = base - step > FINAL ; step < 0 |
1029 | && base - step doesn't overflow |
1030 | |
1031 | Please refer to PR34114 as an example of loop-ch's impact. |
1032 | |
1033 | Note, for NE_EXPR, base equals to FINAL is a special case, in |
1034 | which the loop exits immediately, and the iv does not overflow. |
1035 | |
1036 | Also note, we prove condition 2) by checking base and final seperately |
1037 | along with condition 1) or 1'). Since we ensure the difference |
1038 | computation of c does not wrap with cond below and the adjusted s |
1039 | will fit a signed type as well as an unsigned we can safely do |
1040 | this using the type of the IV if it is not pointer typed. */ |
1041 | tree mtype = type; |
1042 | if (POINTER_TYPE_P (type)) |
1043 | mtype = niter_type; |
1044 | if (!niter->control.no_overflow |
1045 | && (integer_onep (s) |
1046 | || (multiple_of_p (mtype, fold_convert (mtype, iv->base), |
1047 | fold_convert (mtype, s), false) |
1048 | && multiple_of_p (mtype, fold_convert (mtype, final), |
1049 | fold_convert (mtype, s), false)))) |
1050 | { |
1051 | tree t, cond, relaxed_cond = boolean_false_node; |
1052 | |
1053 | if (tree_int_cst_sign_bit (iv->step)) |
1054 | { |
1055 | cond = fold_build2 (GE_EXPR, boolean_type_node, iv->base, final); |
1056 | if (TREE_CODE (type) == INTEGER_TYPE) |
1057 | { |
1058 | /* Only when base - step doesn't overflow. */ |
1059 | t = TYPE_MAX_VALUE (type); |
1060 | t = fold_build2 (PLUS_EXPR, type, t, iv->step); |
1061 | t = fold_build2 (GE_EXPR, boolean_type_node, t, iv->base); |
1062 | if (integer_nonzerop (t)) |
1063 | { |
1064 | t = fold_build2 (MINUS_EXPR, type, iv->base, iv->step); |
1065 | relaxed_cond = fold_build2 (GT_EXPR, boolean_type_node, t, |
1066 | final); |
1067 | } |
1068 | } |
1069 | } |
1070 | else |
1071 | { |
1072 | cond = fold_build2 (LE_EXPR, boolean_type_node, iv->base, final); |
1073 | if (TREE_CODE (type) == INTEGER_TYPE) |
1074 | { |
1075 | /* Only when base - step doesn't underflow. */ |
1076 | t = TYPE_MIN_VALUE (type); |
1077 | t = fold_build2 (PLUS_EXPR, type, t, iv->step); |
1078 | t = fold_build2 (LE_EXPR, boolean_type_node, t, iv->base); |
1079 | if (integer_nonzerop (t)) |
1080 | { |
1081 | t = fold_build2 (MINUS_EXPR, type, iv->base, iv->step); |
1082 | relaxed_cond = fold_build2 (LT_EXPR, boolean_type_node, t, |
1083 | final); |
1084 | } |
1085 | } |
1086 | } |
1087 | |
1088 | t = simplify_using_initial_conditions (loop, cond); |
1089 | if (!t || !integer_onep (t)) |
1090 | t = simplify_using_initial_conditions (loop, relaxed_cond); |
1091 | |
1092 | if (t && integer_onep (t)) |
1093 | { |
1094 | niter->control.no_overflow = true; |
1095 | niter->niter = fold_build2 (EXACT_DIV_EXPR, niter_type, c, s); |
1096 | return true; |
1097 | } |
1098 | } |
1099 | |
1100 | /* Let nsd (step, size of mode) = d. If d does not divide c, the loop |
1101 | is infinite. Otherwise, the number of iterations is |
1102 | (inverse(s/d) * (c/d)) mod (size of mode/d). */ |
1103 | bits = num_ending_zeros (s); |
1104 | bound = build_low_bits_mask (niter_type, |
1105 | (TYPE_PRECISION (niter_type) |
1106 | - tree_to_uhwi (bits))); |
1107 | |
1108 | d = fold_binary_to_constant (LSHIFT_EXPR, niter_type, |
1109 | build_int_cst (niter_type, 1), bits); |
1110 | s = fold_binary_to_constant (RSHIFT_EXPR, niter_type, s, bits); |
1111 | |
1112 | if (!exit_must_be_taken) |
1113 | { |
1114 | /* If we cannot assume that the exit is taken eventually, record the |
1115 | assumptions for divisibility of c. */ |
1116 | assumption = fold_build2 (FLOOR_MOD_EXPR, niter_type, c, d); |
1117 | assumption = fold_build2 (EQ_EXPR, boolean_type_node, |
1118 | assumption, build_int_cst (niter_type, 0)); |
1119 | if (!integer_nonzerop (assumption)) |
1120 | niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
1121 | niter->assumptions, assumption); |
1122 | } |
1123 | |
1124 | c = fold_build2 (EXACT_DIV_EXPR, niter_type, c, d); |
1125 | if (integer_onep (s)) |
1126 | { |
1127 | niter->niter = c; |
1128 | } |
1129 | else |
1130 | { |
1131 | tmp = fold_build2 (MULT_EXPR, niter_type, c, inverse (s, bound)); |
1132 | niter->niter = fold_build2 (BIT_AND_EXPR, niter_type, tmp, bound); |
1133 | } |
1134 | return true; |
1135 | } |
1136 | |
1137 | /* Checks whether we can determine the final value of the control variable |
1138 | of the loop with ending condition IV0 < IV1 (computed in TYPE). |
1139 | DELTA is the difference IV1->base - IV0->base, STEP is the absolute value |
1140 | of the step. The assumptions necessary to ensure that the computation |
1141 | of the final value does not overflow are recorded in NITER. If we |
1142 | find the final value, we adjust DELTA and return TRUE. Otherwise |
1143 | we return false. BNDS bounds the value of IV1->base - IV0->base, |
1144 | and will be updated by the same amount as DELTA. EXIT_MUST_BE_TAKEN is |
1145 | true if we know that the exit must be taken eventually. */ |
1146 | |
1147 | static bool |
1148 | number_of_iterations_lt_to_ne (tree type, affine_iv *iv0, affine_iv *iv1, |
1149 | class tree_niter_desc *niter, |
1150 | tree *delta, tree step, |
1151 | bool exit_must_be_taken, bounds *bnds) |
1152 | { |
1153 | tree niter_type = TREE_TYPE (step); |
1154 | tree mod = fold_build2 (FLOOR_MOD_EXPR, niter_type, *delta, step); |
1155 | tree tmod; |
1156 | tree assumption = boolean_true_node, bound, noloop; |
1157 | bool fv_comp_no_overflow; |
1158 | tree type1 = type; |
1159 | if (POINTER_TYPE_P (type)) |
1160 | type1 = sizetype; |
1161 | |
1162 | if (TREE_CODE (mod) != INTEGER_CST) |
1163 | return false; |
1164 | if (integer_nonzerop (mod)) |
1165 | mod = fold_build2 (MINUS_EXPR, niter_type, step, mod); |
1166 | tmod = fold_convert (type1, mod); |
1167 | |
1168 | auto_mpz mmod; |
1169 | wi::to_mpz (wi::to_wide (t: mod), mmod, UNSIGNED); |
1170 | mpz_neg (gmp_w: mmod, gmp_u: mmod); |
1171 | |
1172 | /* If the induction variable does not overflow and the exit is taken, |
1173 | then the computation of the final value does not overflow. This is |
1174 | also obviously the case if the new final value is equal to the |
1175 | current one. Finally, we postulate this for pointer type variables, |
1176 | as the code cannot rely on the object to that the pointer points being |
1177 | placed at the end of the address space (and more pragmatically, |
1178 | TYPE_{MIN,MAX}_VALUE is not defined for pointers). */ |
1179 | if (integer_zerop (mod) || POINTER_TYPE_P (type)) |
1180 | fv_comp_no_overflow = true; |
1181 | else if (!exit_must_be_taken) |
1182 | fv_comp_no_overflow = false; |
1183 | else |
1184 | fv_comp_no_overflow = |
1185 | (iv0->no_overflow && integer_nonzerop (iv0->step)) |
1186 | || (iv1->no_overflow && integer_nonzerop (iv1->step)); |
1187 | |
1188 | if (integer_nonzerop (iv0->step)) |
1189 | { |
1190 | /* The final value of the iv is iv1->base + MOD, assuming that this |
1191 | computation does not overflow, and that |
1192 | iv0->base <= iv1->base + MOD. */ |
1193 | if (!fv_comp_no_overflow) |
1194 | { |
1195 | bound = fold_build2 (MINUS_EXPR, type1, |
1196 | TYPE_MAX_VALUE (type1), tmod); |
1197 | assumption = fold_build2 (LE_EXPR, boolean_type_node, |
1198 | iv1->base, bound); |
1199 | if (integer_zerop (assumption)) |
1200 | return false; |
1201 | } |
1202 | if (mpz_cmp (mmod, bnds->below) < 0) |
1203 | noloop = boolean_false_node; |
1204 | else if (POINTER_TYPE_P (type)) |
1205 | noloop = fold_build2 (GT_EXPR, boolean_type_node, |
1206 | iv0->base, |
1207 | fold_build_pointer_plus (iv1->base, tmod)); |
1208 | else |
1209 | noloop = fold_build2 (GT_EXPR, boolean_type_node, |
1210 | iv0->base, |
1211 | fold_build2 (PLUS_EXPR, type1, |
1212 | iv1->base, tmod)); |
1213 | } |
1214 | else |
1215 | { |
1216 | /* The final value of the iv is iv0->base - MOD, assuming that this |
1217 | computation does not overflow, and that |
1218 | iv0->base - MOD <= iv1->base. */ |
1219 | if (!fv_comp_no_overflow) |
1220 | { |
1221 | bound = fold_build2 (PLUS_EXPR, type1, |
1222 | TYPE_MIN_VALUE (type1), tmod); |
1223 | assumption = fold_build2 (GE_EXPR, boolean_type_node, |
1224 | iv0->base, bound); |
1225 | if (integer_zerop (assumption)) |
1226 | return false; |
1227 | } |
1228 | if (mpz_cmp (mmod, bnds->below) < 0) |
1229 | noloop = boolean_false_node; |
1230 | else if (POINTER_TYPE_P (type)) |
1231 | noloop = fold_build2 (GT_EXPR, boolean_type_node, |
1232 | fold_build_pointer_plus (iv0->base, |
1233 | fold_build1 (NEGATE_EXPR, |
1234 | type1, tmod)), |
1235 | iv1->base); |
1236 | else |
1237 | noloop = fold_build2 (GT_EXPR, boolean_type_node, |
1238 | fold_build2 (MINUS_EXPR, type1, |
1239 | iv0->base, tmod), |
1240 | iv1->base); |
1241 | } |
1242 | |
1243 | if (!integer_nonzerop (assumption)) |
1244 | niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
1245 | niter->assumptions, |
1246 | assumption); |
1247 | if (!integer_zerop (noloop)) |
1248 | niter->may_be_zero = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, |
1249 | niter->may_be_zero, |
1250 | noloop); |
1251 | bounds_add (bnds, delta: wi::to_widest (t: mod), type); |
1252 | *delta = fold_build2 (PLUS_EXPR, niter_type, *delta, mod); |
1253 | |
1254 | return true; |
1255 | } |
1256 | |
1257 | /* Add assertions to NITER that ensure that the control variable of the loop |
1258 | with ending condition IV0 < IV1 does not overflow. Types of IV0 and IV1 |
1259 | are TYPE. Returns false if we can prove that there is an overflow, true |
1260 | otherwise. STEP is the absolute value of the step. */ |
1261 | |
1262 | static bool |
1263 | assert_no_overflow_lt (tree type, affine_iv *iv0, affine_iv *iv1, |
1264 | class tree_niter_desc *niter, tree step) |
1265 | { |
1266 | tree bound, d, assumption, diff; |
1267 | tree niter_type = TREE_TYPE (step); |
1268 | |
1269 | if (integer_nonzerop (iv0->step)) |
1270 | { |
1271 | /* for (i = iv0->base; i < iv1->base; i += iv0->step) */ |
1272 | if (iv0->no_overflow) |
1273 | return true; |
1274 | |
1275 | /* If iv0->base is a constant, we can determine the last value before |
1276 | overflow precisely; otherwise we conservatively assume |
1277 | MAX - STEP + 1. */ |
1278 | |
1279 | if (TREE_CODE (iv0->base) == INTEGER_CST) |
1280 | { |
1281 | d = fold_build2 (MINUS_EXPR, niter_type, |
1282 | fold_convert (niter_type, TYPE_MAX_VALUE (type)), |
1283 | fold_convert (niter_type, iv0->base)); |
1284 | diff = fold_build2 (FLOOR_MOD_EXPR, niter_type, d, step); |
1285 | } |
1286 | else |
1287 | diff = fold_build2 (MINUS_EXPR, niter_type, step, |
1288 | build_int_cst (niter_type, 1)); |
1289 | bound = fold_build2 (MINUS_EXPR, type, |
1290 | TYPE_MAX_VALUE (type), fold_convert (type, diff)); |
1291 | assumption = fold_build2 (LE_EXPR, boolean_type_node, |
1292 | iv1->base, bound); |
1293 | } |
1294 | else |
1295 | { |
1296 | /* for (i = iv1->base; i > iv0->base; i += iv1->step) */ |
1297 | if (iv1->no_overflow) |
1298 | return true; |
1299 | |
1300 | if (TREE_CODE (iv1->base) == INTEGER_CST) |
1301 | { |
1302 | d = fold_build2 (MINUS_EXPR, niter_type, |
1303 | fold_convert (niter_type, iv1->base), |
1304 | fold_convert (niter_type, TYPE_MIN_VALUE (type))); |
1305 | diff = fold_build2 (FLOOR_MOD_EXPR, niter_type, d, step); |
1306 | } |
1307 | else |
1308 | diff = fold_build2 (MINUS_EXPR, niter_type, step, |
1309 | build_int_cst (niter_type, 1)); |
1310 | bound = fold_build2 (PLUS_EXPR, type, |
1311 | TYPE_MIN_VALUE (type), fold_convert (type, diff)); |
1312 | assumption = fold_build2 (GE_EXPR, boolean_type_node, |
1313 | iv0->base, bound); |
1314 | } |
1315 | |
1316 | if (integer_zerop (assumption)) |
1317 | return false; |
1318 | if (!integer_nonzerop (assumption)) |
1319 | niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
1320 | niter->assumptions, assumption); |
1321 | |
1322 | iv0->no_overflow = true; |
1323 | iv1->no_overflow = true; |
1324 | return true; |
1325 | } |
1326 | |
1327 | /* Add an assumption to NITER that a loop whose ending condition |
1328 | is IV0 < IV1 rolls. TYPE is the type of the control iv. BNDS |
1329 | bounds the value of IV1->base - IV0->base. */ |
1330 | |
1331 | static void |
1332 | assert_loop_rolls_lt (tree type, affine_iv *iv0, affine_iv *iv1, |
1333 | class tree_niter_desc *niter, bounds *bnds) |
1334 | { |
1335 | tree assumption = boolean_true_node, bound, diff; |
1336 | tree mbz, mbzl, mbzr, type1; |
1337 | bool rolls_p, no_overflow_p; |
1338 | widest_int dstep; |
1339 | mpz_t mstep, max; |
1340 | |
1341 | /* We are going to compute the number of iterations as |
1342 | (iv1->base - iv0->base + step - 1) / step, computed in the unsigned |
1343 | variant of TYPE. This formula only works if |
1344 | |
1345 | -step + 1 <= (iv1->base - iv0->base) <= MAX - step + 1 |
1346 | |
1347 | (where MAX is the maximum value of the unsigned variant of TYPE, and |
1348 | the computations in this formula are performed in full precision, |
1349 | i.e., without overflows). |
1350 | |
1351 | Usually, for loops with exit condition iv0->base + step * i < iv1->base, |
1352 | we have a condition of the form iv0->base - step < iv1->base before the loop, |
1353 | and for loops iv0->base < iv1->base - step * i the condition |
1354 | iv0->base < iv1->base + step, due to loop header copying, which enable us |
1355 | to prove the lower bound. |
1356 | |
1357 | The upper bound is more complicated. Unless the expressions for initial |
1358 | and final value themselves contain enough information, we usually cannot |
1359 | derive it from the context. */ |
1360 | |
1361 | /* First check whether the answer does not follow from the bounds we gathered |
1362 | before. */ |
1363 | if (integer_nonzerop (iv0->step)) |
1364 | dstep = wi::to_widest (t: iv0->step); |
1365 | else |
1366 | { |
1367 | dstep = wi::sext (x: wi::to_widest (t: iv1->step), TYPE_PRECISION (type)); |
1368 | dstep = -dstep; |
1369 | } |
1370 | |
1371 | mpz_init (mstep); |
1372 | wi::to_mpz (dstep, mstep, UNSIGNED); |
1373 | mpz_neg (gmp_w: mstep, gmp_u: mstep); |
1374 | mpz_add_ui (mstep, mstep, 1); |
1375 | |
1376 | rolls_p = mpz_cmp (mstep, bnds->below) <= 0; |
1377 | |
1378 | mpz_init (max); |
1379 | wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), max, UNSIGNED); |
1380 | mpz_add (max, max, mstep); |
1381 | no_overflow_p = (mpz_cmp (bnds->up, max) <= 0 |
1382 | /* For pointers, only values lying inside a single object |
1383 | can be compared or manipulated by pointer arithmetics. |
1384 | Gcc in general does not allow or handle objects larger |
1385 | than half of the address space, hence the upper bound |
1386 | is satisfied for pointers. */ |
1387 | || POINTER_TYPE_P (type)); |
1388 | mpz_clear (mstep); |
1389 | mpz_clear (max); |
1390 | |
1391 | if (rolls_p && no_overflow_p) |
1392 | return; |
1393 | |
1394 | type1 = type; |
1395 | if (POINTER_TYPE_P (type)) |
1396 | type1 = sizetype; |
1397 | |
1398 | /* Now the hard part; we must formulate the assumption(s) as expressions, and |
1399 | we must be careful not to introduce overflow. */ |
1400 | |
1401 | if (integer_nonzerop (iv0->step)) |
1402 | { |
1403 | diff = fold_build2 (MINUS_EXPR, type1, |
1404 | iv0->step, build_int_cst (type1, 1)); |
1405 | |
1406 | /* We need to know that iv0->base >= MIN + iv0->step - 1. Since |
1407 | 0 address never belongs to any object, we can assume this for |
1408 | pointers. */ |
1409 | if (!POINTER_TYPE_P (type)) |
1410 | { |
1411 | bound = fold_build2 (PLUS_EXPR, type1, |
1412 | TYPE_MIN_VALUE (type), diff); |
1413 | assumption = fold_build2 (GE_EXPR, boolean_type_node, |
1414 | iv0->base, bound); |
1415 | } |
1416 | |
1417 | /* And then we can compute iv0->base - diff, and compare it with |
1418 | iv1->base. */ |
1419 | mbzl = fold_build2 (MINUS_EXPR, type1, |
1420 | fold_convert (type1, iv0->base), diff); |
1421 | mbzr = fold_convert (type1, iv1->base); |
1422 | } |
1423 | else |
1424 | { |
1425 | diff = fold_build2 (PLUS_EXPR, type1, |
1426 | iv1->step, build_int_cst (type1, 1)); |
1427 | |
1428 | if (!POINTER_TYPE_P (type)) |
1429 | { |
1430 | bound = fold_build2 (PLUS_EXPR, type1, |
1431 | TYPE_MAX_VALUE (type), diff); |
1432 | assumption = fold_build2 (LE_EXPR, boolean_type_node, |
1433 | iv1->base, bound); |
1434 | } |
1435 | |
1436 | mbzl = fold_convert (type1, iv0->base); |
1437 | mbzr = fold_build2 (MINUS_EXPR, type1, |
1438 | fold_convert (type1, iv1->base), diff); |
1439 | } |
1440 | |
1441 | if (!integer_nonzerop (assumption)) |
1442 | niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
1443 | niter->assumptions, assumption); |
1444 | if (!rolls_p) |
1445 | { |
1446 | mbz = fold_build2 (GT_EXPR, boolean_type_node, mbzl, mbzr); |
1447 | niter->may_be_zero = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, |
1448 | niter->may_be_zero, mbz); |
1449 | } |
1450 | } |
1451 | |
1452 | /* Determines number of iterations of loop whose ending condition |
1453 | is IV0 < IV1 which likes: {base, -C} < n, or n < {base, C}. |
1454 | The number of iterations is stored to NITER. */ |
1455 | |
1456 | static bool |
1457 | number_of_iterations_until_wrap (class loop *loop, tree type, affine_iv *iv0, |
1458 | affine_iv *iv1, class tree_niter_desc *niter) |
1459 | { |
1460 | tree niter_type = unsigned_type_for (type); |
1461 | tree step, num, assumptions, may_be_zero, span; |
1462 | wide_int high, low, max, min; |
1463 | |
1464 | may_be_zero = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base); |
1465 | if (integer_onep (may_be_zero)) |
1466 | return false; |
1467 | |
1468 | int prec = TYPE_PRECISION (type); |
1469 | signop sgn = TYPE_SIGN (type); |
1470 | min = wi::min_value (prec, sgn); |
1471 | max = wi::max_value (prec, sgn); |
1472 | |
1473 | /* n < {base, C}. */ |
1474 | if (integer_zerop (iv0->step) && !tree_int_cst_sign_bit (iv1->step)) |
1475 | { |
1476 | step = iv1->step; |
1477 | /* MIN + C - 1 <= n. */ |
1478 | tree last = wide_int_to_tree (type, cst: min + wi::to_wide (t: step) - 1); |
1479 | assumptions = fold_build2 (LE_EXPR, boolean_type_node, last, iv0->base); |
1480 | if (integer_zerop (assumptions)) |
1481 | return false; |
1482 | |
1483 | num = fold_build2 (MINUS_EXPR, niter_type, |
1484 | wide_int_to_tree (niter_type, max), |
1485 | fold_convert (niter_type, iv1->base)); |
1486 | |
1487 | /* When base has the form iv + 1, if we know iv >= n, then iv + 1 < n |
1488 | only when iv + 1 overflows, i.e. when iv == TYPE_VALUE_MAX. */ |
1489 | if (sgn == UNSIGNED |
1490 | && integer_onep (step) |
1491 | && TREE_CODE (iv1->base) == PLUS_EXPR |
1492 | && integer_onep (TREE_OPERAND (iv1->base, 1))) |
1493 | { |
1494 | tree cond = fold_build2 (GE_EXPR, boolean_type_node, |
1495 | TREE_OPERAND (iv1->base, 0), iv0->base); |
1496 | cond = simplify_using_initial_conditions (loop, cond); |
1497 | if (integer_onep (cond)) |
1498 | may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, |
1499 | TREE_OPERAND (iv1->base, 0), |
1500 | TYPE_MAX_VALUE (type)); |
1501 | } |
1502 | |
1503 | high = max; |
1504 | if (TREE_CODE (iv1->base) == INTEGER_CST) |
1505 | low = wi::to_wide (t: iv1->base) - 1; |
1506 | else if (TREE_CODE (iv0->base) == INTEGER_CST) |
1507 | low = wi::to_wide (t: iv0->base); |
1508 | else |
1509 | low = min; |
1510 | } |
1511 | /* {base, -C} < n. */ |
1512 | else if (tree_int_cst_sign_bit (iv0->step) && integer_zerop (iv1->step)) |
1513 | { |
1514 | step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), iv0->step); |
1515 | /* MAX - C + 1 >= n. */ |
1516 | tree last = wide_int_to_tree (type, cst: max - wi::to_wide (t: step) + 1); |
1517 | assumptions = fold_build2 (GE_EXPR, boolean_type_node, last, iv1->base); |
1518 | if (integer_zerop (assumptions)) |
1519 | return false; |
1520 | |
1521 | num = fold_build2 (MINUS_EXPR, niter_type, |
1522 | fold_convert (niter_type, iv0->base), |
1523 | wide_int_to_tree (niter_type, min)); |
1524 | low = min; |
1525 | if (TREE_CODE (iv0->base) == INTEGER_CST) |
1526 | high = wi::to_wide (t: iv0->base) + 1; |
1527 | else if (TREE_CODE (iv1->base) == INTEGER_CST) |
1528 | high = wi::to_wide (t: iv1->base); |
1529 | else |
1530 | high = max; |
1531 | } |
1532 | else |
1533 | return false; |
1534 | |
1535 | /* (delta + step - 1) / step */ |
1536 | step = fold_convert (niter_type, step); |
1537 | num = fold_build2 (PLUS_EXPR, niter_type, num, step); |
1538 | niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, num, step); |
1539 | |
1540 | widest_int delta, s; |
1541 | delta = widest_int::from (x: high, sgn) - widest_int::from (x: low, sgn); |
1542 | s = wi::to_widest (t: step); |
1543 | delta = delta + s - 1; |
1544 | niter->max = wi::udiv_floor (x: delta, y: s); |
1545 | |
1546 | niter->may_be_zero = may_be_zero; |
1547 | |
1548 | if (!integer_nonzerop (assumptions)) |
1549 | niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
1550 | niter->assumptions, assumptions); |
1551 | |
1552 | niter->control.no_overflow = false; |
1553 | |
1554 | /* Update bound and exit condition as: |
1555 | bound = niter * STEP + (IVbase - STEP). |
1556 | { IVbase - STEP, +, STEP } != bound |
1557 | Here, biasing IVbase by 1 step makes 'bound' be the value before wrap. |
1558 | */ |
1559 | tree base_type = TREE_TYPE (niter->control.base); |
1560 | if (POINTER_TYPE_P (base_type)) |
1561 | { |
1562 | tree utype = unsigned_type_for (base_type); |
1563 | niter->control.base |
1564 | = fold_build2 (MINUS_EXPR, utype, |
1565 | fold_convert (utype, niter->control.base), |
1566 | fold_convert (utype, niter->control.step)); |
1567 | niter->control.base = fold_convert (base_type, niter->control.base); |
1568 | } |
1569 | else |
1570 | niter->control.base |
1571 | = fold_build2 (MINUS_EXPR, base_type, niter->control.base, |
1572 | niter->control.step); |
1573 | |
1574 | span = fold_build2 (MULT_EXPR, niter_type, niter->niter, |
1575 | fold_convert (niter_type, niter->control.step)); |
1576 | niter->bound = fold_build2 (PLUS_EXPR, niter_type, span, |
1577 | fold_convert (niter_type, niter->control.base)); |
1578 | niter->bound = fold_convert (type, niter->bound); |
1579 | niter->cmp = NE_EXPR; |
1580 | |
1581 | return true; |
1582 | } |
1583 | |
1584 | /* Determines number of iterations of loop whose ending condition |
1585 | is IV0 < IV1. TYPE is the type of the iv. The number of |
1586 | iterations is stored to NITER. BNDS bounds the difference |
1587 | IV1->base - IV0->base. EXIT_MUST_BE_TAKEN is true if we know |
1588 | that the exit must be taken eventually. */ |
1589 | |
1590 | static bool |
1591 | number_of_iterations_lt (class loop *loop, tree type, affine_iv *iv0, |
1592 | affine_iv *iv1, class tree_niter_desc *niter, |
1593 | bool exit_must_be_taken, bounds *bnds) |
1594 | { |
1595 | tree niter_type = unsigned_type_for (type); |
1596 | tree delta, step, s; |
1597 | mpz_t mstep, tmp; |
1598 | |
1599 | if (integer_nonzerop (iv0->step)) |
1600 | { |
1601 | niter->control = *iv0; |
1602 | niter->cmp = LT_EXPR; |
1603 | niter->bound = iv1->base; |
1604 | } |
1605 | else |
1606 | { |
1607 | niter->control = *iv1; |
1608 | niter->cmp = GT_EXPR; |
1609 | niter->bound = iv0->base; |
1610 | } |
1611 | |
1612 | /* {base, -C} < n, or n < {base, C} */ |
1613 | if (tree_int_cst_sign_bit (iv0->step) |
1614 | || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit (iv1->step))) |
1615 | return number_of_iterations_until_wrap (loop, type, iv0, iv1, niter); |
1616 | |
1617 | delta = fold_build2 (MINUS_EXPR, niter_type, |
1618 | fold_convert (niter_type, iv1->base), |
1619 | fold_convert (niter_type, iv0->base)); |
1620 | |
1621 | /* First handle the special case that the step is +-1. */ |
1622 | if ((integer_onep (iv0->step) && integer_zerop (iv1->step)) |
1623 | || (integer_all_onesp (iv1->step) && integer_zerop (iv0->step))) |
1624 | { |
1625 | /* for (i = iv0->base; i < iv1->base; i++) |
1626 | |
1627 | or |
1628 | |
1629 | for (i = iv1->base; i > iv0->base; i--). |
1630 | |
1631 | In both cases # of iterations is iv1->base - iv0->base, assuming that |
1632 | iv1->base >= iv0->base. |
1633 | |
1634 | First try to derive a lower bound on the value of |
1635 | iv1->base - iv0->base, computed in full precision. If the difference |
1636 | is nonnegative, we are done, otherwise we must record the |
1637 | condition. */ |
1638 | |
1639 | if (mpz_sgn (bnds->below) < 0) |
1640 | niter->may_be_zero = fold_build2 (LT_EXPR, boolean_type_node, |
1641 | iv1->base, iv0->base); |
1642 | niter->niter = delta; |
1643 | niter->max = widest_int::from (x: wi::from_mpz (niter_type, bnds->up, false), |
1644 | TYPE_SIGN (niter_type)); |
1645 | niter->control.no_overflow = true; |
1646 | return true; |
1647 | } |
1648 | |
1649 | if (integer_nonzerop (iv0->step)) |
1650 | step = fold_convert (niter_type, iv0->step); |
1651 | else |
1652 | step = fold_convert (niter_type, |
1653 | fold_build1 (NEGATE_EXPR, type, iv1->step)); |
1654 | |
1655 | /* If we can determine the final value of the control iv exactly, we can |
1656 | transform the condition to != comparison. In particular, this will be |
1657 | the case if DELTA is constant. */ |
1658 | if (number_of_iterations_lt_to_ne (type, iv0, iv1, niter, delta: &delta, step, |
1659 | exit_must_be_taken, bnds)) |
1660 | { |
1661 | affine_iv zps; |
1662 | |
1663 | zps.base = build_int_cst (niter_type, 0); |
1664 | zps.step = step; |
1665 | /* number_of_iterations_lt_to_ne will add assumptions that ensure that |
1666 | zps does not overflow. */ |
1667 | zps.no_overflow = true; |
1668 | |
1669 | return number_of_iterations_ne (loop, type, iv: &zps, |
1670 | final: delta, niter, exit_must_be_taken: true, bnds); |
1671 | } |
1672 | |
1673 | /* Make sure that the control iv does not overflow. */ |
1674 | if (!assert_no_overflow_lt (type, iv0, iv1, niter, step)) |
1675 | return false; |
1676 | |
1677 | /* We determine the number of iterations as (delta + step - 1) / step. For |
1678 | this to work, we must know that iv1->base >= iv0->base - step + 1, |
1679 | otherwise the loop does not roll. */ |
1680 | assert_loop_rolls_lt (type, iv0, iv1, niter, bnds); |
1681 | |
1682 | s = fold_build2 (MINUS_EXPR, niter_type, |
1683 | step, build_int_cst (niter_type, 1)); |
1684 | delta = fold_build2 (PLUS_EXPR, niter_type, delta, s); |
1685 | niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, delta, step); |
1686 | |
1687 | mpz_init (mstep); |
1688 | mpz_init (tmp); |
1689 | wi::to_mpz (wi::to_wide (t: step), mstep, UNSIGNED); |
1690 | mpz_add (tmp, bnds->up, mstep); |
1691 | mpz_sub_ui (tmp, tmp, 1); |
1692 | mpz_fdiv_q (tmp, tmp, mstep); |
1693 | niter->max = widest_int::from (x: wi::from_mpz (niter_type, tmp, false), |
1694 | TYPE_SIGN (niter_type)); |
1695 | mpz_clear (mstep); |
1696 | mpz_clear (tmp); |
1697 | |
1698 | return true; |
1699 | } |
1700 | |
1701 | /* Determines number of iterations of loop whose ending condition |
1702 | is IV0 <= IV1. TYPE is the type of the iv. The number of |
1703 | iterations is stored to NITER. EXIT_MUST_BE_TAKEN is true if |
1704 | we know that this condition must eventually become false (we derived this |
1705 | earlier, and possibly set NITER->assumptions to make sure this |
1706 | is the case). BNDS bounds the difference IV1->base - IV0->base. */ |
1707 | |
1708 | static bool |
1709 | number_of_iterations_le (class loop *loop, tree type, affine_iv *iv0, |
1710 | affine_iv *iv1, class tree_niter_desc *niter, |
1711 | bool exit_must_be_taken, bounds *bnds) |
1712 | { |
1713 | tree assumption; |
1714 | tree type1 = type; |
1715 | if (POINTER_TYPE_P (type)) |
1716 | type1 = sizetype; |
1717 | |
1718 | /* Say that IV0 is the control variable. Then IV0 <= IV1 iff |
1719 | IV0 < IV1 + 1, assuming that IV1 is not equal to the greatest |
1720 | value of the type. This we must know anyway, since if it is |
1721 | equal to this value, the loop rolls forever. We do not check |
1722 | this condition for pointer type ivs, as the code cannot rely on |
1723 | the object to that the pointer points being placed at the end of |
1724 | the address space (and more pragmatically, TYPE_{MIN,MAX}_VALUE is |
1725 | not defined for pointers). */ |
1726 | |
1727 | if (!exit_must_be_taken && !POINTER_TYPE_P (type)) |
1728 | { |
1729 | if (integer_nonzerop (iv0->step)) |
1730 | assumption = fold_build2 (NE_EXPR, boolean_type_node, |
1731 | iv1->base, TYPE_MAX_VALUE (type)); |
1732 | else |
1733 | assumption = fold_build2 (NE_EXPR, boolean_type_node, |
1734 | iv0->base, TYPE_MIN_VALUE (type)); |
1735 | |
1736 | if (integer_zerop (assumption)) |
1737 | return false; |
1738 | if (!integer_nonzerop (assumption)) |
1739 | niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
1740 | niter->assumptions, assumption); |
1741 | } |
1742 | |
1743 | if (integer_nonzerop (iv0->step)) |
1744 | { |
1745 | if (POINTER_TYPE_P (type)) |
1746 | iv1->base = fold_build_pointer_plus_hwi (iv1->base, 1); |
1747 | else |
1748 | iv1->base = fold_build2 (PLUS_EXPR, type1, iv1->base, |
1749 | build_int_cst (type1, 1)); |
1750 | } |
1751 | else if (POINTER_TYPE_P (type)) |
1752 | iv0->base = fold_build_pointer_plus_hwi (iv0->base, -1); |
1753 | else |
1754 | iv0->base = fold_build2 (MINUS_EXPR, type1, |
1755 | iv0->base, build_int_cst (type1, 1)); |
1756 | |
1757 | bounds_add (bnds, delta: 1, type: type1); |
1758 | |
1759 | return number_of_iterations_lt (loop, type, iv0, iv1, niter, exit_must_be_taken, |
1760 | bnds); |
1761 | } |
1762 | |
1763 | /* Dumps description of affine induction variable IV to FILE. */ |
1764 | |
1765 | static void |
1766 | dump_affine_iv (FILE *file, affine_iv *iv) |
1767 | { |
1768 | if (!integer_zerop (iv->step)) |
1769 | fprintf (stream: file, format: "[" ); |
1770 | |
1771 | print_generic_expr (dump_file, iv->base, TDF_SLIM); |
1772 | |
1773 | if (!integer_zerop (iv->step)) |
1774 | { |
1775 | fprintf (stream: file, format: ", + , " ); |
1776 | print_generic_expr (dump_file, iv->step, TDF_SLIM); |
1777 | fprintf (stream: file, format: "]%s" , iv->no_overflow ? "(no_overflow)" : "" ); |
1778 | } |
1779 | } |
1780 | |
1781 | /* Determine the number of iterations according to condition (for staying |
1782 | inside loop) which compares two induction variables using comparison |
1783 | operator CODE. The induction variable on left side of the comparison |
1784 | is IV0, the right-hand side is IV1. Both induction variables must have |
1785 | type TYPE, which must be an integer or pointer type. The steps of the |
1786 | ivs must be constants (or NULL_TREE, which is interpreted as constant zero). |
1787 | |
1788 | LOOP is the loop whose number of iterations we are determining. |
1789 | |
1790 | ONLY_EXIT is true if we are sure this is the only way the loop could be |
1791 | exited (including possibly non-returning function calls, exceptions, etc.) |
1792 | -- in this case we can use the information whether the control induction |
1793 | variables can overflow or not in a more efficient way. |
1794 | |
1795 | if EVERY_ITERATION is true, we know the test is executed on every iteration. |
1796 | |
1797 | The results (number of iterations and assumptions as described in |
1798 | comments at class tree_niter_desc in tree-ssa-loop.h) are stored to NITER. |
1799 | Returns false if it fails to determine number of iterations, true if it |
1800 | was determined (possibly with some assumptions). */ |
1801 | |
1802 | static bool |
1803 | number_of_iterations_cond (class loop *loop, |
1804 | tree type, affine_iv *iv0, enum tree_code code, |
1805 | affine_iv *iv1, class tree_niter_desc *niter, |
1806 | bool only_exit, bool every_iteration) |
1807 | { |
1808 | bool exit_must_be_taken = false, ret; |
1809 | bounds bnds; |
1810 | |
1811 | /* If the test is not executed every iteration, wrapping may make the test |
1812 | to pass again. |
1813 | TODO: the overflow case can be still used as unreliable estimate of upper |
1814 | bound. But we have no API to pass it down to number of iterations code |
1815 | and, at present, it will not use it anyway. */ |
1816 | if (!every_iteration |
1817 | && (!iv0->no_overflow || !iv1->no_overflow |
1818 | || code == NE_EXPR || code == EQ_EXPR)) |
1819 | return false; |
1820 | |
1821 | /* The meaning of these assumptions is this: |
1822 | if !assumptions |
1823 | then the rest of information does not have to be valid |
1824 | if may_be_zero then the loop does not roll, even if |
1825 | niter != 0. */ |
1826 | niter->assumptions = boolean_true_node; |
1827 | niter->may_be_zero = boolean_false_node; |
1828 | niter->niter = NULL_TREE; |
1829 | niter->max = 0; |
1830 | niter->bound = NULL_TREE; |
1831 | niter->cmp = ERROR_MARK; |
1832 | |
1833 | /* Make < comparison from > ones, and for NE_EXPR comparisons, ensure that |
1834 | the control variable is on lhs. */ |
1835 | if (code == GE_EXPR || code == GT_EXPR |
1836 | || (code == NE_EXPR && integer_zerop (iv0->step))) |
1837 | { |
1838 | std::swap (a&: iv0, b&: iv1); |
1839 | code = swap_tree_comparison (code); |
1840 | } |
1841 | |
1842 | if (POINTER_TYPE_P (type)) |
1843 | { |
1844 | /* Comparison of pointers is undefined unless both iv0 and iv1 point |
1845 | to the same object. If they do, the control variable cannot wrap |
1846 | (as wrap around the bounds of memory will never return a pointer |
1847 | that would be guaranteed to point to the same object, even if we |
1848 | avoid undefined behavior by casting to size_t and back). */ |
1849 | iv0->no_overflow = true; |
1850 | iv1->no_overflow = true; |
1851 | } |
1852 | |
1853 | /* If the control induction variable does not overflow and the only exit |
1854 | from the loop is the one that we analyze, we know it must be taken |
1855 | eventually. */ |
1856 | if (only_exit) |
1857 | { |
1858 | if (!integer_zerop (iv0->step) && iv0->no_overflow) |
1859 | exit_must_be_taken = true; |
1860 | else if (!integer_zerop (iv1->step) && iv1->no_overflow) |
1861 | exit_must_be_taken = true; |
1862 | } |
1863 | |
1864 | /* We can handle cases which neither of the sides of the comparison is |
1865 | invariant: |
1866 | |
1867 | {iv0.base, iv0.step} cmp_code {iv1.base, iv1.step} |
1868 | as if: |
1869 | {iv0.base, iv0.step - iv1.step} cmp_code {iv1.base, 0} |
1870 | |
1871 | provided that either below condition is satisfied: |
1872 | |
1873 | a) the test is NE_EXPR; |
1874 | b) iv0 and iv1 do not overflow and iv0.step - iv1.step is of |
1875 | the same sign and of less or equal magnitude than iv0.step |
1876 | |
1877 | This rarely occurs in practice, but it is simple enough to manage. */ |
1878 | if (!integer_zerop (iv0->step) && !integer_zerop (iv1->step)) |
1879 | { |
1880 | tree step_type = POINTER_TYPE_P (type) ? sizetype : type; |
1881 | tree step = fold_binary_to_constant (MINUS_EXPR, step_type, |
1882 | iv0->step, iv1->step); |
1883 | |
1884 | /* For code other than NE_EXPR we have to ensure moving the evolution |
1885 | of IV1 to that of IV0 does not introduce overflow. */ |
1886 | if (TREE_CODE (step) != INTEGER_CST |
1887 | || !iv0->no_overflow || !iv1->no_overflow) |
1888 | { |
1889 | if (code != NE_EXPR) |
1890 | return false; |
1891 | iv0->no_overflow = false; |
1892 | } |
1893 | /* If the new step of IV0 has changed sign or is of greater |
1894 | magnitude then we do not know whether IV0 does overflow |
1895 | and thus the transform is not valid for code other than NE_EXPR. */ |
1896 | else if (tree_int_cst_sign_bit (step) != tree_int_cst_sign_bit (iv0->step) |
1897 | || wi::gtu_p (x: wi::abs (x: wi::to_widest (t: step)), |
1898 | y: wi::abs (x: wi::to_widest (t: iv0->step)))) |
1899 | { |
1900 | if (POINTER_TYPE_P (type) && code != NE_EXPR) |
1901 | /* For relational pointer compares we have further guarantees |
1902 | that the pointers always point to the same object (or one |
1903 | after it) and that objects do not cross the zero page. So |
1904 | not only is the transform always valid for relational |
1905 | pointer compares, we also know the resulting IV does not |
1906 | overflow. */ |
1907 | ; |
1908 | else if (code != NE_EXPR) |
1909 | return false; |
1910 | else |
1911 | iv0->no_overflow = false; |
1912 | } |
1913 | |
1914 | iv0->step = step; |
1915 | iv1->step = build_int_cst (step_type, 0); |
1916 | iv1->no_overflow = true; |
1917 | } |
1918 | |
1919 | /* If the result of the comparison is a constant, the loop is weird. More |
1920 | precise handling would be possible, but the situation is not common enough |
1921 | to waste time on it. */ |
1922 | if (integer_zerop (iv0->step) && integer_zerop (iv1->step)) |
1923 | return false; |
1924 | |
1925 | /* If the loop exits immediately, there is nothing to do. */ |
1926 | tree tem = fold_binary (code, boolean_type_node, iv0->base, iv1->base); |
1927 | if (tem && integer_zerop (tem)) |
1928 | { |
1929 | if (!every_iteration) |
1930 | return false; |
1931 | niter->niter = build_int_cst (unsigned_type_for (type), 0); |
1932 | niter->max = 0; |
1933 | return true; |
1934 | } |
1935 | |
1936 | /* OK, now we know we have a senseful loop. Handle several cases, depending |
1937 | on what comparison operator is used. */ |
1938 | bound_difference (loop, x: iv1->base, y: iv0->base, bnds: &bnds); |
1939 | |
1940 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1941 | { |
1942 | fprintf (stream: dump_file, |
1943 | format: "Analyzing # of iterations of loop %d\n" , loop->num); |
1944 | |
1945 | fprintf (stream: dump_file, format: " exit condition " ); |
1946 | dump_affine_iv (file: dump_file, iv: iv0); |
1947 | fprintf (stream: dump_file, format: " %s " , |
1948 | code == NE_EXPR ? "!=" |
1949 | : code == LT_EXPR ? "<" |
1950 | : "<=" ); |
1951 | dump_affine_iv (file: dump_file, iv: iv1); |
1952 | fprintf (stream: dump_file, format: "\n" ); |
1953 | |
1954 | fprintf (stream: dump_file, format: " bounds on difference of bases: " ); |
1955 | mpz_out_str (dump_file, 10, bnds.below); |
1956 | fprintf (stream: dump_file, format: " ... " ); |
1957 | mpz_out_str (dump_file, 10, bnds.up); |
1958 | fprintf (stream: dump_file, format: "\n" ); |
1959 | } |
1960 | |
1961 | switch (code) |
1962 | { |
1963 | case NE_EXPR: |
1964 | gcc_assert (integer_zerop (iv1->step)); |
1965 | ret = number_of_iterations_ne (loop, type, iv: iv0, final: iv1->base, niter, |
1966 | exit_must_be_taken, bnds: &bnds); |
1967 | break; |
1968 | |
1969 | case LT_EXPR: |
1970 | ret = number_of_iterations_lt (loop, type, iv0, iv1, niter, |
1971 | exit_must_be_taken, bnds: &bnds); |
1972 | break; |
1973 | |
1974 | case LE_EXPR: |
1975 | ret = number_of_iterations_le (loop, type, iv0, iv1, niter, |
1976 | exit_must_be_taken, bnds: &bnds); |
1977 | break; |
1978 | |
1979 | default: |
1980 | gcc_unreachable (); |
1981 | } |
1982 | |
1983 | mpz_clear (bnds.up); |
1984 | mpz_clear (bnds.below); |
1985 | |
1986 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1987 | { |
1988 | if (ret) |
1989 | { |
1990 | fprintf (stream: dump_file, format: " result:\n" ); |
1991 | if (!integer_nonzerop (niter->assumptions)) |
1992 | { |
1993 | fprintf (stream: dump_file, format: " under assumptions " ); |
1994 | print_generic_expr (dump_file, niter->assumptions, TDF_SLIM); |
1995 | fprintf (stream: dump_file, format: "\n" ); |
1996 | } |
1997 | |
1998 | if (!integer_zerop (niter->may_be_zero)) |
1999 | { |
2000 | fprintf (stream: dump_file, format: " zero if " ); |
2001 | print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM); |
2002 | fprintf (stream: dump_file, format: "\n" ); |
2003 | } |
2004 | |
2005 | fprintf (stream: dump_file, format: " # of iterations " ); |
2006 | print_generic_expr (dump_file, niter->niter, TDF_SLIM); |
2007 | fprintf (stream: dump_file, format: ", bounded by " ); |
2008 | print_decu (wi: niter->max, file: dump_file); |
2009 | fprintf (stream: dump_file, format: "\n" ); |
2010 | } |
2011 | else |
2012 | fprintf (stream: dump_file, format: " failed\n\n" ); |
2013 | } |
2014 | return ret; |
2015 | } |
2016 | |
2017 | /* Return an expression that computes the popcount of src. */ |
2018 | |
2019 | static tree |
2020 | build_popcount_expr (tree src) |
2021 | { |
2022 | tree fn; |
2023 | bool use_ifn = false; |
2024 | int prec = TYPE_PRECISION (TREE_TYPE (src)); |
2025 | int i_prec = TYPE_PRECISION (integer_type_node); |
2026 | int li_prec = TYPE_PRECISION (long_integer_type_node); |
2027 | int lli_prec = TYPE_PRECISION (long_long_integer_type_node); |
2028 | |
2029 | tree utype = unsigned_type_for (TREE_TYPE (src)); |
2030 | src = fold_convert (utype, src); |
2031 | |
2032 | if (direct_internal_fn_supported_p (IFN_POPCOUNT, utype, OPTIMIZE_FOR_BOTH)) |
2033 | use_ifn = true; |
2034 | else if (prec <= i_prec) |
2035 | fn = builtin_decl_implicit (fncode: BUILT_IN_POPCOUNT); |
2036 | else if (prec == li_prec) |
2037 | fn = builtin_decl_implicit (fncode: BUILT_IN_POPCOUNTL); |
2038 | else if (prec == lli_prec || prec == 2 * lli_prec) |
2039 | fn = builtin_decl_implicit (fncode: BUILT_IN_POPCOUNTLL); |
2040 | else |
2041 | return NULL_TREE; |
2042 | |
2043 | tree call; |
2044 | if (use_ifn) |
2045 | call = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_POPCOUNT, |
2046 | integer_type_node, 1, src); |
2047 | else if (prec == 2 * lli_prec) |
2048 | { |
2049 | tree src1 = fold_convert (long_long_unsigned_type_node, |
2050 | fold_build2 (RSHIFT_EXPR, TREE_TYPE (src), |
2051 | unshare_expr (src), |
2052 | build_int_cst (integer_type_node, |
2053 | lli_prec))); |
2054 | tree src2 = fold_convert (long_long_unsigned_type_node, src); |
2055 | tree call1 = build_call_expr (fn, 1, src1); |
2056 | tree call2 = build_call_expr (fn, 1, src2); |
2057 | call = fold_build2 (PLUS_EXPR, integer_type_node, call1, call2); |
2058 | } |
2059 | else |
2060 | { |
2061 | if (prec < i_prec) |
2062 | src = fold_convert (unsigned_type_node, src); |
2063 | |
2064 | call = build_call_expr (fn, 1, src); |
2065 | } |
2066 | |
2067 | return call; |
2068 | } |
2069 | |
2070 | /* Utility function to check if OP is defined by a stmt |
2071 | that is a val - 1. */ |
2072 | |
2073 | static bool |
2074 | ssa_defined_by_minus_one_stmt_p (tree op, tree val) |
2075 | { |
2076 | gimple *stmt; |
2077 | return (TREE_CODE (op) == SSA_NAME |
2078 | && (stmt = SSA_NAME_DEF_STMT (op)) |
2079 | && is_gimple_assign (gs: stmt) |
2080 | && (gimple_assign_rhs_code (gs: stmt) == PLUS_EXPR) |
2081 | && val == gimple_assign_rhs1 (gs: stmt) |
2082 | && integer_minus_onep (gimple_assign_rhs2 (gs: stmt))); |
2083 | } |
2084 | |
2085 | /* See comment below for number_of_iterations_bitcount. |
2086 | For popcount, we have: |
2087 | |
2088 | modify: |
2089 | _1 = iv_1 + -1 |
2090 | iv_2 = iv_1 & _1 |
2091 | |
2092 | test: |
2093 | if (iv != 0) |
2094 | |
2095 | modification count: |
2096 | popcount (src) |
2097 | |
2098 | */ |
2099 | |
2100 | static bool |
2101 | number_of_iterations_popcount (loop_p loop, edge exit, |
2102 | enum tree_code code, |
2103 | class tree_niter_desc *niter) |
2104 | { |
2105 | bool modify_before_test = true; |
2106 | HOST_WIDE_INT max; |
2107 | |
2108 | /* Check that condition for staying inside the loop is like |
2109 | if (iv != 0). */ |
2110 | gcond *cond_stmt = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: exit->src)); |
2111 | if (!cond_stmt |
2112 | || code != NE_EXPR |
2113 | || !integer_zerop (gimple_cond_rhs (gs: cond_stmt)) |
2114 | || TREE_CODE (gimple_cond_lhs (cond_stmt)) != SSA_NAME) |
2115 | return false; |
2116 | |
2117 | tree iv_2 = gimple_cond_lhs (gs: cond_stmt); |
2118 | gimple *iv_2_stmt = SSA_NAME_DEF_STMT (iv_2); |
2119 | |
2120 | /* If the test comes before the iv modification, then these will actually be |
2121 | iv_1 and a phi node. */ |
2122 | if (gimple_code (g: iv_2_stmt) == GIMPLE_PHI |
2123 | && gimple_bb (g: iv_2_stmt) == loop->header |
2124 | && gimple_phi_num_args (gs: iv_2_stmt) == 2 |
2125 | && (TREE_CODE (gimple_phi_arg_def (iv_2_stmt, |
2126 | loop_latch_edge (loop)->dest_idx)) |
2127 | == SSA_NAME)) |
2128 | { |
2129 | /* iv_2 is actually one of the inputs to the phi. */ |
2130 | iv_2 = gimple_phi_arg_def (gs: iv_2_stmt, index: loop_latch_edge (loop)->dest_idx); |
2131 | iv_2_stmt = SSA_NAME_DEF_STMT (iv_2); |
2132 | modify_before_test = false; |
2133 | } |
2134 | |
2135 | /* Make sure iv_2_stmt is an and stmt (iv_2 = _1 & iv_1). */ |
2136 | if (!is_gimple_assign (gs: iv_2_stmt) |
2137 | || gimple_assign_rhs_code (gs: iv_2_stmt) != BIT_AND_EXPR) |
2138 | return false; |
2139 | |
2140 | tree iv_1 = gimple_assign_rhs1 (gs: iv_2_stmt); |
2141 | tree _1 = gimple_assign_rhs2 (gs: iv_2_stmt); |
2142 | |
2143 | /* Check that _1 is defined by (_1 = iv_1 + -1). |
2144 | Also make sure that _1 is the same in and_stmt and _1 defining stmt. |
2145 | Also canonicalize if _1 and _b11 are revrsed. */ |
2146 | if (ssa_defined_by_minus_one_stmt_p (op: iv_1, val: _1)) |
2147 | std::swap (a&: iv_1, b&: _1); |
2148 | else if (ssa_defined_by_minus_one_stmt_p (op: _1, val: iv_1)) |
2149 | ; |
2150 | else |
2151 | return false; |
2152 | |
2153 | /* Check the recurrence. */ |
2154 | gimple *phi = SSA_NAME_DEF_STMT (iv_1); |
2155 | if (gimple_code (g: phi) != GIMPLE_PHI |
2156 | || (gimple_bb (g: phi) != loop_latch_edge (loop)->dest) |
2157 | || (iv_2 != gimple_phi_arg_def (gs: phi, index: loop_latch_edge (loop)->dest_idx))) |
2158 | return false; |
2159 | |
2160 | /* We found a match. */ |
2161 | tree src = gimple_phi_arg_def (gs: phi, index: loop_preheader_edge (loop)->dest_idx); |
2162 | int src_precision = TYPE_PRECISION (TREE_TYPE (src)); |
2163 | |
2164 | /* Get the corresponding popcount builtin. */ |
2165 | tree expr = build_popcount_expr (src); |
2166 | |
2167 | if (!expr) |
2168 | return false; |
2169 | |
2170 | max = src_precision; |
2171 | |
2172 | tree may_be_zero = boolean_false_node; |
2173 | |
2174 | if (modify_before_test) |
2175 | { |
2176 | expr = fold_build2 (MINUS_EXPR, integer_type_node, expr, |
2177 | integer_one_node); |
2178 | max = max - 1; |
2179 | may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src, |
2180 | build_zero_cst (TREE_TYPE (src))); |
2181 | } |
2182 | |
2183 | expr = fold_convert (unsigned_type_node, expr); |
2184 | |
2185 | niter->assumptions = boolean_true_node; |
2186 | niter->may_be_zero = simplify_using_initial_conditions (loop, may_be_zero); |
2187 | niter->niter = simplify_using_initial_conditions(loop, expr); |
2188 | |
2189 | if (TREE_CODE (niter->niter) == INTEGER_CST) |
2190 | niter->max = tree_to_uhwi (niter->niter); |
2191 | else |
2192 | niter->max = max; |
2193 | |
2194 | niter->bound = NULL_TREE; |
2195 | niter->cmp = ERROR_MARK; |
2196 | return true; |
2197 | } |
2198 | |
2199 | /* Return an expression that counts the leading/trailing zeroes of src. |
2200 | |
2201 | If define_at_zero is true, then the built expression will be defined to |
2202 | return the precision of src when src == 0 (using either a conditional |
2203 | expression or a suitable internal function). |
2204 | Otherwise, we can elide the conditional expression and let src = 0 invoke |
2205 | undefined behaviour. */ |
2206 | |
2207 | static tree |
2208 | build_cltz_expr (tree src, bool leading, bool define_at_zero) |
2209 | { |
2210 | tree fn; |
2211 | internal_fn ifn = leading ? IFN_CLZ : IFN_CTZ; |
2212 | bool use_ifn = false; |
2213 | int prec = TYPE_PRECISION (TREE_TYPE (src)); |
2214 | int i_prec = TYPE_PRECISION (integer_type_node); |
2215 | int li_prec = TYPE_PRECISION (long_integer_type_node); |
2216 | int lli_prec = TYPE_PRECISION (long_long_integer_type_node); |
2217 | |
2218 | tree utype = unsigned_type_for (TREE_TYPE (src)); |
2219 | src = fold_convert (utype, src); |
2220 | |
2221 | if (direct_internal_fn_supported_p (ifn, utype, OPTIMIZE_FOR_BOTH)) |
2222 | use_ifn = true; |
2223 | else if (prec <= i_prec) |
2224 | fn = leading ? builtin_decl_implicit (fncode: BUILT_IN_CLZ) |
2225 | : builtin_decl_implicit (fncode: BUILT_IN_CTZ); |
2226 | else if (prec == li_prec) |
2227 | fn = leading ? builtin_decl_implicit (fncode: BUILT_IN_CLZL) |
2228 | : builtin_decl_implicit (fncode: BUILT_IN_CTZL); |
2229 | else if (prec == lli_prec || prec == 2 * lli_prec) |
2230 | fn = leading ? builtin_decl_implicit (fncode: BUILT_IN_CLZLL) |
2231 | : builtin_decl_implicit (fncode: BUILT_IN_CTZLL); |
2232 | else |
2233 | return NULL_TREE; |
2234 | |
2235 | tree call; |
2236 | if (use_ifn) |
2237 | { |
2238 | int val; |
2239 | int optab_defined_at_zero |
2240 | = (leading |
2241 | ? CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (utype), val) |
2242 | : CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (utype), val)); |
2243 | tree arg2 = NULL_TREE; |
2244 | if (define_at_zero && optab_defined_at_zero == 2 && val == prec) |
2245 | arg2 = build_int_cst (integer_type_node, val); |
2246 | call = build_call_expr_internal_loc (UNKNOWN_LOCATION, ifn, |
2247 | integer_type_node, arg2 ? 2 : 1, |
2248 | src, arg2); |
2249 | if (define_at_zero && arg2 == NULL_TREE) |
2250 | { |
2251 | tree is_zero = fold_build2 (NE_EXPR, boolean_type_node, src, |
2252 | build_zero_cst (TREE_TYPE (src))); |
2253 | call = fold_build3 (COND_EXPR, integer_type_node, is_zero, call, |
2254 | build_int_cst (integer_type_node, prec)); |
2255 | } |
2256 | } |
2257 | else if (prec == 2 * lli_prec) |
2258 | { |
2259 | tree src1 = fold_convert (long_long_unsigned_type_node, |
2260 | fold_build2 (RSHIFT_EXPR, TREE_TYPE (src), |
2261 | unshare_expr (src), |
2262 | build_int_cst (integer_type_node, |
2263 | lli_prec))); |
2264 | tree src2 = fold_convert (long_long_unsigned_type_node, src); |
2265 | /* We count the zeroes in src1, and add the number in src2 when src1 |
2266 | is 0. */ |
2267 | if (!leading) |
2268 | std::swap (a&: src1, b&: src2); |
2269 | tree call1 = build_call_expr (fn, 1, src1); |
2270 | tree call2 = build_call_expr (fn, 1, src2); |
2271 | if (define_at_zero) |
2272 | { |
2273 | tree is_zero2 = fold_build2 (NE_EXPR, boolean_type_node, src2, |
2274 | build_zero_cst (TREE_TYPE (src2))); |
2275 | call2 = fold_build3 (COND_EXPR, integer_type_node, is_zero2, call2, |
2276 | build_int_cst (integer_type_node, lli_prec)); |
2277 | } |
2278 | tree is_zero1 = fold_build2 (NE_EXPR, boolean_type_node, src1, |
2279 | build_zero_cst (TREE_TYPE (src1))); |
2280 | call = fold_build3 (COND_EXPR, integer_type_node, is_zero1, call1, |
2281 | fold_build2 (PLUS_EXPR, integer_type_node, call2, |
2282 | build_int_cst (integer_type_node, |
2283 | lli_prec))); |
2284 | } |
2285 | else |
2286 | { |
2287 | if (prec < i_prec) |
2288 | src = fold_convert (unsigned_type_node, src); |
2289 | |
2290 | call = build_call_expr (fn, 1, src); |
2291 | if (leading && prec < i_prec) |
2292 | call = fold_build2 (MINUS_EXPR, integer_type_node, call, |
2293 | build_int_cst (integer_type_node, i_prec - prec)); |
2294 | if (define_at_zero) |
2295 | { |
2296 | tree is_zero = fold_build2 (NE_EXPR, boolean_type_node, src, |
2297 | build_zero_cst (TREE_TYPE (src))); |
2298 | call = fold_build3 (COND_EXPR, integer_type_node, is_zero, call, |
2299 | build_int_cst (integer_type_node, prec)); |
2300 | } |
2301 | } |
2302 | |
2303 | return call; |
2304 | } |
2305 | |
2306 | /* See comment below for number_of_iterations_bitcount. |
2307 | For c[lt]z, we have: |
2308 | |
2309 | modify: |
2310 | iv_2 = iv_1 << 1 OR iv_1 >> 1 |
2311 | |
2312 | test: |
2313 | if (iv & 1 << (prec-1)) OR (iv & 1) |
2314 | |
2315 | modification count: |
2316 | src precision - c[lt]z (src) |
2317 | |
2318 | */ |
2319 | |
2320 | static bool |
2321 | number_of_iterations_cltz (loop_p loop, edge exit, |
2322 | enum tree_code code, |
2323 | class tree_niter_desc *niter) |
2324 | { |
2325 | bool modify_before_test = true; |
2326 | HOST_WIDE_INT max; |
2327 | int checked_bit; |
2328 | tree iv_2; |
2329 | |
2330 | /* Check that condition for staying inside the loop is like |
2331 | if (iv == 0). */ |
2332 | gcond *cond_stmt = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: exit->src)); |
2333 | if (!cond_stmt |
2334 | || (code != EQ_EXPR && code != GE_EXPR) |
2335 | || !integer_zerop (gimple_cond_rhs (gs: cond_stmt)) |
2336 | || TREE_CODE (gimple_cond_lhs (cond_stmt)) != SSA_NAME) |
2337 | return false; |
2338 | |
2339 | if (code == EQ_EXPR) |
2340 | { |
2341 | /* Make sure we check a bitwise and with a suitable constant */ |
2342 | gimple *and_stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (cond_stmt)); |
2343 | if (!is_gimple_assign (gs: and_stmt) |
2344 | || gimple_assign_rhs_code (gs: and_stmt) != BIT_AND_EXPR |
2345 | || !integer_pow2p (gimple_assign_rhs2 (gs: and_stmt)) |
2346 | || TREE_CODE (gimple_assign_rhs1 (and_stmt)) != SSA_NAME) |
2347 | return false; |
2348 | |
2349 | checked_bit = tree_log2 (gimple_assign_rhs2 (gs: and_stmt)); |
2350 | |
2351 | iv_2 = gimple_assign_rhs1 (gs: and_stmt); |
2352 | } |
2353 | else |
2354 | { |
2355 | /* We have a GE_EXPR - a signed comparison with zero is equivalent to |
2356 | testing the leading bit, so check for this pattern too. */ |
2357 | |
2358 | iv_2 = gimple_cond_lhs (gs: cond_stmt); |
2359 | tree test_value_type = TREE_TYPE (iv_2); |
2360 | |
2361 | if (TYPE_UNSIGNED (test_value_type)) |
2362 | return false; |
2363 | |
2364 | gimple *test_value_stmt = SSA_NAME_DEF_STMT (iv_2); |
2365 | |
2366 | if (is_gimple_assign (gs: test_value_stmt) |
2367 | && gimple_assign_rhs_code (gs: test_value_stmt) == NOP_EXPR) |
2368 | { |
2369 | /* If the test value comes from a NOP_EXPR, then we need to unwrap |
2370 | this. We conservatively require that both types have the same |
2371 | precision. */ |
2372 | iv_2 = gimple_assign_rhs1 (gs: test_value_stmt); |
2373 | tree rhs_type = TREE_TYPE (iv_2); |
2374 | if (TREE_CODE (iv_2) != SSA_NAME |
2375 | || TREE_CODE (rhs_type) != INTEGER_TYPE |
2376 | || (TYPE_PRECISION (rhs_type) |
2377 | != TYPE_PRECISION (test_value_type))) |
2378 | return false; |
2379 | } |
2380 | |
2381 | checked_bit = TYPE_PRECISION (test_value_type) - 1; |
2382 | } |
2383 | |
2384 | gimple *iv_2_stmt = SSA_NAME_DEF_STMT (iv_2); |
2385 | |
2386 | /* If the test comes before the iv modification, then these will actually be |
2387 | iv_1 and a phi node. */ |
2388 | if (gimple_code (g: iv_2_stmt) == GIMPLE_PHI |
2389 | && gimple_bb (g: iv_2_stmt) == loop->header |
2390 | && gimple_phi_num_args (gs: iv_2_stmt) == 2 |
2391 | && (TREE_CODE (gimple_phi_arg_def (iv_2_stmt, |
2392 | loop_latch_edge (loop)->dest_idx)) |
2393 | == SSA_NAME)) |
2394 | { |
2395 | /* iv_2 is actually one of the inputs to the phi. */ |
2396 | iv_2 = gimple_phi_arg_def (gs: iv_2_stmt, index: loop_latch_edge (loop)->dest_idx); |
2397 | iv_2_stmt = SSA_NAME_DEF_STMT (iv_2); |
2398 | modify_before_test = false; |
2399 | } |
2400 | |
2401 | /* Make sure iv_2_stmt is a logical shift by one stmt: |
2402 | iv_2 = iv_1 {<<|>>} 1 */ |
2403 | if (!is_gimple_assign (gs: iv_2_stmt) |
2404 | || (gimple_assign_rhs_code (gs: iv_2_stmt) != LSHIFT_EXPR |
2405 | && (gimple_assign_rhs_code (gs: iv_2_stmt) != RSHIFT_EXPR |
2406 | || !TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (iv_2_stmt))))) |
2407 | || !integer_onep (gimple_assign_rhs2 (gs: iv_2_stmt))) |
2408 | return false; |
2409 | |
2410 | bool left_shift = (gimple_assign_rhs_code (gs: iv_2_stmt) == LSHIFT_EXPR); |
2411 | |
2412 | tree iv_1 = gimple_assign_rhs1 (gs: iv_2_stmt); |
2413 | |
2414 | /* Check the recurrence. */ |
2415 | gimple *phi = SSA_NAME_DEF_STMT (iv_1); |
2416 | if (gimple_code (g: phi) != GIMPLE_PHI |
2417 | || (gimple_bb (g: phi) != loop_latch_edge (loop)->dest) |
2418 | || (iv_2 != gimple_phi_arg_def (gs: phi, index: loop_latch_edge (loop)->dest_idx))) |
2419 | return false; |
2420 | |
2421 | /* We found a match. */ |
2422 | tree src = gimple_phi_arg_def (gs: phi, index: loop_preheader_edge (loop)->dest_idx); |
2423 | int src_precision = TYPE_PRECISION (TREE_TYPE (src)); |
2424 | |
2425 | /* Apply any needed preprocessing to src. */ |
2426 | int num_ignored_bits; |
2427 | if (left_shift) |
2428 | num_ignored_bits = src_precision - checked_bit - 1; |
2429 | else |
2430 | num_ignored_bits = checked_bit; |
2431 | |
2432 | if (modify_before_test) |
2433 | num_ignored_bits++; |
2434 | |
2435 | if (num_ignored_bits != 0) |
2436 | src = fold_build2 (left_shift ? LSHIFT_EXPR : RSHIFT_EXPR, |
2437 | TREE_TYPE (src), src, |
2438 | build_int_cst (integer_type_node, num_ignored_bits)); |
2439 | |
2440 | /* Get the corresponding c[lt]z builtin. */ |
2441 | tree expr = build_cltz_expr (src, leading: left_shift, define_at_zero: false); |
2442 | |
2443 | if (!expr) |
2444 | return false; |
2445 | |
2446 | max = src_precision - num_ignored_bits - 1; |
2447 | |
2448 | expr = fold_convert (unsigned_type_node, expr); |
2449 | |
2450 | tree assumptions = fold_build2 (NE_EXPR, boolean_type_node, src, |
2451 | build_zero_cst (TREE_TYPE (src))); |
2452 | |
2453 | niter->assumptions = simplify_using_initial_conditions (loop, assumptions); |
2454 | niter->may_be_zero = boolean_false_node; |
2455 | niter->niter = simplify_using_initial_conditions (loop, expr); |
2456 | |
2457 | if (TREE_CODE (niter->niter) == INTEGER_CST) |
2458 | niter->max = tree_to_uhwi (niter->niter); |
2459 | else |
2460 | niter->max = max; |
2461 | |
2462 | niter->bound = NULL_TREE; |
2463 | niter->cmp = ERROR_MARK; |
2464 | |
2465 | return true; |
2466 | } |
2467 | |
2468 | /* See comment below for number_of_iterations_bitcount. |
2469 | For c[lt]z complement, we have: |
2470 | |
2471 | modify: |
2472 | iv_2 = iv_1 >> 1 OR iv_1 << 1 |
2473 | |
2474 | test: |
2475 | if (iv != 0) |
2476 | |
2477 | modification count: |
2478 | src precision - c[lt]z (src) |
2479 | |
2480 | */ |
2481 | |
2482 | static bool |
2483 | number_of_iterations_cltz_complement (loop_p loop, edge exit, |
2484 | enum tree_code code, |
2485 | class tree_niter_desc *niter) |
2486 | { |
2487 | bool modify_before_test = true; |
2488 | HOST_WIDE_INT max; |
2489 | |
2490 | /* Check that condition for staying inside the loop is like |
2491 | if (iv != 0). */ |
2492 | gcond *cond_stmt = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: exit->src)); |
2493 | if (!cond_stmt |
2494 | || code != NE_EXPR |
2495 | || !integer_zerop (gimple_cond_rhs (gs: cond_stmt)) |
2496 | || TREE_CODE (gimple_cond_lhs (cond_stmt)) != SSA_NAME) |
2497 | return false; |
2498 | |
2499 | tree iv_2 = gimple_cond_lhs (gs: cond_stmt); |
2500 | gimple *iv_2_stmt = SSA_NAME_DEF_STMT (iv_2); |
2501 | |
2502 | /* If the test comes before the iv modification, then these will actually be |
2503 | iv_1 and a phi node. */ |
2504 | if (gimple_code (g: iv_2_stmt) == GIMPLE_PHI |
2505 | && gimple_bb (g: iv_2_stmt) == loop->header |
2506 | && gimple_phi_num_args (gs: iv_2_stmt) == 2 |
2507 | && (TREE_CODE (gimple_phi_arg_def (iv_2_stmt, |
2508 | loop_latch_edge (loop)->dest_idx)) |
2509 | == SSA_NAME)) |
2510 | { |
2511 | /* iv_2 is actually one of the inputs to the phi. */ |
2512 | iv_2 = gimple_phi_arg_def (gs: iv_2_stmt, index: loop_latch_edge (loop)->dest_idx); |
2513 | iv_2_stmt = SSA_NAME_DEF_STMT (iv_2); |
2514 | modify_before_test = false; |
2515 | } |
2516 | |
2517 | /* Make sure iv_2_stmt is a logical shift by one stmt: |
2518 | iv_2 = iv_1 {>>|<<} 1 */ |
2519 | if (!is_gimple_assign (gs: iv_2_stmt) |
2520 | || (gimple_assign_rhs_code (gs: iv_2_stmt) != LSHIFT_EXPR |
2521 | && (gimple_assign_rhs_code (gs: iv_2_stmt) != RSHIFT_EXPR |
2522 | || !TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (iv_2_stmt))))) |
2523 | || !integer_onep (gimple_assign_rhs2 (gs: iv_2_stmt))) |
2524 | return false; |
2525 | |
2526 | bool left_shift = (gimple_assign_rhs_code (gs: iv_2_stmt) == LSHIFT_EXPR); |
2527 | |
2528 | tree iv_1 = gimple_assign_rhs1 (gs: iv_2_stmt); |
2529 | |
2530 | /* Check the recurrence. */ |
2531 | gimple *phi = SSA_NAME_DEF_STMT (iv_1); |
2532 | if (gimple_code (g: phi) != GIMPLE_PHI |
2533 | || (gimple_bb (g: phi) != loop_latch_edge (loop)->dest) |
2534 | || (iv_2 != gimple_phi_arg_def (gs: phi, index: loop_latch_edge (loop)->dest_idx))) |
2535 | return false; |
2536 | |
2537 | /* We found a match. */ |
2538 | tree src = gimple_phi_arg_def (gs: phi, index: loop_preheader_edge (loop)->dest_idx); |
2539 | int src_precision = TYPE_PRECISION (TREE_TYPE (src)); |
2540 | |
2541 | /* Get the corresponding c[lt]z builtin. */ |
2542 | tree expr = build_cltz_expr (src, leading: !left_shift, define_at_zero: true); |
2543 | |
2544 | if (!expr) |
2545 | return false; |
2546 | |
2547 | expr = fold_build2 (MINUS_EXPR, integer_type_node, |
2548 | build_int_cst (integer_type_node, src_precision), |
2549 | expr); |
2550 | |
2551 | max = src_precision; |
2552 | |
2553 | tree may_be_zero = boolean_false_node; |
2554 | |
2555 | if (modify_before_test) |
2556 | { |
2557 | expr = fold_build2 (MINUS_EXPR, integer_type_node, expr, |
2558 | integer_one_node); |
2559 | max = max - 1; |
2560 | may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src, |
2561 | build_zero_cst (TREE_TYPE (src))); |
2562 | } |
2563 | |
2564 | expr = fold_convert (unsigned_type_node, expr); |
2565 | |
2566 | niter->assumptions = boolean_true_node; |
2567 | niter->may_be_zero = simplify_using_initial_conditions (loop, may_be_zero); |
2568 | niter->niter = simplify_using_initial_conditions (loop, expr); |
2569 | |
2570 | if (TREE_CODE (niter->niter) == INTEGER_CST) |
2571 | niter->max = tree_to_uhwi (niter->niter); |
2572 | else |
2573 | niter->max = max; |
2574 | |
2575 | niter->bound = NULL_TREE; |
2576 | niter->cmp = ERROR_MARK; |
2577 | return true; |
2578 | } |
2579 | |
2580 | /* See if LOOP contains a bit counting idiom. The idiom consists of two parts: |
2581 | 1. A modification to the induction variabler;. |
2582 | 2. A test to determine whether or not to exit the loop. |
2583 | |
2584 | These can come in either order - i.e.: |
2585 | |
2586 | <bb 3> |
2587 | iv_1 = PHI <src(2), iv_2(4)> |
2588 | if (test (iv_1)) |
2589 | goto <bb 4> |
2590 | else |
2591 | goto <bb 5> |
2592 | |
2593 | <bb 4> |
2594 | iv_2 = modify (iv_1) |
2595 | goto <bb 3> |
2596 | |
2597 | OR |
2598 | |
2599 | <bb 3> |
2600 | iv_1 = PHI <src(2), iv_2(4)> |
2601 | iv_2 = modify (iv_1) |
2602 | |
2603 | <bb 4> |
2604 | if (test (iv_2)) |
2605 | goto <bb 3> |
2606 | else |
2607 | goto <bb 5> |
2608 | |
2609 | The second form can be generated by copying the loop header out of the loop. |
2610 | |
2611 | In the first case, the number of latch executions will be equal to the |
2612 | number of induction variable modifications required before the test fails. |
2613 | |
2614 | In the second case (modify_before_test), if we assume that the number of |
2615 | modifications required before the test fails is nonzero, then the number of |
2616 | latch executions will be one less than this number. |
2617 | |
2618 | If we recognise the pattern, then we update niter accordingly, and return |
2619 | true. */ |
2620 | |
2621 | static bool |
2622 | number_of_iterations_bitcount (loop_p loop, edge exit, |
2623 | enum tree_code code, |
2624 | class tree_niter_desc *niter) |
2625 | { |
2626 | return (number_of_iterations_popcount (loop, exit, code, niter) |
2627 | || number_of_iterations_cltz (loop, exit, code, niter) |
2628 | || number_of_iterations_cltz_complement (loop, exit, code, niter)); |
2629 | } |
2630 | |
2631 | /* Substitute NEW_TREE for OLD in EXPR and fold the result. |
2632 | If VALUEIZE is non-NULL then OLD and NEW_TREE are ignored and instead |
2633 | all SSA names are replaced with the result of calling the VALUEIZE |
2634 | function with the SSA name as argument. */ |
2635 | |
2636 | tree |
2637 | simplify_replace_tree (tree expr, tree old, tree new_tree, |
2638 | tree (*valueize) (tree, void*), void *context, |
2639 | bool do_fold) |
2640 | { |
2641 | unsigned i, n; |
2642 | tree ret = NULL_TREE, e, se; |
2643 | |
2644 | if (!expr) |
2645 | return NULL_TREE; |
2646 | |
2647 | /* Do not bother to replace constants. */ |
2648 | if (CONSTANT_CLASS_P (expr)) |
2649 | return expr; |
2650 | |
2651 | if (valueize) |
2652 | { |
2653 | if (TREE_CODE (expr) == SSA_NAME) |
2654 | { |
2655 | new_tree = valueize (expr, context); |
2656 | if (new_tree != expr) |
2657 | return new_tree; |
2658 | } |
2659 | } |
2660 | else if (expr == old |
2661 | || operand_equal_p (expr, old, flags: 0)) |
2662 | return unshare_expr (new_tree); |
2663 | |
2664 | if (!EXPR_P (expr)) |
2665 | return expr; |
2666 | |
2667 | n = TREE_OPERAND_LENGTH (expr); |
2668 | for (i = 0; i < n; i++) |
2669 | { |
2670 | e = TREE_OPERAND (expr, i); |
2671 | se = simplify_replace_tree (expr: e, old, new_tree, valueize, context, do_fold); |
2672 | if (e == se) |
2673 | continue; |
2674 | |
2675 | if (!ret) |
2676 | ret = copy_node (expr); |
2677 | |
2678 | TREE_OPERAND (ret, i) = se; |
2679 | } |
2680 | |
2681 | return (ret ? (do_fold ? fold (ret) : ret) : expr); |
2682 | } |
2683 | |
2684 | /* Expand definitions of ssa names in EXPR as long as they are simple |
2685 | enough, and return the new expression. If STOP is specified, stop |
2686 | expanding if EXPR equals to it. */ |
2687 | |
2688 | static tree |
2689 | expand_simple_operations (tree expr, tree stop, hash_map<tree, tree> &cache) |
2690 | { |
2691 | unsigned i, n; |
2692 | tree ret = NULL_TREE, e, ee, e1; |
2693 | enum tree_code code; |
2694 | gimple *stmt; |
2695 | |
2696 | if (expr == NULL_TREE) |
2697 | return expr; |
2698 | |
2699 | if (is_gimple_min_invariant (expr)) |
2700 | return expr; |
2701 | |
2702 | code = TREE_CODE (expr); |
2703 | if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))) |
2704 | { |
2705 | n = TREE_OPERAND_LENGTH (expr); |
2706 | for (i = 0; i < n; i++) |
2707 | { |
2708 | e = TREE_OPERAND (expr, i); |
2709 | if (!e) |
2710 | continue; |
2711 | /* SCEV analysis feeds us with a proper expression |
2712 | graph matching the SSA graph. Avoid turning it |
2713 | into a tree here, thus handle tree sharing |
2714 | properly. |
2715 | ??? The SSA walk below still turns the SSA graph |
2716 | into a tree but until we find a testcase do not |
2717 | introduce additional tree sharing here. */ |
2718 | bool existed_p; |
2719 | tree &cee = cache.get_or_insert (k: e, existed: &existed_p); |
2720 | if (existed_p) |
2721 | ee = cee; |
2722 | else |
2723 | { |
2724 | cee = e; |
2725 | ee = expand_simple_operations (expr: e, stop, cache); |
2726 | if (ee != e) |
2727 | *cache.get (k: e) = ee; |
2728 | } |
2729 | if (e == ee) |
2730 | continue; |
2731 | |
2732 | if (!ret) |
2733 | ret = copy_node (expr); |
2734 | |
2735 | TREE_OPERAND (ret, i) = ee; |
2736 | } |
2737 | |
2738 | if (!ret) |
2739 | return expr; |
2740 | |
2741 | fold_defer_overflow_warnings (); |
2742 | ret = fold (ret); |
2743 | fold_undefer_and_ignore_overflow_warnings (); |
2744 | return ret; |
2745 | } |
2746 | |
2747 | /* Stop if it's not ssa name or the one we don't want to expand. */ |
2748 | if (TREE_CODE (expr) != SSA_NAME || expr == stop) |
2749 | return expr; |
2750 | |
2751 | stmt = SSA_NAME_DEF_STMT (expr); |
2752 | if (gimple_code (g: stmt) == GIMPLE_PHI) |
2753 | { |
2754 | basic_block src, dest; |
2755 | |
2756 | if (gimple_phi_num_args (gs: stmt) != 1) |
2757 | return expr; |
2758 | e = PHI_ARG_DEF (stmt, 0); |
2759 | |
2760 | /* Avoid propagating through loop exit phi nodes, which |
2761 | could break loop-closed SSA form restrictions. */ |
2762 | dest = gimple_bb (g: stmt); |
2763 | src = single_pred (bb: dest); |
2764 | if (TREE_CODE (e) == SSA_NAME |
2765 | && src->loop_father != dest->loop_father) |
2766 | return expr; |
2767 | |
2768 | return expand_simple_operations (expr: e, stop, cache); |
2769 | } |
2770 | if (gimple_code (g: stmt) != GIMPLE_ASSIGN) |
2771 | return expr; |
2772 | |
2773 | /* Avoid expanding to expressions that contain SSA names that need |
2774 | to take part in abnormal coalescing. */ |
2775 | ssa_op_iter iter; |
2776 | FOR_EACH_SSA_TREE_OPERAND (e, stmt, iter, SSA_OP_USE) |
2777 | if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (e)) |
2778 | return expr; |
2779 | |
2780 | e = gimple_assign_rhs1 (gs: stmt); |
2781 | code = gimple_assign_rhs_code (gs: stmt); |
2782 | if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS) |
2783 | { |
2784 | if (is_gimple_min_invariant (e)) |
2785 | return e; |
2786 | |
2787 | if (code == SSA_NAME) |
2788 | return expand_simple_operations (expr: e, stop, cache); |
2789 | else if (code == ADDR_EXPR) |
2790 | { |
2791 | poly_int64 offset; |
2792 | tree base = get_addr_base_and_unit_offset (TREE_OPERAND (e, 0), |
2793 | &offset); |
2794 | if (base |
2795 | && TREE_CODE (base) == MEM_REF) |
2796 | { |
2797 | ee = expand_simple_operations (TREE_OPERAND (base, 0), stop, |
2798 | cache); |
2799 | return fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (expr), ee, |
2800 | wide_int_to_tree (sizetype, |
2801 | mem_ref_offset (base) |
2802 | + offset)); |
2803 | } |
2804 | } |
2805 | |
2806 | return expr; |
2807 | } |
2808 | |
2809 | switch (code) |
2810 | { |
2811 | CASE_CONVERT: |
2812 | /* Casts are simple. */ |
2813 | ee = expand_simple_operations (expr: e, stop, cache); |
2814 | return fold_build1 (code, TREE_TYPE (expr), ee); |
2815 | |
2816 | case PLUS_EXPR: |
2817 | case MINUS_EXPR: |
2818 | case MULT_EXPR: |
2819 | if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (expr)) |
2820 | && TYPE_OVERFLOW_TRAPS (TREE_TYPE (expr))) |
2821 | return expr; |
2822 | /* Fallthru. */ |
2823 | case POINTER_PLUS_EXPR: |
2824 | /* And increments and decrements by a constant are simple. */ |
2825 | e1 = gimple_assign_rhs2 (gs: stmt); |
2826 | if (!is_gimple_min_invariant (e1)) |
2827 | return expr; |
2828 | |
2829 | ee = expand_simple_operations (expr: e, stop, cache); |
2830 | return fold_build2 (code, TREE_TYPE (expr), ee, e1); |
2831 | |
2832 | default: |
2833 | return expr; |
2834 | } |
2835 | } |
2836 | |
2837 | tree |
2838 | expand_simple_operations (tree expr, tree stop) |
2839 | { |
2840 | hash_map<tree, tree> cache; |
2841 | return expand_simple_operations (expr, stop, cache); |
2842 | } |
2843 | |
2844 | /* Tries to simplify EXPR using the condition COND. Returns the simplified |
2845 | expression (or EXPR unchanged, if no simplification was possible). */ |
2846 | |
2847 | static tree |
2848 | tree_simplify_using_condition_1 (tree cond, tree expr) |
2849 | { |
2850 | bool changed; |
2851 | tree e, e0, e1, e2, notcond; |
2852 | enum tree_code code = TREE_CODE (expr); |
2853 | |
2854 | if (code == INTEGER_CST) |
2855 | return expr; |
2856 | |
2857 | if (code == TRUTH_OR_EXPR |
2858 | || code == TRUTH_AND_EXPR |
2859 | || code == COND_EXPR) |
2860 | { |
2861 | changed = false; |
2862 | |
2863 | e0 = tree_simplify_using_condition_1 (cond, TREE_OPERAND (expr, 0)); |
2864 | if (TREE_OPERAND (expr, 0) != e0) |
2865 | changed = true; |
2866 | |
2867 | e1 = tree_simplify_using_condition_1 (cond, TREE_OPERAND (expr, 1)); |
2868 | if (TREE_OPERAND (expr, 1) != e1) |
2869 | changed = true; |
2870 | |
2871 | if (code == COND_EXPR) |
2872 | { |
2873 | e2 = tree_simplify_using_condition_1 (cond, TREE_OPERAND (expr, 2)); |
2874 | if (TREE_OPERAND (expr, 2) != e2) |
2875 | changed = true; |
2876 | } |
2877 | else |
2878 | e2 = NULL_TREE; |
2879 | |
2880 | if (changed) |
2881 | { |
2882 | if (code == COND_EXPR) |
2883 | expr = fold_build3 (code, boolean_type_node, e0, e1, e2); |
2884 | else |
2885 | expr = fold_build2 (code, boolean_type_node, e0, e1); |
2886 | } |
2887 | |
2888 | return expr; |
2889 | } |
2890 | |
2891 | /* In case COND is equality, we may be able to simplify EXPR by copy/constant |
2892 | propagation, and vice versa. Fold does not handle this, since it is |
2893 | considered too expensive. */ |
2894 | if (TREE_CODE (cond) == EQ_EXPR) |
2895 | { |
2896 | e0 = TREE_OPERAND (cond, 0); |
2897 | e1 = TREE_OPERAND (cond, 1); |
2898 | |
2899 | /* We know that e0 == e1. Check whether we cannot simplify expr |
2900 | using this fact. */ |
2901 | e = simplify_replace_tree (expr, old: e0, new_tree: e1); |
2902 | if (integer_zerop (e) || integer_nonzerop (e)) |
2903 | return e; |
2904 | |
2905 | e = simplify_replace_tree (expr, old: e1, new_tree: e0); |
2906 | if (integer_zerop (e) || integer_nonzerop (e)) |
2907 | return e; |
2908 | } |
2909 | if (TREE_CODE (expr) == EQ_EXPR) |
2910 | { |
2911 | e0 = TREE_OPERAND (expr, 0); |
2912 | e1 = TREE_OPERAND (expr, 1); |
2913 | |
2914 | /* If e0 == e1 (EXPR) implies !COND, then EXPR cannot be true. */ |
2915 | e = simplify_replace_tree (expr: cond, old: e0, new_tree: e1); |
2916 | if (integer_zerop (e)) |
2917 | return e; |
2918 | e = simplify_replace_tree (expr: cond, old: e1, new_tree: e0); |
2919 | if (integer_zerop (e)) |
2920 | return e; |
2921 | } |
2922 | if (TREE_CODE (expr) == NE_EXPR) |
2923 | { |
2924 | e0 = TREE_OPERAND (expr, 0); |
2925 | e1 = TREE_OPERAND (expr, 1); |
2926 | |
2927 | /* If e0 == e1 (!EXPR) implies !COND, then EXPR must be true. */ |
2928 | e = simplify_replace_tree (expr: cond, old: e0, new_tree: e1); |
2929 | if (integer_zerop (e)) |
2930 | return boolean_true_node; |
2931 | e = simplify_replace_tree (expr: cond, old: e1, new_tree: e0); |
2932 | if (integer_zerop (e)) |
2933 | return boolean_true_node; |
2934 | } |
2935 | |
2936 | /* Check whether COND ==> EXPR. */ |
2937 | notcond = invert_truthvalue (cond); |
2938 | e = fold_binary (TRUTH_OR_EXPR, boolean_type_node, notcond, expr); |
2939 | if (e && integer_nonzerop (e)) |
2940 | return e; |
2941 | |
2942 | /* Check whether COND ==> not EXPR. */ |
2943 | e = fold_binary (TRUTH_AND_EXPR, boolean_type_node, cond, expr); |
2944 | if (e && integer_zerop (e)) |
2945 | return e; |
2946 | |
2947 | return expr; |
2948 | } |
2949 | |
2950 | /* Tries to simplify EXPR using the condition COND. Returns the simplified |
2951 | expression (or EXPR unchanged, if no simplification was possible). |
2952 | Wrapper around tree_simplify_using_condition_1 that ensures that chains |
2953 | of simple operations in definitions of ssa names in COND are expanded, |
2954 | so that things like casts or incrementing the value of the bound before |
2955 | the loop do not cause us to fail. */ |
2956 | |
2957 | static tree |
2958 | tree_simplify_using_condition (tree cond, tree expr) |
2959 | { |
2960 | cond = expand_simple_operations (expr: cond); |
2961 | |
2962 | return tree_simplify_using_condition_1 (cond, expr); |
2963 | } |
2964 | |
2965 | /* Tries to simplify EXPR using the conditions on entry to LOOP. |
2966 | Returns the simplified expression (or EXPR unchanged, if no |
2967 | simplification was possible). */ |
2968 | |
2969 | tree |
2970 | simplify_using_initial_conditions (class loop *loop, tree expr) |
2971 | { |
2972 | edge e; |
2973 | basic_block bb; |
2974 | tree cond, expanded, backup; |
2975 | int cnt = 0; |
2976 | |
2977 | if (TREE_CODE (expr) == INTEGER_CST) |
2978 | return expr; |
2979 | |
2980 | backup = expanded = expand_simple_operations (expr); |
2981 | |
2982 | /* Limit walking the dominators to avoid quadraticness in |
2983 | the number of BBs times the number of loops in degenerate |
2984 | cases. */ |
2985 | for (bb = loop->header; |
2986 | bb != ENTRY_BLOCK_PTR_FOR_FN (cfun) && cnt < MAX_DOMINATORS_TO_WALK; |
2987 | bb = get_immediate_dominator (CDI_DOMINATORS, bb)) |
2988 | { |
2989 | if (!single_pred_p (bb)) |
2990 | continue; |
2991 | e = single_pred_edge (bb); |
2992 | |
2993 | if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) |
2994 | continue; |
2995 | |
2996 | gcond *stmt = as_a <gcond *> (p: *gsi_last_bb (bb: e->src)); |
2997 | cond = fold_build2 (gimple_cond_code (stmt), |
2998 | boolean_type_node, |
2999 | gimple_cond_lhs (stmt), |
3000 | gimple_cond_rhs (stmt)); |
3001 | if (e->flags & EDGE_FALSE_VALUE) |
3002 | cond = invert_truthvalue (cond); |
3003 | expanded = tree_simplify_using_condition (cond, expr: expanded); |
3004 | /* Break if EXPR is simplified to const values. */ |
3005 | if (expanded |
3006 | && (integer_zerop (expanded) || integer_nonzerop (expanded))) |
3007 | return expanded; |
3008 | |
3009 | ++cnt; |
3010 | } |
3011 | |
3012 | /* Return the original expression if no simplification is done. */ |
3013 | return operand_equal_p (backup, expanded, flags: 0) ? expr : expanded; |
3014 | } |
3015 | |
3016 | /* Tries to simplify EXPR using the evolutions of the loop invariants |
3017 | in the superloops of LOOP. Returns the simplified expression |
3018 | (or EXPR unchanged, if no simplification was possible). */ |
3019 | |
3020 | static tree |
3021 | simplify_using_outer_evolutions (class loop *loop, tree expr) |
3022 | { |
3023 | enum tree_code code = TREE_CODE (expr); |
3024 | bool changed; |
3025 | tree e, e0, e1, e2; |
3026 | |
3027 | if (is_gimple_min_invariant (expr)) |
3028 | return expr; |
3029 | |
3030 | if (code == TRUTH_OR_EXPR |
3031 | || code == TRUTH_AND_EXPR |
3032 | || code == COND_EXPR) |
3033 | { |
3034 | changed = false; |
3035 | |
3036 | e0 = simplify_using_outer_evolutions (loop, TREE_OPERAND (expr, 0)); |
3037 | if (TREE_OPERAND (expr, 0) != e0) |
3038 | changed = true; |
3039 | |
3040 | e1 = simplify_using_outer_evolutions (loop, TREE_OPERAND (expr, 1)); |
3041 | if (TREE_OPERAND (expr, 1) != e1) |
3042 | changed = true; |
3043 | |
3044 | if (code == COND_EXPR) |
3045 | { |
3046 | e2 = simplify_using_outer_evolutions (loop, TREE_OPERAND (expr, 2)); |
3047 | if (TREE_OPERAND (expr, 2) != e2) |
3048 | changed = true; |
3049 | } |
3050 | else |
3051 | e2 = NULL_TREE; |
3052 | |
3053 | if (changed) |
3054 | { |
3055 | if (code == COND_EXPR) |
3056 | expr = fold_build3 (code, boolean_type_node, e0, e1, e2); |
3057 | else |
3058 | expr = fold_build2 (code, boolean_type_node, e0, e1); |
3059 | } |
3060 | |
3061 | return expr; |
3062 | } |
3063 | |
3064 | e = instantiate_parameters (loop, chrec: expr); |
3065 | if (is_gimple_min_invariant (e)) |
3066 | return e; |
3067 | |
3068 | return expr; |
3069 | } |
3070 | |
3071 | /* Returns true if EXIT is the only possible exit from LOOP. */ |
3072 | |
3073 | bool |
3074 | loop_only_exit_p (const class loop *loop, basic_block *body, const_edge exit) |
3075 | { |
3076 | gimple_stmt_iterator bsi; |
3077 | unsigned i; |
3078 | |
3079 | if (exit != single_exit (loop)) |
3080 | return false; |
3081 | |
3082 | for (i = 0; i < loop->num_nodes; i++) |
3083 | for (bsi = gsi_start_bb (bb: body[i]); !gsi_end_p (i: bsi); gsi_next (i: &bsi)) |
3084 | if (stmt_can_terminate_bb_p (gsi_stmt (i: bsi))) |
3085 | return false; |
3086 | |
3087 | return true; |
3088 | } |
3089 | |
3090 | /* Stores description of number of iterations of LOOP derived from |
3091 | EXIT (an exit edge of the LOOP) in NITER. Returns true if some useful |
3092 | information could be derived (and fields of NITER have meaning described |
3093 | in comments at class tree_niter_desc declaration), false otherwise. |
3094 | When EVERY_ITERATION is true, only tests that are known to be executed |
3095 | every iteration are considered (i.e. only test that alone bounds the loop). |
3096 | If AT_STMT is not NULL, this function stores LOOP's condition statement in |
3097 | it when returning true. */ |
3098 | |
3099 | bool |
3100 | number_of_iterations_exit_assumptions (class loop *loop, edge exit, |
3101 | class tree_niter_desc *niter, |
3102 | gcond **at_stmt, bool every_iteration, |
3103 | basic_block *body) |
3104 | { |
3105 | tree type; |
3106 | tree op0, op1; |
3107 | enum tree_code code; |
3108 | affine_iv iv0, iv1; |
3109 | bool safe; |
3110 | |
3111 | /* The condition at a fake exit (if it exists) does not control its |
3112 | execution. */ |
3113 | if (exit->flags & EDGE_FAKE) |
3114 | return false; |
3115 | |
3116 | /* Nothing to analyze if the loop is known to be infinite. */ |
3117 | if (loop_constraint_set_p (loop, LOOP_C_INFINITE)) |
3118 | return false; |
3119 | |
3120 | safe = dominated_by_p (CDI_DOMINATORS, loop->latch, exit->src); |
3121 | |
3122 | if (every_iteration && !safe) |
3123 | return false; |
3124 | |
3125 | niter->assumptions = boolean_false_node; |
3126 | niter->control.base = NULL_TREE; |
3127 | niter->control.step = NULL_TREE; |
3128 | niter->control.no_overflow = false; |
3129 | gcond *stmt = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: exit->src)); |
3130 | if (!stmt) |
3131 | return false; |
3132 | |
3133 | if (at_stmt) |
3134 | *at_stmt = stmt; |
3135 | |
3136 | /* We want the condition for staying inside loop. */ |
3137 | code = gimple_cond_code (gs: stmt); |
3138 | if (exit->flags & EDGE_TRUE_VALUE) |
3139 | code = invert_tree_comparison (code, false); |
3140 | |
3141 | switch (code) |
3142 | { |
3143 | case GT_EXPR: |
3144 | case GE_EXPR: |
3145 | case LT_EXPR: |
3146 | case LE_EXPR: |
3147 | case NE_EXPR: |
3148 | break; |
3149 | |
3150 | case EQ_EXPR: |
3151 | return number_of_iterations_cltz (loop, exit, code, niter); |
3152 | |
3153 | default: |
3154 | return false; |
3155 | } |
3156 | |
3157 | op0 = gimple_cond_lhs (gs: stmt); |
3158 | op1 = gimple_cond_rhs (gs: stmt); |
3159 | type = TREE_TYPE (op0); |
3160 | |
3161 | if (TREE_CODE (type) != INTEGER_TYPE |
3162 | && !POINTER_TYPE_P (type)) |
3163 | return false; |
3164 | |
3165 | tree iv0_niters = NULL_TREE; |
3166 | if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt), |
3167 | op0, &iv0, safe ? &iv0_niters : NULL, false)) |
3168 | return number_of_iterations_bitcount (loop, exit, code, niter); |
3169 | tree iv1_niters = NULL_TREE; |
3170 | if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt), |
3171 | op1, &iv1, safe ? &iv1_niters : NULL, false)) |
3172 | return false; |
3173 | /* Give up on complicated case. */ |
3174 | if (iv0_niters && iv1_niters) |
3175 | return false; |
3176 | |
3177 | /* We don't want to see undefined signed overflow warnings while |
3178 | computing the number of iterations. */ |
3179 | fold_defer_overflow_warnings (); |
3180 | |
3181 | iv0.base = expand_simple_operations (expr: iv0.base); |
3182 | iv1.base = expand_simple_operations (expr: iv1.base); |
3183 | bool body_from_caller = true; |
3184 | if (!body) |
3185 | { |
3186 | body = get_loop_body (loop); |
3187 | body_from_caller = false; |
3188 | } |
3189 | bool only_exit_p = loop_only_exit_p (loop, body, exit); |
3190 | if (!body_from_caller) |
3191 | free (ptr: body); |
3192 | if (!number_of_iterations_cond (loop, type, iv0: &iv0, code, iv1: &iv1, niter, |
3193 | only_exit: only_exit_p, every_iteration: safe)) |
3194 | { |
3195 | fold_undefer_and_ignore_overflow_warnings (); |
3196 | return false; |
3197 | } |
3198 | |
3199 | /* Incorporate additional assumption implied by control iv. */ |
3200 | tree iv_niters = iv0_niters ? iv0_niters : iv1_niters; |
3201 | if (iv_niters) |
3202 | { |
3203 | tree assumption = fold_build2 (LE_EXPR, boolean_type_node, niter->niter, |
3204 | fold_convert (TREE_TYPE (niter->niter), |
3205 | iv_niters)); |
3206 | |
3207 | if (!integer_nonzerop (assumption)) |
3208 | niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
3209 | niter->assumptions, assumption); |
3210 | |
3211 | /* Refine upper bound if possible. */ |
3212 | if (TREE_CODE (iv_niters) == INTEGER_CST |
3213 | && niter->max > wi::to_widest (t: iv_niters)) |
3214 | niter->max = wi::to_widest (t: iv_niters); |
3215 | } |
3216 | |
3217 | /* There is no assumptions if the loop is known to be finite. */ |
3218 | if (!integer_zerop (niter->assumptions) |
3219 | && loop_constraint_set_p (loop, LOOP_C_FINITE)) |
3220 | niter->assumptions = boolean_true_node; |
3221 | |
3222 | if (optimize >= 3) |
3223 | { |
3224 | niter->assumptions = simplify_using_outer_evolutions (loop, |
3225 | expr: niter->assumptions); |
3226 | niter->may_be_zero = simplify_using_outer_evolutions (loop, |
3227 | expr: niter->may_be_zero); |
3228 | niter->niter = simplify_using_outer_evolutions (loop, expr: niter->niter); |
3229 | } |
3230 | |
3231 | niter->assumptions |
3232 | = simplify_using_initial_conditions (loop, |
3233 | expr: niter->assumptions); |
3234 | niter->may_be_zero |
3235 | = simplify_using_initial_conditions (loop, |
3236 | expr: niter->may_be_zero); |
3237 | |
3238 | fold_undefer_and_ignore_overflow_warnings (); |
3239 | |
3240 | /* If NITER has simplified into a constant, update MAX. */ |
3241 | if (TREE_CODE (niter->niter) == INTEGER_CST) |
3242 | niter->max = wi::to_widest (t: niter->niter); |
3243 | |
3244 | return (!integer_zerop (niter->assumptions)); |
3245 | } |
3246 | |
3247 | /* Like number_of_iterations_exit_assumptions, but return TRUE only if |
3248 | the niter information holds unconditionally. */ |
3249 | |
3250 | bool |
3251 | number_of_iterations_exit (class loop *loop, edge exit, |
3252 | class tree_niter_desc *niter, |
3253 | bool warn, bool every_iteration, |
3254 | basic_block *body) |
3255 | { |
3256 | gcond *stmt; |
3257 | if (!number_of_iterations_exit_assumptions (loop, exit, niter, |
3258 | at_stmt: &stmt, every_iteration, body)) |
3259 | return false; |
3260 | |
3261 | if (integer_nonzerop (niter->assumptions)) |
3262 | return true; |
3263 | |
3264 | if (warn && dump_enabled_p ()) |
3265 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmt, |
3266 | "missed loop optimization: niters analysis ends up " |
3267 | "with assumptions.\n" ); |
3268 | |
3269 | return false; |
3270 | } |
3271 | |
3272 | /* Try to determine the number of iterations of LOOP. If we succeed, |
3273 | expression giving number of iterations is returned and *EXIT is |
3274 | set to the edge from that the information is obtained. Otherwise |
3275 | chrec_dont_know is returned. */ |
3276 | |
3277 | tree |
3278 | find_loop_niter (class loop *loop, edge *exit) |
3279 | { |
3280 | unsigned i; |
3281 | auto_vec<edge> exits = get_loop_exit_edges (loop); |
3282 | edge ex; |
3283 | tree niter = NULL_TREE, aniter; |
3284 | class tree_niter_desc desc; |
3285 | |
3286 | *exit = NULL; |
3287 | FOR_EACH_VEC_ELT (exits, i, ex) |
3288 | { |
3289 | if (!number_of_iterations_exit (loop, exit: ex, niter: &desc, warn: false)) |
3290 | continue; |
3291 | |
3292 | if (integer_nonzerop (desc.may_be_zero)) |
3293 | { |
3294 | /* We exit in the first iteration through this exit. |
3295 | We won't find anything better. */ |
3296 | niter = build_int_cst (unsigned_type_node, 0); |
3297 | *exit = ex; |
3298 | break; |
3299 | } |
3300 | |
3301 | if (!integer_zerop (desc.may_be_zero)) |
3302 | continue; |
3303 | |
3304 | aniter = desc.niter; |
3305 | |
3306 | if (!niter) |
3307 | { |
3308 | /* Nothing recorded yet. */ |
3309 | niter = aniter; |
3310 | *exit = ex; |
3311 | continue; |
3312 | } |
3313 | |
3314 | /* Prefer constants, the lower the better. */ |
3315 | if (TREE_CODE (aniter) != INTEGER_CST) |
3316 | continue; |
3317 | |
3318 | if (TREE_CODE (niter) != INTEGER_CST) |
3319 | { |
3320 | niter = aniter; |
3321 | *exit = ex; |
3322 | continue; |
3323 | } |
3324 | |
3325 | if (tree_int_cst_lt (t1: aniter, t2: niter)) |
3326 | { |
3327 | niter = aniter; |
3328 | *exit = ex; |
3329 | continue; |
3330 | } |
3331 | } |
3332 | |
3333 | return niter ? niter : chrec_dont_know; |
3334 | } |
3335 | |
3336 | /* Return true if loop is known to have bounded number of iterations. */ |
3337 | |
3338 | bool |
3339 | finite_loop_p (class loop *loop) |
3340 | { |
3341 | widest_int nit; |
3342 | int flags; |
3343 | |
3344 | if (loop->finite_p) |
3345 | { |
3346 | unsigned i; |
3347 | auto_vec<edge> exits = get_loop_exit_edges (loop); |
3348 | edge ex; |
3349 | |
3350 | /* If the loop has a normal exit, we can assume it will terminate. */ |
3351 | FOR_EACH_VEC_ELT (exits, i, ex) |
3352 | if (!(ex->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_FAKE))) |
3353 | { |
3354 | if (dump_file) |
3355 | fprintf (stream: dump_file, format: "Assume loop %i to be finite: it has an exit " |
3356 | "and -ffinite-loops is on or loop was " |
3357 | "previously finite.\n" , |
3358 | loop->num); |
3359 | return true; |
3360 | } |
3361 | } |
3362 | |
3363 | flags = flags_from_decl_or_type (current_function_decl); |
3364 | if ((flags & (ECF_CONST|ECF_PURE)) && !(flags & ECF_LOOPING_CONST_OR_PURE)) |
3365 | { |
3366 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3367 | fprintf (stream: dump_file, |
3368 | format: "Found loop %i to be finite: it is within " |
3369 | "pure or const function.\n" , |
3370 | loop->num); |
3371 | loop->finite_p = true; |
3372 | return true; |
3373 | } |
3374 | |
3375 | if (loop->any_upper_bound |
3376 | /* Loop with no normal exit will not pass max_loop_iterations. */ |
3377 | || (!loop->finite_p && max_loop_iterations (loop, &nit))) |
3378 | { |
3379 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3380 | fprintf (stream: dump_file, format: "Found loop %i to be finite: upper bound found.\n" , |
3381 | loop->num); |
3382 | loop->finite_p = true; |
3383 | return true; |
3384 | } |
3385 | |
3386 | return false; |
3387 | } |
3388 | |
3389 | /* |
3390 | |
3391 | Analysis of a number of iterations of a loop by a brute-force evaluation. |
3392 | |
3393 | */ |
3394 | |
3395 | /* Bound on the number of iterations we try to evaluate. */ |
3396 | |
3397 | #define MAX_ITERATIONS_TO_TRACK \ |
3398 | ((unsigned) param_max_iterations_to_track) |
3399 | |
3400 | /* Returns the loop phi node of LOOP such that ssa name X is derived from its |
3401 | result by a chain of operations such that all but exactly one of their |
3402 | operands are constants. */ |
3403 | |
3404 | static gphi * |
3405 | chain_of_csts_start (class loop *loop, tree x) |
3406 | { |
3407 | gimple *stmt = SSA_NAME_DEF_STMT (x); |
3408 | tree use; |
3409 | basic_block bb = gimple_bb (g: stmt); |
3410 | enum tree_code code; |
3411 | |
3412 | if (!bb |
3413 | || !flow_bb_inside_loop_p (loop, bb)) |
3414 | return NULL; |
3415 | |
3416 | if (gimple_code (g: stmt) == GIMPLE_PHI) |
3417 | { |
3418 | if (bb == loop->header) |
3419 | return as_a <gphi *> (p: stmt); |
3420 | |
3421 | return NULL; |
3422 | } |
3423 | |
3424 | if (gimple_code (g: stmt) != GIMPLE_ASSIGN |
3425 | || gimple_assign_rhs_class (gs: stmt) == GIMPLE_TERNARY_RHS) |
3426 | return NULL; |
3427 | |
3428 | code = gimple_assign_rhs_code (gs: stmt); |
3429 | if (gimple_references_memory_p (stmt) |
3430 | || TREE_CODE_CLASS (code) == tcc_reference |
3431 | || (code == ADDR_EXPR |
3432 | && !is_gimple_min_invariant (gimple_assign_rhs1 (gs: stmt)))) |
3433 | return NULL; |
3434 | |
3435 | use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE); |
3436 | if (use == NULL_TREE) |
3437 | return NULL; |
3438 | |
3439 | return chain_of_csts_start (loop, x: use); |
3440 | } |
3441 | |
3442 | /* Determines whether the expression X is derived from a result of a phi node |
3443 | in header of LOOP such that |
3444 | |
3445 | * the derivation of X consists only from operations with constants |
3446 | * the initial value of the phi node is constant |
3447 | * the value of the phi node in the next iteration can be derived from the |
3448 | value in the current iteration by a chain of operations with constants, |
3449 | or is also a constant |
3450 | |
3451 | If such phi node exists, it is returned, otherwise NULL is returned. */ |
3452 | |
3453 | static gphi * |
3454 | get_base_for (class loop *loop, tree x) |
3455 | { |
3456 | gphi *phi; |
3457 | tree init, next; |
3458 | |
3459 | if (is_gimple_min_invariant (x)) |
3460 | return NULL; |
3461 | |
3462 | phi = chain_of_csts_start (loop, x); |
3463 | if (!phi) |
3464 | return NULL; |
3465 | |
3466 | init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); |
3467 | next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); |
3468 | |
3469 | if (!is_gimple_min_invariant (init)) |
3470 | return NULL; |
3471 | |
3472 | if (TREE_CODE (next) == SSA_NAME |
3473 | && chain_of_csts_start (loop, x: next) != phi) |
3474 | return NULL; |
3475 | |
3476 | return phi; |
3477 | } |
3478 | |
3479 | /* Given an expression X, then |
3480 | |
3481 | * if X is NULL_TREE, we return the constant BASE. |
3482 | * if X is a constant, we return the constant X. |
3483 | * otherwise X is a SSA name, whose value in the considered loop is derived |
3484 | by a chain of operations with constant from a result of a phi node in |
3485 | the header of the loop. Then we return value of X when the value of the |
3486 | result of this phi node is given by the constant BASE. */ |
3487 | |
3488 | static tree |
3489 | get_val_for (tree x, tree base) |
3490 | { |
3491 | gimple *stmt; |
3492 | |
3493 | gcc_checking_assert (is_gimple_min_invariant (base)); |
3494 | |
3495 | if (!x) |
3496 | return base; |
3497 | else if (is_gimple_min_invariant (x)) |
3498 | return x; |
3499 | |
3500 | stmt = SSA_NAME_DEF_STMT (x); |
3501 | if (gimple_code (g: stmt) == GIMPLE_PHI) |
3502 | return base; |
3503 | |
3504 | gcc_checking_assert (is_gimple_assign (stmt)); |
3505 | |
3506 | /* STMT must be either an assignment of a single SSA name or an |
3507 | expression involving an SSA name and a constant. Try to fold that |
3508 | expression using the value for the SSA name. */ |
3509 | if (gimple_assign_ssa_name_copy_p (stmt)) |
3510 | return get_val_for (x: gimple_assign_rhs1 (gs: stmt), base); |
3511 | else if (gimple_assign_rhs_class (gs: stmt) == GIMPLE_UNARY_RHS |
3512 | && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME) |
3513 | return fold_build1 (gimple_assign_rhs_code (stmt), |
3514 | TREE_TYPE (gimple_assign_lhs (stmt)), |
3515 | get_val_for (gimple_assign_rhs1 (stmt), base)); |
3516 | else if (gimple_assign_rhs_class (gs: stmt) == GIMPLE_BINARY_RHS) |
3517 | { |
3518 | tree rhs1 = gimple_assign_rhs1 (gs: stmt); |
3519 | tree rhs2 = gimple_assign_rhs2 (gs: stmt); |
3520 | if (TREE_CODE (rhs1) == SSA_NAME) |
3521 | rhs1 = get_val_for (x: rhs1, base); |
3522 | else if (TREE_CODE (rhs2) == SSA_NAME) |
3523 | rhs2 = get_val_for (x: rhs2, base); |
3524 | else |
3525 | gcc_unreachable (); |
3526 | return fold_build2 (gimple_assign_rhs_code (stmt), |
3527 | TREE_TYPE (gimple_assign_lhs (stmt)), rhs1, rhs2); |
3528 | } |
3529 | else |
3530 | gcc_unreachable (); |
3531 | } |
3532 | |
3533 | |
3534 | /* Tries to count the number of iterations of LOOP till it exits by EXIT |
3535 | by brute force -- i.e. by determining the value of the operands of the |
3536 | condition at EXIT in first few iterations of the loop (assuming that |
3537 | these values are constant) and determining the first one in that the |
3538 | condition is not satisfied. Returns the constant giving the number |
3539 | of the iterations of LOOP if successful, chrec_dont_know otherwise. */ |
3540 | |
3541 | tree |
3542 | loop_niter_by_eval (class loop *loop, edge exit) |
3543 | { |
3544 | tree acnd; |
3545 | tree op[2], val[2], next[2], aval[2]; |
3546 | gphi *phi; |
3547 | unsigned i, j; |
3548 | enum tree_code cmp; |
3549 | |
3550 | gcond *cond = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: exit->src)); |
3551 | if (!cond) |
3552 | return chrec_dont_know; |
3553 | |
3554 | cmp = gimple_cond_code (gs: cond); |
3555 | if (exit->flags & EDGE_TRUE_VALUE) |
3556 | cmp = invert_tree_comparison (cmp, false); |
3557 | |
3558 | switch (cmp) |
3559 | { |
3560 | case EQ_EXPR: |
3561 | case NE_EXPR: |
3562 | case GT_EXPR: |
3563 | case GE_EXPR: |
3564 | case LT_EXPR: |
3565 | case LE_EXPR: |
3566 | op[0] = gimple_cond_lhs (gs: cond); |
3567 | op[1] = gimple_cond_rhs (gs: cond); |
3568 | break; |
3569 | |
3570 | default: |
3571 | return chrec_dont_know; |
3572 | } |
3573 | |
3574 | for (j = 0; j < 2; j++) |
3575 | { |
3576 | if (is_gimple_min_invariant (op[j])) |
3577 | { |
3578 | val[j] = op[j]; |
3579 | next[j] = NULL_TREE; |
3580 | op[j] = NULL_TREE; |
3581 | } |
3582 | else |
3583 | { |
3584 | phi = get_base_for (loop, x: op[j]); |
3585 | if (!phi) |
3586 | return chrec_dont_know; |
3587 | val[j] = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); |
3588 | next[j] = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); |
3589 | } |
3590 | } |
3591 | |
3592 | /* Don't issue signed overflow warnings. */ |
3593 | fold_defer_overflow_warnings (); |
3594 | |
3595 | for (i = 0; i < MAX_ITERATIONS_TO_TRACK; i++) |
3596 | { |
3597 | for (j = 0; j < 2; j++) |
3598 | aval[j] = get_val_for (x: op[j], base: val[j]); |
3599 | |
3600 | acnd = fold_binary (cmp, boolean_type_node, aval[0], aval[1]); |
3601 | if (acnd && integer_zerop (acnd)) |
3602 | { |
3603 | fold_undefer_and_ignore_overflow_warnings (); |
3604 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3605 | fprintf (stream: dump_file, |
3606 | format: "Proved that loop %d iterates %d times using brute force.\n" , |
3607 | loop->num, i); |
3608 | return build_int_cst (unsigned_type_node, i); |
3609 | } |
3610 | |
3611 | for (j = 0; j < 2; j++) |
3612 | { |
3613 | aval[j] = val[j]; |
3614 | val[j] = get_val_for (x: next[j], base: val[j]); |
3615 | if (!is_gimple_min_invariant (val[j])) |
3616 | { |
3617 | fold_undefer_and_ignore_overflow_warnings (); |
3618 | return chrec_dont_know; |
3619 | } |
3620 | } |
3621 | |
3622 | /* If the next iteration would use the same base values |
3623 | as the current one, there is no point looping further, |
3624 | all following iterations will be the same as this one. */ |
3625 | if (val[0] == aval[0] && val[1] == aval[1]) |
3626 | break; |
3627 | } |
3628 | |
3629 | fold_undefer_and_ignore_overflow_warnings (); |
3630 | |
3631 | return chrec_dont_know; |
3632 | } |
3633 | |
3634 | /* Finds the exit of the LOOP by that the loop exits after a constant |
3635 | number of iterations and stores the exit edge to *EXIT. The constant |
3636 | giving the number of iterations of LOOP is returned. The number of |
3637 | iterations is determined using loop_niter_by_eval (i.e. by brute force |
3638 | evaluation). If we are unable to find the exit for that loop_niter_by_eval |
3639 | determines the number of iterations, chrec_dont_know is returned. */ |
3640 | |
3641 | tree |
3642 | find_loop_niter_by_eval (class loop *loop, edge *exit) |
3643 | { |
3644 | unsigned i; |
3645 | auto_vec<edge> exits = get_loop_exit_edges (loop); |
3646 | edge ex; |
3647 | tree niter = NULL_TREE, aniter; |
3648 | |
3649 | *exit = NULL; |
3650 | |
3651 | /* Loops with multiple exits are expensive to handle and less important. */ |
3652 | if (!flag_expensive_optimizations |
3653 | && exits.length () > 1) |
3654 | return chrec_dont_know; |
3655 | |
3656 | FOR_EACH_VEC_ELT (exits, i, ex) |
3657 | { |
3658 | if (!just_once_each_iteration_p (loop, ex->src)) |
3659 | continue; |
3660 | |
3661 | aniter = loop_niter_by_eval (loop, exit: ex); |
3662 | if (chrec_contains_undetermined (aniter)) |
3663 | continue; |
3664 | |
3665 | if (niter |
3666 | && !tree_int_cst_lt (t1: aniter, t2: niter)) |
3667 | continue; |
3668 | |
3669 | niter = aniter; |
3670 | *exit = ex; |
3671 | } |
3672 | |
3673 | return niter ? niter : chrec_dont_know; |
3674 | } |
3675 | |
3676 | /* |
3677 | |
3678 | Analysis of upper bounds on number of iterations of a loop. |
3679 | |
3680 | */ |
3681 | |
3682 | static widest_int derive_constant_upper_bound_ops (tree, tree, |
3683 | enum tree_code, tree); |
3684 | |
3685 | /* Returns a constant upper bound on the value of the right-hand side of |
3686 | an assignment statement STMT. */ |
3687 | |
3688 | static widest_int |
3689 | derive_constant_upper_bound_assign (gimple *stmt) |
3690 | { |
3691 | enum tree_code code = gimple_assign_rhs_code (gs: stmt); |
3692 | tree op0 = gimple_assign_rhs1 (gs: stmt); |
3693 | tree op1 = gimple_assign_rhs2 (gs: stmt); |
3694 | |
3695 | return derive_constant_upper_bound_ops (TREE_TYPE (gimple_assign_lhs (stmt)), |
3696 | op0, code, op1); |
3697 | } |
3698 | |
3699 | /* Returns a constant upper bound on the value of expression VAL. VAL |
3700 | is considered to be unsigned. If its type is signed, its value must |
3701 | be nonnegative. */ |
3702 | |
3703 | static widest_int |
3704 | derive_constant_upper_bound (tree val) |
3705 | { |
3706 | enum tree_code code; |
3707 | tree op0, op1, op2; |
3708 | |
3709 | extract_ops_from_tree (val, &code, &op0, &op1, &op2); |
3710 | return derive_constant_upper_bound_ops (TREE_TYPE (val), op0, code, op1); |
3711 | } |
3712 | |
3713 | /* Returns a constant upper bound on the value of expression OP0 CODE OP1, |
3714 | whose type is TYPE. The expression is considered to be unsigned. If |
3715 | its type is signed, its value must be nonnegative. */ |
3716 | |
3717 | static widest_int |
3718 | derive_constant_upper_bound_ops (tree type, tree op0, |
3719 | enum tree_code code, tree op1) |
3720 | { |
3721 | tree subtype, maxt; |
3722 | widest_int bnd, max, cst; |
3723 | gimple *stmt; |
3724 | |
3725 | if (INTEGRAL_TYPE_P (type)) |
3726 | maxt = TYPE_MAX_VALUE (type); |
3727 | else |
3728 | maxt = upper_bound_in_type (type, type); |
3729 | |
3730 | max = wi::to_widest (t: maxt); |
3731 | |
3732 | switch (code) |
3733 | { |
3734 | case INTEGER_CST: |
3735 | return wi::to_widest (t: op0); |
3736 | |
3737 | CASE_CONVERT: |
3738 | subtype = TREE_TYPE (op0); |
3739 | if (!TYPE_UNSIGNED (subtype) |
3740 | /* If TYPE is also signed, the fact that VAL is nonnegative implies |
3741 | that OP0 is nonnegative. */ |
3742 | && TYPE_UNSIGNED (type) |
3743 | && !tree_expr_nonnegative_p (op0)) |
3744 | { |
3745 | /* If we cannot prove that the casted expression is nonnegative, |
3746 | we cannot establish more useful upper bound than the precision |
3747 | of the type gives us. */ |
3748 | return max; |
3749 | } |
3750 | |
3751 | /* We now know that op0 is an nonnegative value. Try deriving an upper |
3752 | bound for it. */ |
3753 | bnd = derive_constant_upper_bound (val: op0); |
3754 | |
3755 | /* If the bound does not fit in TYPE, max. value of TYPE could be |
3756 | attained. */ |
3757 | if (wi::ltu_p (x: max, y: bnd)) |
3758 | return max; |
3759 | |
3760 | return bnd; |
3761 | |
3762 | case PLUS_EXPR: |
3763 | case POINTER_PLUS_EXPR: |
3764 | case MINUS_EXPR: |
3765 | if (TREE_CODE (op1) != INTEGER_CST |
3766 | || !tree_expr_nonnegative_p (op0)) |
3767 | return max; |
3768 | |
3769 | /* Canonicalize to OP0 - CST. Consider CST to be signed, in order to |
3770 | choose the most logical way how to treat this constant regardless |
3771 | of the signedness of the type. */ |
3772 | cst = wi::sext (x: wi::to_widest (t: op1), TYPE_PRECISION (type)); |
3773 | if (code != MINUS_EXPR) |
3774 | cst = -cst; |
3775 | |
3776 | bnd = derive_constant_upper_bound (val: op0); |
3777 | |
3778 | if (wi::neg_p (x: cst)) |
3779 | { |
3780 | cst = -cst; |
3781 | /* Avoid CST == 0x80000... */ |
3782 | if (wi::neg_p (x: cst)) |
3783 | return max; |
3784 | |
3785 | /* OP0 + CST. We need to check that |
3786 | BND <= MAX (type) - CST. */ |
3787 | |
3788 | widest_int mmax = max - cst; |
3789 | if (wi::leu_p (x: bnd, y: mmax)) |
3790 | return max; |
3791 | |
3792 | return bnd + cst; |
3793 | } |
3794 | else |
3795 | { |
3796 | /* OP0 - CST, where CST >= 0. |
3797 | |
3798 | If TYPE is signed, we have already verified that OP0 >= 0, and we |
3799 | know that the result is nonnegative. This implies that |
3800 | VAL <= BND - CST. |
3801 | |
3802 | If TYPE is unsigned, we must additionally know that OP0 >= CST, |
3803 | otherwise the operation underflows. |
3804 | */ |
3805 | |
3806 | /* This should only happen if the type is unsigned; however, for |
3807 | buggy programs that use overflowing signed arithmetics even with |
3808 | -fno-wrapv, this condition may also be true for signed values. */ |
3809 | if (wi::ltu_p (x: bnd, y: cst)) |
3810 | return max; |
3811 | |
3812 | if (TYPE_UNSIGNED (type)) |
3813 | { |
3814 | tree tem = fold_binary (GE_EXPR, boolean_type_node, op0, |
3815 | wide_int_to_tree (type, cst)); |
3816 | if (!tem || integer_nonzerop (tem)) |
3817 | return max; |
3818 | } |
3819 | |
3820 | bnd -= cst; |
3821 | } |
3822 | |
3823 | return bnd; |
3824 | |
3825 | case FLOOR_DIV_EXPR: |
3826 | case EXACT_DIV_EXPR: |
3827 | if (TREE_CODE (op1) != INTEGER_CST |
3828 | || tree_int_cst_sign_bit (op1)) |
3829 | return max; |
3830 | |
3831 | bnd = derive_constant_upper_bound (val: op0); |
3832 | return wi::udiv_floor (x: bnd, y: wi::to_widest (t: op1)); |
3833 | |
3834 | case BIT_AND_EXPR: |
3835 | if (TREE_CODE (op1) != INTEGER_CST |
3836 | || tree_int_cst_sign_bit (op1)) |
3837 | return max; |
3838 | return wi::to_widest (t: op1); |
3839 | |
3840 | case SSA_NAME: |
3841 | stmt = SSA_NAME_DEF_STMT (op0); |
3842 | if (gimple_code (g: stmt) != GIMPLE_ASSIGN |
3843 | || gimple_assign_lhs (gs: stmt) != op0) |
3844 | return max; |
3845 | return derive_constant_upper_bound_assign (stmt); |
3846 | |
3847 | default: |
3848 | return max; |
3849 | } |
3850 | } |
3851 | |
3852 | /* Emit a -Waggressive-loop-optimizations warning if needed. */ |
3853 | |
3854 | static void |
3855 | do_warn_aggressive_loop_optimizations (class loop *loop, |
3856 | widest_int i_bound, gimple *stmt) |
3857 | { |
3858 | /* Don't warn if the loop doesn't have known constant bound. */ |
3859 | if (!loop->nb_iterations |
3860 | || TREE_CODE (loop->nb_iterations) != INTEGER_CST |
3861 | || !warn_aggressive_loop_optimizations |
3862 | /* To avoid warning multiple times for the same loop, |
3863 | only start warning when we preserve loops. */ |
3864 | || (cfun->curr_properties & PROP_loops) == 0 |
3865 | /* Only warn once per loop. */ |
3866 | || loop->warned_aggressive_loop_optimizations |
3867 | /* Only warn if undefined behavior gives us lower estimate than the |
3868 | known constant bound. */ |
3869 | || wi::cmpu (x: i_bound, y: wi::to_widest (t: loop->nb_iterations)) >= 0 |
3870 | /* And undefined behavior happens unconditionally. */ |
3871 | || !dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (g: stmt))) |
3872 | return; |
3873 | |
3874 | edge e = single_exit (loop); |
3875 | if (e == NULL) |
3876 | return; |
3877 | |
3878 | gimple *estmt = last_nondebug_stmt (e->src); |
3879 | char buf[WIDE_INT_PRINT_BUFFER_SIZE], *p; |
3880 | unsigned len; |
3881 | if (print_dec_buf_size (wi: i_bound, TYPE_SIGN (TREE_TYPE (loop->nb_iterations)), |
3882 | len: &len)) |
3883 | p = XALLOCAVEC (char, len); |
3884 | else |
3885 | p = buf; |
3886 | print_dec (wi: i_bound, buf: p, TYPE_SIGN (TREE_TYPE (loop->nb_iterations))); |
3887 | auto_diagnostic_group d; |
3888 | if (warning_at (gimple_location (g: stmt), OPT_Waggressive_loop_optimizations, |
3889 | "iteration %s invokes undefined behavior" , p)) |
3890 | inform (gimple_location (g: estmt), "within this loop" ); |
3891 | loop->warned_aggressive_loop_optimizations = true; |
3892 | } |
3893 | |
3894 | /* Records that AT_STMT is executed at most BOUND + 1 times in LOOP. IS_EXIT |
3895 | is true if the loop is exited immediately after STMT, and this exit |
3896 | is taken at last when the STMT is executed BOUND + 1 times. |
3897 | REALISTIC is true if BOUND is expected to be close to the real number |
3898 | of iterations. UPPER is true if we are sure the loop iterates at most |
3899 | BOUND times. I_BOUND is a widest_int upper estimate on BOUND. */ |
3900 | |
3901 | static void |
3902 | record_estimate (class loop *loop, tree bound, const widest_int &i_bound, |
3903 | gimple *at_stmt, bool is_exit, bool realistic, bool upper) |
3904 | { |
3905 | widest_int delta; |
3906 | |
3907 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3908 | { |
3909 | fprintf (stream: dump_file, format: "Statement %s" , is_exit ? "(exit)" : "" ); |
3910 | print_gimple_stmt (dump_file, at_stmt, 0, TDF_SLIM); |
3911 | fprintf (stream: dump_file, format: " is %sexecuted at most " , |
3912 | upper ? "" : "probably " ); |
3913 | print_generic_expr (dump_file, bound, TDF_SLIM); |
3914 | fprintf (stream: dump_file, format: " (bounded by " ); |
3915 | print_decu (wi: i_bound, file: dump_file); |
3916 | fprintf (stream: dump_file, format: ") + 1 times in loop %d.\n" , loop->num); |
3917 | } |
3918 | |
3919 | /* If the I_BOUND is just an estimate of BOUND, it rarely is close to the |
3920 | real number of iterations. */ |
3921 | if (TREE_CODE (bound) != INTEGER_CST) |
3922 | realistic = false; |
3923 | else |
3924 | gcc_checking_assert (i_bound == wi::to_widest (bound)); |
3925 | |
3926 | if (wi::min_precision (x: i_bound, sgn: SIGNED) > bound_wide_int ().get_precision ()) |
3927 | return; |
3928 | |
3929 | /* If we have a guaranteed upper bound, record it in the appropriate |
3930 | list, unless this is an !is_exit bound (i.e. undefined behavior in |
3931 | at_stmt) in a loop with known constant number of iterations. */ |
3932 | if (upper |
3933 | && (is_exit |
3934 | || loop->nb_iterations == NULL_TREE |
3935 | || TREE_CODE (loop->nb_iterations) != INTEGER_CST)) |
3936 | { |
3937 | class nb_iter_bound *elt = ggc_alloc<nb_iter_bound> (); |
3938 | |
3939 | elt->bound = bound_wide_int::from (x: i_bound, sgn: SIGNED); |
3940 | elt->stmt = at_stmt; |
3941 | elt->is_exit = is_exit; |
3942 | elt->next = loop->bounds; |
3943 | loop->bounds = elt; |
3944 | } |
3945 | |
3946 | /* If statement is executed on every path to the loop latch, we can directly |
3947 | infer the upper bound on the # of iterations of the loop. */ |
3948 | if (!dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (g: at_stmt))) |
3949 | upper = false; |
3950 | |
3951 | /* Update the number of iteration estimates according to the bound. |
3952 | If at_stmt is an exit then the loop latch is executed at most BOUND times, |
3953 | otherwise it can be executed BOUND + 1 times. We will lower the estimate |
3954 | later if such statement must be executed on last iteration */ |
3955 | if (is_exit) |
3956 | delta = 0; |
3957 | else |
3958 | delta = 1; |
3959 | widest_int new_i_bound = i_bound + delta; |
3960 | |
3961 | /* If an overflow occurred, ignore the result. */ |
3962 | if (wi::ltu_p (x: new_i_bound, y: delta)) |
3963 | return; |
3964 | |
3965 | if (upper && !is_exit) |
3966 | do_warn_aggressive_loop_optimizations (loop, i_bound: new_i_bound, stmt: at_stmt); |
3967 | record_niter_bound (loop, new_i_bound, realistic, upper); |
3968 | } |
3969 | |
3970 | /* Records the control iv analyzed in NITER for LOOP if the iv is valid |
3971 | and doesn't overflow. */ |
3972 | |
3973 | static void |
3974 | record_control_iv (class loop *loop, class tree_niter_desc *niter) |
3975 | { |
3976 | struct control_iv *iv; |
3977 | |
3978 | if (!niter->control.base || !niter->control.step) |
3979 | return; |
3980 | |
3981 | if (!integer_onep (niter->assumptions) || !niter->control.no_overflow) |
3982 | return; |
3983 | |
3984 | iv = ggc_alloc<control_iv> (); |
3985 | iv->base = niter->control.base; |
3986 | iv->step = niter->control.step; |
3987 | iv->next = loop->control_ivs; |
3988 | loop->control_ivs = iv; |
3989 | |
3990 | return; |
3991 | } |
3992 | |
3993 | /* This function returns TRUE if below conditions are satisfied: |
3994 | 1) VAR is SSA variable. |
3995 | 2) VAR is an IV:{base, step} in its defining loop. |
3996 | 3) IV doesn't overflow. |
3997 | 4) Both base and step are integer constants. |
3998 | 5) Base is the MIN/MAX value depends on IS_MIN. |
3999 | Store value of base to INIT correspondingly. */ |
4000 | |
4001 | static bool |
4002 | get_cst_init_from_scev (tree var, wide_int *init, bool is_min) |
4003 | { |
4004 | if (TREE_CODE (var) != SSA_NAME) |
4005 | return false; |
4006 | |
4007 | gimple *def_stmt = SSA_NAME_DEF_STMT (var); |
4008 | class loop *loop = loop_containing_stmt (stmt: def_stmt); |
4009 | |
4010 | if (loop == NULL) |
4011 | return false; |
4012 | |
4013 | affine_iv iv; |
4014 | if (!simple_iv (loop, loop, var, &iv, false)) |
4015 | return false; |
4016 | |
4017 | if (!iv.no_overflow) |
4018 | return false; |
4019 | |
4020 | if (TREE_CODE (iv.base) != INTEGER_CST || TREE_CODE (iv.step) != INTEGER_CST) |
4021 | return false; |
4022 | |
4023 | if (is_min == tree_int_cst_sign_bit (iv.step)) |
4024 | return false; |
4025 | |
4026 | *init = wi::to_wide (t: iv.base); |
4027 | return true; |
4028 | } |
4029 | |
4030 | /* Record the estimate on number of iterations of LOOP based on the fact that |
4031 | the induction variable BASE + STEP * i evaluated in STMT does not wrap and |
4032 | its values belong to the range <LOW, HIGH>. REALISTIC is true if the |
4033 | estimated number of iterations is expected to be close to the real one. |
4034 | UPPER is true if we are sure the induction variable does not wrap. */ |
4035 | |
4036 | static void |
4037 | record_nonwrapping_iv (class loop *loop, tree base, tree step, gimple *stmt, |
4038 | tree low, tree high, bool realistic, bool upper) |
4039 | { |
4040 | tree niter_bound, extreme, delta; |
4041 | tree type = TREE_TYPE (base), unsigned_type; |
4042 | tree orig_base = base; |
4043 | |
4044 | if (TREE_CODE (step) != INTEGER_CST || integer_zerop (step)) |
4045 | return; |
4046 | |
4047 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4048 | { |
4049 | fprintf (stream: dump_file, format: "Induction variable (" ); |
4050 | print_generic_expr (dump_file, TREE_TYPE (base), TDF_SLIM); |
4051 | fprintf (stream: dump_file, format: ") " ); |
4052 | print_generic_expr (dump_file, base, TDF_SLIM); |
4053 | fprintf (stream: dump_file, format: " + " ); |
4054 | print_generic_expr (dump_file, step, TDF_SLIM); |
4055 | fprintf (stream: dump_file, format: " * iteration does not wrap in statement " ); |
4056 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); |
4057 | fprintf (stream: dump_file, format: " in loop %d.\n" , loop->num); |
4058 | } |
4059 | |
4060 | unsigned_type = unsigned_type_for (type); |
4061 | base = fold_convert (unsigned_type, base); |
4062 | step = fold_convert (unsigned_type, step); |
4063 | |
4064 | if (tree_int_cst_sign_bit (step)) |
4065 | { |
4066 | wide_int max; |
4067 | Value_Range base_range (TREE_TYPE (orig_base)); |
4068 | if (get_range_query (cfun)->range_of_expr (r&: base_range, expr: orig_base) |
4069 | && !base_range.undefined_p ()) |
4070 | max = base_range.upper_bound (); |
4071 | extreme = fold_convert (unsigned_type, low); |
4072 | if (TREE_CODE (orig_base) == SSA_NAME |
4073 | && TREE_CODE (high) == INTEGER_CST |
4074 | && INTEGRAL_TYPE_P (TREE_TYPE (orig_base)) |
4075 | && ((!base_range.varying_p () |
4076 | && !base_range.undefined_p ()) |
4077 | || get_cst_init_from_scev (var: orig_base, init: &max, is_min: false)) |
4078 | && wi::gts_p (x: wi::to_wide (t: high), y: max)) |
4079 | base = wide_int_to_tree (type: unsigned_type, cst: max); |
4080 | else if (TREE_CODE (base) != INTEGER_CST |
4081 | && dominated_by_p (CDI_DOMINATORS, |
4082 | loop->latch, gimple_bb (g: stmt))) |
4083 | base = fold_convert (unsigned_type, high); |
4084 | delta = fold_build2 (MINUS_EXPR, unsigned_type, base, extreme); |
4085 | step = fold_build1 (NEGATE_EXPR, unsigned_type, step); |
4086 | } |
4087 | else |
4088 | { |
4089 | wide_int min; |
4090 | Value_Range base_range (TREE_TYPE (orig_base)); |
4091 | if (get_range_query (cfun)->range_of_expr (r&: base_range, expr: orig_base) |
4092 | && !base_range.undefined_p ()) |
4093 | min = base_range.lower_bound (); |
4094 | extreme = fold_convert (unsigned_type, high); |
4095 | if (TREE_CODE (orig_base) == SSA_NAME |
4096 | && TREE_CODE (low) == INTEGER_CST |
4097 | && INTEGRAL_TYPE_P (TREE_TYPE (orig_base)) |
4098 | && ((!base_range.varying_p () |
4099 | && !base_range.undefined_p ()) |
4100 | || get_cst_init_from_scev (var: orig_base, init: &min, is_min: true)) |
4101 | && wi::gts_p (x: min, y: wi::to_wide (t: low))) |
4102 | base = wide_int_to_tree (type: unsigned_type, cst: min); |
4103 | else if (TREE_CODE (base) != INTEGER_CST |
4104 | && dominated_by_p (CDI_DOMINATORS, |
4105 | loop->latch, gimple_bb (g: stmt))) |
4106 | base = fold_convert (unsigned_type, low); |
4107 | delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, base); |
4108 | } |
4109 | |
4110 | /* STMT is executed at most NITER_BOUND + 1 times, since otherwise the value |
4111 | would get out of the range. */ |
4112 | niter_bound = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step); |
4113 | widest_int max = derive_constant_upper_bound (val: niter_bound); |
4114 | record_estimate (loop, bound: niter_bound, i_bound: max, at_stmt: stmt, is_exit: false, realistic, upper); |
4115 | } |
4116 | |
4117 | /* Determine information about number of iterations a LOOP from the index |
4118 | IDX of a data reference accessed in STMT. RELIABLE is true if STMT is |
4119 | guaranteed to be executed in every iteration of LOOP. Callback for |
4120 | for_each_index. */ |
4121 | |
4122 | struct ilb_data |
4123 | { |
4124 | class loop *loop; |
4125 | gimple *stmt; |
4126 | }; |
4127 | |
4128 | static bool |
4129 | idx_infer_loop_bounds (tree base, tree *idx, void *dta) |
4130 | { |
4131 | struct ilb_data *data = (struct ilb_data *) dta; |
4132 | tree ev, init, step; |
4133 | tree low, high, type, next; |
4134 | bool sign, upper = true, has_flexible_size = false; |
4135 | class loop *loop = data->loop; |
4136 | |
4137 | if (TREE_CODE (base) != ARRAY_REF) |
4138 | return true; |
4139 | |
4140 | /* For arrays that might have flexible sizes, it is not guaranteed that they |
4141 | do not really extend over their declared size. */ |
4142 | if (array_ref_flexible_size_p (base)) |
4143 | { |
4144 | has_flexible_size = true; |
4145 | upper = false; |
4146 | } |
4147 | |
4148 | class loop *dloop = loop_containing_stmt (stmt: data->stmt); |
4149 | if (!dloop) |
4150 | return true; |
4151 | |
4152 | ev = analyze_scalar_evolution (dloop, *idx); |
4153 | ev = instantiate_parameters (loop, chrec: ev); |
4154 | init = initial_condition (ev); |
4155 | step = evolution_part_in_loop_num (ev, loop->num); |
4156 | |
4157 | if (!init |
4158 | || !step |
4159 | || TREE_CODE (step) != INTEGER_CST |
4160 | || integer_zerop (step) |
4161 | || tree_contains_chrecs (init, NULL) |
4162 | || chrec_contains_symbols_defined_in_loop (init, loop->num)) |
4163 | return true; |
4164 | |
4165 | low = array_ref_low_bound (base); |
4166 | high = array_ref_up_bound (base); |
4167 | |
4168 | /* The case of nonconstant bounds could be handled, but it would be |
4169 | complicated. */ |
4170 | if (TREE_CODE (low) != INTEGER_CST |
4171 | || !high |
4172 | || TREE_CODE (high) != INTEGER_CST) |
4173 | return true; |
4174 | sign = tree_int_cst_sign_bit (step); |
4175 | type = TREE_TYPE (step); |
4176 | |
4177 | /* The array that might have flexible size most likely extends |
4178 | beyond its bounds. */ |
4179 | if (has_flexible_size |
4180 | && operand_equal_p (low, high, flags: 0)) |
4181 | return true; |
4182 | |
4183 | /* In case the relevant bound of the array does not fit in type, or |
4184 | it does, but bound + step (in type) still belongs into the range of the |
4185 | array, the index may wrap and still stay within the range of the array |
4186 | (consider e.g. if the array is indexed by the full range of |
4187 | unsigned char). |
4188 | |
4189 | To make things simpler, we require both bounds to fit into type, although |
4190 | there are cases where this would not be strictly necessary. */ |
4191 | if (!int_fits_type_p (high, type) |
4192 | || !int_fits_type_p (low, type)) |
4193 | return true; |
4194 | low = fold_convert (type, low); |
4195 | high = fold_convert (type, high); |
4196 | |
4197 | if (sign) |
4198 | next = fold_binary (PLUS_EXPR, type, low, step); |
4199 | else |
4200 | next = fold_binary (PLUS_EXPR, type, high, step); |
4201 | |
4202 | if (tree_int_cst_compare (t1: low, t2: next) <= 0 |
4203 | && tree_int_cst_compare (t1: next, t2: high) <= 0) |
4204 | return true; |
4205 | |
4206 | /* If access is not executed on every iteration, we must ensure that overlow |
4207 | may not make the access valid later. */ |
4208 | if (!dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (g: data->stmt))) |
4209 | { |
4210 | if (scev_probably_wraps_p (NULL_TREE, |
4211 | initial_condition_in_loop_num (ev, loop->num), |
4212 | step, data->stmt, loop, true)) |
4213 | upper = false; |
4214 | } |
4215 | else |
4216 | record_nonwrapping_chrec (ev); |
4217 | |
4218 | record_nonwrapping_iv (loop, base: init, step, stmt: data->stmt, low, high, realistic: false, upper); |
4219 | return true; |
4220 | } |
4221 | |
4222 | /* Determine information about number of iterations a LOOP from the bounds |
4223 | of arrays in the data reference REF accessed in STMT. RELIABLE is true if |
4224 | STMT is guaranteed to be executed in every iteration of LOOP.*/ |
4225 | |
4226 | static void |
4227 | infer_loop_bounds_from_ref (class loop *loop, gimple *stmt, tree ref) |
4228 | { |
4229 | struct ilb_data data; |
4230 | |
4231 | data.loop = loop; |
4232 | data.stmt = stmt; |
4233 | for_each_index (&ref, idx_infer_loop_bounds, &data); |
4234 | } |
4235 | |
4236 | /* Determine information about number of iterations of a LOOP from the way |
4237 | arrays are used in STMT. RELIABLE is true if STMT is guaranteed to be |
4238 | executed in every iteration of LOOP. */ |
4239 | |
4240 | static void |
4241 | infer_loop_bounds_from_array (class loop *loop, gimple *stmt) |
4242 | { |
4243 | if (is_gimple_assign (gs: stmt)) |
4244 | { |
4245 | tree op0 = gimple_assign_lhs (gs: stmt); |
4246 | tree op1 = gimple_assign_rhs1 (gs: stmt); |
4247 | |
4248 | /* For each memory access, analyze its access function |
4249 | and record a bound on the loop iteration domain. */ |
4250 | if (REFERENCE_CLASS_P (op0)) |
4251 | infer_loop_bounds_from_ref (loop, stmt, ref: op0); |
4252 | |
4253 | if (REFERENCE_CLASS_P (op1)) |
4254 | infer_loop_bounds_from_ref (loop, stmt, ref: op1); |
4255 | } |
4256 | else if (is_gimple_call (gs: stmt)) |
4257 | { |
4258 | tree arg, lhs; |
4259 | unsigned i, n = gimple_call_num_args (gs: stmt); |
4260 | |
4261 | lhs = gimple_call_lhs (gs: stmt); |
4262 | if (lhs && REFERENCE_CLASS_P (lhs)) |
4263 | infer_loop_bounds_from_ref (loop, stmt, ref: lhs); |
4264 | |
4265 | for (i = 0; i < n; i++) |
4266 | { |
4267 | arg = gimple_call_arg (gs: stmt, index: i); |
4268 | if (REFERENCE_CLASS_P (arg)) |
4269 | infer_loop_bounds_from_ref (loop, stmt, ref: arg); |
4270 | } |
4271 | } |
4272 | } |
4273 | |
4274 | /* Determine information about number of iterations of a LOOP from the fact |
4275 | that pointer arithmetics in STMT does not overflow. */ |
4276 | |
4277 | static void |
4278 | infer_loop_bounds_from_pointer_arith (class loop *loop, gimple *stmt) |
4279 | { |
4280 | tree def, base, step, scev, type, low, high; |
4281 | tree var, ptr; |
4282 | |
4283 | if (!is_gimple_assign (gs: stmt) |
4284 | || gimple_assign_rhs_code (gs: stmt) != POINTER_PLUS_EXPR) |
4285 | return; |
4286 | |
4287 | def = gimple_assign_lhs (gs: stmt); |
4288 | if (TREE_CODE (def) != SSA_NAME) |
4289 | return; |
4290 | |
4291 | type = TREE_TYPE (def); |
4292 | if (!nowrap_type_p (type)) |
4293 | return; |
4294 | |
4295 | ptr = gimple_assign_rhs1 (gs: stmt); |
4296 | if (!expr_invariant_in_loop_p (loop, ptr)) |
4297 | return; |
4298 | |
4299 | var = gimple_assign_rhs2 (gs: stmt); |
4300 | if (TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (var))) |
4301 | return; |
4302 | |
4303 | class loop *uloop = loop_containing_stmt (stmt); |
4304 | scev = instantiate_parameters (loop, chrec: analyze_scalar_evolution (uloop, def)); |
4305 | if (chrec_contains_undetermined (scev)) |
4306 | return; |
4307 | |
4308 | base = initial_condition_in_loop_num (scev, loop->num); |
4309 | step = evolution_part_in_loop_num (scev, loop->num); |
4310 | |
4311 | if (!base || !step |
4312 | || TREE_CODE (step) != INTEGER_CST |
4313 | || tree_contains_chrecs (base, NULL) |
4314 | || chrec_contains_symbols_defined_in_loop (base, loop->num)) |
4315 | return; |
4316 | |
4317 | low = lower_bound_in_type (type, type); |
4318 | high = upper_bound_in_type (type, type); |
4319 | |
4320 | /* In C, pointer arithmetic p + 1 cannot use a NULL pointer, and p - 1 cannot |
4321 | produce a NULL pointer. The contrary would mean NULL points to an object, |
4322 | while NULL is supposed to compare unequal with the address of all objects. |
4323 | Furthermore, p + 1 cannot produce a NULL pointer and p - 1 cannot use a |
4324 | NULL pointer since that would mean wrapping, which we assume here not to |
4325 | happen. So, we can exclude NULL from the valid range of pointer |
4326 | arithmetic. */ |
4327 | if (flag_delete_null_pointer_checks && int_cst_value (low) == 0) |
4328 | low = build_int_cstu (TREE_TYPE (low), TYPE_ALIGN_UNIT (TREE_TYPE (type))); |
4329 | |
4330 | record_nonwrapping_chrec (scev); |
4331 | record_nonwrapping_iv (loop, base, step, stmt, low, high, realistic: false, upper: true); |
4332 | } |
4333 | |
4334 | /* Determine information about number of iterations of a LOOP from the fact |
4335 | that signed arithmetics in STMT does not overflow. */ |
4336 | |
4337 | static void |
4338 | infer_loop_bounds_from_signedness (class loop *loop, gimple *stmt) |
4339 | { |
4340 | tree def, base, step, scev, type, low, high; |
4341 | |
4342 | if (gimple_code (g: stmt) != GIMPLE_ASSIGN) |
4343 | return; |
4344 | |
4345 | def = gimple_assign_lhs (gs: stmt); |
4346 | |
4347 | if (TREE_CODE (def) != SSA_NAME) |
4348 | return; |
4349 | |
4350 | type = TREE_TYPE (def); |
4351 | if (!INTEGRAL_TYPE_P (type) |
4352 | || !TYPE_OVERFLOW_UNDEFINED (type)) |
4353 | return; |
4354 | |
4355 | scev = instantiate_parameters (loop, chrec: analyze_scalar_evolution (loop, def)); |
4356 | if (chrec_contains_undetermined (scev)) |
4357 | return; |
4358 | |
4359 | base = initial_condition_in_loop_num (scev, loop->num); |
4360 | step = evolution_part_in_loop_num (scev, loop->num); |
4361 | |
4362 | if (!base || !step |
4363 | || TREE_CODE (step) != INTEGER_CST |
4364 | || tree_contains_chrecs (base, NULL) |
4365 | || chrec_contains_symbols_defined_in_loop (base, loop->num)) |
4366 | return; |
4367 | |
4368 | low = lower_bound_in_type (type, type); |
4369 | high = upper_bound_in_type (type, type); |
4370 | Value_Range r (TREE_TYPE (def)); |
4371 | get_range_query (cfun)->range_of_expr (r, expr: def); |
4372 | if (!r.varying_p () && !r.undefined_p ()) |
4373 | { |
4374 | low = wide_int_to_tree (type, cst: r.lower_bound ()); |
4375 | high = wide_int_to_tree (type, cst: r.upper_bound ()); |
4376 | } |
4377 | |
4378 | record_nonwrapping_chrec (scev); |
4379 | record_nonwrapping_iv (loop, base, step, stmt, low, high, realistic: false, upper: true); |
4380 | } |
4381 | |
4382 | /* The following analyzers are extracting informations on the bounds |
4383 | of LOOP from the following undefined behaviors: |
4384 | |
4385 | - data references should not access elements over the statically |
4386 | allocated size, |
4387 | |
4388 | - signed variables should not overflow when flag_wrapv is not set. |
4389 | */ |
4390 | |
4391 | static void |
4392 | infer_loop_bounds_from_undefined (class loop *loop, basic_block *bbs) |
4393 | { |
4394 | unsigned i; |
4395 | gimple_stmt_iterator bsi; |
4396 | basic_block bb; |
4397 | bool reliable; |
4398 | |
4399 | for (i = 0; i < loop->num_nodes; i++) |
4400 | { |
4401 | bb = bbs[i]; |
4402 | |
4403 | /* If BB is not executed in each iteration of the loop, we cannot |
4404 | use the operations in it to infer reliable upper bound on the |
4405 | # of iterations of the loop. However, we can use it as a guess. |
4406 | Reliable guesses come only from array bounds. */ |
4407 | reliable = dominated_by_p (CDI_DOMINATORS, loop->latch, bb); |
4408 | |
4409 | for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi)) |
4410 | { |
4411 | gimple *stmt = gsi_stmt (i: bsi); |
4412 | |
4413 | infer_loop_bounds_from_array (loop, stmt); |
4414 | |
4415 | if (reliable) |
4416 | { |
4417 | infer_loop_bounds_from_signedness (loop, stmt); |
4418 | infer_loop_bounds_from_pointer_arith (loop, stmt); |
4419 | } |
4420 | } |
4421 | |
4422 | } |
4423 | } |
4424 | |
4425 | /* Compare wide ints, callback for qsort. */ |
4426 | |
4427 | static int |
4428 | wide_int_cmp (const void *p1, const void *p2) |
4429 | { |
4430 | const bound_wide_int *d1 = (const bound_wide_int *) p1; |
4431 | const bound_wide_int *d2 = (const bound_wide_int *) p2; |
4432 | return wi::cmpu (x: *d1, y: *d2); |
4433 | } |
4434 | |
4435 | /* Return index of BOUND in BOUNDS array sorted in increasing order. |
4436 | Lookup by binary search. */ |
4437 | |
4438 | static int |
4439 | bound_index (const vec<bound_wide_int> &bounds, const bound_wide_int &bound) |
4440 | { |
4441 | unsigned int end = bounds.length (); |
4442 | unsigned int begin = 0; |
4443 | |
4444 | /* Find a matching index by means of a binary search. */ |
4445 | while (begin != end) |
4446 | { |
4447 | unsigned int middle = (begin + end) / 2; |
4448 | bound_wide_int index = bounds[middle]; |
4449 | |
4450 | if (index == bound) |
4451 | return middle; |
4452 | else if (wi::ltu_p (x: index, y: bound)) |
4453 | begin = middle + 1; |
4454 | else |
4455 | end = middle; |
4456 | } |
4457 | gcc_unreachable (); |
4458 | } |
4459 | |
4460 | /* We recorded loop bounds only for statements dominating loop latch (and thus |
4461 | executed each loop iteration). If there are any bounds on statements not |
4462 | dominating the loop latch we can improve the estimate by walking the loop |
4463 | body and seeing if every path from loop header to loop latch contains |
4464 | some bounded statement. */ |
4465 | |
4466 | static void |
4467 | discover_iteration_bound_by_body_walk (class loop *loop) |
4468 | { |
4469 | class nb_iter_bound *elt; |
4470 | auto_vec<bound_wide_int> bounds; |
4471 | vec<vec<basic_block> > queues = vNULL; |
4472 | vec<basic_block> queue = vNULL; |
4473 | ptrdiff_t queue_index; |
4474 | ptrdiff_t latch_index = 0; |
4475 | |
4476 | /* Discover what bounds may interest us. */ |
4477 | for (elt = loop->bounds; elt; elt = elt->next) |
4478 | { |
4479 | bound_wide_int bound = elt->bound; |
4480 | |
4481 | /* Exit terminates loop at given iteration, while non-exits produce undefined |
4482 | effect on the next iteration. */ |
4483 | if (!elt->is_exit) |
4484 | { |
4485 | bound += 1; |
4486 | /* If an overflow occurred, ignore the result. */ |
4487 | if (bound == 0) |
4488 | continue; |
4489 | } |
4490 | |
4491 | if (!loop->any_upper_bound |
4492 | || wi::ltu_p (x: bound, y: loop->nb_iterations_upper_bound)) |
4493 | bounds.safe_push (obj: bound); |
4494 | } |
4495 | |
4496 | /* Exit early if there is nothing to do. */ |
4497 | if (!bounds.exists ()) |
4498 | return; |
4499 | |
4500 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4501 | fprintf (stream: dump_file, format: " Trying to walk loop body to reduce the bound.\n" ); |
4502 | |
4503 | /* Sort the bounds in decreasing order. */ |
4504 | bounds.qsort (wide_int_cmp); |
4505 | |
4506 | /* For every basic block record the lowest bound that is guaranteed to |
4507 | terminate the loop. */ |
4508 | |
4509 | hash_map<basic_block, ptrdiff_t> bb_bounds; |
4510 | for (elt = loop->bounds; elt; elt = elt->next) |
4511 | { |
4512 | bound_wide_int bound = elt->bound; |
4513 | if (!elt->is_exit) |
4514 | { |
4515 | bound += 1; |
4516 | /* If an overflow occurred, ignore the result. */ |
4517 | if (bound == 0) |
4518 | continue; |
4519 | } |
4520 | |
4521 | if (!loop->any_upper_bound |
4522 | || wi::ltu_p (x: bound, y: loop->nb_iterations_upper_bound)) |
4523 | { |
4524 | ptrdiff_t index = bound_index (bounds, bound); |
4525 | ptrdiff_t *entry = bb_bounds.get (k: gimple_bb (g: elt->stmt)); |
4526 | if (!entry) |
4527 | bb_bounds.put (k: gimple_bb (g: elt->stmt), v: index); |
4528 | else if ((ptrdiff_t)*entry > index) |
4529 | *entry = index; |
4530 | } |
4531 | } |
4532 | |
4533 | hash_map<basic_block, ptrdiff_t> block_priority; |
4534 | |
4535 | /* Perform shortest path discovery loop->header ... loop->latch. |
4536 | |
4537 | The "distance" is given by the smallest loop bound of basic block |
4538 | present in the path and we look for path with largest smallest bound |
4539 | on it. |
4540 | |
4541 | To avoid the need for fibonacci heap on double ints we simply compress |
4542 | double ints into indexes to BOUNDS array and then represent the queue |
4543 | as arrays of queues for every index. |
4544 | Index of BOUNDS.length() means that the execution of given BB has |
4545 | no bounds determined. |
4546 | |
4547 | VISITED is a pointer map translating basic block into smallest index |
4548 | it was inserted into the priority queue with. */ |
4549 | latch_index = -1; |
4550 | |
4551 | /* Start walk in loop header with index set to infinite bound. */ |
4552 | queue_index = bounds.length (); |
4553 | queues.safe_grow_cleared (len: queue_index + 1, exact: true); |
4554 | queue.safe_push (obj: loop->header); |
4555 | queues[queue_index] = queue; |
4556 | block_priority.put (k: loop->header, v: queue_index); |
4557 | |
4558 | for (; queue_index >= 0; queue_index--) |
4559 | { |
4560 | if (latch_index < queue_index) |
4561 | { |
4562 | while (queues[queue_index].length ()) |
4563 | { |
4564 | basic_block bb; |
4565 | ptrdiff_t bound_index = queue_index; |
4566 | edge e; |
4567 | edge_iterator ei; |
4568 | |
4569 | queue = queues[queue_index]; |
4570 | bb = queue.pop (); |
4571 | |
4572 | /* OK, we later inserted the BB with lower priority, skip it. */ |
4573 | if (*block_priority.get (k: bb) > queue_index) |
4574 | continue; |
4575 | |
4576 | /* See if we can improve the bound. */ |
4577 | ptrdiff_t *entry = bb_bounds.get (k: bb); |
4578 | if (entry && *entry < bound_index) |
4579 | bound_index = *entry; |
4580 | |
4581 | /* Insert succesors into the queue, watch for latch edge |
4582 | and record greatest index we saw. */ |
4583 | FOR_EACH_EDGE (e, ei, bb->succs) |
4584 | { |
4585 | bool insert = false; |
4586 | |
4587 | if (loop_exit_edge_p (loop, e)) |
4588 | continue; |
4589 | |
4590 | if (e == loop_latch_edge (loop) |
4591 | && latch_index < bound_index) |
4592 | latch_index = bound_index; |
4593 | else if (!(entry = block_priority.get (k: e->dest))) |
4594 | { |
4595 | insert = true; |
4596 | block_priority.put (k: e->dest, v: bound_index); |
4597 | } |
4598 | else if (*entry < bound_index) |
4599 | { |
4600 | insert = true; |
4601 | *entry = bound_index; |
4602 | } |
4603 | |
4604 | if (insert) |
4605 | queues[bound_index].safe_push (obj: e->dest); |
4606 | } |
4607 | } |
4608 | } |
4609 | queues[queue_index].release (); |
4610 | } |
4611 | |
4612 | gcc_assert (latch_index >= 0); |
4613 | if ((unsigned)latch_index < bounds.length ()) |
4614 | { |
4615 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4616 | { |
4617 | fprintf (stream: dump_file, format: "Found better loop bound " ); |
4618 | print_decu (wi: bounds[latch_index], file: dump_file); |
4619 | fprintf (stream: dump_file, format: "\n" ); |
4620 | } |
4621 | record_niter_bound (loop, widest_int::from (x: bounds[latch_index], |
4622 | sgn: SIGNED), false, true); |
4623 | } |
4624 | |
4625 | queues.release (); |
4626 | } |
4627 | |
4628 | /* See if every path cross the loop goes through a statement that is known |
4629 | to not execute at the last iteration. In that case we can decrese iteration |
4630 | count by 1. */ |
4631 | |
4632 | static void |
4633 | maybe_lower_iteration_bound (class loop *loop) |
4634 | { |
4635 | hash_set<gimple *> *not_executed_last_iteration = NULL; |
4636 | class nb_iter_bound *elt; |
4637 | bool found_exit = false; |
4638 | auto_vec<basic_block> queue; |
4639 | bitmap visited; |
4640 | |
4641 | /* Collect all statements with interesting (i.e. lower than |
4642 | nb_iterations_upper_bound) bound on them. |
4643 | |
4644 | TODO: Due to the way record_estimate choose estimates to store, the bounds |
4645 | will be always nb_iterations_upper_bound-1. We can change this to record |
4646 | also statements not dominating the loop latch and update the walk bellow |
4647 | to the shortest path algorithm. */ |
4648 | for (elt = loop->bounds; elt; elt = elt->next) |
4649 | { |
4650 | if (!elt->is_exit |
4651 | && wi::ltu_p (x: elt->bound, y: loop->nb_iterations_upper_bound)) |
4652 | { |
4653 | if (!not_executed_last_iteration) |
4654 | not_executed_last_iteration = new hash_set<gimple *>; |
4655 | not_executed_last_iteration->add (k: elt->stmt); |
4656 | } |
4657 | } |
4658 | if (!not_executed_last_iteration) |
4659 | return; |
4660 | |
4661 | /* Start DFS walk in the loop header and see if we can reach the |
4662 | loop latch or any of the exits (including statements with side |
4663 | effects that may terminate the loop otherwise) without visiting |
4664 | any of the statements known to have undefined effect on the last |
4665 | iteration. */ |
4666 | queue.safe_push (obj: loop->header); |
4667 | visited = BITMAP_ALLOC (NULL); |
4668 | bitmap_set_bit (visited, loop->header->index); |
4669 | found_exit = false; |
4670 | |
4671 | do |
4672 | { |
4673 | basic_block bb = queue.pop (); |
4674 | gimple_stmt_iterator gsi; |
4675 | bool stmt_found = false; |
4676 | |
4677 | /* Loop for possible exits and statements bounding the execution. */ |
4678 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
4679 | { |
4680 | gimple *stmt = gsi_stmt (i: gsi); |
4681 | if (not_executed_last_iteration->contains (k: stmt)) |
4682 | { |
4683 | stmt_found = true; |
4684 | break; |
4685 | } |
4686 | if (gimple_has_side_effects (stmt)) |
4687 | { |
4688 | found_exit = true; |
4689 | break; |
4690 | } |
4691 | } |
4692 | if (found_exit) |
4693 | break; |
4694 | |
4695 | /* If no bounding statement is found, continue the walk. */ |
4696 | if (!stmt_found) |
4697 | { |
4698 | edge e; |
4699 | edge_iterator ei; |
4700 | |
4701 | FOR_EACH_EDGE (e, ei, bb->succs) |
4702 | { |
4703 | if (loop_exit_edge_p (loop, e) |
4704 | || e == loop_latch_edge (loop)) |
4705 | { |
4706 | found_exit = true; |
4707 | break; |
4708 | } |
4709 | if (bitmap_set_bit (visited, e->dest->index)) |
4710 | queue.safe_push (obj: e->dest); |
4711 | } |
4712 | } |
4713 | } |
4714 | while (queue.length () && !found_exit); |
4715 | |
4716 | /* If every path through the loop reach bounding statement before exit, |
4717 | then we know the last iteration of the loop will have undefined effect |
4718 | and we can decrease number of iterations. */ |
4719 | |
4720 | if (!found_exit) |
4721 | { |
4722 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4723 | fprintf (stream: dump_file, format: "Reducing loop iteration estimate by 1; " |
4724 | "undefined statement must be executed at the last iteration.\n" ); |
4725 | record_niter_bound (loop, widest_int::from (x: loop->nb_iterations_upper_bound, |
4726 | sgn: SIGNED) - 1, |
4727 | false, true); |
4728 | } |
4729 | |
4730 | BITMAP_FREE (visited); |
4731 | delete not_executed_last_iteration; |
4732 | } |
4733 | |
4734 | /* Get expected upper bound for number of loop iterations for |
4735 | BUILT_IN_EXPECT_WITH_PROBABILITY for a condition COND. */ |
4736 | |
4737 | static tree |
4738 | get_upper_bound_based_on_builtin_expr_with_prob (gcond *cond) |
4739 | { |
4740 | if (cond == NULL) |
4741 | return NULL_TREE; |
4742 | |
4743 | tree lhs = gimple_cond_lhs (gs: cond); |
4744 | if (TREE_CODE (lhs) != SSA_NAME) |
4745 | return NULL_TREE; |
4746 | |
4747 | gimple *stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (cond)); |
4748 | gcall *def = dyn_cast<gcall *> (p: stmt); |
4749 | if (def == NULL) |
4750 | return NULL_TREE; |
4751 | |
4752 | tree decl = gimple_call_fndecl (gs: def); |
4753 | if (!decl |
4754 | || !fndecl_built_in_p (node: decl, name1: BUILT_IN_EXPECT_WITH_PROBABILITY) |
4755 | || gimple_call_num_args (gs: stmt) != 3) |
4756 | return NULL_TREE; |
4757 | |
4758 | tree c = gimple_call_arg (gs: def, index: 1); |
4759 | tree condt = TREE_TYPE (lhs); |
4760 | tree res = fold_build2 (gimple_cond_code (cond), |
4761 | condt, c, |
4762 | gimple_cond_rhs (cond)); |
4763 | if (TREE_CODE (res) != INTEGER_CST) |
4764 | return NULL_TREE; |
4765 | |
4766 | |
4767 | tree prob = gimple_call_arg (gs: def, index: 2); |
4768 | tree t = TREE_TYPE (prob); |
4769 | tree one |
4770 | = build_real_from_int_cst (t, |
4771 | integer_one_node); |
4772 | if (integer_zerop (res)) |
4773 | prob = fold_build2 (MINUS_EXPR, t, one, prob); |
4774 | tree r = fold_build2 (RDIV_EXPR, t, one, prob); |
4775 | if (TREE_CODE (r) != REAL_CST) |
4776 | return NULL_TREE; |
4777 | |
4778 | HOST_WIDE_INT probi |
4779 | = real_to_integer (TREE_REAL_CST_PTR (r)); |
4780 | return build_int_cst (condt, probi); |
4781 | } |
4782 | |
4783 | /* Records estimates on numbers of iterations of LOOP. If USE_UNDEFINED_P |
4784 | is true also use estimates derived from undefined behavior. */ |
4785 | |
4786 | void |
4787 | estimate_numbers_of_iterations (class loop *loop) |
4788 | { |
4789 | tree niter, type; |
4790 | unsigned i; |
4791 | class tree_niter_desc niter_desc; |
4792 | edge ex; |
4793 | widest_int bound; |
4794 | edge likely_exit; |
4795 | |
4796 | /* Give up if we already have tried to compute an estimation. */ |
4797 | if (loop->estimate_state != EST_NOT_COMPUTED) |
4798 | return; |
4799 | |
4800 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4801 | fprintf (stream: dump_file, format: "Estimating # of iterations of loop %d\n" , loop->num); |
4802 | |
4803 | loop->estimate_state = EST_AVAILABLE; |
4804 | |
4805 | sreal nit; |
4806 | bool reliable; |
4807 | |
4808 | /* If we have a measured profile, use it to estimate the number of |
4809 | iterations. Normally this is recorded by branch_prob right after |
4810 | reading the profile. In case we however found a new loop, record the |
4811 | information here. |
4812 | |
4813 | Explicitly check for profile status so we do not report |
4814 | wrong prediction hitrates for guessed loop iterations heuristics. |
4815 | Do not recompute already recorded bounds - we ought to be better on |
4816 | updating iteration bounds than updating profile in general and thus |
4817 | recomputing iteration bounds later in the compilation process will just |
4818 | introduce random roundoff errors. */ |
4819 | if (!loop->any_estimate |
4820 | && expected_loop_iterations_by_profile (loop, ret: &nit, reliable: &reliable) |
4821 | && reliable) |
4822 | { |
4823 | bound = nit.to_nearest_int (); |
4824 | record_niter_bound (loop, bound, true, false); |
4825 | } |
4826 | |
4827 | /* Ensure that loop->nb_iterations is computed if possible. If it turns out |
4828 | to be constant, we avoid undefined behavior implied bounds and instead |
4829 | diagnose those loops with -Waggressive-loop-optimizations. */ |
4830 | number_of_latch_executions (loop); |
4831 | |
4832 | basic_block *body = get_loop_body (loop); |
4833 | auto_vec<edge> exits = get_loop_exit_edges (loop, body); |
4834 | likely_exit = single_likely_exit (loop, exits); |
4835 | FOR_EACH_VEC_ELT (exits, i, ex) |
4836 | { |
4837 | if (ex == likely_exit) |
4838 | { |
4839 | gimple *stmt = *gsi_last_bb (bb: ex->src); |
4840 | if (stmt != NULL) |
4841 | { |
4842 | gcond *cond = dyn_cast<gcond *> (p: stmt); |
4843 | tree niter_bound |
4844 | = get_upper_bound_based_on_builtin_expr_with_prob (cond); |
4845 | if (niter_bound != NULL_TREE) |
4846 | { |
4847 | widest_int max = derive_constant_upper_bound (val: niter_bound); |
4848 | record_estimate (loop, bound: niter_bound, i_bound: max, at_stmt: cond, |
4849 | is_exit: true, realistic: true, upper: false); |
4850 | } |
4851 | } |
4852 | } |
4853 | |
4854 | if (!number_of_iterations_exit (loop, exit: ex, niter: &niter_desc, |
4855 | warn: false, every_iteration: false, body)) |
4856 | continue; |
4857 | |
4858 | niter = niter_desc.niter; |
4859 | type = TREE_TYPE (niter); |
4860 | if (TREE_CODE (niter_desc.may_be_zero) != INTEGER_CST) |
4861 | niter = build3 (COND_EXPR, type, niter_desc.may_be_zero, |
4862 | build_int_cst (type, 0), |
4863 | niter); |
4864 | record_estimate (loop, bound: niter, i_bound: niter_desc.max, |
4865 | at_stmt: last_nondebug_stmt (ex->src), |
4866 | is_exit: true, realistic: ex == likely_exit, upper: true); |
4867 | record_control_iv (loop, niter: &niter_desc); |
4868 | } |
4869 | |
4870 | if (flag_aggressive_loop_optimizations) |
4871 | infer_loop_bounds_from_undefined (loop, bbs: body); |
4872 | free (ptr: body); |
4873 | |
4874 | discover_iteration_bound_by_body_walk (loop); |
4875 | |
4876 | maybe_lower_iteration_bound (loop); |
4877 | |
4878 | /* If we know the exact number of iterations of this loop, try to |
4879 | not break code with undefined behavior by not recording smaller |
4880 | maximum number of iterations. */ |
4881 | if (loop->nb_iterations |
4882 | && TREE_CODE (loop->nb_iterations) == INTEGER_CST |
4883 | && (wi::min_precision (x: wi::to_widest (t: loop->nb_iterations), sgn: SIGNED) |
4884 | <= bound_wide_int ().get_precision ())) |
4885 | { |
4886 | loop->any_upper_bound = true; |
4887 | loop->nb_iterations_upper_bound |
4888 | = bound_wide_int::from (x: wi::to_widest (t: loop->nb_iterations), sgn: SIGNED); |
4889 | } |
4890 | } |
4891 | |
4892 | /* Sets NIT to the estimated number of executions of the latch of the |
4893 | LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as |
4894 | large as the number of iterations. If we have no reliable estimate, |
4895 | the function returns false, otherwise returns true. */ |
4896 | |
4897 | bool |
4898 | estimated_loop_iterations (class loop *loop, widest_int *nit) |
4899 | { |
4900 | /* When SCEV information is available, try to update loop iterations |
4901 | estimate. Otherwise just return whatever we recorded earlier. */ |
4902 | if (scev_initialized_p ()) |
4903 | estimate_numbers_of_iterations (loop); |
4904 | |
4905 | return (get_estimated_loop_iterations (loop, nit)); |
4906 | } |
4907 | |
4908 | /* Similar to estimated_loop_iterations, but returns the estimate only |
4909 | if it fits to HOST_WIDE_INT. If this is not the case, or the estimate |
4910 | on the number of iterations of LOOP could not be derived, returns -1. */ |
4911 | |
4912 | HOST_WIDE_INT |
4913 | estimated_loop_iterations_int (class loop *loop) |
4914 | { |
4915 | widest_int nit; |
4916 | HOST_WIDE_INT hwi_nit; |
4917 | |
4918 | if (!estimated_loop_iterations (loop, nit: &nit)) |
4919 | return -1; |
4920 | |
4921 | if (!wi::fits_shwi_p (x: nit)) |
4922 | return -1; |
4923 | hwi_nit = nit.to_shwi (); |
4924 | |
4925 | return hwi_nit < 0 ? -1 : hwi_nit; |
4926 | } |
4927 | |
4928 | |
4929 | /* Sets NIT to an upper bound for the maximum number of executions of the |
4930 | latch of the LOOP. If we have no reliable estimate, the function returns |
4931 | false, otherwise returns true. */ |
4932 | |
4933 | bool |
4934 | max_loop_iterations (class loop *loop, widest_int *nit) |
4935 | { |
4936 | /* When SCEV information is available, try to update loop iterations |
4937 | estimate. Otherwise just return whatever we recorded earlier. */ |
4938 | if (scev_initialized_p ()) |
4939 | estimate_numbers_of_iterations (loop); |
4940 | |
4941 | return get_max_loop_iterations (loop, nit); |
4942 | } |
4943 | |
4944 | /* Similar to max_loop_iterations, but returns the estimate only |
4945 | if it fits to HOST_WIDE_INT. If this is not the case, or the estimate |
4946 | on the number of iterations of LOOP could not be derived, returns -1. */ |
4947 | |
4948 | HOST_WIDE_INT |
4949 | max_loop_iterations_int (class loop *loop) |
4950 | { |
4951 | widest_int nit; |
4952 | HOST_WIDE_INT hwi_nit; |
4953 | |
4954 | if (!max_loop_iterations (loop, nit: &nit)) |
4955 | return -1; |
4956 | |
4957 | if (!wi::fits_shwi_p (x: nit)) |
4958 | return -1; |
4959 | hwi_nit = nit.to_shwi (); |
4960 | |
4961 | return hwi_nit < 0 ? -1 : hwi_nit; |
4962 | } |
4963 | |
4964 | /* Sets NIT to an likely upper bound for the maximum number of executions of the |
4965 | latch of the LOOP. If we have no reliable estimate, the function returns |
4966 | false, otherwise returns true. */ |
4967 | |
4968 | bool |
4969 | likely_max_loop_iterations (class loop *loop, widest_int *nit) |
4970 | { |
4971 | /* When SCEV information is available, try to update loop iterations |
4972 | estimate. Otherwise just return whatever we recorded earlier. */ |
4973 | if (scev_initialized_p ()) |
4974 | estimate_numbers_of_iterations (loop); |
4975 | |
4976 | return get_likely_max_loop_iterations (loop, nit); |
4977 | } |
4978 | |
4979 | /* Similar to max_loop_iterations, but returns the estimate only |
4980 | if it fits to HOST_WIDE_INT. If this is not the case, or the estimate |
4981 | on the number of iterations of LOOP could not be derived, returns -1. */ |
4982 | |
4983 | HOST_WIDE_INT |
4984 | likely_max_loop_iterations_int (class loop *loop) |
4985 | { |
4986 | widest_int nit; |
4987 | HOST_WIDE_INT hwi_nit; |
4988 | |
4989 | if (!likely_max_loop_iterations (loop, nit: &nit)) |
4990 | return -1; |
4991 | |
4992 | if (!wi::fits_shwi_p (x: nit)) |
4993 | return -1; |
4994 | hwi_nit = nit.to_shwi (); |
4995 | |
4996 | return hwi_nit < 0 ? -1 : hwi_nit; |
4997 | } |
4998 | |
4999 | /* Returns an estimate for the number of executions of statements |
5000 | in the LOOP. For statements before the loop exit, this exceeds |
5001 | the number of execution of the latch by one. */ |
5002 | |
5003 | HOST_WIDE_INT |
5004 | estimated_stmt_executions_int (class loop *loop) |
5005 | { |
5006 | HOST_WIDE_INT nit = estimated_loop_iterations_int (loop); |
5007 | HOST_WIDE_INT snit; |
5008 | |
5009 | if (nit == -1) |
5010 | return -1; |
5011 | |
5012 | snit = (HOST_WIDE_INT) ((unsigned HOST_WIDE_INT) nit + 1); |
5013 | |
5014 | /* If the computation overflows, return -1. */ |
5015 | return snit < 0 ? -1 : snit; |
5016 | } |
5017 | |
5018 | /* Sets NIT to the maximum number of executions of the latch of the |
5019 | LOOP, plus one. If we have no reliable estimate, the function returns |
5020 | false, otherwise returns true. */ |
5021 | |
5022 | bool |
5023 | max_stmt_executions (class loop *loop, widest_int *nit) |
5024 | { |
5025 | widest_int nit_minus_one; |
5026 | |
5027 | if (!max_loop_iterations (loop, nit)) |
5028 | return false; |
5029 | |
5030 | nit_minus_one = *nit; |
5031 | |
5032 | *nit += 1; |
5033 | |
5034 | return wi::gtu_p (x: *nit, y: nit_minus_one); |
5035 | } |
5036 | |
5037 | /* Sets NIT to the estimated maximum number of executions of the latch of the |
5038 | LOOP, plus one. If we have no likely estimate, the function returns |
5039 | false, otherwise returns true. */ |
5040 | |
5041 | bool |
5042 | likely_max_stmt_executions (class loop *loop, widest_int *nit) |
5043 | { |
5044 | widest_int nit_minus_one; |
5045 | |
5046 | if (!likely_max_loop_iterations (loop, nit)) |
5047 | return false; |
5048 | |
5049 | nit_minus_one = *nit; |
5050 | |
5051 | *nit += 1; |
5052 | |
5053 | return wi::gtu_p (x: *nit, y: nit_minus_one); |
5054 | } |
5055 | |
5056 | /* Sets NIT to the estimated number of executions of the latch of the |
5057 | LOOP, plus one. If we have no reliable estimate, the function returns |
5058 | false, otherwise returns true. */ |
5059 | |
5060 | bool |
5061 | estimated_stmt_executions (class loop *loop, widest_int *nit) |
5062 | { |
5063 | widest_int nit_minus_one; |
5064 | |
5065 | if (!estimated_loop_iterations (loop, nit)) |
5066 | return false; |
5067 | |
5068 | nit_minus_one = *nit; |
5069 | |
5070 | *nit += 1; |
5071 | |
5072 | return wi::gtu_p (x: *nit, y: nit_minus_one); |
5073 | } |
5074 | |
5075 | /* Records estimates on numbers of iterations of loops. */ |
5076 | |
5077 | void |
5078 | estimate_numbers_of_iterations (function *fn) |
5079 | { |
5080 | /* We don't want to issue signed overflow warnings while getting |
5081 | loop iteration estimates. */ |
5082 | fold_defer_overflow_warnings (); |
5083 | |
5084 | for (auto loop : loops_list (fn, 0)) |
5085 | estimate_numbers_of_iterations (loop); |
5086 | |
5087 | fold_undefer_and_ignore_overflow_warnings (); |
5088 | } |
5089 | |
5090 | /* Returns true if statement S1 dominates statement S2. */ |
5091 | |
5092 | bool |
5093 | stmt_dominates_stmt_p (gimple *s1, gimple *s2) |
5094 | { |
5095 | basic_block bb1 = gimple_bb (g: s1), bb2 = gimple_bb (g: s2); |
5096 | |
5097 | if (!bb1 |
5098 | || s1 == s2) |
5099 | return true; |
5100 | |
5101 | if (bb1 == bb2) |
5102 | { |
5103 | gimple_stmt_iterator bsi; |
5104 | |
5105 | if (gimple_code (g: s2) == GIMPLE_PHI) |
5106 | return false; |
5107 | |
5108 | if (gimple_code (g: s1) == GIMPLE_PHI) |
5109 | return true; |
5110 | |
5111 | for (bsi = gsi_start_bb (bb: bb1); gsi_stmt (i: bsi) != s2; gsi_next (i: &bsi)) |
5112 | if (gsi_stmt (i: bsi) == s1) |
5113 | return true; |
5114 | |
5115 | return false; |
5116 | } |
5117 | |
5118 | return dominated_by_p (CDI_DOMINATORS, bb2, bb1); |
5119 | } |
5120 | |
5121 | /* Returns true when we can prove that the number of executions of |
5122 | STMT in the loop is at most NITER, according to the bound on |
5123 | the number of executions of the statement NITER_BOUND->stmt recorded in |
5124 | NITER_BOUND and fact that NITER_BOUND->stmt dominate STMT. |
5125 | |
5126 | ??? This code can become quite a CPU hog - we can have many bounds, |
5127 | and large basic block forcing stmt_dominates_stmt_p to be queried |
5128 | many times on a large basic blocks, so the whole thing is O(n^2) |
5129 | for scev_probably_wraps_p invocation (that can be done n times). |
5130 | |
5131 | It would make more sense (and give better answers) to remember BB |
5132 | bounds computed by discover_iteration_bound_by_body_walk. */ |
5133 | |
5134 | static bool |
5135 | n_of_executions_at_most (gimple *stmt, |
5136 | class nb_iter_bound *niter_bound, |
5137 | tree niter) |
5138 | { |
5139 | widest_int bound = widest_int::from (x: niter_bound->bound, sgn: SIGNED); |
5140 | tree nit_type = TREE_TYPE (niter), e; |
5141 | enum tree_code cmp; |
5142 | |
5143 | gcc_assert (TYPE_UNSIGNED (nit_type)); |
5144 | |
5145 | /* If the bound does not even fit into NIT_TYPE, it cannot tell us that |
5146 | the number of iterations is small. */ |
5147 | if (!wi::fits_to_tree_p (x: bound, type: nit_type)) |
5148 | return false; |
5149 | |
5150 | /* We know that NITER_BOUND->stmt is executed at most NITER_BOUND->bound + 1 |
5151 | times. This means that: |
5152 | |
5153 | -- if NITER_BOUND->is_exit is true, then everything after |
5154 | it at most NITER_BOUND->bound times. |
5155 | |
5156 | -- If NITER_BOUND->is_exit is false, then if we can prove that when STMT |
5157 | is executed, then NITER_BOUND->stmt is executed as well in the same |
5158 | iteration then STMT is executed at most NITER_BOUND->bound + 1 times. |
5159 | |
5160 | If we can determine that NITER_BOUND->stmt is always executed |
5161 | after STMT, then STMT is executed at most NITER_BOUND->bound + 2 times. |
5162 | We conclude that if both statements belong to the same |
5163 | basic block and STMT is before NITER_BOUND->stmt and there are no |
5164 | statements with side effects in between. */ |
5165 | |
5166 | if (niter_bound->is_exit) |
5167 | { |
5168 | if (stmt == niter_bound->stmt |
5169 | || !stmt_dominates_stmt_p (s1: niter_bound->stmt, s2: stmt)) |
5170 | return false; |
5171 | cmp = GE_EXPR; |
5172 | } |
5173 | else |
5174 | { |
5175 | if (!stmt_dominates_stmt_p (s1: niter_bound->stmt, s2: stmt)) |
5176 | { |
5177 | gimple_stmt_iterator bsi; |
5178 | if (gimple_bb (g: stmt) != gimple_bb (g: niter_bound->stmt) |
5179 | || gimple_code (g: stmt) == GIMPLE_PHI |
5180 | || gimple_code (g: niter_bound->stmt) == GIMPLE_PHI) |
5181 | return false; |
5182 | |
5183 | /* By stmt_dominates_stmt_p we already know that STMT appears |
5184 | before NITER_BOUND->STMT. Still need to test that the loop |
5185 | cannot be terinated by a side effect in between. */ |
5186 | for (bsi = gsi_for_stmt (stmt); gsi_stmt (i: bsi) != niter_bound->stmt; |
5187 | gsi_next (i: &bsi)) |
5188 | if (gimple_has_side_effects (gsi_stmt (i: bsi))) |
5189 | return false; |
5190 | bound += 1; |
5191 | if (bound == 0 |
5192 | || !wi::fits_to_tree_p (x: bound, type: nit_type)) |
5193 | return false; |
5194 | } |
5195 | cmp = GT_EXPR; |
5196 | } |
5197 | |
5198 | e = fold_binary (cmp, boolean_type_node, |
5199 | niter, wide_int_to_tree (nit_type, bound)); |
5200 | return e && integer_nonzerop (e); |
5201 | } |
5202 | |
5203 | /* Returns true if the arithmetics in TYPE can be assumed not to wrap. */ |
5204 | |
5205 | bool |
5206 | nowrap_type_p (tree type) |
5207 | { |
5208 | if (ANY_INTEGRAL_TYPE_P (type) |
5209 | && TYPE_OVERFLOW_UNDEFINED (type)) |
5210 | return true; |
5211 | |
5212 | if (POINTER_TYPE_P (type)) |
5213 | return true; |
5214 | |
5215 | return false; |
5216 | } |
5217 | |
5218 | /* Return true if we can prove LOOP is exited before evolution of induction |
5219 | variable {BASE, STEP} overflows with respect to its type bound. */ |
5220 | |
5221 | static bool |
5222 | loop_exits_before_overflow (tree base, tree step, |
5223 | gimple *at_stmt, class loop *loop) |
5224 | { |
5225 | widest_int niter; |
5226 | struct control_iv *civ; |
5227 | class nb_iter_bound *bound; |
5228 | tree e, delta, step_abs, unsigned_base; |
5229 | tree type = TREE_TYPE (step); |
5230 | tree unsigned_type, valid_niter; |
5231 | |
5232 | /* Don't issue signed overflow warnings. */ |
5233 | fold_defer_overflow_warnings (); |
5234 | |
5235 | /* Compute the number of iterations before we reach the bound of the |
5236 | type, and verify that the loop is exited before this occurs. */ |
5237 | unsigned_type = unsigned_type_for (type); |
5238 | unsigned_base = fold_convert (unsigned_type, base); |
5239 | |
5240 | if (tree_int_cst_sign_bit (step)) |
5241 | { |
5242 | tree extreme = fold_convert (unsigned_type, |
5243 | lower_bound_in_type (type, type)); |
5244 | delta = fold_build2 (MINUS_EXPR, unsigned_type, unsigned_base, extreme); |
5245 | step_abs = fold_build1 (NEGATE_EXPR, unsigned_type, |
5246 | fold_convert (unsigned_type, step)); |
5247 | } |
5248 | else |
5249 | { |
5250 | tree extreme = fold_convert (unsigned_type, |
5251 | upper_bound_in_type (type, type)); |
5252 | delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, unsigned_base); |
5253 | step_abs = fold_convert (unsigned_type, step); |
5254 | } |
5255 | |
5256 | valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs); |
5257 | |
5258 | estimate_numbers_of_iterations (loop); |
5259 | |
5260 | if (max_loop_iterations (loop, nit: &niter) |
5261 | && wi::fits_to_tree_p (x: niter, TREE_TYPE (valid_niter)) |
5262 | && (e = fold_binary (GT_EXPR, boolean_type_node, valid_niter, |
5263 | wide_int_to_tree (TREE_TYPE (valid_niter), |
5264 | niter))) != NULL |
5265 | && integer_nonzerop (e)) |
5266 | { |
5267 | fold_undefer_and_ignore_overflow_warnings (); |
5268 | return true; |
5269 | } |
5270 | if (at_stmt) |
5271 | for (bound = loop->bounds; bound; bound = bound->next) |
5272 | { |
5273 | if (n_of_executions_at_most (stmt: at_stmt, niter_bound: bound, niter: valid_niter)) |
5274 | { |
5275 | fold_undefer_and_ignore_overflow_warnings (); |
5276 | return true; |
5277 | } |
5278 | } |
5279 | fold_undefer_and_ignore_overflow_warnings (); |
5280 | |
5281 | /* Try to prove loop is exited before {base, step} overflows with the |
5282 | help of analyzed loop control IV. This is done only for IVs with |
5283 | constant step because otherwise we don't have the information. */ |
5284 | if (TREE_CODE (step) == INTEGER_CST) |
5285 | { |
5286 | for (civ = loop->control_ivs; civ; civ = civ->next) |
5287 | { |
5288 | enum tree_code code; |
5289 | tree civ_type = TREE_TYPE (civ->step); |
5290 | |
5291 | /* Have to consider type difference because operand_equal_p ignores |
5292 | that for constants. */ |
5293 | if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (civ_type) |
5294 | || element_precision (type) != element_precision (civ_type)) |
5295 | continue; |
5296 | |
5297 | /* Only consider control IV with same step. */ |
5298 | if (!operand_equal_p (step, civ->step, flags: 0)) |
5299 | continue; |
5300 | |
5301 | /* Done proving if this is a no-overflow control IV. */ |
5302 | if (operand_equal_p (base, civ->base, flags: 0)) |
5303 | return true; |
5304 | |
5305 | /* Control IV is recorded after expanding simple operations, |
5306 | Here we expand base and compare it too. */ |
5307 | tree expanded_base = expand_simple_operations (expr: base); |
5308 | if (operand_equal_p (expanded_base, civ->base, flags: 0)) |
5309 | return true; |
5310 | |
5311 | /* If this is a before stepping control IV, in other words, we have |
5312 | |
5313 | {civ_base, step} = {base + step, step} |
5314 | |
5315 | Because civ {base + step, step} doesn't overflow during loop |
5316 | iterations, {base, step} will not overflow if we can prove the |
5317 | operation "base + step" does not overflow. Specifically, we try |
5318 | to prove below conditions are satisfied: |
5319 | |
5320 | base <= UPPER_BOUND (type) - step ;;step > 0 |
5321 | base >= LOWER_BOUND (type) - step ;;step < 0 |
5322 | |
5323 | by proving the reverse conditions are false using loop's initial |
5324 | condition. */ |
5325 | if (POINTER_TYPE_P (TREE_TYPE (base))) |
5326 | code = POINTER_PLUS_EXPR; |
5327 | else |
5328 | code = PLUS_EXPR; |
5329 | |
5330 | tree stepped = fold_build2 (code, TREE_TYPE (base), base, step); |
5331 | tree expanded_stepped = fold_build2 (code, TREE_TYPE (base), |
5332 | expanded_base, step); |
5333 | if (operand_equal_p (stepped, civ->base, flags: 0) |
5334 | || operand_equal_p (expanded_stepped, civ->base, flags: 0)) |
5335 | { |
5336 | tree extreme; |
5337 | |
5338 | if (tree_int_cst_sign_bit (step)) |
5339 | { |
5340 | code = LT_EXPR; |
5341 | extreme = lower_bound_in_type (type, type); |
5342 | } |
5343 | else |
5344 | { |
5345 | code = GT_EXPR; |
5346 | extreme = upper_bound_in_type (type, type); |
5347 | } |
5348 | extreme = fold_build2 (MINUS_EXPR, type, extreme, step); |
5349 | e = fold_build2 (code, boolean_type_node, base, extreme); |
5350 | e = simplify_using_initial_conditions (loop, expr: e); |
5351 | if (integer_zerop (e)) |
5352 | return true; |
5353 | } |
5354 | } |
5355 | } |
5356 | |
5357 | return false; |
5358 | } |
5359 | |
5360 | /* VAR is scev variable whose evolution part is constant STEP, this function |
5361 | proves that VAR can't overflow by using value range info. If VAR's value |
5362 | range is [MIN, MAX], it can be proven by: |
5363 | MAX + step doesn't overflow ; if step > 0 |
5364 | or |
5365 | MIN + step doesn't underflow ; if step < 0. |
5366 | |
5367 | We can only do this if var is computed in every loop iteration, i.e, var's |
5368 | definition has to dominate loop latch. Consider below example: |
5369 | |
5370 | { |
5371 | unsigned int i; |
5372 | |
5373 | <bb 3>: |
5374 | |
5375 | <bb 4>: |
5376 | # RANGE [0, 4294967294] NONZERO 65535 |
5377 | # i_21 = PHI <0(3), i_18(9)> |
5378 | if (i_21 != 0) |
5379 | goto <bb 6>; |
5380 | else |
5381 | goto <bb 8>; |
5382 | |
5383 | <bb 6>: |
5384 | # RANGE [0, 65533] NONZERO 65535 |
5385 | _6 = i_21 + 4294967295; |
5386 | # RANGE [0, 65533] NONZERO 65535 |
5387 | _7 = (long unsigned int) _6; |
5388 | # RANGE [0, 524264] NONZERO 524280 |
5389 | _8 = _7 * 8; |
5390 | # PT = nonlocal escaped |
5391 | _9 = a_14 + _8; |
5392 | *_9 = 0; |
5393 | |
5394 | <bb 8>: |
5395 | # RANGE [1, 65535] NONZERO 65535 |
5396 | i_18 = i_21 + 1; |
5397 | if (i_18 >= 65535) |
5398 | goto <bb 10>; |
5399 | else |
5400 | goto <bb 9>; |
5401 | |
5402 | <bb 9>: |
5403 | goto <bb 4>; |
5404 | |
5405 | <bb 10>: |
5406 | return; |
5407 | } |
5408 | |
5409 | VAR _6 doesn't overflow only with pre-condition (i_21 != 0), here we |
5410 | can't use _6 to prove no-overlfow for _7. In fact, var _7 takes value |
5411 | sequence (4294967295, 0, 1, ..., 65533) in loop life time, rather than |
5412 | (4294967295, 4294967296, ...). */ |
5413 | |
5414 | static bool |
5415 | scev_var_range_cant_overflow (tree var, tree step, class loop *loop) |
5416 | { |
5417 | tree type; |
5418 | wide_int minv, maxv, diff, step_wi; |
5419 | |
5420 | if (TREE_CODE (step) != INTEGER_CST || !INTEGRAL_TYPE_P (TREE_TYPE (var))) |
5421 | return false; |
5422 | |
5423 | /* Check if VAR evaluates in every loop iteration. It's not the case |
5424 | if VAR is default definition or does not dominate loop's latch. */ |
5425 | basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (var)); |
5426 | if (!def_bb || !dominated_by_p (CDI_DOMINATORS, loop->latch, def_bb)) |
5427 | return false; |
5428 | |
5429 | Value_Range r (TREE_TYPE (var)); |
5430 | get_range_query (cfun)->range_of_expr (r, expr: var); |
5431 | if (r.varying_p () || r.undefined_p ()) |
5432 | return false; |
5433 | |
5434 | /* VAR is a scev whose evolution part is STEP and value range info |
5435 | is [MIN, MAX], we can prove its no-overflowness by conditions: |
5436 | |
5437 | type_MAX - MAX >= step ; if step > 0 |
5438 | MIN - type_MIN >= |step| ; if step < 0. |
5439 | |
5440 | Or VAR must take value outside of value range, which is not true. */ |
5441 | step_wi = wi::to_wide (t: step); |
5442 | type = TREE_TYPE (var); |
5443 | if (tree_int_cst_sign_bit (step)) |
5444 | { |
5445 | diff = r.lower_bound () - wi::to_wide (t: lower_bound_in_type (type, type)); |
5446 | step_wi = - step_wi; |
5447 | } |
5448 | else |
5449 | diff = wi::to_wide (t: upper_bound_in_type (type, type)) - r.upper_bound (); |
5450 | |
5451 | return (wi::geu_p (x: diff, y: step_wi)); |
5452 | } |
5453 | |
5454 | /* Return false only when the induction variable BASE + STEP * I is |
5455 | known to not overflow: i.e. when the number of iterations is small |
5456 | enough with respect to the step and initial condition in order to |
5457 | keep the evolution confined in TYPEs bounds. Return true when the |
5458 | iv is known to overflow or when the property is not computable. |
5459 | |
5460 | USE_OVERFLOW_SEMANTICS is true if this function should assume that |
5461 | the rules for overflow of the given language apply (e.g., that signed |
5462 | arithmetics in C does not overflow). |
5463 | |
5464 | If VAR is a ssa variable, this function also returns false if VAR can |
5465 | be proven not overflow with value range info. */ |
5466 | |
5467 | bool |
5468 | scev_probably_wraps_p (tree var, tree base, tree step, |
5469 | gimple *at_stmt, class loop *loop, |
5470 | bool use_overflow_semantics) |
5471 | { |
5472 | /* FIXME: We really need something like |
5473 | http://gcc.gnu.org/ml/gcc-patches/2005-06/msg02025.html. |
5474 | |
5475 | We used to test for the following situation that frequently appears |
5476 | during address arithmetics: |
5477 | |
5478 | D.1621_13 = (long unsigned intD.4) D.1620_12; |
5479 | D.1622_14 = D.1621_13 * 8; |
5480 | D.1623_15 = (doubleD.29 *) D.1622_14; |
5481 | |
5482 | And derived that the sequence corresponding to D_14 |
5483 | can be proved to not wrap because it is used for computing a |
5484 | memory access; however, this is not really the case -- for example, |
5485 | if D_12 = (unsigned char) [254,+,1], then D_14 has values |
5486 | 2032, 2040, 0, 8, ..., but the code is still legal. */ |
5487 | |
5488 | if (chrec_contains_undetermined (base) |
5489 | || chrec_contains_undetermined (step)) |
5490 | return true; |
5491 | |
5492 | if (integer_zerop (step)) |
5493 | return false; |
5494 | |
5495 | /* If we can use the fact that signed and pointer arithmetics does not |
5496 | wrap, we are done. */ |
5497 | if (use_overflow_semantics && nowrap_type_p (TREE_TYPE (base))) |
5498 | return false; |
5499 | |
5500 | /* To be able to use estimates on number of iterations of the loop, |
5501 | we must have an upper bound on the absolute value of the step. */ |
5502 | if (TREE_CODE (step) != INTEGER_CST) |
5503 | return true; |
5504 | |
5505 | /* Check if var can be proven not overflow with value range info. */ |
5506 | if (var && TREE_CODE (var) == SSA_NAME |
5507 | && scev_var_range_cant_overflow (var, step, loop)) |
5508 | return false; |
5509 | |
5510 | if (loop_exits_before_overflow (base, step, at_stmt, loop)) |
5511 | return false; |
5512 | |
5513 | /* Check the nonwrapping flag, which may be set by niter analysis (e.g., the |
5514 | above loop exits before overflow). */ |
5515 | if (var && nonwrapping_chrec_p (analyze_scalar_evolution (loop, var))) |
5516 | return false; |
5517 | |
5518 | /* At this point we still don't have a proof that the iv does not |
5519 | overflow: give up. */ |
5520 | return true; |
5521 | } |
5522 | |
5523 | /* Frees the information on upper bounds on numbers of iterations of LOOP. */ |
5524 | |
5525 | void |
5526 | free_numbers_of_iterations_estimates (class loop *loop) |
5527 | { |
5528 | struct control_iv *civ; |
5529 | class nb_iter_bound *bound; |
5530 | |
5531 | loop->nb_iterations = NULL; |
5532 | loop->estimate_state = EST_NOT_COMPUTED; |
5533 | for (bound = loop->bounds; bound;) |
5534 | { |
5535 | class nb_iter_bound *next = bound->next; |
5536 | ggc_free (bound); |
5537 | bound = next; |
5538 | } |
5539 | loop->bounds = NULL; |
5540 | |
5541 | for (civ = loop->control_ivs; civ;) |
5542 | { |
5543 | struct control_iv *next = civ->next; |
5544 | ggc_free (civ); |
5545 | civ = next; |
5546 | } |
5547 | loop->control_ivs = NULL; |
5548 | } |
5549 | |
5550 | /* Frees the information on upper bounds on numbers of iterations of loops. */ |
5551 | |
5552 | void |
5553 | free_numbers_of_iterations_estimates (function *fn) |
5554 | { |
5555 | for (auto loop : loops_list (fn, 0)) |
5556 | free_numbers_of_iterations_estimates (loop); |
5557 | } |
5558 | |
5559 | /* Substitute value VAL for ssa name NAME inside expressions held |
5560 | at LOOP. */ |
5561 | |
5562 | void |
5563 | substitute_in_loop_info (class loop *loop, tree name, tree val) |
5564 | { |
5565 | loop->nb_iterations = simplify_replace_tree (expr: loop->nb_iterations, old: name, new_tree: val); |
5566 | } |
5567 | |