1 | /* Data references and dependences detectors. |
2 | Copyright (C) 2003-2024 Free Software Foundation, Inc. |
3 | Contributed by Sebastian Pop <pop@cri.ensmp.fr> |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free |
9 | Software Foundation; either version 3, or (at your option) any later |
10 | version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
15 | for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | /* This pass walks a given loop structure searching for array |
22 | references. The information about the array accesses is recorded |
23 | in DATA_REFERENCE structures. |
24 | |
25 | The basic test for determining the dependences is: |
26 | given two access functions chrec1 and chrec2 to a same array, and |
27 | x and y two vectors from the iteration domain, the same element of |
28 | the array is accessed twice at iterations x and y if and only if: |
29 | | chrec1 (x) == chrec2 (y). |
30 | |
31 | The goals of this analysis are: |
32 | |
33 | - to determine the independence: the relation between two |
34 | independent accesses is qualified with the chrec_known (this |
35 | information allows a loop parallelization), |
36 | |
37 | - when two data references access the same data, to qualify the |
38 | dependence relation with classic dependence representations: |
39 | |
40 | - distance vectors |
41 | - direction vectors |
42 | - loop carried level dependence |
43 | - polyhedron dependence |
44 | or with the chains of recurrences based representation, |
45 | |
46 | - to define a knowledge base for storing the data dependence |
47 | information, |
48 | |
49 | - to define an interface to access this data. |
50 | |
51 | |
52 | Definitions: |
53 | |
54 | - subscript: given two array accesses a subscript is the tuple |
55 | composed of the access functions for a given dimension. Example: |
56 | Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts: |
57 | (f1, g1), (f2, g2), (f3, g3). |
58 | |
59 | - Diophantine equation: an equation whose coefficients and |
60 | solutions are integer constants, for example the equation |
61 | | 3*x + 2*y = 1 |
62 | has an integer solution x = 1 and y = -1. |
63 | |
64 | References: |
65 | |
66 | - "Advanced Compilation for High Performance Computing" by Randy |
67 | Allen and Ken Kennedy. |
68 | http://citeseer.ist.psu.edu/goff91practical.html |
69 | |
70 | - "Loop Transformations for Restructuring Compilers - The Foundations" |
71 | by Utpal Banerjee. |
72 | |
73 | |
74 | */ |
75 | |
76 | #include "config.h" |
77 | #include "system.h" |
78 | #include "coretypes.h" |
79 | #include "backend.h" |
80 | #include "rtl.h" |
81 | #include "tree.h" |
82 | #include "gimple.h" |
83 | #include "gimple-pretty-print.h" |
84 | #include "alias.h" |
85 | #include "fold-const.h" |
86 | #include "expr.h" |
87 | #include "gimple-iterator.h" |
88 | #include "tree-ssa-loop-niter.h" |
89 | #include "tree-ssa-loop.h" |
90 | #include "tree-ssa.h" |
91 | #include "cfgloop.h" |
92 | #include "tree-data-ref.h" |
93 | #include "tree-scalar-evolution.h" |
94 | #include "dumpfile.h" |
95 | #include "tree-affine.h" |
96 | #include "builtins.h" |
97 | #include "tree-eh.h" |
98 | #include "ssa.h" |
99 | #include "internal-fn.h" |
100 | #include "vr-values.h" |
101 | #include "range-op.h" |
102 | #include "tree-ssa-loop-ivopts.h" |
103 | #include "calls.h" |
104 | |
105 | static struct datadep_stats |
106 | { |
107 | int num_dependence_tests; |
108 | int num_dependence_dependent; |
109 | int num_dependence_independent; |
110 | int num_dependence_undetermined; |
111 | |
112 | int num_subscript_tests; |
113 | int num_subscript_undetermined; |
114 | int num_same_subscript_function; |
115 | |
116 | int num_ziv; |
117 | int num_ziv_independent; |
118 | int num_ziv_dependent; |
119 | int num_ziv_unimplemented; |
120 | |
121 | int num_siv; |
122 | int num_siv_independent; |
123 | int num_siv_dependent; |
124 | int num_siv_unimplemented; |
125 | |
126 | int num_miv; |
127 | int num_miv_independent; |
128 | int num_miv_dependent; |
129 | int num_miv_unimplemented; |
130 | } dependence_stats; |
131 | |
132 | static bool subscript_dependence_tester_1 (struct data_dependence_relation *, |
133 | unsigned int, unsigned int, |
134 | class loop *); |
135 | /* Returns true iff A divides B. */ |
136 | |
137 | static inline bool |
138 | tree_fold_divides_p (const_tree a, const_tree b) |
139 | { |
140 | gcc_assert (TREE_CODE (a) == INTEGER_CST); |
141 | gcc_assert (TREE_CODE (b) == INTEGER_CST); |
142 | return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a)); |
143 | } |
144 | |
145 | /* Returns true iff A divides B. */ |
146 | |
147 | static inline bool |
148 | int_divides_p (lambda_int a, lambda_int b) |
149 | { |
150 | return ((b % a) == 0); |
151 | } |
152 | |
153 | /* Return true if reference REF contains a union access. */ |
154 | |
155 | static bool |
156 | ref_contains_union_access_p (tree ref) |
157 | { |
158 | while (handled_component_p (t: ref)) |
159 | { |
160 | ref = TREE_OPERAND (ref, 0); |
161 | if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE |
162 | || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE) |
163 | return true; |
164 | } |
165 | return false; |
166 | } |
167 | |
168 | |
169 | |
170 | /* Dump into FILE all the data references from DATAREFS. */ |
171 | |
172 | static void |
173 | dump_data_references (FILE *file, vec<data_reference_p> datarefs) |
174 | { |
175 | for (data_reference *dr : datarefs) |
176 | dump_data_reference (file, dr); |
177 | } |
178 | |
179 | /* Unified dump into FILE all the data references from DATAREFS. */ |
180 | |
181 | DEBUG_FUNCTION void |
182 | debug (vec<data_reference_p> &ref) |
183 | { |
184 | dump_data_references (stderr, datarefs: ref); |
185 | } |
186 | |
187 | DEBUG_FUNCTION void |
188 | debug (vec<data_reference_p> *ptr) |
189 | { |
190 | if (ptr) |
191 | debug (ref&: *ptr); |
192 | else |
193 | fprintf (stderr, format: "<nil>\n" ); |
194 | } |
195 | |
196 | |
197 | /* Dump into STDERR all the data references from DATAREFS. */ |
198 | |
199 | DEBUG_FUNCTION void |
200 | debug_data_references (vec<data_reference_p> datarefs) |
201 | { |
202 | dump_data_references (stderr, datarefs); |
203 | } |
204 | |
205 | /* Print to STDERR the data_reference DR. */ |
206 | |
207 | DEBUG_FUNCTION void |
208 | debug_data_reference (struct data_reference *dr) |
209 | { |
210 | dump_data_reference (stderr, dr); |
211 | } |
212 | |
213 | /* Dump function for a DATA_REFERENCE structure. */ |
214 | |
215 | void |
216 | dump_data_reference (FILE *outf, |
217 | struct data_reference *dr) |
218 | { |
219 | unsigned int i; |
220 | |
221 | fprintf (stream: outf, format: "#(Data Ref: \n" ); |
222 | fprintf (stream: outf, format: "# bb: %d \n" , gimple_bb (DR_STMT (dr))->index); |
223 | fprintf (stream: outf, format: "# stmt: " ); |
224 | print_gimple_stmt (outf, DR_STMT (dr), 0); |
225 | fprintf (stream: outf, format: "# ref: " ); |
226 | print_generic_stmt (outf, DR_REF (dr)); |
227 | fprintf (stream: outf, format: "# base_object: " ); |
228 | print_generic_stmt (outf, DR_BASE_OBJECT (dr)); |
229 | |
230 | for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++) |
231 | { |
232 | fprintf (stream: outf, format: "# Access function %d: " , i); |
233 | print_generic_stmt (outf, DR_ACCESS_FN (dr, i)); |
234 | } |
235 | fprintf (stream: outf, format: "#)\n" ); |
236 | } |
237 | |
238 | /* Unified dump function for a DATA_REFERENCE structure. */ |
239 | |
240 | DEBUG_FUNCTION void |
241 | debug (data_reference &ref) |
242 | { |
243 | dump_data_reference (stderr, dr: &ref); |
244 | } |
245 | |
246 | DEBUG_FUNCTION void |
247 | debug (data_reference *ptr) |
248 | { |
249 | if (ptr) |
250 | debug (ref&: *ptr); |
251 | else |
252 | fprintf (stderr, format: "<nil>\n" ); |
253 | } |
254 | |
255 | |
256 | /* Dumps the affine function described by FN to the file OUTF. */ |
257 | |
258 | DEBUG_FUNCTION void |
259 | dump_affine_function (FILE *outf, affine_fn fn) |
260 | { |
261 | unsigned i; |
262 | tree coef; |
263 | |
264 | print_generic_expr (outf, fn[0], TDF_SLIM); |
265 | for (i = 1; fn.iterate (ix: i, ptr: &coef); i++) |
266 | { |
267 | fprintf (stream: outf, format: " + " ); |
268 | print_generic_expr (outf, coef, TDF_SLIM); |
269 | fprintf (stream: outf, format: " * x_%u" , i); |
270 | } |
271 | } |
272 | |
273 | /* Dumps the conflict function CF to the file OUTF. */ |
274 | |
275 | DEBUG_FUNCTION void |
276 | dump_conflict_function (FILE *outf, conflict_function *cf) |
277 | { |
278 | unsigned i; |
279 | |
280 | if (cf->n == NO_DEPENDENCE) |
281 | fprintf (stream: outf, format: "no dependence" ); |
282 | else if (cf->n == NOT_KNOWN) |
283 | fprintf (stream: outf, format: "not known" ); |
284 | else |
285 | { |
286 | for (i = 0; i < cf->n; i++) |
287 | { |
288 | if (i != 0) |
289 | fprintf (stream: outf, format: " " ); |
290 | fprintf (stream: outf, format: "[" ); |
291 | dump_affine_function (outf, fn: cf->fns[i]); |
292 | fprintf (stream: outf, format: "]" ); |
293 | } |
294 | } |
295 | } |
296 | |
297 | /* Dump function for a SUBSCRIPT structure. */ |
298 | |
299 | DEBUG_FUNCTION void |
300 | dump_subscript (FILE *outf, struct subscript *subscript) |
301 | { |
302 | conflict_function *cf = SUB_CONFLICTS_IN_A (subscript); |
303 | |
304 | fprintf (stream: outf, format: "\n (subscript \n" ); |
305 | fprintf (stream: outf, format: " iterations_that_access_an_element_twice_in_A: " ); |
306 | dump_conflict_function (outf, cf); |
307 | if (CF_NONTRIVIAL_P (cf)) |
308 | { |
309 | tree last_iteration = SUB_LAST_CONFLICT (subscript); |
310 | fprintf (stream: outf, format: "\n last_conflict: " ); |
311 | print_generic_expr (outf, last_iteration); |
312 | } |
313 | |
314 | cf = SUB_CONFLICTS_IN_B (subscript); |
315 | fprintf (stream: outf, format: "\n iterations_that_access_an_element_twice_in_B: " ); |
316 | dump_conflict_function (outf, cf); |
317 | if (CF_NONTRIVIAL_P (cf)) |
318 | { |
319 | tree last_iteration = SUB_LAST_CONFLICT (subscript); |
320 | fprintf (stream: outf, format: "\n last_conflict: " ); |
321 | print_generic_expr (outf, last_iteration); |
322 | } |
323 | |
324 | fprintf (stream: outf, format: "\n (Subscript distance: " ); |
325 | print_generic_expr (outf, SUB_DISTANCE (subscript)); |
326 | fprintf (stream: outf, format: " ))\n" ); |
327 | } |
328 | |
329 | /* Print the classic direction vector DIRV to OUTF. */ |
330 | |
331 | DEBUG_FUNCTION void |
332 | print_direction_vector (FILE *outf, |
333 | lambda_vector dirv, |
334 | int length) |
335 | { |
336 | int eq; |
337 | |
338 | for (eq = 0; eq < length; eq++) |
339 | { |
340 | enum data_dependence_direction dir = ((enum data_dependence_direction) |
341 | dirv[eq]); |
342 | |
343 | switch (dir) |
344 | { |
345 | case dir_positive: |
346 | fprintf (stream: outf, format: " +" ); |
347 | break; |
348 | case dir_negative: |
349 | fprintf (stream: outf, format: " -" ); |
350 | break; |
351 | case dir_equal: |
352 | fprintf (stream: outf, format: " =" ); |
353 | break; |
354 | case dir_positive_or_equal: |
355 | fprintf (stream: outf, format: " +=" ); |
356 | break; |
357 | case dir_positive_or_negative: |
358 | fprintf (stream: outf, format: " +-" ); |
359 | break; |
360 | case dir_negative_or_equal: |
361 | fprintf (stream: outf, format: " -=" ); |
362 | break; |
363 | case dir_star: |
364 | fprintf (stream: outf, format: " *" ); |
365 | break; |
366 | default: |
367 | fprintf (stream: outf, format: "indep" ); |
368 | break; |
369 | } |
370 | } |
371 | fprintf (stream: outf, format: "\n" ); |
372 | } |
373 | |
374 | /* Print a vector of direction vectors. */ |
375 | |
376 | DEBUG_FUNCTION void |
377 | print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects, |
378 | int length) |
379 | { |
380 | for (lambda_vector v : dir_vects) |
381 | print_direction_vector (outf, dirv: v, length); |
382 | } |
383 | |
384 | /* Print out a vector VEC of length N to OUTFILE. */ |
385 | |
386 | DEBUG_FUNCTION void |
387 | print_lambda_vector (FILE * outfile, lambda_vector vector, int n) |
388 | { |
389 | int i; |
390 | |
391 | for (i = 0; i < n; i++) |
392 | fprintf (stream: outfile, HOST_WIDE_INT_PRINT_DEC " " , vector[i]); |
393 | fprintf (stream: outfile, format: "\n" ); |
394 | } |
395 | |
396 | /* Print a vector of distance vectors. */ |
397 | |
398 | DEBUG_FUNCTION void |
399 | print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects, |
400 | int length) |
401 | { |
402 | for (lambda_vector v : dist_vects) |
403 | print_lambda_vector (outfile: outf, vector: v, n: length); |
404 | } |
405 | |
406 | /* Dump function for a DATA_DEPENDENCE_RELATION structure. */ |
407 | |
408 | DEBUG_FUNCTION void |
409 | dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr) |
410 | { |
411 | struct data_reference *dra, *drb; |
412 | |
413 | fprintf (stream: outf, format: "(Data Dep: \n" ); |
414 | |
415 | if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) |
416 | { |
417 | if (ddr) |
418 | { |
419 | dra = DDR_A (ddr); |
420 | drb = DDR_B (ddr); |
421 | if (dra) |
422 | dump_data_reference (outf, dr: dra); |
423 | else |
424 | fprintf (stream: outf, format: " (nil)\n" ); |
425 | if (drb) |
426 | dump_data_reference (outf, dr: drb); |
427 | else |
428 | fprintf (stream: outf, format: " (nil)\n" ); |
429 | } |
430 | fprintf (stream: outf, format: " (don't know)\n)\n" ); |
431 | return; |
432 | } |
433 | |
434 | dra = DDR_A (ddr); |
435 | drb = DDR_B (ddr); |
436 | dump_data_reference (outf, dr: dra); |
437 | dump_data_reference (outf, dr: drb); |
438 | |
439 | if (DDR_ARE_DEPENDENT (ddr) == chrec_known) |
440 | fprintf (stream: outf, format: " (no dependence)\n" ); |
441 | |
442 | else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) |
443 | { |
444 | unsigned int i; |
445 | class loop *loopi; |
446 | |
447 | subscript *sub; |
448 | FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub) |
449 | { |
450 | fprintf (stream: outf, format: " access_fn_A: " ); |
451 | print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0)); |
452 | fprintf (stream: outf, format: " access_fn_B: " ); |
453 | print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1)); |
454 | dump_subscript (outf, subscript: sub); |
455 | } |
456 | |
457 | fprintf (stream: outf, format: " loop nest: (" ); |
458 | FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi) |
459 | fprintf (stream: outf, format: "%d " , loopi->num); |
460 | fprintf (stream: outf, format: ")\n" ); |
461 | |
462 | for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++) |
463 | { |
464 | fprintf (stream: outf, format: " distance_vector: " ); |
465 | print_lambda_vector (outfile: outf, DDR_DIST_VECT (ddr, i), |
466 | DDR_NB_LOOPS (ddr)); |
467 | } |
468 | |
469 | for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++) |
470 | { |
471 | fprintf (stream: outf, format: " direction_vector: " ); |
472 | print_direction_vector (outf, DDR_DIR_VECT (ddr, i), |
473 | DDR_NB_LOOPS (ddr)); |
474 | } |
475 | } |
476 | |
477 | fprintf (stream: outf, format: ")\n" ); |
478 | } |
479 | |
480 | /* Debug version. */ |
481 | |
482 | DEBUG_FUNCTION void |
483 | debug_data_dependence_relation (const struct data_dependence_relation *ddr) |
484 | { |
485 | dump_data_dependence_relation (stderr, ddr); |
486 | } |
487 | |
488 | /* Dump into FILE all the dependence relations from DDRS. */ |
489 | |
490 | DEBUG_FUNCTION void |
491 | dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs) |
492 | { |
493 | for (auto ddr : ddrs) |
494 | dump_data_dependence_relation (outf: file, ddr); |
495 | } |
496 | |
497 | DEBUG_FUNCTION void |
498 | debug (vec<ddr_p> &ref) |
499 | { |
500 | dump_data_dependence_relations (stderr, ddrs: ref); |
501 | } |
502 | |
503 | DEBUG_FUNCTION void |
504 | debug (vec<ddr_p> *ptr) |
505 | { |
506 | if (ptr) |
507 | debug (ref&: *ptr); |
508 | else |
509 | fprintf (stderr, format: "<nil>\n" ); |
510 | } |
511 | |
512 | |
513 | /* Dump to STDERR all the dependence relations from DDRS. */ |
514 | |
515 | DEBUG_FUNCTION void |
516 | debug_data_dependence_relations (vec<ddr_p> ddrs) |
517 | { |
518 | dump_data_dependence_relations (stderr, ddrs); |
519 | } |
520 | |
521 | /* Dumps the distance and direction vectors in FILE. DDRS contains |
522 | the dependence relations, and VECT_SIZE is the size of the |
523 | dependence vectors, or in other words the number of loops in the |
524 | considered nest. */ |
525 | |
526 | DEBUG_FUNCTION void |
527 | dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs) |
528 | { |
529 | for (data_dependence_relation *ddr : ddrs) |
530 | if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr)) |
531 | { |
532 | for (lambda_vector v : DDR_DIST_VECTS (ddr)) |
533 | { |
534 | fprintf (stream: file, format: "DISTANCE_V (" ); |
535 | print_lambda_vector (outfile: file, vector: v, DDR_NB_LOOPS (ddr)); |
536 | fprintf (stream: file, format: ")\n" ); |
537 | } |
538 | |
539 | for (lambda_vector v : DDR_DIR_VECTS (ddr)) |
540 | { |
541 | fprintf (stream: file, format: "DIRECTION_V (" ); |
542 | print_direction_vector (outf: file, dirv: v, DDR_NB_LOOPS (ddr)); |
543 | fprintf (stream: file, format: ")\n" ); |
544 | } |
545 | } |
546 | |
547 | fprintf (stream: file, format: "\n\n" ); |
548 | } |
549 | |
550 | /* Dumps the data dependence relations DDRS in FILE. */ |
551 | |
552 | DEBUG_FUNCTION void |
553 | dump_ddrs (FILE *file, vec<ddr_p> ddrs) |
554 | { |
555 | for (data_dependence_relation *ddr : ddrs) |
556 | dump_data_dependence_relation (outf: file, ddr); |
557 | |
558 | fprintf (stream: file, format: "\n\n" ); |
559 | } |
560 | |
561 | DEBUG_FUNCTION void |
562 | debug_ddrs (vec<ddr_p> ddrs) |
563 | { |
564 | dump_ddrs (stderr, ddrs); |
565 | } |
566 | |
567 | /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of |
568 | OP0 CODE OP1, where: |
569 | |
570 | - OP0 CODE OP1 has integral type TYPE |
571 | - the range of OP0 is given by OP0_RANGE and |
572 | - the range of OP1 is given by OP1_RANGE. |
573 | |
574 | Independently of RESULT_RANGE, try to compute: |
575 | |
576 | DELTA = ((sizetype) OP0 CODE (sizetype) OP1) |
577 | - (sizetype) (OP0 CODE OP1) |
578 | |
579 | as a constant and subtract DELTA from the ssizetype constant in *OFF. |
580 | Return true on success, or false if DELTA is not known at compile time. |
581 | |
582 | Truncation and sign changes are known to distribute over CODE, i.e. |
583 | |
584 | (itype) (A CODE B) == (itype) A CODE (itype) B |
585 | |
586 | for any integral type ITYPE whose precision is no greater than the |
587 | precision of A and B. */ |
588 | |
589 | static bool |
590 | compute_distributive_range (tree type, value_range &op0_range, |
591 | tree_code code, value_range &op1_range, |
592 | tree *off, value_range *result_range) |
593 | { |
594 | gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)); |
595 | if (result_range) |
596 | { |
597 | range_op_handler op (code); |
598 | if (!op.fold_range (r&: *result_range, type, lh: op0_range, rh: op1_range)) |
599 | result_range->set_varying (type); |
600 | } |
601 | |
602 | /* The distributive property guarantees that if TYPE is no narrower |
603 | than SIZETYPE, |
604 | |
605 | (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1 |
606 | |
607 | and so we can treat DELTA as zero. */ |
608 | if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype)) |
609 | return true; |
610 | |
611 | /* If overflow is undefined, we can assume that: |
612 | |
613 | X == (ssizetype) OP0 CODE (ssizetype) OP1 |
614 | |
615 | is within the range of TYPE, i.e.: |
616 | |
617 | X == (ssizetype) (TYPE) X |
618 | |
619 | Distributing the (TYPE) truncation over X gives: |
620 | |
621 | X == (ssizetype) (OP0 CODE OP1) |
622 | |
623 | Casting both sides to sizetype and distributing the sizetype cast |
624 | over X gives: |
625 | |
626 | (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1) |
627 | |
628 | and so we can treat DELTA as zero. */ |
629 | if (TYPE_OVERFLOW_UNDEFINED (type)) |
630 | return true; |
631 | |
632 | /* Compute the range of: |
633 | |
634 | (ssizetype) OP0 CODE (ssizetype) OP1 |
635 | |
636 | The distributive property guarantees that this has the same bitpattern as: |
637 | |
638 | (sizetype) OP0 CODE (sizetype) OP1 |
639 | |
640 | but its range is more conducive to analysis. */ |
641 | range_cast (r&: op0_range, ssizetype); |
642 | range_cast (r&: op1_range, ssizetype); |
643 | value_range wide_range; |
644 | range_op_handler op (code); |
645 | bool saved_flag_wrapv = flag_wrapv; |
646 | flag_wrapv = 1; |
647 | if (!op.fold_range (r&: wide_range, ssizetype, lh: op0_range, rh: op1_range)) |
648 | wide_range.set_varying (ssizetype);; |
649 | flag_wrapv = saved_flag_wrapv; |
650 | if (wide_range.num_pairs () != 1 |
651 | || wide_range.varying_p () || wide_range.undefined_p ()) |
652 | return false; |
653 | |
654 | wide_int lb = wide_range.lower_bound (); |
655 | wide_int ub = wide_range.upper_bound (); |
656 | |
657 | /* Calculate the number of times that each end of the range overflows or |
658 | underflows TYPE. We can only calculate DELTA if the numbers match. */ |
659 | unsigned int precision = TYPE_PRECISION (type); |
660 | if (!TYPE_UNSIGNED (type)) |
661 | { |
662 | wide_int type_min = wi::mask (width: precision - 1, negate_p: true, precision: lb.get_precision ()); |
663 | lb -= type_min; |
664 | ub -= type_min; |
665 | } |
666 | wide_int upper_bits = wi::mask (width: precision, negate_p: true, precision: lb.get_precision ()); |
667 | lb &= upper_bits; |
668 | ub &= upper_bits; |
669 | if (lb != ub) |
670 | return false; |
671 | |
672 | /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with |
673 | negative values indicating underflow. The low PRECISION bits of LB |
674 | are clear, so DELTA is therefore LB (== UB). */ |
675 | *off = wide_int_to_tree (ssizetype, cst: wi::to_wide (t: *off) - lb); |
676 | return true; |
677 | } |
678 | |
679 | /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP, |
680 | given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and |
681 | FROM_TYPE are integral types. */ |
682 | |
683 | static bool |
684 | nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range) |
685 | { |
686 | gcc_assert (INTEGRAL_TYPE_P (to_type) |
687 | && INTEGRAL_TYPE_P (from_type) |
688 | && !TYPE_OVERFLOW_TRAPS (to_type) |
689 | && !TYPE_OVERFLOW_TRAPS (from_type)); |
690 | |
691 | /* Converting to something no narrower than sizetype and then to sizetype |
692 | is equivalent to converting directly to sizetype. */ |
693 | if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype)) |
694 | return true; |
695 | |
696 | /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */ |
697 | if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type) |
698 | && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type))) |
699 | return true; |
700 | |
701 | /* For narrowing conversions, we could in principle test whether |
702 | the bits in FROM_TYPE but not in TO_TYPE have a fixed value |
703 | and apply a constant adjustment. |
704 | |
705 | For other conversions (which involve a sign change) we could |
706 | check that the signs are always equal, and apply a constant |
707 | adjustment if the signs are negative. |
708 | |
709 | However, both cases should be rare. */ |
710 | return range_fits_type_p (vr: &range, TYPE_PRECISION (to_type), |
711 | TYPE_SIGN (to_type)); |
712 | } |
713 | |
714 | static void |
715 | split_constant_offset (tree type, tree *var, tree *off, |
716 | value_range *result_range, |
717 | hash_map<tree, std::pair<tree, tree> > &cache, |
718 | unsigned *limit); |
719 | |
720 | /* Helper function for split_constant_offset. If TYPE is a pointer type, |
721 | try to express OP0 CODE OP1 as: |
722 | |
723 | POINTER_PLUS <*VAR, (sizetype) *OFF> |
724 | |
725 | where: |
726 | |
727 | - *VAR has type TYPE |
728 | - *OFF is a constant of type ssizetype. |
729 | |
730 | If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as: |
731 | |
732 | *VAR + (sizetype) *OFF |
733 | |
734 | where: |
735 | |
736 | - *VAR has type sizetype |
737 | - *OFF is a constant of type ssizetype. |
738 | |
739 | In both cases, OP0 CODE OP1 has type TYPE. |
740 | |
741 | Return true on success. A false return value indicates that we can't |
742 | do better than set *OFF to zero. |
743 | |
744 | When returning true, set RESULT_RANGE to the range of OP0 CODE OP1, |
745 | if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING. |
746 | |
747 | CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously |
748 | visited. LIMIT counts down the number of SSA names that we are |
749 | allowed to process before giving up. */ |
750 | |
751 | static bool |
752 | split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1, |
753 | tree *var, tree *off, value_range *result_range, |
754 | hash_map<tree, std::pair<tree, tree> > &cache, |
755 | unsigned *limit) |
756 | { |
757 | tree var0, var1; |
758 | tree off0, off1; |
759 | value_range op0_range, op1_range; |
760 | |
761 | *var = NULL_TREE; |
762 | *off = NULL_TREE; |
763 | |
764 | if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)) |
765 | return false; |
766 | |
767 | switch (code) |
768 | { |
769 | case INTEGER_CST: |
770 | *var = size_int (0); |
771 | *off = fold_convert (ssizetype, op0); |
772 | if (result_range) |
773 | { |
774 | wide_int w = wi::to_wide (t: op0); |
775 | result_range->set (TREE_TYPE (op0), w, w); |
776 | } |
777 | return true; |
778 | |
779 | case POINTER_PLUS_EXPR: |
780 | split_constant_offset (type: op0, var: &var0, off: &off0, result_range: nullptr, cache, limit); |
781 | split_constant_offset (type: op1, var: &var1, off: &off1, result_range: nullptr, cache, limit); |
782 | *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1); |
783 | *off = size_binop (PLUS_EXPR, off0, off1); |
784 | return true; |
785 | |
786 | case PLUS_EXPR: |
787 | case MINUS_EXPR: |
788 | split_constant_offset (type: op0, var: &var0, off: &off0, result_range: &op0_range, cache, limit); |
789 | split_constant_offset (type: op1, var: &var1, off: &off1, result_range: &op1_range, cache, limit); |
790 | *off = size_binop (code, off0, off1); |
791 | if (!compute_distributive_range (type, op0_range, code, op1_range, |
792 | off, result_range)) |
793 | return false; |
794 | *var = fold_build2 (code, sizetype, var0, var1); |
795 | return true; |
796 | |
797 | case MULT_EXPR: |
798 | if (TREE_CODE (op1) != INTEGER_CST) |
799 | return false; |
800 | |
801 | split_constant_offset (type: op0, var: &var0, off: &off0, result_range: &op0_range, cache, limit); |
802 | op1_range.set (TREE_TYPE (op1), wi::to_wide (t: op1), wi::to_wide (t: op1)); |
803 | *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1)); |
804 | if (!compute_distributive_range (type, op0_range, code, op1_range, |
805 | off, result_range)) |
806 | return false; |
807 | *var = fold_build2 (MULT_EXPR, sizetype, var0, |
808 | fold_convert (sizetype, op1)); |
809 | return true; |
810 | |
811 | case ADDR_EXPR: |
812 | { |
813 | tree base, poffset; |
814 | poly_int64 pbitsize, pbitpos, pbytepos; |
815 | machine_mode pmode; |
816 | int punsignedp, preversep, pvolatilep; |
817 | |
818 | op0 = TREE_OPERAND (op0, 0); |
819 | base |
820 | = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode, |
821 | &punsignedp, &preversep, &pvolatilep); |
822 | |
823 | if (!multiple_p (a: pbitpos, BITS_PER_UNIT, multiple: &pbytepos)) |
824 | return false; |
825 | base = build_fold_addr_expr (base); |
826 | off0 = ssize_int (pbytepos); |
827 | |
828 | if (poffset) |
829 | { |
830 | split_constant_offset (type: poffset, var: &poffset, off: &off1, result_range: nullptr, |
831 | cache, limit); |
832 | off0 = size_binop (PLUS_EXPR, off0, off1); |
833 | base = fold_build_pointer_plus (base, poffset); |
834 | } |
835 | |
836 | var0 = fold_convert (type, base); |
837 | |
838 | /* If variable length types are involved, punt, otherwise casts |
839 | might be converted into ARRAY_REFs in gimplify_conversion. |
840 | To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which |
841 | possibly no longer appears in current GIMPLE, might resurface. |
842 | This perhaps could run |
843 | if (CONVERT_EXPR_P (var0)) |
844 | { |
845 | gimplify_conversion (&var0); |
846 | // Attempt to fill in any within var0 found ARRAY_REF's |
847 | // element size from corresponding op embedded ARRAY_REF, |
848 | // if unsuccessful, just punt. |
849 | } */ |
850 | while (POINTER_TYPE_P (type)) |
851 | type = TREE_TYPE (type); |
852 | if (int_size_in_bytes (type) < 0) |
853 | return false; |
854 | |
855 | *var = var0; |
856 | *off = off0; |
857 | return true; |
858 | } |
859 | |
860 | case SSA_NAME: |
861 | { |
862 | if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0)) |
863 | return false; |
864 | |
865 | gimple *def_stmt = SSA_NAME_DEF_STMT (op0); |
866 | enum tree_code subcode; |
867 | |
868 | if (gimple_code (g: def_stmt) != GIMPLE_ASSIGN) |
869 | return false; |
870 | |
871 | subcode = gimple_assign_rhs_code (gs: def_stmt); |
872 | |
873 | /* We are using a cache to avoid un-CSEing large amounts of code. */ |
874 | bool use_cache = false; |
875 | if (!has_single_use (var: op0) |
876 | && (subcode == POINTER_PLUS_EXPR |
877 | || subcode == PLUS_EXPR |
878 | || subcode == MINUS_EXPR |
879 | || subcode == MULT_EXPR |
880 | || subcode == ADDR_EXPR |
881 | || CONVERT_EXPR_CODE_P (subcode))) |
882 | { |
883 | use_cache = true; |
884 | bool existed; |
885 | std::pair<tree, tree> &e = cache.get_or_insert (k: op0, existed: &existed); |
886 | if (existed) |
887 | { |
888 | if (integer_zerop (e.second)) |
889 | return false; |
890 | *var = e.first; |
891 | *off = e.second; |
892 | /* The caller sets the range in this case. */ |
893 | return true; |
894 | } |
895 | e = std::make_pair (x&: op0, ssize_int (0)); |
896 | } |
897 | |
898 | if (*limit == 0) |
899 | return false; |
900 | --*limit; |
901 | |
902 | var0 = gimple_assign_rhs1 (gs: def_stmt); |
903 | var1 = gimple_assign_rhs2 (gs: def_stmt); |
904 | |
905 | bool res = split_constant_offset_1 (type, op0: var0, code: subcode, op1: var1, |
906 | var, off, result_range: nullptr, cache, limit); |
907 | if (res && use_cache) |
908 | *cache.get (k: op0) = std::make_pair (x&: *var, y&: *off); |
909 | /* The caller sets the range in this case. */ |
910 | return res; |
911 | } |
912 | CASE_CONVERT: |
913 | { |
914 | /* We can only handle the following conversions: |
915 | |
916 | - Conversions from one pointer type to another pointer type. |
917 | |
918 | - Conversions from one non-trapping integral type to another |
919 | non-trapping integral type. In this case, the recursive |
920 | call makes sure that: |
921 | |
922 | (sizetype) OP0 |
923 | |
924 | can be expressed as a sizetype operation involving VAR and OFF, |
925 | and all we need to do is check whether: |
926 | |
927 | (sizetype) OP0 == (sizetype) (TYPE) OP0 |
928 | |
929 | - Conversions from a non-trapping sizetype-size integral type to |
930 | a like-sized pointer type. In this case, the recursive call |
931 | makes sure that: |
932 | |
933 | (sizetype) OP0 == *VAR + (sizetype) *OFF |
934 | |
935 | and we can convert that to: |
936 | |
937 | POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF> |
938 | |
939 | - Conversions from a sizetype-sized pointer type to a like-sized |
940 | non-trapping integral type. In this case, the recursive call |
941 | makes sure that: |
942 | |
943 | OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF> |
944 | |
945 | where the POINTER_PLUS and *VAR have the same precision as |
946 | TYPE (and the same precision as sizetype). Then: |
947 | |
948 | (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */ |
949 | tree itype = TREE_TYPE (op0); |
950 | if ((POINTER_TYPE_P (itype) |
951 | || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype))) |
952 | && (POINTER_TYPE_P (type) |
953 | || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))) |
954 | && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype) |
955 | || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype) |
956 | && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype)))) |
957 | { |
958 | if (POINTER_TYPE_P (type)) |
959 | { |
960 | split_constant_offset (type: op0, var, off, result_range: nullptr, cache, limit); |
961 | *var = fold_convert (type, *var); |
962 | } |
963 | else if (POINTER_TYPE_P (itype)) |
964 | { |
965 | split_constant_offset (type: op0, var, off, result_range: nullptr, cache, limit); |
966 | *var = fold_convert (sizetype, *var); |
967 | } |
968 | else |
969 | { |
970 | split_constant_offset (type: op0, var, off, result_range: &op0_range, |
971 | cache, limit); |
972 | if (!nop_conversion_for_offset_p (to_type: type, from_type: itype, range&: op0_range)) |
973 | return false; |
974 | if (result_range) |
975 | { |
976 | *result_range = op0_range; |
977 | range_cast (r&: *result_range, type); |
978 | } |
979 | } |
980 | return true; |
981 | } |
982 | return false; |
983 | } |
984 | |
985 | default: |
986 | return false; |
987 | } |
988 | } |
989 | |
990 | /* If EXP has pointer type, try to express it as: |
991 | |
992 | POINTER_PLUS <*VAR, (sizetype) *OFF> |
993 | |
994 | where: |
995 | |
996 | - *VAR has the same type as EXP |
997 | - *OFF is a constant of type ssizetype. |
998 | |
999 | If EXP has an integral type, try to express (sizetype) EXP as: |
1000 | |
1001 | *VAR + (sizetype) *OFF |
1002 | |
1003 | where: |
1004 | |
1005 | - *VAR has type sizetype |
1006 | - *OFF is a constant of type ssizetype. |
1007 | |
1008 | If EXP_RANGE is nonnull, set it to the range of EXP. |
1009 | |
1010 | CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously |
1011 | visited. LIMIT counts down the number of SSA names that we are |
1012 | allowed to process before giving up. */ |
1013 | |
1014 | static void |
1015 | split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range, |
1016 | hash_map<tree, std::pair<tree, tree> > &cache, |
1017 | unsigned *limit) |
1018 | { |
1019 | tree type = TREE_TYPE (exp), op0, op1; |
1020 | enum tree_code code; |
1021 | |
1022 | code = TREE_CODE (exp); |
1023 | if (exp_range) |
1024 | { |
1025 | *exp_range = type; |
1026 | if (code == SSA_NAME) |
1027 | { |
1028 | value_range vr; |
1029 | get_range_query (cfun)->range_of_expr (r&: vr, expr: exp); |
1030 | if (vr.undefined_p ()) |
1031 | vr.set_varying (TREE_TYPE (exp)); |
1032 | tree vr_min, vr_max; |
1033 | value_range_kind vr_kind = get_legacy_range (vr, min&: vr_min, max&: vr_max); |
1034 | wide_int var_min = wi::to_wide (t: vr_min); |
1035 | wide_int var_max = wi::to_wide (t: vr_max); |
1036 | wide_int var_nonzero = get_nonzero_bits (exp); |
1037 | vr_kind = intersect_range_with_nonzero_bits (vr_kind, |
1038 | &var_min, &var_max, |
1039 | var_nonzero, |
1040 | TYPE_SIGN (type)); |
1041 | /* This check for VR_VARYING is here because the old code |
1042 | using get_range_info would return VR_RANGE for the entire |
1043 | domain, instead of VR_VARYING. The new code normalizes |
1044 | full-domain ranges to VR_VARYING. */ |
1045 | if (vr_kind == VR_RANGE || vr_kind == VR_VARYING) |
1046 | *exp_range = value_range (type, var_min, var_max); |
1047 | } |
1048 | } |
1049 | |
1050 | if (!tree_is_chrec (expr: exp) |
1051 | && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS) |
1052 | { |
1053 | extract_ops_from_tree (expr: exp, code: &code, op0: &op0, op1: &op1); |
1054 | if (split_constant_offset_1 (type, op0, code, op1, var, off, |
1055 | result_range: exp_range, cache, limit)) |
1056 | return; |
1057 | } |
1058 | |
1059 | *var = exp; |
1060 | if (INTEGRAL_TYPE_P (type)) |
1061 | *var = fold_convert (sizetype, *var); |
1062 | *off = ssize_int (0); |
1063 | |
1064 | value_range r; |
1065 | if (exp_range && code != SSA_NAME |
1066 | && get_range_query (cfun)->range_of_expr (r, expr: exp) |
1067 | && !r.undefined_p ()) |
1068 | *exp_range = r; |
1069 | } |
1070 | |
1071 | /* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same |
1072 | type as EXP while OFF has type ssizetype. */ |
1073 | |
1074 | void |
1075 | split_constant_offset (tree exp, tree *var, tree *off) |
1076 | { |
1077 | unsigned limit = param_ssa_name_def_chain_limit; |
1078 | static hash_map<tree, std::pair<tree, tree> > *cache; |
1079 | if (!cache) |
1080 | cache = new hash_map<tree, std::pair<tree, tree> > (37); |
1081 | split_constant_offset (exp, var, off, exp_range: nullptr, cache&: *cache, limit: &limit); |
1082 | *var = fold_convert (TREE_TYPE (exp), *var); |
1083 | cache->empty (); |
1084 | } |
1085 | |
1086 | /* Returns the address ADDR of an object in a canonical shape (without nop |
1087 | casts, and with type of pointer to the object). */ |
1088 | |
1089 | static tree |
1090 | canonicalize_base_object_address (tree addr) |
1091 | { |
1092 | tree orig = addr; |
1093 | |
1094 | STRIP_NOPS (addr); |
1095 | |
1096 | /* The base address may be obtained by casting from integer, in that case |
1097 | keep the cast. */ |
1098 | if (!POINTER_TYPE_P (TREE_TYPE (addr))) |
1099 | return orig; |
1100 | |
1101 | if (TREE_CODE (addr) != ADDR_EXPR) |
1102 | return addr; |
1103 | |
1104 | return build_fold_addr_expr (TREE_OPERAND (addr, 0)); |
1105 | } |
1106 | |
1107 | /* Analyze the behavior of memory reference REF within STMT. |
1108 | There are two modes: |
1109 | |
1110 | - BB analysis. In this case we simply split the address into base, |
1111 | init and offset components, without reference to any containing loop. |
1112 | The resulting base and offset are general expressions and they can |
1113 | vary arbitrarily from one iteration of the containing loop to the next. |
1114 | The step is always zero. |
1115 | |
1116 | - loop analysis. In this case we analyze the reference both wrt LOOP |
1117 | and on the basis that the reference occurs (is "used") in LOOP; |
1118 | see the comment above analyze_scalar_evolution_in_loop for more |
1119 | information about this distinction. The base, init, offset and |
1120 | step fields are all invariant in LOOP. |
1121 | |
1122 | Perform BB analysis if LOOP is null, or if LOOP is the function's |
1123 | dummy outermost loop. In other cases perform loop analysis. |
1124 | |
1125 | Return true if the analysis succeeded and store the results in DRB if so. |
1126 | BB analysis can only fail for bitfield or reversed-storage accesses. */ |
1127 | |
1128 | opt_result |
1129 | dr_analyze_innermost (innermost_loop_behavior *drb, tree ref, |
1130 | class loop *loop, const gimple *stmt) |
1131 | { |
1132 | poly_int64 pbitsize, pbitpos; |
1133 | tree base, poffset; |
1134 | machine_mode pmode; |
1135 | int punsignedp, preversep, pvolatilep; |
1136 | affine_iv base_iv, offset_iv; |
1137 | tree init, dinit, step; |
1138 | bool in_loop = (loop && loop->num); |
1139 | |
1140 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1141 | fprintf (stream: dump_file, format: "analyze_innermost: " ); |
1142 | |
1143 | base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode, |
1144 | &punsignedp, &preversep, &pvolatilep); |
1145 | gcc_assert (base != NULL_TREE); |
1146 | |
1147 | poly_int64 pbytepos; |
1148 | if (!multiple_p (a: pbitpos, BITS_PER_UNIT, multiple: &pbytepos)) |
1149 | return opt_result::failure_at (loc: stmt, |
1150 | fmt: "failed: bit offset alignment.\n" ); |
1151 | |
1152 | if (preversep) |
1153 | return opt_result::failure_at (loc: stmt, |
1154 | fmt: "failed: reverse storage order.\n" ); |
1155 | |
1156 | /* Calculate the alignment and misalignment for the inner reference. */ |
1157 | unsigned int HOST_WIDE_INT bit_base_misalignment; |
1158 | unsigned int bit_base_alignment; |
1159 | get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment); |
1160 | |
1161 | /* There are no bitfield references remaining in BASE, so the values |
1162 | we got back must be whole bytes. */ |
1163 | gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0 |
1164 | && bit_base_misalignment % BITS_PER_UNIT == 0); |
1165 | unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT; |
1166 | poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT; |
1167 | |
1168 | if (TREE_CODE (base) == MEM_REF) |
1169 | { |
1170 | if (!integer_zerop (TREE_OPERAND (base, 1))) |
1171 | { |
1172 | /* Subtract MOFF from the base and add it to POFFSET instead. |
1173 | Adjust the misalignment to reflect the amount we subtracted. */ |
1174 | poly_offset_int moff = mem_ref_offset (base); |
1175 | base_misalignment -= moff.force_shwi (); |
1176 | tree mofft = wide_int_to_tree (sizetype, cst: moff); |
1177 | if (!poffset) |
1178 | poffset = mofft; |
1179 | else |
1180 | poffset = size_binop (PLUS_EXPR, poffset, mofft); |
1181 | } |
1182 | base = TREE_OPERAND (base, 0); |
1183 | } |
1184 | else |
1185 | { |
1186 | if (may_be_nonaddressable_p (expr: base)) |
1187 | return opt_result::failure_at (loc: stmt, |
1188 | fmt: "failed: base not addressable.\n" ); |
1189 | base = build_fold_addr_expr (base); |
1190 | } |
1191 | |
1192 | if (in_loop) |
1193 | { |
1194 | if (!simple_iv (loop, loop, base, &base_iv, true)) |
1195 | return opt_result::failure_at |
1196 | (loc: stmt, fmt: "failed: evolution of base is not affine.\n" ); |
1197 | } |
1198 | else |
1199 | { |
1200 | base_iv.base = base; |
1201 | base_iv.step = ssize_int (0); |
1202 | base_iv.no_overflow = true; |
1203 | } |
1204 | |
1205 | if (!poffset) |
1206 | { |
1207 | offset_iv.base = ssize_int (0); |
1208 | offset_iv.step = ssize_int (0); |
1209 | } |
1210 | else |
1211 | { |
1212 | if (!in_loop) |
1213 | { |
1214 | offset_iv.base = poffset; |
1215 | offset_iv.step = ssize_int (0); |
1216 | } |
1217 | else if (!simple_iv (loop, loop, poffset, &offset_iv, true)) |
1218 | return opt_result::failure_at |
1219 | (loc: stmt, fmt: "failed: evolution of offset is not affine.\n" ); |
1220 | } |
1221 | |
1222 | init = ssize_int (pbytepos); |
1223 | |
1224 | /* Subtract any constant component from the base and add it to INIT instead. |
1225 | Adjust the misalignment to reflect the amount we subtracted. */ |
1226 | split_constant_offset (exp: base_iv.base, var: &base_iv.base, off: &dinit); |
1227 | init = size_binop (PLUS_EXPR, init, dinit); |
1228 | base_misalignment -= TREE_INT_CST_LOW (dinit); |
1229 | |
1230 | split_constant_offset (exp: offset_iv.base, var: &offset_iv.base, off: &dinit); |
1231 | init = size_binop (PLUS_EXPR, init, dinit); |
1232 | |
1233 | step = size_binop (PLUS_EXPR, |
1234 | fold_convert (ssizetype, base_iv.step), |
1235 | fold_convert (ssizetype, offset_iv.step)); |
1236 | |
1237 | base = canonicalize_base_object_address (addr: base_iv.base); |
1238 | |
1239 | /* See if get_pointer_alignment can guarantee a higher alignment than |
1240 | the one we calculated above. */ |
1241 | unsigned int HOST_WIDE_INT alt_misalignment; |
1242 | unsigned int alt_alignment; |
1243 | get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment); |
1244 | |
1245 | /* As above, these values must be whole bytes. */ |
1246 | gcc_assert (alt_alignment % BITS_PER_UNIT == 0 |
1247 | && alt_misalignment % BITS_PER_UNIT == 0); |
1248 | alt_alignment /= BITS_PER_UNIT; |
1249 | alt_misalignment /= BITS_PER_UNIT; |
1250 | |
1251 | if (base_alignment < alt_alignment) |
1252 | { |
1253 | base_alignment = alt_alignment; |
1254 | base_misalignment = alt_misalignment; |
1255 | } |
1256 | |
1257 | drb->base_address = base; |
1258 | drb->offset = fold_convert (ssizetype, offset_iv.base); |
1259 | drb->init = init; |
1260 | drb->step = step; |
1261 | if (known_misalignment (value: base_misalignment, align: base_alignment, |
1262 | misalign: &drb->base_misalignment)) |
1263 | drb->base_alignment = base_alignment; |
1264 | else |
1265 | { |
1266 | drb->base_alignment = known_alignment (a: base_misalignment); |
1267 | drb->base_misalignment = 0; |
1268 | } |
1269 | drb->offset_alignment = highest_pow2_factor (offset_iv.base); |
1270 | drb->step_alignment = highest_pow2_factor (step); |
1271 | |
1272 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1273 | fprintf (stream: dump_file, format: "success.\n" ); |
1274 | |
1275 | return opt_result::success (); |
1276 | } |
1277 | |
1278 | /* Return true if OP is a valid component reference for a DR access |
1279 | function. This accepts a subset of what handled_component_p accepts. */ |
1280 | |
1281 | static bool |
1282 | access_fn_component_p (tree op) |
1283 | { |
1284 | switch (TREE_CODE (op)) |
1285 | { |
1286 | case REALPART_EXPR: |
1287 | case IMAGPART_EXPR: |
1288 | case ARRAY_REF: |
1289 | return true; |
1290 | |
1291 | case COMPONENT_REF: |
1292 | return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE; |
1293 | |
1294 | default: |
1295 | return false; |
1296 | } |
1297 | } |
1298 | |
1299 | /* Returns whether BASE can have a access_fn_component_p with BASE |
1300 | as base. */ |
1301 | |
1302 | static bool |
1303 | base_supports_access_fn_components_p (tree base) |
1304 | { |
1305 | switch (TREE_CODE (TREE_TYPE (base))) |
1306 | { |
1307 | case COMPLEX_TYPE: |
1308 | case ARRAY_TYPE: |
1309 | case RECORD_TYPE: |
1310 | return true; |
1311 | default: |
1312 | return false; |
1313 | } |
1314 | } |
1315 | |
1316 | /* Determines the base object and the list of indices of memory reference |
1317 | DR, analyzed in LOOP and instantiated before NEST. */ |
1318 | |
1319 | static void |
1320 | dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop) |
1321 | { |
1322 | /* If analyzing a basic-block there are no indices to analyze |
1323 | and thus no access functions. */ |
1324 | if (!nest) |
1325 | { |
1326 | dri->base_object = ref; |
1327 | dri->access_fns.create (nelems: 0); |
1328 | return; |
1329 | } |
1330 | |
1331 | vec<tree> access_fns = vNULL; |
1332 | |
1333 | /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses |
1334 | into a two element array with a constant index. The base is |
1335 | then just the immediate underlying object. */ |
1336 | if (TREE_CODE (ref) == REALPART_EXPR) |
1337 | { |
1338 | ref = TREE_OPERAND (ref, 0); |
1339 | access_fns.safe_push (integer_zero_node); |
1340 | } |
1341 | else if (TREE_CODE (ref) == IMAGPART_EXPR) |
1342 | { |
1343 | ref = TREE_OPERAND (ref, 0); |
1344 | access_fns.safe_push (integer_one_node); |
1345 | } |
1346 | |
1347 | /* Analyze access functions of dimensions we know to be independent. |
1348 | The list of component references handled here should be kept in |
1349 | sync with access_fn_component_p. */ |
1350 | while (handled_component_p (t: ref)) |
1351 | { |
1352 | if (TREE_CODE (ref) == ARRAY_REF) |
1353 | { |
1354 | tree op = TREE_OPERAND (ref, 1); |
1355 | tree access_fn = analyze_scalar_evolution (loop, op); |
1356 | access_fn = instantiate_scev (nest, loop, access_fn); |
1357 | access_fns.safe_push (obj: access_fn); |
1358 | } |
1359 | else if (TREE_CODE (ref) == COMPONENT_REF |
1360 | && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE) |
1361 | { |
1362 | /* For COMPONENT_REFs of records (but not unions!) use the |
1363 | FIELD_DECL offset as constant access function so we can |
1364 | disambiguate a[i].f1 and a[i].f2. */ |
1365 | tree off = component_ref_field_offset (ref); |
1366 | off = size_binop (PLUS_EXPR, |
1367 | size_binop (MULT_EXPR, |
1368 | fold_convert (bitsizetype, off), |
1369 | bitsize_int (BITS_PER_UNIT)), |
1370 | DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1))); |
1371 | access_fns.safe_push (obj: off); |
1372 | } |
1373 | else |
1374 | /* If we have an unhandled component we could not translate |
1375 | to an access function stop analyzing. We have determined |
1376 | our base object in this case. */ |
1377 | break; |
1378 | |
1379 | ref = TREE_OPERAND (ref, 0); |
1380 | } |
1381 | |
1382 | /* If the address operand of a MEM_REF base has an evolution in the |
1383 | analyzed nest, add it as an additional independent access-function. */ |
1384 | if (TREE_CODE (ref) == MEM_REF) |
1385 | { |
1386 | tree op = TREE_OPERAND (ref, 0); |
1387 | tree access_fn = analyze_scalar_evolution (loop, op); |
1388 | access_fn = instantiate_scev (nest, loop, access_fn); |
1389 | STRIP_NOPS (access_fn); |
1390 | if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC) |
1391 | { |
1392 | tree memoff = TREE_OPERAND (ref, 1); |
1393 | tree base = initial_condition (access_fn); |
1394 | tree orig_type = TREE_TYPE (base); |
1395 | STRIP_USELESS_TYPE_CONVERSION (base); |
1396 | tree off; |
1397 | split_constant_offset (exp: base, var: &base, off: &off); |
1398 | STRIP_USELESS_TYPE_CONVERSION (base); |
1399 | /* Fold the MEM_REF offset into the evolutions initial |
1400 | value to make more bases comparable. */ |
1401 | if (!integer_zerop (memoff)) |
1402 | { |
1403 | off = size_binop (PLUS_EXPR, off, |
1404 | fold_convert (ssizetype, memoff)); |
1405 | memoff = build_int_cst (TREE_TYPE (memoff), 0); |
1406 | } |
1407 | /* Adjust the offset so it is a multiple of the access type |
1408 | size and thus we separate bases that can possibly be used |
1409 | to produce partial overlaps (which the access_fn machinery |
1410 | cannot handle). */ |
1411 | wide_int rem; |
1412 | if (TYPE_SIZE_UNIT (TREE_TYPE (ref)) |
1413 | && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST |
1414 | && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref)))) |
1415 | rem = wi::mod_trunc |
1416 | (x: wi::to_wide (t: off), |
1417 | y: wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))), |
1418 | sgn: SIGNED); |
1419 | else |
1420 | /* If we can't compute the remainder simply force the initial |
1421 | condition to zero. */ |
1422 | rem = wi::to_wide (t: off); |
1423 | off = wide_int_to_tree (ssizetype, cst: wi::to_wide (t: off) - rem); |
1424 | memoff = wide_int_to_tree (TREE_TYPE (memoff), cst: rem); |
1425 | /* And finally replace the initial condition. */ |
1426 | access_fn = chrec_replace_initial_condition |
1427 | (access_fn, fold_convert (orig_type, off)); |
1428 | /* ??? This is still not a suitable base object for |
1429 | dr_may_alias_p - the base object needs to be an |
1430 | access that covers the object as whole. With |
1431 | an evolution in the pointer this cannot be |
1432 | guaranteed. |
1433 | As a band-aid, mark the access so we can special-case |
1434 | it in dr_may_alias_p. */ |
1435 | tree old = ref; |
1436 | ref = fold_build2_loc (EXPR_LOCATION (ref), |
1437 | MEM_REF, TREE_TYPE (ref), |
1438 | base, memoff); |
1439 | MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old); |
1440 | MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old); |
1441 | dri->unconstrained_base = true; |
1442 | access_fns.safe_push (obj: access_fn); |
1443 | } |
1444 | } |
1445 | else if (DECL_P (ref)) |
1446 | { |
1447 | /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */ |
1448 | ref = build2 (MEM_REF, TREE_TYPE (ref), |
1449 | build_fold_addr_expr (ref), |
1450 | build_int_cst (reference_alias_ptr_type (ref), 0)); |
1451 | } |
1452 | |
1453 | dri->base_object = ref; |
1454 | dri->access_fns = access_fns; |
1455 | } |
1456 | |
1457 | /* Extracts the alias analysis information from the memory reference DR. */ |
1458 | |
1459 | static void |
1460 | dr_analyze_alias (struct data_reference *dr) |
1461 | { |
1462 | tree ref = DR_REF (dr); |
1463 | tree base = get_base_address (t: ref), addr; |
1464 | |
1465 | if (INDIRECT_REF_P (base) |
1466 | || TREE_CODE (base) == MEM_REF) |
1467 | { |
1468 | addr = TREE_OPERAND (base, 0); |
1469 | if (TREE_CODE (addr) == SSA_NAME) |
1470 | DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr); |
1471 | } |
1472 | } |
1473 | |
1474 | /* Frees data reference DR. */ |
1475 | |
1476 | void |
1477 | free_data_ref (data_reference_p dr) |
1478 | { |
1479 | DR_ACCESS_FNS (dr).release (); |
1480 | if (dr->alt_indices.base_object) |
1481 | dr->alt_indices.access_fns.release (); |
1482 | free (ptr: dr); |
1483 | } |
1484 | |
1485 | /* Analyze memory reference MEMREF, which is accessed in STMT. |
1486 | The reference is a read if IS_READ is true, otherwise it is a write. |
1487 | IS_CONDITIONAL_IN_STMT indicates that the reference is conditional |
1488 | within STMT, i.e. that it might not occur even if STMT is executed |
1489 | and runs to completion. |
1490 | |
1491 | Return the data_reference description of MEMREF. NEST is the outermost |
1492 | loop in which the reference should be instantiated, LOOP is the loop |
1493 | in which the data reference should be analyzed. */ |
1494 | |
1495 | struct data_reference * |
1496 | create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt, |
1497 | bool is_read, bool is_conditional_in_stmt) |
1498 | { |
1499 | struct data_reference *dr; |
1500 | |
1501 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1502 | { |
1503 | fprintf (stream: dump_file, format: "Creating dr for " ); |
1504 | print_generic_expr (dump_file, memref, TDF_SLIM); |
1505 | fprintf (stream: dump_file, format: "\n" ); |
1506 | } |
1507 | |
1508 | dr = XCNEW (struct data_reference); |
1509 | DR_STMT (dr) = stmt; |
1510 | DR_REF (dr) = memref; |
1511 | DR_IS_READ (dr) = is_read; |
1512 | DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt; |
1513 | |
1514 | dr_analyze_innermost (drb: &DR_INNERMOST (dr), ref: memref, |
1515 | loop: nest != NULL ? loop : NULL, stmt); |
1516 | dr_analyze_indices (dri: &dr->indices, DR_REF (dr), nest, loop); |
1517 | dr_analyze_alias (dr); |
1518 | |
1519 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1520 | { |
1521 | unsigned i; |
1522 | fprintf (stream: dump_file, format: "\tbase_address: " ); |
1523 | print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM); |
1524 | fprintf (stream: dump_file, format: "\n\toffset from base address: " ); |
1525 | print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM); |
1526 | fprintf (stream: dump_file, format: "\n\tconstant offset from base address: " ); |
1527 | print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM); |
1528 | fprintf (stream: dump_file, format: "\n\tstep: " ); |
1529 | print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM); |
1530 | fprintf (stream: dump_file, format: "\n\tbase alignment: %d" , DR_BASE_ALIGNMENT (dr)); |
1531 | fprintf (stream: dump_file, format: "\n\tbase misalignment: %d" , |
1532 | DR_BASE_MISALIGNMENT (dr)); |
1533 | fprintf (stream: dump_file, format: "\n\toffset alignment: %d" , |
1534 | DR_OFFSET_ALIGNMENT (dr)); |
1535 | fprintf (stream: dump_file, format: "\n\tstep alignment: %d" , DR_STEP_ALIGNMENT (dr)); |
1536 | fprintf (stream: dump_file, format: "\n\tbase_object: " ); |
1537 | print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM); |
1538 | fprintf (stream: dump_file, format: "\n" ); |
1539 | for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++) |
1540 | { |
1541 | fprintf (stream: dump_file, format: "\tAccess function %d: " , i); |
1542 | print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM); |
1543 | } |
1544 | } |
1545 | |
1546 | return dr; |
1547 | } |
1548 | |
1549 | /* A helper function computes order between two tree expressions T1 and T2. |
1550 | This is used in comparator functions sorting objects based on the order |
1551 | of tree expressions. The function returns -1, 0, or 1. */ |
1552 | |
1553 | int |
1554 | data_ref_compare_tree (tree t1, tree t2) |
1555 | { |
1556 | int i, cmp; |
1557 | enum tree_code code; |
1558 | char tclass; |
1559 | |
1560 | if (t1 == t2) |
1561 | return 0; |
1562 | if (t1 == NULL) |
1563 | return -1; |
1564 | if (t2 == NULL) |
1565 | return 1; |
1566 | |
1567 | STRIP_USELESS_TYPE_CONVERSION (t1); |
1568 | STRIP_USELESS_TYPE_CONVERSION (t2); |
1569 | if (t1 == t2) |
1570 | return 0; |
1571 | |
1572 | if (TREE_CODE (t1) != TREE_CODE (t2) |
1573 | && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2))) |
1574 | return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1; |
1575 | |
1576 | code = TREE_CODE (t1); |
1577 | switch (code) |
1578 | { |
1579 | case INTEGER_CST: |
1580 | return tree_int_cst_compare (t1, t2); |
1581 | |
1582 | case STRING_CST: |
1583 | if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2)) |
1584 | return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1; |
1585 | return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2), |
1586 | TREE_STRING_LENGTH (t1)); |
1587 | |
1588 | case SSA_NAME: |
1589 | if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2)) |
1590 | return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1; |
1591 | break; |
1592 | |
1593 | default: |
1594 | if (POLY_INT_CST_P (t1)) |
1595 | return compare_sizes_for_sort (a: wi::to_poly_widest (t: t1), |
1596 | b: wi::to_poly_widest (t: t2)); |
1597 | |
1598 | tclass = TREE_CODE_CLASS (code); |
1599 | |
1600 | /* For decls, compare their UIDs. */ |
1601 | if (tclass == tcc_declaration) |
1602 | { |
1603 | if (DECL_UID (t1) != DECL_UID (t2)) |
1604 | return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1; |
1605 | break; |
1606 | } |
1607 | /* For expressions, compare their operands recursively. */ |
1608 | else if (IS_EXPR_CODE_CLASS (tclass)) |
1609 | { |
1610 | for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i) |
1611 | { |
1612 | cmp = data_ref_compare_tree (TREE_OPERAND (t1, i), |
1613 | TREE_OPERAND (t2, i)); |
1614 | if (cmp != 0) |
1615 | return cmp; |
1616 | } |
1617 | } |
1618 | else |
1619 | gcc_unreachable (); |
1620 | } |
1621 | |
1622 | return 0; |
1623 | } |
1624 | |
1625 | /* Return TRUE it's possible to resolve data dependence DDR by runtime alias |
1626 | check. */ |
1627 | |
1628 | opt_result |
1629 | runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p) |
1630 | { |
1631 | if (dump_enabled_p ()) |
1632 | dump_printf (MSG_NOTE, |
1633 | "consider run-time aliasing test between %T and %T\n" , |
1634 | DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr))); |
1635 | |
1636 | if (!speed_p) |
1637 | return opt_result::failure_at (DR_STMT (DDR_A (ddr)), |
1638 | fmt: "runtime alias check not supported when" |
1639 | " optimizing for size.\n" ); |
1640 | |
1641 | /* FORNOW: We don't support versioning with outer-loop in either |
1642 | vectorization or loop distribution. */ |
1643 | if (loop != NULL && loop->inner != NULL) |
1644 | return opt_result::failure_at (DR_STMT (DDR_A (ddr)), |
1645 | fmt: "runtime alias check not supported for" |
1646 | " outer loop.\n" ); |
1647 | |
1648 | /* FORNOW: We don't support handling different address spaces. */ |
1649 | if (TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_A (ddr))))) |
1650 | != TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_B (ddr)))))) |
1651 | return opt_result::failure_at (DR_STMT (DDR_A (ddr)), |
1652 | fmt: "runtime alias check between different " |
1653 | "address spaces not supported.\n" ); |
1654 | |
1655 | return opt_result::success (); |
1656 | } |
1657 | |
1658 | /* Operator == between two dr_with_seg_len objects. |
1659 | |
1660 | This equality operator is used to make sure two data refs |
1661 | are the same one so that we will consider to combine the |
1662 | aliasing checks of those two pairs of data dependent data |
1663 | refs. */ |
1664 | |
1665 | static bool |
1666 | operator == (const dr_with_seg_len& d1, |
1667 | const dr_with_seg_len& d2) |
1668 | { |
1669 | return (operand_equal_p (DR_BASE_ADDRESS (d1.dr), |
1670 | DR_BASE_ADDRESS (d2.dr), flags: 0) |
1671 | && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0 |
1672 | && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0 |
1673 | && data_ref_compare_tree (t1: d1.seg_len, t2: d2.seg_len) == 0 |
1674 | && known_eq (d1.access_size, d2.access_size) |
1675 | && d1.align == d2.align); |
1676 | } |
1677 | |
1678 | /* Comparison function for sorting objects of dr_with_seg_len_pair_t |
1679 | so that we can combine aliasing checks in one scan. */ |
1680 | |
1681 | static int |
1682 | comp_dr_with_seg_len_pair (const void *pa_, const void *pb_) |
1683 | { |
1684 | const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_; |
1685 | const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_; |
1686 | const dr_with_seg_len &a1 = pa->first, &a2 = pa->second; |
1687 | const dr_with_seg_len &b1 = pb->first, &b2 = pb->second; |
1688 | |
1689 | /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks |
1690 | if a and c have the same basic address snd step, and b and d have the same |
1691 | address and step. Therefore, if any a&c or b&d don't have the same address |
1692 | and step, we don't care the order of those two pairs after sorting. */ |
1693 | int comp_res; |
1694 | |
1695 | if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr), |
1696 | DR_BASE_ADDRESS (b1.dr))) != 0) |
1697 | return comp_res; |
1698 | if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr), |
1699 | DR_BASE_ADDRESS (b2.dr))) != 0) |
1700 | return comp_res; |
1701 | if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr), |
1702 | DR_STEP (b1.dr))) != 0) |
1703 | return comp_res; |
1704 | if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr), |
1705 | DR_STEP (b2.dr))) != 0) |
1706 | return comp_res; |
1707 | if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr), |
1708 | DR_OFFSET (b1.dr))) != 0) |
1709 | return comp_res; |
1710 | if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr), |
1711 | DR_INIT (b1.dr))) != 0) |
1712 | return comp_res; |
1713 | if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr), |
1714 | DR_OFFSET (b2.dr))) != 0) |
1715 | return comp_res; |
1716 | if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr), |
1717 | DR_INIT (b2.dr))) != 0) |
1718 | return comp_res; |
1719 | |
1720 | return 0; |
1721 | } |
1722 | |
1723 | /* Dump information about ALIAS_PAIR, indenting each line by INDENT. */ |
1724 | |
1725 | static void |
1726 | dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent) |
1727 | { |
1728 | dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n" , indent, |
1729 | DR_REF (alias_pair->first.dr), |
1730 | DR_REF (alias_pair->second.dr)); |
1731 | |
1732 | dump_printf (MSG_NOTE, "%ssegment length: %T" , indent, |
1733 | alias_pair->first.seg_len); |
1734 | if (!operand_equal_p (alias_pair->first.seg_len, |
1735 | alias_pair->second.seg_len, flags: 0)) |
1736 | dump_printf (MSG_NOTE, " vs. %T" , alias_pair->second.seg_len); |
1737 | |
1738 | dump_printf (MSG_NOTE, "\n%saccess size: " , indent); |
1739 | dump_dec (MSG_NOTE, alias_pair->first.access_size); |
1740 | if (maybe_ne (a: alias_pair->first.access_size, b: alias_pair->second.access_size)) |
1741 | { |
1742 | dump_printf (MSG_NOTE, " vs. " ); |
1743 | dump_dec (MSG_NOTE, alias_pair->second.access_size); |
1744 | } |
1745 | |
1746 | dump_printf (MSG_NOTE, "\n%salignment: %d" , indent, |
1747 | alias_pair->first.align); |
1748 | if (alias_pair->first.align != alias_pair->second.align) |
1749 | dump_printf (MSG_NOTE, " vs. %d" , alias_pair->second.align); |
1750 | |
1751 | dump_printf (MSG_NOTE, "\n%sflags: " , indent); |
1752 | if (alias_pair->flags & DR_ALIAS_RAW) |
1753 | dump_printf (MSG_NOTE, " RAW" ); |
1754 | if (alias_pair->flags & DR_ALIAS_WAR) |
1755 | dump_printf (MSG_NOTE, " WAR" ); |
1756 | if (alias_pair->flags & DR_ALIAS_WAW) |
1757 | dump_printf (MSG_NOTE, " WAW" ); |
1758 | if (alias_pair->flags & DR_ALIAS_ARBITRARY) |
1759 | dump_printf (MSG_NOTE, " ARBITRARY" ); |
1760 | if (alias_pair->flags & DR_ALIAS_SWAPPED) |
1761 | dump_printf (MSG_NOTE, " SWAPPED" ); |
1762 | if (alias_pair->flags & DR_ALIAS_UNSWAPPED) |
1763 | dump_printf (MSG_NOTE, " UNSWAPPED" ); |
1764 | if (alias_pair->flags & DR_ALIAS_MIXED_STEPS) |
1765 | dump_printf (MSG_NOTE, " MIXED_STEPS" ); |
1766 | if (alias_pair->flags == 0) |
1767 | dump_printf (MSG_NOTE, " <none>" ); |
1768 | dump_printf (MSG_NOTE, "\n" ); |
1769 | } |
1770 | |
1771 | /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones. |
1772 | FACTOR is number of iterations that each data reference is accessed. |
1773 | |
1774 | Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0, |
1775 | we create an expression: |
1776 | |
1777 | ((store_ptr_0 + store_segment_length_0) <= load_ptr_0) |
1778 | || (load_ptr_0 + load_segment_length_0) <= store_ptr_0)) |
1779 | |
1780 | for aliasing checks. However, in some cases we can decrease the number |
1781 | of checks by combining two checks into one. For example, suppose we have |
1782 | another pair of data refs store_ptr_0 & load_ptr_1, and if the following |
1783 | condition is satisfied: |
1784 | |
1785 | load_ptr_0 < load_ptr_1 && |
1786 | load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0 |
1787 | |
1788 | (this condition means, in each iteration of vectorized loop, the accessed |
1789 | memory of store_ptr_0 cannot be between the memory of load_ptr_0 and |
1790 | load_ptr_1.) |
1791 | |
1792 | we then can use only the following expression to finish the alising checks |
1793 | between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1: |
1794 | |
1795 | ((store_ptr_0 + store_segment_length_0) <= load_ptr_0) |
1796 | || (load_ptr_1 + load_segment_length_1 <= store_ptr_0)) |
1797 | |
1798 | Note that we only consider that load_ptr_0 and load_ptr_1 have the same |
1799 | basic address. */ |
1800 | |
1801 | void |
1802 | prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs, |
1803 | poly_uint64) |
1804 | { |
1805 | if (alias_pairs->is_empty ()) |
1806 | return; |
1807 | |
1808 | /* Canonicalize each pair so that the base components are ordered wrt |
1809 | data_ref_compare_tree. This allows the loop below to merge more |
1810 | cases. */ |
1811 | unsigned int i; |
1812 | dr_with_seg_len_pair_t *alias_pair; |
1813 | FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair) |
1814 | { |
1815 | data_reference_p dr_a = alias_pair->first.dr; |
1816 | data_reference_p dr_b = alias_pair->second.dr; |
1817 | int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a), |
1818 | DR_BASE_ADDRESS (dr_b)); |
1819 | if (comp_res == 0) |
1820 | comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b)); |
1821 | if (comp_res == 0) |
1822 | comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b)); |
1823 | if (comp_res > 0) |
1824 | { |
1825 | std::swap (a&: alias_pair->first, b&: alias_pair->second); |
1826 | alias_pair->flags |= DR_ALIAS_SWAPPED; |
1827 | } |
1828 | else |
1829 | alias_pair->flags |= DR_ALIAS_UNSWAPPED; |
1830 | } |
1831 | |
1832 | /* Sort the collected data ref pairs so that we can scan them once to |
1833 | combine all possible aliasing checks. */ |
1834 | alias_pairs->qsort (comp_dr_with_seg_len_pair); |
1835 | |
1836 | /* Scan the sorted dr pairs and check if we can combine alias checks |
1837 | of two neighboring dr pairs. */ |
1838 | unsigned int last = 0; |
1839 | for (i = 1; i < alias_pairs->length (); ++i) |
1840 | { |
1841 | /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */ |
1842 | dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last]; |
1843 | dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i]; |
1844 | |
1845 | dr_with_seg_len *dr_a1 = &alias_pair1->first; |
1846 | dr_with_seg_len *dr_b1 = &alias_pair1->second; |
1847 | dr_with_seg_len *dr_a2 = &alias_pair2->first; |
1848 | dr_with_seg_len *dr_b2 = &alias_pair2->second; |
1849 | |
1850 | /* Remove duplicate data ref pairs. */ |
1851 | if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2) |
1852 | { |
1853 | if (dump_enabled_p ()) |
1854 | dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n" , |
1855 | DR_REF (dr_a1->dr), DR_REF (dr_b1->dr), |
1856 | DR_REF (dr_a2->dr), DR_REF (dr_b2->dr)); |
1857 | alias_pair1->flags |= alias_pair2->flags; |
1858 | continue; |
1859 | } |
1860 | |
1861 | /* Assume that we won't be able to merge the pairs, then correct |
1862 | if we do. */ |
1863 | last += 1; |
1864 | if (last != i) |
1865 | (*alias_pairs)[last] = (*alias_pairs)[i]; |
1866 | |
1867 | if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2) |
1868 | { |
1869 | /* We consider the case that DR_B1 and DR_B2 are same memrefs, |
1870 | and DR_A1 and DR_A2 are two consecutive memrefs. */ |
1871 | if (*dr_a1 == *dr_a2) |
1872 | { |
1873 | std::swap (a&: dr_a1, b&: dr_b1); |
1874 | std::swap (a&: dr_a2, b&: dr_b2); |
1875 | } |
1876 | |
1877 | poly_int64 init_a1, init_a2; |
1878 | /* Only consider cases in which the distance between the initial |
1879 | DR_A1 and the initial DR_A2 is known at compile time. */ |
1880 | if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr), |
1881 | DR_BASE_ADDRESS (dr_a2->dr), flags: 0) |
1882 | || !operand_equal_p (DR_OFFSET (dr_a1->dr), |
1883 | DR_OFFSET (dr_a2->dr), flags: 0) |
1884 | || !poly_int_tree_p (DR_INIT (dr_a1->dr), value: &init_a1) |
1885 | || !poly_int_tree_p (DR_INIT (dr_a2->dr), value: &init_a2)) |
1886 | continue; |
1887 | |
1888 | /* Don't combine if we can't tell which one comes first. */ |
1889 | if (!ordered_p (a: init_a1, b: init_a2)) |
1890 | continue; |
1891 | |
1892 | /* Work out what the segment length would be if we did combine |
1893 | DR_A1 and DR_A2: |
1894 | |
1895 | - If DR_A1 and DR_A2 have equal lengths, that length is |
1896 | also the combined length. |
1897 | |
1898 | - If DR_A1 and DR_A2 both have negative "lengths", the combined |
1899 | length is the lower bound on those lengths. |
1900 | |
1901 | - If DR_A1 and DR_A2 both have positive lengths, the combined |
1902 | length is the upper bound on those lengths. |
1903 | |
1904 | Other cases are unlikely to give a useful combination. |
1905 | |
1906 | The lengths both have sizetype, so the sign is taken from |
1907 | the step instead. */ |
1908 | poly_uint64 new_seg_len = 0; |
1909 | bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len, |
1910 | dr_a2->seg_len, flags: 0); |
1911 | if (new_seg_len_p) |
1912 | { |
1913 | poly_uint64 seg_len_a1, seg_len_a2; |
1914 | if (!poly_int_tree_p (t: dr_a1->seg_len, value: &seg_len_a1) |
1915 | || !poly_int_tree_p (t: dr_a2->seg_len, value: &seg_len_a2)) |
1916 | continue; |
1917 | |
1918 | tree indicator_a = dr_direction_indicator (dr_a1->dr); |
1919 | if (TREE_CODE (indicator_a) != INTEGER_CST) |
1920 | continue; |
1921 | |
1922 | tree indicator_b = dr_direction_indicator (dr_a2->dr); |
1923 | if (TREE_CODE (indicator_b) != INTEGER_CST) |
1924 | continue; |
1925 | |
1926 | int sign_a = tree_int_cst_sgn (indicator_a); |
1927 | int sign_b = tree_int_cst_sgn (indicator_b); |
1928 | |
1929 | if (sign_a <= 0 && sign_b <= 0) |
1930 | new_seg_len = lower_bound (a: seg_len_a1, b: seg_len_a2); |
1931 | else if (sign_a >= 0 && sign_b >= 0) |
1932 | new_seg_len = upper_bound (a: seg_len_a1, b: seg_len_a2); |
1933 | else |
1934 | continue; |
1935 | } |
1936 | /* At this point we're committed to merging the refs. */ |
1937 | |
1938 | /* Make sure dr_a1 starts left of dr_a2. */ |
1939 | if (maybe_gt (init_a1, init_a2)) |
1940 | { |
1941 | std::swap (a&: *dr_a1, b&: *dr_a2); |
1942 | std::swap (a&: init_a1, b&: init_a2); |
1943 | } |
1944 | |
1945 | /* The DR_Bs are equal, so only the DR_As can introduce |
1946 | mixed steps. */ |
1947 | if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), flags: 0)) |
1948 | alias_pair1->flags |= DR_ALIAS_MIXED_STEPS; |
1949 | |
1950 | if (new_seg_len_p) |
1951 | { |
1952 | dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len), |
1953 | new_seg_len); |
1954 | dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len)); |
1955 | } |
1956 | |
1957 | /* This is always positive due to the swap above. */ |
1958 | poly_uint64 diff = init_a2 - init_a1; |
1959 | |
1960 | /* The new check will start at DR_A1. Make sure that its access |
1961 | size encompasses the initial DR_A2. */ |
1962 | if (maybe_lt (a: dr_a1->access_size, b: diff + dr_a2->access_size)) |
1963 | { |
1964 | dr_a1->access_size = upper_bound (a: dr_a1->access_size, |
1965 | b: diff + dr_a2->access_size); |
1966 | unsigned int new_align = known_alignment (a: dr_a1->access_size); |
1967 | dr_a1->align = MIN (dr_a1->align, new_align); |
1968 | } |
1969 | if (dump_enabled_p ()) |
1970 | dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n" , |
1971 | DR_REF (dr_a1->dr), DR_REF (dr_b1->dr), |
1972 | DR_REF (dr_a2->dr), DR_REF (dr_b2->dr)); |
1973 | alias_pair1->flags |= alias_pair2->flags; |
1974 | last -= 1; |
1975 | } |
1976 | } |
1977 | alias_pairs->truncate (size: last + 1); |
1978 | |
1979 | /* Try to restore the original dr_with_seg_len order within each |
1980 | dr_with_seg_len_pair_t. If we ended up combining swapped and |
1981 | unswapped pairs into the same check, we have to invalidate any |
1982 | RAW, WAR and WAW information for it. */ |
1983 | if (dump_enabled_p ()) |
1984 | dump_printf (MSG_NOTE, "merged alias checks:\n" ); |
1985 | FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair) |
1986 | { |
1987 | unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED); |
1988 | unsigned int swapped = (alias_pair->flags & swap_mask); |
1989 | if (swapped == DR_ALIAS_SWAPPED) |
1990 | std::swap (a&: alias_pair->first, b&: alias_pair->second); |
1991 | else if (swapped != DR_ALIAS_UNSWAPPED) |
1992 | alias_pair->flags |= DR_ALIAS_ARBITRARY; |
1993 | alias_pair->flags &= ~swap_mask; |
1994 | if (dump_enabled_p ()) |
1995 | dump_alias_pair (alias_pair, indent: " " ); |
1996 | } |
1997 | } |
1998 | |
1999 | /* A subroutine of create_intersect_range_checks, with a subset of the |
2000 | same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS |
2001 | to optimize cases in which the references form a simple RAW, WAR or |
2002 | WAR dependence. */ |
2003 | |
2004 | static bool |
2005 | create_ifn_alias_checks (tree *cond_expr, |
2006 | const dr_with_seg_len_pair_t &alias_pair) |
2007 | { |
2008 | const dr_with_seg_len& dr_a = alias_pair.first; |
2009 | const dr_with_seg_len& dr_b = alias_pair.second; |
2010 | |
2011 | /* Check for cases in which: |
2012 | |
2013 | (a) we have a known RAW, WAR or WAR dependence |
2014 | (b) the accesses are well-ordered in both the original and new code |
2015 | (see the comment above the DR_ALIAS_* flags for details); and |
2016 | (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */ |
2017 | if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW)) |
2018 | return false; |
2019 | |
2020 | /* Make sure that both DRs access the same pattern of bytes, |
2021 | with a constant length and step. */ |
2022 | poly_uint64 seg_len; |
2023 | if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, flags: 0) |
2024 | || !poly_int_tree_p (t: dr_a.seg_len, value: &seg_len) |
2025 | || maybe_ne (a: dr_a.access_size, b: dr_b.access_size) |
2026 | || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), flags: 0) |
2027 | || !tree_fits_uhwi_p (DR_STEP (dr_a.dr))) |
2028 | return false; |
2029 | |
2030 | unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr)); |
2031 | tree addr_a = DR_BASE_ADDRESS (dr_a.dr); |
2032 | tree addr_b = DR_BASE_ADDRESS (dr_b.dr); |
2033 | |
2034 | /* See whether the target suports what we want to do. WAW checks are |
2035 | equivalent to WAR checks here. */ |
2036 | internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW |
2037 | ? IFN_CHECK_RAW_PTRS |
2038 | : IFN_CHECK_WAR_PTRS); |
2039 | unsigned int align = MIN (dr_a.align, dr_b.align); |
2040 | poly_uint64 full_length = seg_len + bytes; |
2041 | if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a), |
2042 | full_length, align)) |
2043 | { |
2044 | full_length = seg_len + dr_a.access_size; |
2045 | if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a), |
2046 | full_length, align)) |
2047 | return false; |
2048 | } |
2049 | |
2050 | /* Commit to using this form of test. */ |
2051 | addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr)); |
2052 | addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr)); |
2053 | |
2054 | addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr)); |
2055 | addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr)); |
2056 | |
2057 | *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION, |
2058 | ifn, boolean_type_node, |
2059 | 4, addr_a, addr_b, |
2060 | size_int (full_length), |
2061 | size_int (align)); |
2062 | |
2063 | if (dump_enabled_p ()) |
2064 | { |
2065 | if (ifn == IFN_CHECK_RAW_PTRS) |
2066 | dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n" ); |
2067 | else |
2068 | dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n" ); |
2069 | } |
2070 | return true; |
2071 | } |
2072 | |
2073 | /* Try to generate a runtime condition that is true if ALIAS_PAIR is |
2074 | free of aliases, using a condition based on index values instead |
2075 | of a condition based on addresses. Return true on success, |
2076 | storing the condition in *COND_EXPR. |
2077 | |
2078 | This can only be done if the two data references in ALIAS_PAIR access |
2079 | the same array object and the index is the only difference. For example, |
2080 | if the two data references are DR_A and DR_B: |
2081 | |
2082 | DR_A DR_B |
2083 | data-ref arr[i] arr[j] |
2084 | base_object arr arr |
2085 | index {i_0, +, 1}_loop {j_0, +, 1}_loop |
2086 | |
2087 | The addresses and their index are like: |
2088 | |
2089 | |<- ADDR_A ->| |<- ADDR_B ->| |
2090 | -------------------------------------------------------> |
2091 | | | | | | | | | | | |
2092 | -------------------------------------------------------> |
2093 | i_0 ... i_0+4 j_0 ... j_0+4 |
2094 | |
2095 | We can create expression based on index rather than address: |
2096 | |
2097 | (unsigned) (i_0 - j_0 + 3) <= 6 |
2098 | |
2099 | i.e. the indices are less than 4 apart. |
2100 | |
2101 | Note evolution step of index needs to be considered in comparison. */ |
2102 | |
2103 | static bool |
2104 | create_intersect_range_checks_index (class loop *loop, tree *cond_expr, |
2105 | const dr_with_seg_len_pair_t &alias_pair) |
2106 | { |
2107 | const dr_with_seg_len &dr_a = alias_pair.first; |
2108 | const dr_with_seg_len &dr_b = alias_pair.second; |
2109 | if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS) |
2110 | || integer_zerop (DR_STEP (dr_a.dr)) |
2111 | || integer_zerop (DR_STEP (dr_b.dr)) |
2112 | || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr)) |
2113 | return false; |
2114 | |
2115 | poly_uint64 seg_len1, seg_len2; |
2116 | if (!poly_int_tree_p (t: dr_a.seg_len, value: &seg_len1) |
2117 | || !poly_int_tree_p (t: dr_b.seg_len, value: &seg_len2)) |
2118 | return false; |
2119 | |
2120 | if (!tree_fits_shwi_p (DR_STEP (dr_a.dr))) |
2121 | return false; |
2122 | |
2123 | if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), flags: 0)) |
2124 | return false; |
2125 | |
2126 | if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), flags: 0)) |
2127 | return false; |
2128 | |
2129 | gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST); |
2130 | |
2131 | bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0; |
2132 | unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr)); |
2133 | if (neg_step) |
2134 | { |
2135 | abs_step = -abs_step; |
2136 | seg_len1 = (-wi::to_poly_wide (t: dr_a.seg_len)).force_uhwi (); |
2137 | seg_len2 = (-wi::to_poly_wide (t: dr_b.seg_len)).force_uhwi (); |
2138 | } |
2139 | |
2140 | /* Infer the number of iterations with which the memory segment is accessed |
2141 | by DR. In other words, alias is checked if memory segment accessed by |
2142 | DR_A in some iterations intersect with memory segment accessed by DR_B |
2143 | in the same amount iterations. |
2144 | Note segnment length is a linear function of number of iterations with |
2145 | DR_STEP as the coefficient. */ |
2146 | poly_uint64 niter_len1, niter_len2; |
2147 | if (!can_div_trunc_p (a: seg_len1 + abs_step - 1, b: abs_step, quotient: &niter_len1) |
2148 | || !can_div_trunc_p (a: seg_len2 + abs_step - 1, b: abs_step, quotient: &niter_len2)) |
2149 | return false; |
2150 | |
2151 | /* Divide each access size by the byte step, rounding up. */ |
2152 | poly_uint64 niter_access1, niter_access2; |
2153 | if (!can_div_trunc_p (a: dr_a.access_size + abs_step - 1, |
2154 | b: abs_step, quotient: &niter_access1) |
2155 | || !can_div_trunc_p (a: dr_b.access_size + abs_step - 1, |
2156 | b: abs_step, quotient: &niter_access2)) |
2157 | return false; |
2158 | |
2159 | bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0; |
2160 | |
2161 | int found = -1; |
2162 | for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++) |
2163 | { |
2164 | tree access1 = DR_ACCESS_FN (dr_a.dr, i); |
2165 | tree access2 = DR_ACCESS_FN (dr_b.dr, i); |
2166 | /* Two indices must be the same if they are not scev, or not scev wrto |
2167 | current loop being vecorized. */ |
2168 | if (TREE_CODE (access1) != POLYNOMIAL_CHREC |
2169 | || TREE_CODE (access2) != POLYNOMIAL_CHREC |
2170 | || CHREC_VARIABLE (access1) != (unsigned)loop->num |
2171 | || CHREC_VARIABLE (access2) != (unsigned)loop->num) |
2172 | { |
2173 | if (operand_equal_p (access1, access2, flags: 0)) |
2174 | continue; |
2175 | |
2176 | return false; |
2177 | } |
2178 | if (found >= 0) |
2179 | return false; |
2180 | found = i; |
2181 | } |
2182 | |
2183 | /* Ought not to happen in practice, since if all accesses are equal then the |
2184 | alias should be decidable at compile time. */ |
2185 | if (found < 0) |
2186 | return false; |
2187 | |
2188 | /* The two indices must have the same step. */ |
2189 | tree access1 = DR_ACCESS_FN (dr_a.dr, found); |
2190 | tree access2 = DR_ACCESS_FN (dr_b.dr, found); |
2191 | if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), flags: 0)) |
2192 | return false; |
2193 | |
2194 | tree idx_step = CHREC_RIGHT (access1); |
2195 | /* Index must have const step, otherwise DR_STEP won't be constant. */ |
2196 | gcc_assert (TREE_CODE (idx_step) == INTEGER_CST); |
2197 | /* Index must evaluate in the same direction as DR. */ |
2198 | gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1); |
2199 | |
2200 | tree min1 = CHREC_LEFT (access1); |
2201 | tree min2 = CHREC_LEFT (access2); |
2202 | if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2))) |
2203 | return false; |
2204 | |
2205 | /* Ideally, alias can be checked against loop's control IV, but we |
2206 | need to prove linear mapping between control IV and reference |
2207 | index. Although that should be true, we check against (array) |
2208 | index of data reference. Like segment length, index length is |
2209 | linear function of the number of iterations with index_step as |
2210 | the coefficient, i.e, niter_len * idx_step. */ |
2211 | offset_int abs_idx_step = offset_int::from (x: wi::to_wide (t: idx_step), |
2212 | sgn: SIGNED); |
2213 | if (neg_step) |
2214 | abs_idx_step = -abs_idx_step; |
2215 | poly_offset_int idx_len1 = abs_idx_step * niter_len1; |
2216 | poly_offset_int idx_len2 = abs_idx_step * niter_len2; |
2217 | poly_offset_int idx_access1 = abs_idx_step * niter_access1; |
2218 | poly_offset_int idx_access2 = abs_idx_step * niter_access2; |
2219 | |
2220 | gcc_assert (known_ge (idx_len1, 0) |
2221 | && known_ge (idx_len2, 0) |
2222 | && known_ge (idx_access1, 0) |
2223 | && known_ge (idx_access2, 0)); |
2224 | |
2225 | /* Each access has the following pattern, with lengths measured |
2226 | in units of INDEX: |
2227 | |
2228 | <-- idx_len --> |
2229 | <--- A: -ve step ---> |
2230 | +-----+-------+-----+-------+-----+ |
2231 | | n-1 | ..... | 0 | ..... | n-1 | |
2232 | +-----+-------+-----+-------+-----+ |
2233 | <--- B: +ve step ---> |
2234 | <-- idx_len --> |
2235 | | |
2236 | min |
2237 | |
2238 | where "n" is the number of scalar iterations covered by the segment |
2239 | and where each access spans idx_access units. |
2240 | |
2241 | A is the range of bytes accessed when the step is negative, |
2242 | B is the range when the step is positive. |
2243 | |
2244 | When checking for general overlap, we need to test whether |
2245 | the range: |
2246 | |
2247 | [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1] |
2248 | |
2249 | overlaps: |
2250 | |
2251 | [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1] |
2252 | |
2253 | where: |
2254 | |
2255 | low_offsetN = +ve step ? 0 : -idx_lenN; |
2256 | high_offsetN = +ve step ? idx_lenN : 0; |
2257 | |
2258 | This is equivalent to testing whether: |
2259 | |
2260 | min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1 |
2261 | && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1 |
2262 | |
2263 | Converting this into a single test, there is an overlap if: |
2264 | |
2265 | 0 <= min2 - min1 + bias <= limit |
2266 | |
2267 | where bias = high_offset2 + idx_access2 - 1 - low_offset1 |
2268 | limit = (high_offset1 - low_offset1 + idx_access1 - 1) |
2269 | + (high_offset2 - low_offset2 + idx_access2 - 1) |
2270 | i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1 |
2271 | |
2272 | Combining the tests requires limit to be computable in an unsigned |
2273 | form of the index type; if it isn't, we fall back to the usual |
2274 | pointer-based checks. |
2275 | |
2276 | We can do better if DR_B is a write and if DR_A and DR_B are |
2277 | well-ordered in both the original and the new code (see the |
2278 | comment above the DR_ALIAS_* flags for details). In this case |
2279 | we know that for each i in [0, n-1], the write performed by |
2280 | access i of DR_B occurs after access numbers j<=i of DR_A in |
2281 | both the original and the new code. Any write or anti |
2282 | dependencies wrt those DR_A accesses are therefore maintained. |
2283 | |
2284 | We just need to make sure that each individual write in DR_B does not |
2285 | overlap any higher-indexed access in DR_A; such DR_A accesses happen |
2286 | after the DR_B access in the original code but happen before it in |
2287 | the new code. |
2288 | |
2289 | We know the steps for both accesses are equal, so by induction, we |
2290 | just need to test whether the first write of DR_B overlaps a later |
2291 | access of DR_A. In other words, we need to move min1 along by |
2292 | one iteration: |
2293 | |
2294 | min1' = min1 + idx_step |
2295 | |
2296 | and use the ranges: |
2297 | |
2298 | [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1] |
2299 | |
2300 | and: |
2301 | |
2302 | [min2, min2 + idx_access2 - 1] |
2303 | |
2304 | where: |
2305 | |
2306 | low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|) |
2307 | high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */ |
2308 | if (waw_or_war_p) |
2309 | idx_len1 -= abs_idx_step; |
2310 | |
2311 | poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1; |
2312 | if (!waw_or_war_p) |
2313 | limit += idx_len2; |
2314 | |
2315 | tree utype = unsigned_type_for (TREE_TYPE (min1)); |
2316 | if (!wi::fits_to_tree_p (x: limit, type: utype)) |
2317 | return false; |
2318 | |
2319 | poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0; |
2320 | poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2; |
2321 | poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1; |
2322 | /* Equivalent to adding IDX_STEP to MIN1. */ |
2323 | if (waw_or_war_p) |
2324 | bias -= wi::to_offset (t: idx_step); |
2325 | |
2326 | tree subject = fold_build2 (MINUS_EXPR, utype, |
2327 | fold_convert (utype, min2), |
2328 | fold_convert (utype, min1)); |
2329 | subject = fold_build2 (PLUS_EXPR, utype, subject, |
2330 | wide_int_to_tree (utype, bias)); |
2331 | tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, |
2332 | wide_int_to_tree (utype, limit)); |
2333 | if (*cond_expr) |
2334 | *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
2335 | *cond_expr, part_cond_expr); |
2336 | else |
2337 | *cond_expr = part_cond_expr; |
2338 | if (dump_enabled_p ()) |
2339 | { |
2340 | if (waw_or_war_p) |
2341 | dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n" ); |
2342 | else |
2343 | dump_printf (MSG_NOTE, "using an index-based overlap test\n" ); |
2344 | } |
2345 | return true; |
2346 | } |
2347 | |
2348 | /* A subroutine of create_intersect_range_checks, with a subset of the |
2349 | same arguments. Try to optimize cases in which the second access |
2350 | is a write and in which some overlap is valid. */ |
2351 | |
2352 | static bool |
2353 | create_waw_or_war_checks (tree *cond_expr, |
2354 | const dr_with_seg_len_pair_t &alias_pair) |
2355 | { |
2356 | const dr_with_seg_len& dr_a = alias_pair.first; |
2357 | const dr_with_seg_len& dr_b = alias_pair.second; |
2358 | |
2359 | /* Check for cases in which: |
2360 | |
2361 | (a) DR_B is always a write; |
2362 | (b) the accesses are well-ordered in both the original and new code |
2363 | (see the comment above the DR_ALIAS_* flags for details); and |
2364 | (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */ |
2365 | if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) |
2366 | return false; |
2367 | |
2368 | /* Check for equal (but possibly variable) steps. */ |
2369 | tree step = DR_STEP (dr_a.dr); |
2370 | if (!operand_equal_p (step, DR_STEP (dr_b.dr))) |
2371 | return false; |
2372 | |
2373 | /* Make sure that we can operate on sizetype without loss of precision. */ |
2374 | tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr)); |
2375 | if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype)) |
2376 | return false; |
2377 | |
2378 | /* All addresses involved are known to have a common alignment ALIGN. |
2379 | We can therefore subtract ALIGN from an exclusive endpoint to get |
2380 | an inclusive endpoint. In the best (and common) case, ALIGN is the |
2381 | same as the access sizes of both DRs, and so subtracting ALIGN |
2382 | cancels out the addition of an access size. */ |
2383 | unsigned int align = MIN (dr_a.align, dr_b.align); |
2384 | poly_uint64 last_chunk_a = dr_a.access_size - align; |
2385 | poly_uint64 last_chunk_b = dr_b.access_size - align; |
2386 | |
2387 | /* Get a boolean expression that is true when the step is negative. */ |
2388 | tree indicator = dr_direction_indicator (dr_a.dr); |
2389 | tree neg_step = fold_build2 (LT_EXPR, boolean_type_node, |
2390 | fold_convert (ssizetype, indicator), |
2391 | ssize_int (0)); |
2392 | |
2393 | /* Get lengths in sizetype. */ |
2394 | tree seg_len_a |
2395 | = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len)); |
2396 | step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step)); |
2397 | |
2398 | /* Each access has the following pattern: |
2399 | |
2400 | <- |seg_len| -> |
2401 | <--- A: -ve step ---> |
2402 | +-----+-------+-----+-------+-----+ |
2403 | | n-1 | ..... | 0 | ..... | n-1 | |
2404 | +-----+-------+-----+-------+-----+ |
2405 | <--- B: +ve step ---> |
2406 | <- |seg_len| -> |
2407 | | |
2408 | base address |
2409 | |
2410 | where "n" is the number of scalar iterations covered by the segment. |
2411 | |
2412 | A is the range of bytes accessed when the step is negative, |
2413 | B is the range when the step is positive. |
2414 | |
2415 | We know that DR_B is a write. We also know (from checking that |
2416 | DR_A and DR_B are well-ordered) that for each i in [0, n-1], |
2417 | the write performed by access i of DR_B occurs after access numbers |
2418 | j<=i of DR_A in both the original and the new code. Any write or |
2419 | anti dependencies wrt those DR_A accesses are therefore maintained. |
2420 | |
2421 | We just need to make sure that each individual write in DR_B does not |
2422 | overlap any higher-indexed access in DR_A; such DR_A accesses happen |
2423 | after the DR_B access in the original code but happen before it in |
2424 | the new code. |
2425 | |
2426 | We know the steps for both accesses are equal, so by induction, we |
2427 | just need to test whether the first write of DR_B overlaps a later |
2428 | access of DR_A. In other words, we need to move addr_a along by |
2429 | one iteration: |
2430 | |
2431 | addr_a' = addr_a + step |
2432 | |
2433 | and check whether: |
2434 | |
2435 | [addr_b, addr_b + last_chunk_b] |
2436 | |
2437 | overlaps: |
2438 | |
2439 | [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a] |
2440 | |
2441 | where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.: |
2442 | |
2443 | low_offset_a = +ve step ? 0 : seg_len_a - step |
2444 | high_offset_a = +ve step ? seg_len_a - step : 0 |
2445 | |
2446 | This is equivalent to testing whether: |
2447 | |
2448 | addr_a' + low_offset_a <= addr_b + last_chunk_b |
2449 | && addr_b <= addr_a' + high_offset_a + last_chunk_a |
2450 | |
2451 | Converting this into a single test, there is an overlap if: |
2452 | |
2453 | 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit |
2454 | |
2455 | where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b |
2456 | |
2457 | If DR_A is performed, limit + |step| - last_chunk_b is known to be |
2458 | less than the size of the object underlying DR_A. We also know |
2459 | that last_chunk_b <= |step|; this is checked elsewhere if it isn't |
2460 | guaranteed at compile time. There can therefore be no overflow if |
2461 | "limit" is calculated in an unsigned type with pointer precision. */ |
2462 | tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr), |
2463 | DR_OFFSET (dr_a.dr)); |
2464 | addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr)); |
2465 | |
2466 | tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr), |
2467 | DR_OFFSET (dr_b.dr)); |
2468 | addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr)); |
2469 | |
2470 | /* Advance ADDR_A by one iteration and adjust the length to compensate. */ |
2471 | addr_a = fold_build_pointer_plus (addr_a, step); |
2472 | tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype, |
2473 | seg_len_a, step); |
2474 | if (!CONSTANT_CLASS_P (seg_len_a_minus_step)) |
2475 | seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step); |
2476 | |
2477 | tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step, |
2478 | seg_len_a_minus_step, size_zero_node); |
2479 | if (!CONSTANT_CLASS_P (low_offset_a)) |
2480 | low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a); |
2481 | |
2482 | /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>, |
2483 | but it's usually more efficient to reuse the LOW_OFFSET_A result. */ |
2484 | tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step, |
2485 | low_offset_a); |
2486 | |
2487 | /* The amount added to addr_b - addr_a'. */ |
2488 | tree bias = fold_build2 (MINUS_EXPR, sizetype, |
2489 | size_int (last_chunk_b), low_offset_a); |
2490 | |
2491 | tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a); |
2492 | limit = fold_build2 (PLUS_EXPR, sizetype, limit, |
2493 | size_int (last_chunk_a + last_chunk_b)); |
2494 | |
2495 | tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a); |
2496 | subject = fold_build2 (PLUS_EXPR, sizetype, |
2497 | fold_convert (sizetype, subject), bias); |
2498 | |
2499 | *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit); |
2500 | if (dump_enabled_p ()) |
2501 | dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n" ); |
2502 | return true; |
2503 | } |
2504 | |
2505 | /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for |
2506 | every address ADDR accessed by D: |
2507 | |
2508 | *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT |
2509 | |
2510 | In this case, every element accessed by D is aligned to at least |
2511 | ALIGN bytes. |
2512 | |
2513 | If ALIGN is zero then instead set *SEG_MAX_OUT so that: |
2514 | |
2515 | *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */ |
2516 | |
2517 | static void |
2518 | get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out, |
2519 | tree *seg_max_out, HOST_WIDE_INT align) |
2520 | { |
2521 | /* Each access has the following pattern: |
2522 | |
2523 | <- |seg_len| -> |
2524 | <--- A: -ve step ---> |
2525 | +-----+-------+-----+-------+-----+ |
2526 | | n-1 | ,.... | 0 | ..... | n-1 | |
2527 | +-----+-------+-----+-------+-----+ |
2528 | <--- B: +ve step ---> |
2529 | <- |seg_len| -> |
2530 | | |
2531 | base address |
2532 | |
2533 | where "n" is the number of scalar iterations covered by the segment. |
2534 | (This should be VF for a particular pair if we know that both steps |
2535 | are the same, otherwise it will be the full number of scalar loop |
2536 | iterations.) |
2537 | |
2538 | A is the range of bytes accessed when the step is negative, |
2539 | B is the range when the step is positive. |
2540 | |
2541 | If the access size is "access_size" bytes, the lowest addressed byte is: |
2542 | |
2543 | base + (step < 0 ? seg_len : 0) [LB] |
2544 | |
2545 | and the highest addressed byte is always below: |
2546 | |
2547 | base + (step < 0 ? 0 : seg_len) + access_size [UB] |
2548 | |
2549 | Thus: |
2550 | |
2551 | LB <= ADDR < UB |
2552 | |
2553 | If ALIGN is nonzero, all three values are aligned to at least ALIGN |
2554 | bytes, so: |
2555 | |
2556 | LB <= ADDR <= UB - ALIGN |
2557 | |
2558 | where "- ALIGN" folds naturally with the "+ access_size" and often |
2559 | cancels it out. |
2560 | |
2561 | We don't try to simplify LB and UB beyond this (e.g. by using |
2562 | MIN and MAX based on whether seg_len rather than the stride is |
2563 | negative) because it is possible for the absolute size of the |
2564 | segment to overflow the range of a ssize_t. |
2565 | |
2566 | Keeping the pointer_plus outside of the cond_expr should allow |
2567 | the cond_exprs to be shared with other alias checks. */ |
2568 | tree indicator = dr_direction_indicator (d.dr); |
2569 | tree neg_step = fold_build2 (LT_EXPR, boolean_type_node, |
2570 | fold_convert (ssizetype, indicator), |
2571 | ssize_int (0)); |
2572 | tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr), |
2573 | DR_OFFSET (d.dr)); |
2574 | addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr)); |
2575 | tree seg_len |
2576 | = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len)); |
2577 | |
2578 | tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step, |
2579 | seg_len, size_zero_node); |
2580 | tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step, |
2581 | size_zero_node, seg_len); |
2582 | max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach, |
2583 | size_int (d.access_size - align)); |
2584 | |
2585 | *seg_min_out = fold_build_pointer_plus (addr_base, min_reach); |
2586 | *seg_max_out = fold_build_pointer_plus (addr_base, max_reach); |
2587 | } |
2588 | |
2589 | /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases, |
2590 | storing the condition in *COND_EXPR. The fallback is to generate a |
2591 | a test that the two accesses do not overlap: |
2592 | |
2593 | end_a <= start_b || end_b <= start_a. */ |
2594 | |
2595 | static void |
2596 | create_intersect_range_checks (class loop *loop, tree *cond_expr, |
2597 | const dr_with_seg_len_pair_t &alias_pair) |
2598 | { |
2599 | const dr_with_seg_len& dr_a = alias_pair.first; |
2600 | const dr_with_seg_len& dr_b = alias_pair.second; |
2601 | *cond_expr = NULL_TREE; |
2602 | if (create_intersect_range_checks_index (loop, cond_expr, alias_pair)) |
2603 | return; |
2604 | |
2605 | if (create_ifn_alias_checks (cond_expr, alias_pair)) |
2606 | return; |
2607 | |
2608 | if (create_waw_or_war_checks (cond_expr, alias_pair)) |
2609 | return; |
2610 | |
2611 | unsigned HOST_WIDE_INT min_align; |
2612 | tree_code cmp_code; |
2613 | /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions |
2614 | are equivalent. This is just an optimization heuristic. */ |
2615 | if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST |
2616 | && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST) |
2617 | { |
2618 | /* In this case adding access_size to seg_len is likely to give |
2619 | a simple X * step, where X is either the number of scalar |
2620 | iterations or the vectorization factor. We're better off |
2621 | keeping that, rather than subtracting an alignment from it. |
2622 | |
2623 | In this case the maximum values are exclusive and so there is |
2624 | no alias if the maximum of one segment equals the minimum |
2625 | of another. */ |
2626 | min_align = 0; |
2627 | cmp_code = LE_EXPR; |
2628 | } |
2629 | else |
2630 | { |
2631 | /* Calculate the minimum alignment shared by all four pointers, |
2632 | then arrange for this alignment to be subtracted from the |
2633 | exclusive maximum values to get inclusive maximum values. |
2634 | This "- min_align" is cumulative with a "+ access_size" |
2635 | in the calculation of the maximum values. In the best |
2636 | (and common) case, the two cancel each other out, leaving |
2637 | us with an inclusive bound based only on seg_len. In the |
2638 | worst case we're simply adding a smaller number than before. |
2639 | |
2640 | Because the maximum values are inclusive, there is an alias |
2641 | if the maximum value of one segment is equal to the minimum |
2642 | value of the other. */ |
2643 | min_align = MIN (dr_a.align, dr_b.align); |
2644 | cmp_code = LT_EXPR; |
2645 | } |
2646 | |
2647 | tree seg_a_min, seg_a_max, seg_b_min, seg_b_max; |
2648 | get_segment_min_max (d: dr_a, seg_min_out: &seg_a_min, seg_max_out: &seg_a_max, align: min_align); |
2649 | get_segment_min_max (d: dr_b, seg_min_out: &seg_b_min, seg_max_out: &seg_b_max, align: min_align); |
2650 | |
2651 | *cond_expr |
2652 | = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, |
2653 | fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min), |
2654 | fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min)); |
2655 | if (dump_enabled_p ()) |
2656 | dump_printf (MSG_NOTE, "using an address-based overlap test\n" ); |
2657 | } |
2658 | |
2659 | /* Create a conditional expression that represents the run-time checks for |
2660 | overlapping of address ranges represented by a list of data references |
2661 | pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned |
2662 | COND_EXPR is the conditional expression to be used in the if statement |
2663 | that controls which version of the loop gets executed at runtime. */ |
2664 | |
2665 | void |
2666 | create_runtime_alias_checks (class loop *loop, |
2667 | const vec<dr_with_seg_len_pair_t> *alias_pairs, |
2668 | tree * cond_expr) |
2669 | { |
2670 | tree part_cond_expr; |
2671 | |
2672 | fold_defer_overflow_warnings (); |
2673 | for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs) |
2674 | { |
2675 | gcc_assert (alias_pair.flags); |
2676 | if (dump_enabled_p ()) |
2677 | dump_printf (MSG_NOTE, |
2678 | "create runtime check for data references %T and %T\n" , |
2679 | DR_REF (alias_pair.first.dr), |
2680 | DR_REF (alias_pair.second.dr)); |
2681 | |
2682 | /* Create condition expression for each pair data references. */ |
2683 | create_intersect_range_checks (loop, cond_expr: &part_cond_expr, alias_pair); |
2684 | if (*cond_expr) |
2685 | *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
2686 | *cond_expr, part_cond_expr); |
2687 | else |
2688 | *cond_expr = part_cond_expr; |
2689 | } |
2690 | fold_undefer_and_ignore_overflow_warnings (); |
2691 | } |
2692 | |
2693 | /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical |
2694 | expressions. */ |
2695 | static bool |
2696 | dr_equal_offsets_p1 (tree offset1, tree offset2) |
2697 | { |
2698 | bool res; |
2699 | |
2700 | STRIP_NOPS (offset1); |
2701 | STRIP_NOPS (offset2); |
2702 | |
2703 | if (offset1 == offset2) |
2704 | return true; |
2705 | |
2706 | if (TREE_CODE (offset1) != TREE_CODE (offset2) |
2707 | || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1))) |
2708 | return false; |
2709 | |
2710 | res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0), |
2711 | TREE_OPERAND (offset2, 0)); |
2712 | |
2713 | if (!res || !BINARY_CLASS_P (offset1)) |
2714 | return res; |
2715 | |
2716 | res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1), |
2717 | TREE_OPERAND (offset2, 1)); |
2718 | |
2719 | return res; |
2720 | } |
2721 | |
2722 | /* Check if DRA and DRB have equal offsets. */ |
2723 | bool |
2724 | dr_equal_offsets_p (struct data_reference *dra, |
2725 | struct data_reference *drb) |
2726 | { |
2727 | tree offset1, offset2; |
2728 | |
2729 | offset1 = DR_OFFSET (dra); |
2730 | offset2 = DR_OFFSET (drb); |
2731 | |
2732 | return dr_equal_offsets_p1 (offset1, offset2); |
2733 | } |
2734 | |
2735 | /* Returns true if FNA == FNB. */ |
2736 | |
2737 | static bool |
2738 | affine_function_equal_p (affine_fn fna, affine_fn fnb) |
2739 | { |
2740 | unsigned i, n = fna.length (); |
2741 | |
2742 | if (n != fnb.length ()) |
2743 | return false; |
2744 | |
2745 | for (i = 0; i < n; i++) |
2746 | if (!operand_equal_p (fna[i], fnb[i], flags: 0)) |
2747 | return false; |
2748 | |
2749 | return true; |
2750 | } |
2751 | |
2752 | /* If all the functions in CF are the same, returns one of them, |
2753 | otherwise returns NULL. */ |
2754 | |
2755 | static affine_fn |
2756 | common_affine_function (conflict_function *cf) |
2757 | { |
2758 | unsigned i; |
2759 | affine_fn comm; |
2760 | |
2761 | if (!CF_NONTRIVIAL_P (cf)) |
2762 | return affine_fn (); |
2763 | |
2764 | comm = cf->fns[0]; |
2765 | |
2766 | for (i = 1; i < cf->n; i++) |
2767 | if (!affine_function_equal_p (fna: comm, fnb: cf->fns[i])) |
2768 | return affine_fn (); |
2769 | |
2770 | return comm; |
2771 | } |
2772 | |
2773 | /* Returns the base of the affine function FN. */ |
2774 | |
2775 | static tree |
2776 | affine_function_base (affine_fn fn) |
2777 | { |
2778 | return fn[0]; |
2779 | } |
2780 | |
2781 | /* Returns true if FN is a constant. */ |
2782 | |
2783 | static bool |
2784 | affine_function_constant_p (affine_fn fn) |
2785 | { |
2786 | unsigned i; |
2787 | tree coef; |
2788 | |
2789 | for (i = 1; fn.iterate (ix: i, ptr: &coef); i++) |
2790 | if (!integer_zerop (coef)) |
2791 | return false; |
2792 | |
2793 | return true; |
2794 | } |
2795 | |
2796 | /* Returns true if FN is the zero constant function. */ |
2797 | |
2798 | static bool |
2799 | affine_function_zero_p (affine_fn fn) |
2800 | { |
2801 | return (integer_zerop (affine_function_base (fn)) |
2802 | && affine_function_constant_p (fn)); |
2803 | } |
2804 | |
2805 | /* Returns a signed integer type with the largest precision from TA |
2806 | and TB. */ |
2807 | |
2808 | static tree |
2809 | signed_type_for_types (tree ta, tree tb) |
2810 | { |
2811 | if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb)) |
2812 | return signed_type_for (ta); |
2813 | else |
2814 | return signed_type_for (tb); |
2815 | } |
2816 | |
2817 | /* Applies operation OP on affine functions FNA and FNB, and returns the |
2818 | result. */ |
2819 | |
2820 | static affine_fn |
2821 | affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb) |
2822 | { |
2823 | unsigned i, n, m; |
2824 | affine_fn ret; |
2825 | tree coef; |
2826 | |
2827 | if (fnb.length () > fna.length ()) |
2828 | { |
2829 | n = fna.length (); |
2830 | m = fnb.length (); |
2831 | } |
2832 | else |
2833 | { |
2834 | n = fnb.length (); |
2835 | m = fna.length (); |
2836 | } |
2837 | |
2838 | ret.create (nelems: m); |
2839 | for (i = 0; i < n; i++) |
2840 | { |
2841 | tree type = signed_type_for_types (TREE_TYPE (fna[i]), |
2842 | TREE_TYPE (fnb[i])); |
2843 | ret.quick_push (fold_build2 (op, type, fna[i], fnb[i])); |
2844 | } |
2845 | |
2846 | for (; fna.iterate (ix: i, ptr: &coef); i++) |
2847 | ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)), |
2848 | coef, integer_zero_node)); |
2849 | for (; fnb.iterate (ix: i, ptr: &coef); i++) |
2850 | ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)), |
2851 | integer_zero_node, coef)); |
2852 | |
2853 | return ret; |
2854 | } |
2855 | |
2856 | /* Returns the sum of affine functions FNA and FNB. */ |
2857 | |
2858 | static affine_fn |
2859 | affine_fn_plus (affine_fn fna, affine_fn fnb) |
2860 | { |
2861 | return affine_fn_op (op: PLUS_EXPR, fna, fnb); |
2862 | } |
2863 | |
2864 | /* Returns the difference of affine functions FNA and FNB. */ |
2865 | |
2866 | static affine_fn |
2867 | affine_fn_minus (affine_fn fna, affine_fn fnb) |
2868 | { |
2869 | return affine_fn_op (op: MINUS_EXPR, fna, fnb); |
2870 | } |
2871 | |
2872 | /* Frees affine function FN. */ |
2873 | |
2874 | static void |
2875 | affine_fn_free (affine_fn fn) |
2876 | { |
2877 | fn.release (); |
2878 | } |
2879 | |
2880 | /* Determine for each subscript in the data dependence relation DDR |
2881 | the distance. */ |
2882 | |
2883 | static void |
2884 | compute_subscript_distance (struct data_dependence_relation *ddr) |
2885 | { |
2886 | conflict_function *cf_a, *cf_b; |
2887 | affine_fn fn_a, fn_b, diff; |
2888 | |
2889 | if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) |
2890 | { |
2891 | unsigned int i; |
2892 | |
2893 | for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) |
2894 | { |
2895 | struct subscript *subscript; |
2896 | |
2897 | subscript = DDR_SUBSCRIPT (ddr, i); |
2898 | cf_a = SUB_CONFLICTS_IN_A (subscript); |
2899 | cf_b = SUB_CONFLICTS_IN_B (subscript); |
2900 | |
2901 | fn_a = common_affine_function (cf: cf_a); |
2902 | fn_b = common_affine_function (cf: cf_b); |
2903 | if (!fn_a.exists () || !fn_b.exists ()) |
2904 | { |
2905 | SUB_DISTANCE (subscript) = chrec_dont_know; |
2906 | return; |
2907 | } |
2908 | diff = affine_fn_minus (fna: fn_a, fnb: fn_b); |
2909 | |
2910 | if (affine_function_constant_p (fn: diff)) |
2911 | SUB_DISTANCE (subscript) = affine_function_base (fn: diff); |
2912 | else |
2913 | SUB_DISTANCE (subscript) = chrec_dont_know; |
2914 | |
2915 | affine_fn_free (fn: diff); |
2916 | } |
2917 | } |
2918 | } |
2919 | |
2920 | /* Returns the conflict function for "unknown". */ |
2921 | |
2922 | static conflict_function * |
2923 | conflict_fn_not_known (void) |
2924 | { |
2925 | conflict_function *fn = XCNEW (conflict_function); |
2926 | fn->n = NOT_KNOWN; |
2927 | |
2928 | return fn; |
2929 | } |
2930 | |
2931 | /* Returns the conflict function for "independent". */ |
2932 | |
2933 | static conflict_function * |
2934 | conflict_fn_no_dependence (void) |
2935 | { |
2936 | conflict_function *fn = XCNEW (conflict_function); |
2937 | fn->n = NO_DEPENDENCE; |
2938 | |
2939 | return fn; |
2940 | } |
2941 | |
2942 | /* Returns true if the address of OBJ is invariant in LOOP. */ |
2943 | |
2944 | static bool |
2945 | object_address_invariant_in_loop_p (const class loop *loop, const_tree obj) |
2946 | { |
2947 | while (handled_component_p (t: obj)) |
2948 | { |
2949 | if (TREE_CODE (obj) == ARRAY_REF) |
2950 | { |
2951 | for (int i = 1; i < 4; ++i) |
2952 | if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i), |
2953 | loop->num)) |
2954 | return false; |
2955 | } |
2956 | else if (TREE_CODE (obj) == COMPONENT_REF) |
2957 | { |
2958 | if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2), |
2959 | loop->num)) |
2960 | return false; |
2961 | } |
2962 | obj = TREE_OPERAND (obj, 0); |
2963 | } |
2964 | |
2965 | if (!INDIRECT_REF_P (obj) |
2966 | && TREE_CODE (obj) != MEM_REF) |
2967 | return true; |
2968 | |
2969 | return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0), |
2970 | loop->num); |
2971 | } |
2972 | |
2973 | /* Returns false if we can prove that data references A and B do not alias, |
2974 | true otherwise. If LOOP_NEST is false no cross-iteration aliases are |
2975 | considered. */ |
2976 | |
2977 | bool |
2978 | dr_may_alias_p (const struct data_reference *a, const struct data_reference *b, |
2979 | class loop *loop_nest) |
2980 | { |
2981 | tree addr_a = DR_BASE_OBJECT (a); |
2982 | tree addr_b = DR_BASE_OBJECT (b); |
2983 | |
2984 | /* If we are not processing a loop nest but scalar code we |
2985 | do not need to care about possible cross-iteration dependences |
2986 | and thus can process the full original reference. Do so, |
2987 | similar to how loop invariant motion applies extra offset-based |
2988 | disambiguation. */ |
2989 | if (!loop_nest) |
2990 | { |
2991 | tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a))); |
2992 | tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b))); |
2993 | |
2994 | if (DR_BASE_ADDRESS (a) |
2995 | && DR_BASE_ADDRESS (b) |
2996 | && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b)) |
2997 | && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b)) |
2998 | && poly_int_tree_p (t: tree_size_a) |
2999 | && poly_int_tree_p (t: tree_size_b) |
3000 | && !ranges_maybe_overlap_p (pos1: wi::to_poly_widest (DR_INIT (a)), |
3001 | size1: wi::to_poly_widest (t: tree_size_a), |
3002 | pos2: wi::to_poly_widest (DR_INIT (b)), |
3003 | size2: wi::to_poly_widest (t: tree_size_b))) |
3004 | { |
3005 | gcc_assert (integer_zerop (DR_STEP (a)) |
3006 | && integer_zerop (DR_STEP (b))); |
3007 | return false; |
3008 | } |
3009 | |
3010 | aff_tree off1, off2; |
3011 | poly_widest_int size1, size2; |
3012 | get_inner_reference_aff (DR_REF (a), &off1, &size1); |
3013 | get_inner_reference_aff (DR_REF (b), &off2, &size2); |
3014 | aff_combination_scale (&off1, -1); |
3015 | aff_combination_add (&off2, &off1); |
3016 | if (aff_comb_cannot_overlap_p (&off2, size1, size2)) |
3017 | return false; |
3018 | } |
3019 | |
3020 | if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF) |
3021 | && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF) |
3022 | /* For cross-iteration dependences the cliques must be valid for the |
3023 | whole loop, not just individual iterations. */ |
3024 | && (!loop_nest |
3025 | || MR_DEPENDENCE_CLIQUE (addr_a) == 1 |
3026 | || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique) |
3027 | && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b) |
3028 | && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b)) |
3029 | return false; |
3030 | |
3031 | /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we |
3032 | do not know the size of the base-object. So we cannot do any |
3033 | offset/overlap based analysis but have to rely on points-to |
3034 | information only. */ |
3035 | if (TREE_CODE (addr_a) == MEM_REF |
3036 | && (DR_UNCONSTRAINED_BASE (a) |
3037 | || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME)) |
3038 | { |
3039 | /* For true dependences we can apply TBAA. */ |
3040 | if (flag_strict_aliasing |
3041 | && DR_IS_WRITE (a) && DR_IS_READ (b) |
3042 | && !alias_sets_conflict_p (get_alias_set (DR_REF (a)), |
3043 | get_alias_set (DR_REF (b)))) |
3044 | return false; |
3045 | if (TREE_CODE (addr_b) == MEM_REF) |
3046 | return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0), |
3047 | TREE_OPERAND (addr_b, 0)); |
3048 | else |
3049 | return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0), |
3050 | build_fold_addr_expr (addr_b)); |
3051 | } |
3052 | else if (TREE_CODE (addr_b) == MEM_REF |
3053 | && (DR_UNCONSTRAINED_BASE (b) |
3054 | || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME)) |
3055 | { |
3056 | /* For true dependences we can apply TBAA. */ |
3057 | if (flag_strict_aliasing |
3058 | && DR_IS_WRITE (a) && DR_IS_READ (b) |
3059 | && !alias_sets_conflict_p (get_alias_set (DR_REF (a)), |
3060 | get_alias_set (DR_REF (b)))) |
3061 | return false; |
3062 | if (TREE_CODE (addr_a) == MEM_REF) |
3063 | return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0), |
3064 | TREE_OPERAND (addr_b, 0)); |
3065 | else |
3066 | return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a), |
3067 | TREE_OPERAND (addr_b, 0)); |
3068 | } |
3069 | |
3070 | /* Otherwise DR_BASE_OBJECT is an access that covers the whole object |
3071 | that is being subsetted in the loop nest. */ |
3072 | if (DR_IS_WRITE (a) && DR_IS_WRITE (b)) |
3073 | return refs_output_dependent_p (addr_a, addr_b); |
3074 | else if (DR_IS_READ (a) && DR_IS_WRITE (b)) |
3075 | return refs_anti_dependent_p (addr_a, addr_b); |
3076 | return refs_may_alias_p (addr_a, addr_b); |
3077 | } |
3078 | |
3079 | /* REF_A and REF_B both satisfy access_fn_component_p. Return true |
3080 | if it is meaningful to compare their associated access functions |
3081 | when checking for dependencies. */ |
3082 | |
3083 | static bool |
3084 | access_fn_components_comparable_p (tree ref_a, tree ref_b) |
3085 | { |
3086 | /* Allow pairs of component refs from the following sets: |
3087 | |
3088 | { REALPART_EXPR, IMAGPART_EXPR } |
3089 | { COMPONENT_REF } |
3090 | { ARRAY_REF }. */ |
3091 | tree_code code_a = TREE_CODE (ref_a); |
3092 | tree_code code_b = TREE_CODE (ref_b); |
3093 | if (code_a == IMAGPART_EXPR) |
3094 | code_a = REALPART_EXPR; |
3095 | if (code_b == IMAGPART_EXPR) |
3096 | code_b = REALPART_EXPR; |
3097 | if (code_a != code_b) |
3098 | return false; |
3099 | |
3100 | if (TREE_CODE (ref_a) == COMPONENT_REF) |
3101 | /* ??? We cannot simply use the type of operand #0 of the refs here as |
3102 | the Fortran compiler smuggles type punning into COMPONENT_REFs. |
3103 | Use the DECL_CONTEXT of the FIELD_DECLs instead. */ |
3104 | return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1)) |
3105 | == DECL_CONTEXT (TREE_OPERAND (ref_b, 1))); |
3106 | |
3107 | return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)), |
3108 | TREE_TYPE (TREE_OPERAND (ref_b, 0))); |
3109 | } |
3110 | |
3111 | /* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES |
3112 | is true when the main indices of A and B were not comparable so we try again |
3113 | with alternate indices computed on an indirect reference. */ |
3114 | |
3115 | struct data_dependence_relation * |
3116 | initialize_data_dependence_relation (struct data_dependence_relation *res, |
3117 | vec<loop_p> loop_nest, |
3118 | bool use_alt_indices) |
3119 | { |
3120 | struct data_reference *a = DDR_A (res); |
3121 | struct data_reference *b = DDR_B (res); |
3122 | unsigned int i; |
3123 | |
3124 | struct indices *indices_a = &a->indices; |
3125 | struct indices *indices_b = &b->indices; |
3126 | if (use_alt_indices) |
3127 | { |
3128 | if (TREE_CODE (DR_REF (a)) != MEM_REF) |
3129 | indices_a = &a->alt_indices; |
3130 | if (TREE_CODE (DR_REF (b)) != MEM_REF) |
3131 | indices_b = &b->alt_indices; |
3132 | } |
3133 | unsigned int num_dimensions_a = indices_a->access_fns.length (); |
3134 | unsigned int num_dimensions_b = indices_b->access_fns.length (); |
3135 | if (num_dimensions_a == 0 || num_dimensions_b == 0) |
3136 | { |
3137 | DDR_ARE_DEPENDENT (res) = chrec_dont_know; |
3138 | return res; |
3139 | } |
3140 | |
3141 | /* For unconstrained bases, the root (highest-indexed) subscript |
3142 | describes a variation in the base of the original DR_REF rather |
3143 | than a component access. We have no type that accurately describes |
3144 | the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after* |
3145 | applying this subscript) so limit the search to the last real |
3146 | component access. |
3147 | |
3148 | E.g. for: |
3149 | |
3150 | void |
3151 | f (int a[][8], int b[][8]) |
3152 | { |
3153 | for (int i = 0; i < 8; ++i) |
3154 | a[i * 2][0] = b[i][0]; |
3155 | } |
3156 | |
3157 | the a and b accesses have a single ARRAY_REF component reference [0] |
3158 | but have two subscripts. */ |
3159 | if (indices_a->unconstrained_base) |
3160 | num_dimensions_a -= 1; |
3161 | if (indices_b->unconstrained_base) |
3162 | num_dimensions_b -= 1; |
3163 | |
3164 | /* These structures describe sequences of component references in |
3165 | DR_REF (A) and DR_REF (B). Each component reference is tied to a |
3166 | specific access function. */ |
3167 | struct { |
3168 | /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and |
3169 | DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher |
3170 | indices. In C notation, these are the indices of the rightmost |
3171 | component references; e.g. for a sequence .b.c.d, the start |
3172 | index is for .d. */ |
3173 | unsigned int start_a; |
3174 | unsigned int start_b; |
3175 | |
3176 | /* The sequence contains LENGTH consecutive access functions from |
3177 | each DR. */ |
3178 | unsigned int length; |
3179 | |
3180 | /* The enclosing objects for the A and B sequences respectively, |
3181 | i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1) |
3182 | and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */ |
3183 | tree object_a; |
3184 | tree object_b; |
3185 | } full_seq = {}, struct_seq = {}; |
3186 | |
3187 | /* Before each iteration of the loop: |
3188 | |
3189 | - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and |
3190 | - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */ |
3191 | unsigned int index_a = 0; |
3192 | unsigned int index_b = 0; |
3193 | tree ref_a = DR_REF (a); |
3194 | tree ref_b = DR_REF (b); |
3195 | |
3196 | /* Now walk the component references from the final DR_REFs back up to |
3197 | the enclosing base objects. Each component reference corresponds |
3198 | to one access function in the DR, with access function 0 being for |
3199 | the final DR_REF and the highest-indexed access function being the |
3200 | one that is applied to the base of the DR. |
3201 | |
3202 | Look for a sequence of component references whose access functions |
3203 | are comparable (see access_fn_components_comparable_p). If more |
3204 | than one such sequence exists, pick the one nearest the base |
3205 | (which is the leftmost sequence in C notation). Store this sequence |
3206 | in FULL_SEQ. |
3207 | |
3208 | For example, if we have: |
3209 | |
3210 | struct foo { struct bar s; ... } (*a)[10], (*b)[10]; |
3211 | |
3212 | A: a[0][i].s.c.d |
3213 | B: __real b[0][i].s.e[i].f |
3214 | |
3215 | (where d is the same type as the real component of f) then the access |
3216 | functions would be: |
3217 | |
3218 | 0 1 2 3 |
3219 | A: .d .c .s [i] |
3220 | |
3221 | 0 1 2 3 4 5 |
3222 | B: __real .f [i] .e .s [i] |
3223 | |
3224 | The A0/B2 column isn't comparable, since .d is a COMPONENT_REF |
3225 | and [i] is an ARRAY_REF. However, the A1/B3 column contains two |
3226 | COMPONENT_REF accesses for struct bar, so is comparable. Likewise |
3227 | the A2/B4 column contains two COMPONENT_REF accesses for struct foo, |
3228 | so is comparable. The A3/B5 column contains two ARRAY_REFs that |
3229 | index foo[10] arrays, so is again comparable. The sequence is |
3230 | therefore: |
3231 | |
3232 | A: [1, 3] (i.e. [i].s.c) |
3233 | B: [3, 5] (i.e. [i].s.e) |
3234 | |
3235 | Also look for sequences of component references whose access |
3236 | functions are comparable and whose enclosing objects have the same |
3237 | RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above |
3238 | example, STRUCT_SEQ would be: |
3239 | |
3240 | A: [1, 2] (i.e. s.c) |
3241 | B: [3, 4] (i.e. s.e) */ |
3242 | while (index_a < num_dimensions_a && index_b < num_dimensions_b) |
3243 | { |
3244 | /* The alternate indices form always has a single dimension |
3245 | with unconstrained base. */ |
3246 | gcc_assert (!use_alt_indices); |
3247 | |
3248 | /* REF_A and REF_B must be one of the component access types |
3249 | allowed by dr_analyze_indices. */ |
3250 | gcc_checking_assert (access_fn_component_p (ref_a)); |
3251 | gcc_checking_assert (access_fn_component_p (ref_b)); |
3252 | |
3253 | /* Get the immediately-enclosing objects for REF_A and REF_B, |
3254 | i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A) |
3255 | and DR_ACCESS_FN (B, INDEX_B). */ |
3256 | tree object_a = TREE_OPERAND (ref_a, 0); |
3257 | tree object_b = TREE_OPERAND (ref_b, 0); |
3258 | |
3259 | tree type_a = TREE_TYPE (object_a); |
3260 | tree type_b = TREE_TYPE (object_b); |
3261 | if (access_fn_components_comparable_p (ref_a, ref_b)) |
3262 | { |
3263 | /* This pair of component accesses is comparable for dependence |
3264 | analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and |
3265 | DR_ACCESS_FN (B, INDEX_B) in the sequence. */ |
3266 | if (full_seq.start_a + full_seq.length != index_a |
3267 | || full_seq.start_b + full_seq.length != index_b) |
3268 | { |
3269 | /* The accesses don't extend the current sequence, |
3270 | so start a new one here. */ |
3271 | full_seq.start_a = index_a; |
3272 | full_seq.start_b = index_b; |
3273 | full_seq.length = 0; |
3274 | } |
3275 | |
3276 | /* Add this pair of references to the sequence. */ |
3277 | full_seq.length += 1; |
3278 | full_seq.object_a = object_a; |
3279 | full_seq.object_b = object_b; |
3280 | |
3281 | /* If the enclosing objects are structures (and thus have the |
3282 | same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */ |
3283 | if (TREE_CODE (type_a) == RECORD_TYPE) |
3284 | struct_seq = full_seq; |
3285 | |
3286 | /* Move to the next containing reference for both A and B. */ |
3287 | ref_a = object_a; |
3288 | ref_b = object_b; |
3289 | index_a += 1; |
3290 | index_b += 1; |
3291 | continue; |
3292 | } |
3293 | |
3294 | /* Try to approach equal type sizes. */ |
3295 | if (!COMPLETE_TYPE_P (type_a) |
3296 | || !COMPLETE_TYPE_P (type_b) |
3297 | || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a)) |
3298 | || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b))) |
3299 | break; |
3300 | |
3301 | unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a)); |
3302 | unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b)); |
3303 | if (size_a <= size_b) |
3304 | { |
3305 | index_a += 1; |
3306 | ref_a = object_a; |
3307 | } |
3308 | if (size_b <= size_a) |
3309 | { |
3310 | index_b += 1; |
3311 | ref_b = object_b; |
3312 | } |
3313 | } |
3314 | |
3315 | /* See whether FULL_SEQ ends at the base and whether the two bases |
3316 | are equal. We do not care about TBAA or alignment info so we can |
3317 | use OEP_ADDRESS_OF to avoid false negatives. */ |
3318 | tree base_a = indices_a->base_object; |
3319 | tree base_b = indices_b->base_object; |
3320 | bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a |
3321 | && full_seq.start_b + full_seq.length == num_dimensions_b |
3322 | && (indices_a->unconstrained_base |
3323 | == indices_b->unconstrained_base) |
3324 | && operand_equal_p (base_a, base_b, flags: OEP_ADDRESS_OF) |
3325 | && (types_compatible_p (TREE_TYPE (base_a), |
3326 | TREE_TYPE (base_b)) |
3327 | || (!base_supports_access_fn_components_p (base: base_a) |
3328 | && !base_supports_access_fn_components_p (base: base_b) |
3329 | && operand_equal_p |
3330 | (TYPE_SIZE (TREE_TYPE (base_a)), |
3331 | TYPE_SIZE (TREE_TYPE (base_b)), flags: 0))) |
3332 | && (!loop_nest.exists () |
3333 | || (object_address_invariant_in_loop_p |
3334 | (loop: loop_nest[0], obj: base_a)))); |
3335 | |
3336 | /* If the bases are the same, we can include the base variation too. |
3337 | E.g. the b accesses in: |
3338 | |
3339 | for (int i = 0; i < n; ++i) |
3340 | b[i + 4][0] = b[i][0]; |
3341 | |
3342 | have a definite dependence distance of 4, while for: |
3343 | |
3344 | for (int i = 0; i < n; ++i) |
3345 | a[i + 4][0] = b[i][0]; |
3346 | |
3347 | the dependence distance depends on the gap between a and b. |
3348 | |
3349 | If the bases are different then we can only rely on the sequence |
3350 | rooted at a structure access, since arrays are allowed to overlap |
3351 | arbitrarily and change shape arbitrarily. E.g. we treat this as |
3352 | valid code: |
3353 | |
3354 | int a[256]; |
3355 | ... |
3356 | ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0]; |
3357 | |
3358 | where two lvalues with the same int[4][3] type overlap, and where |
3359 | both lvalues are distinct from the object's declared type. */ |
3360 | if (same_base_p) |
3361 | { |
3362 | if (indices_a->unconstrained_base) |
3363 | full_seq.length += 1; |
3364 | } |
3365 | else |
3366 | full_seq = struct_seq; |
3367 | |
3368 | /* Punt if we didn't find a suitable sequence. */ |
3369 | if (full_seq.length == 0) |
3370 | { |
3371 | if (use_alt_indices |
3372 | || (TREE_CODE (DR_REF (a)) == MEM_REF |
3373 | && TREE_CODE (DR_REF (b)) == MEM_REF) |
3374 | || may_be_nonaddressable_p (DR_REF (a)) |
3375 | || may_be_nonaddressable_p (DR_REF (b))) |
3376 | { |
3377 | /* Fully exhausted possibilities. */ |
3378 | DDR_ARE_DEPENDENT (res) = chrec_dont_know; |
3379 | return res; |
3380 | } |
3381 | |
3382 | /* Try evaluating both DRs as dereferences of pointers. */ |
3383 | if (!a->alt_indices.base_object |
3384 | && TREE_CODE (DR_REF (a)) != MEM_REF) |
3385 | { |
3386 | tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)), |
3387 | build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)), |
3388 | build_int_cst |
3389 | (reference_alias_ptr_type (DR_REF (a)), 0)); |
3390 | dr_analyze_indices (dri: &a->alt_indices, ref: alt_ref, |
3391 | nest: loop_preheader_edge (loop_nest[0]), |
3392 | loop: loop_containing_stmt (DR_STMT (a))); |
3393 | } |
3394 | if (!b->alt_indices.base_object |
3395 | && TREE_CODE (DR_REF (b)) != MEM_REF) |
3396 | { |
3397 | tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)), |
3398 | build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)), |
3399 | build_int_cst |
3400 | (reference_alias_ptr_type (DR_REF (b)), 0)); |
3401 | dr_analyze_indices (dri: &b->alt_indices, ref: alt_ref, |
3402 | nest: loop_preheader_edge (loop_nest[0]), |
3403 | loop: loop_containing_stmt (DR_STMT (b))); |
3404 | } |
3405 | return initialize_data_dependence_relation (res, loop_nest, use_alt_indices: true); |
3406 | } |
3407 | |
3408 | if (!same_base_p) |
3409 | { |
3410 | /* Partial overlap is possible for different bases when strict aliasing |
3411 | is not in effect. It's also possible if either base involves a union |
3412 | access; e.g. for: |
3413 | |
3414 | struct s1 { int a[2]; }; |
3415 | struct s2 { struct s1 b; int c; }; |
3416 | struct s3 { int d; struct s1 e; }; |
3417 | union u { struct s2 f; struct s3 g; } *p, *q; |
3418 | |
3419 | the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at |
3420 | "p->g.e" (base "p->g") and might partially overlap the s1 at |
3421 | "q->g.e" (base "q->g"). */ |
3422 | if (!flag_strict_aliasing |
3423 | || ref_contains_union_access_p (ref: full_seq.object_a) |
3424 | || ref_contains_union_access_p (ref: full_seq.object_b)) |
3425 | { |
3426 | DDR_ARE_DEPENDENT (res) = chrec_dont_know; |
3427 | return res; |
3428 | } |
3429 | |
3430 | DDR_COULD_BE_INDEPENDENT_P (res) = true; |
3431 | if (!loop_nest.exists () |
3432 | || (object_address_invariant_in_loop_p (loop: loop_nest[0], |
3433 | obj: full_seq.object_a) |
3434 | && object_address_invariant_in_loop_p (loop: loop_nest[0], |
3435 | obj: full_seq.object_b))) |
3436 | { |
3437 | DDR_OBJECT_A (res) = full_seq.object_a; |
3438 | DDR_OBJECT_B (res) = full_seq.object_b; |
3439 | } |
3440 | } |
3441 | |
3442 | DDR_AFFINE_P (res) = true; |
3443 | DDR_ARE_DEPENDENT (res) = NULL_TREE; |
3444 | DDR_SUBSCRIPTS (res).create (nelems: full_seq.length); |
3445 | DDR_LOOP_NEST (res) = loop_nest; |
3446 | DDR_SELF_REFERENCE (res) = false; |
3447 | |
3448 | for (i = 0; i < full_seq.length; ++i) |
3449 | { |
3450 | struct subscript *subscript; |
3451 | |
3452 | subscript = XNEW (struct subscript); |
3453 | SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i]; |
3454 | SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i]; |
3455 | SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known (); |
3456 | SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known (); |
3457 | SUB_LAST_CONFLICT (subscript) = chrec_dont_know; |
3458 | SUB_DISTANCE (subscript) = chrec_dont_know; |
3459 | DDR_SUBSCRIPTS (res).safe_push (obj: subscript); |
3460 | } |
3461 | |
3462 | return res; |
3463 | } |
3464 | |
3465 | /* Initialize a data dependence relation between data accesses A and |
3466 | B. NB_LOOPS is the number of loops surrounding the references: the |
3467 | size of the classic distance/direction vectors. */ |
3468 | |
3469 | struct data_dependence_relation * |
3470 | initialize_data_dependence_relation (struct data_reference *a, |
3471 | struct data_reference *b, |
3472 | vec<loop_p> loop_nest) |
3473 | { |
3474 | data_dependence_relation *res = XCNEW (struct data_dependence_relation); |
3475 | DDR_A (res) = a; |
3476 | DDR_B (res) = b; |
3477 | DDR_LOOP_NEST (res).create (nelems: 0); |
3478 | DDR_SUBSCRIPTS (res).create (nelems: 0); |
3479 | DDR_DIR_VECTS (res).create (nelems: 0); |
3480 | DDR_DIST_VECTS (res).create (nelems: 0); |
3481 | |
3482 | if (a == NULL || b == NULL) |
3483 | { |
3484 | DDR_ARE_DEPENDENT (res) = chrec_dont_know; |
3485 | return res; |
3486 | } |
3487 | |
3488 | /* If the data references do not alias, then they are independent. */ |
3489 | if (!dr_may_alias_p (a, b, loop_nest: loop_nest.exists () ? loop_nest[0] : NULL)) |
3490 | { |
3491 | DDR_ARE_DEPENDENT (res) = chrec_known; |
3492 | return res; |
3493 | } |
3494 | |
3495 | return initialize_data_dependence_relation (res, loop_nest, use_alt_indices: false); |
3496 | } |
3497 | |
3498 | |
3499 | /* Frees memory used by the conflict function F. */ |
3500 | |
3501 | static void |
3502 | free_conflict_function (conflict_function *f) |
3503 | { |
3504 | unsigned i; |
3505 | |
3506 | if (CF_NONTRIVIAL_P (f)) |
3507 | { |
3508 | for (i = 0; i < f->n; i++) |
3509 | affine_fn_free (fn: f->fns[i]); |
3510 | } |
3511 | free (ptr: f); |
3512 | } |
3513 | |
3514 | /* Frees memory used by SUBSCRIPTS. */ |
3515 | |
3516 | static void |
3517 | free_subscripts (vec<subscript_p> subscripts) |
3518 | { |
3519 | for (subscript_p s : subscripts) |
3520 | { |
3521 | free_conflict_function (f: s->conflicting_iterations_in_a); |
3522 | free_conflict_function (f: s->conflicting_iterations_in_b); |
3523 | free (ptr: s); |
3524 | } |
3525 | subscripts.release (); |
3526 | } |
3527 | |
3528 | /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap |
3529 | description. */ |
3530 | |
3531 | static inline void |
3532 | finalize_ddr_dependent (struct data_dependence_relation *ddr, |
3533 | tree chrec) |
3534 | { |
3535 | DDR_ARE_DEPENDENT (ddr) = chrec; |
3536 | free_subscripts (DDR_SUBSCRIPTS (ddr)); |
3537 | DDR_SUBSCRIPTS (ddr).create (nelems: 0); |
3538 | } |
3539 | |
3540 | /* The dependence relation DDR cannot be represented by a distance |
3541 | vector. */ |
3542 | |
3543 | static inline void |
3544 | non_affine_dependence_relation (struct data_dependence_relation *ddr) |
3545 | { |
3546 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3547 | fprintf (stream: dump_file, format: "(Dependence relation cannot be represented by distance vector.) \n" ); |
3548 | |
3549 | DDR_AFFINE_P (ddr) = false; |
3550 | } |
3551 | |
3552 | |
3553 | |
3554 | /* This section contains the classic Banerjee tests. */ |
3555 | |
3556 | /* Returns true iff CHREC_A and CHREC_B are not dependent on any index |
3557 | variables, i.e., if the ZIV (Zero Index Variable) test is true. */ |
3558 | |
3559 | static inline bool |
3560 | ziv_subscript_p (const_tree chrec_a, const_tree chrec_b) |
3561 | { |
3562 | return (evolution_function_is_constant_p (chrec: chrec_a) |
3563 | && evolution_function_is_constant_p (chrec: chrec_b)); |
3564 | } |
3565 | |
3566 | /* Returns true iff CHREC_A and CHREC_B are dependent on an index |
3567 | variable, i.e., if the SIV (Single Index Variable) test is true. */ |
3568 | |
3569 | static bool |
3570 | siv_subscript_p (const_tree chrec_a, const_tree chrec_b) |
3571 | { |
3572 | if ((evolution_function_is_constant_p (chrec: chrec_a) |
3573 | && evolution_function_is_univariate_p (chrec_b)) |
3574 | || (evolution_function_is_constant_p (chrec: chrec_b) |
3575 | && evolution_function_is_univariate_p (chrec_a))) |
3576 | return true; |
3577 | |
3578 | if (evolution_function_is_univariate_p (chrec_a) |
3579 | && evolution_function_is_univariate_p (chrec_b)) |
3580 | { |
3581 | switch (TREE_CODE (chrec_a)) |
3582 | { |
3583 | case POLYNOMIAL_CHREC: |
3584 | switch (TREE_CODE (chrec_b)) |
3585 | { |
3586 | case POLYNOMIAL_CHREC: |
3587 | if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b)) |
3588 | return false; |
3589 | /* FALLTHRU */ |
3590 | |
3591 | default: |
3592 | return true; |
3593 | } |
3594 | |
3595 | default: |
3596 | return true; |
3597 | } |
3598 | } |
3599 | |
3600 | return false; |
3601 | } |
3602 | |
3603 | /* Creates a conflict function with N dimensions. The affine functions |
3604 | in each dimension follow. */ |
3605 | |
3606 | static conflict_function * |
3607 | conflict_fn (unsigned n, ...) |
3608 | { |
3609 | unsigned i; |
3610 | conflict_function *ret = XCNEW (conflict_function); |
3611 | va_list ap; |
3612 | |
3613 | gcc_assert (n > 0 && n <= MAX_DIM); |
3614 | va_start (ap, n); |
3615 | |
3616 | ret->n = n; |
3617 | for (i = 0; i < n; i++) |
3618 | ret->fns[i] = va_arg (ap, affine_fn); |
3619 | va_end (ap); |
3620 | |
3621 | return ret; |
3622 | } |
3623 | |
3624 | /* Returns constant affine function with value CST. */ |
3625 | |
3626 | static affine_fn |
3627 | affine_fn_cst (tree cst) |
3628 | { |
3629 | affine_fn fn; |
3630 | fn.create (nelems: 1); |
3631 | fn.quick_push (obj: cst); |
3632 | return fn; |
3633 | } |
3634 | |
3635 | /* Returns affine function with single variable, CST + COEF * x_DIM. */ |
3636 | |
3637 | static affine_fn |
3638 | affine_fn_univar (tree cst, unsigned dim, tree coef) |
3639 | { |
3640 | affine_fn fn; |
3641 | fn.create (nelems: dim + 1); |
3642 | unsigned i; |
3643 | |
3644 | gcc_assert (dim > 0); |
3645 | fn.quick_push (obj: cst); |
3646 | for (i = 1; i < dim; i++) |
3647 | fn.quick_push (integer_zero_node); |
3648 | fn.quick_push (obj: coef); |
3649 | return fn; |
3650 | } |
3651 | |
3652 | /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and |
3653 | *OVERLAPS_B are initialized to the functions that describe the |
3654 | relation between the elements accessed twice by CHREC_A and |
3655 | CHREC_B. For k >= 0, the following property is verified: |
3656 | |
3657 | CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ |
3658 | |
3659 | static void |
3660 | analyze_ziv_subscript (tree chrec_a, |
3661 | tree chrec_b, |
3662 | conflict_function **overlaps_a, |
3663 | conflict_function **overlaps_b, |
3664 | tree *last_conflicts) |
3665 | { |
3666 | tree type, difference; |
3667 | dependence_stats.num_ziv++; |
3668 | |
3669 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3670 | fprintf (stream: dump_file, format: "(analyze_ziv_subscript \n" ); |
3671 | |
3672 | type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b)); |
3673 | chrec_a = chrec_convert (type, chrec_a, NULL); |
3674 | chrec_b = chrec_convert (type, chrec_b, NULL); |
3675 | difference = chrec_fold_minus (type, chrec_a, chrec_b); |
3676 | |
3677 | switch (TREE_CODE (difference)) |
3678 | { |
3679 | case INTEGER_CST: |
3680 | if (integer_zerop (difference)) |
3681 | { |
3682 | /* The difference is equal to zero: the accessed index |
3683 | overlaps for each iteration in the loop. */ |
3684 | *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
3685 | *overlaps_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
3686 | *last_conflicts = chrec_dont_know; |
3687 | dependence_stats.num_ziv_dependent++; |
3688 | } |
3689 | else |
3690 | { |
3691 | /* The accesses do not overlap. */ |
3692 | *overlaps_a = conflict_fn_no_dependence (); |
3693 | *overlaps_b = conflict_fn_no_dependence (); |
3694 | *last_conflicts = integer_zero_node; |
3695 | dependence_stats.num_ziv_independent++; |
3696 | } |
3697 | break; |
3698 | |
3699 | default: |
3700 | /* We're not sure whether the indexes overlap. For the moment, |
3701 | conservatively answer "don't know". */ |
3702 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3703 | fprintf (stream: dump_file, format: "ziv test failed: difference is non-integer.\n" ); |
3704 | |
3705 | *overlaps_a = conflict_fn_not_known (); |
3706 | *overlaps_b = conflict_fn_not_known (); |
3707 | *last_conflicts = chrec_dont_know; |
3708 | dependence_stats.num_ziv_unimplemented++; |
3709 | break; |
3710 | } |
3711 | |
3712 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3713 | fprintf (stream: dump_file, format: ")\n" ); |
3714 | } |
3715 | |
3716 | /* Similar to max_stmt_executions_int, but returns the bound as a tree, |
3717 | and only if it fits to the int type. If this is not the case, or the |
3718 | bound on the number of iterations of LOOP could not be derived, returns |
3719 | chrec_dont_know. */ |
3720 | |
3721 | static tree |
3722 | max_stmt_executions_tree (class loop *loop) |
3723 | { |
3724 | widest_int nit; |
3725 | |
3726 | if (!max_stmt_executions (loop, &nit)) |
3727 | return chrec_dont_know; |
3728 | |
3729 | if (!wi::fits_to_tree_p (x: nit, unsigned_type_node)) |
3730 | return chrec_dont_know; |
3731 | |
3732 | return wide_int_to_tree (unsigned_type_node, cst: nit); |
3733 | } |
3734 | |
3735 | /* Determine whether the CHREC is always positive/negative. If the expression |
3736 | cannot be statically analyzed, return false, otherwise set the answer into |
3737 | VALUE. */ |
3738 | |
3739 | static bool |
3740 | chrec_is_positive (tree chrec, bool *value) |
3741 | { |
3742 | bool value0, value1, value2; |
3743 | tree end_value, nb_iter; |
3744 | |
3745 | switch (TREE_CODE (chrec)) |
3746 | { |
3747 | case POLYNOMIAL_CHREC: |
3748 | if (!chrec_is_positive (CHREC_LEFT (chrec), value: &value0) |
3749 | || !chrec_is_positive (CHREC_RIGHT (chrec), value: &value1)) |
3750 | return false; |
3751 | |
3752 | /* FIXME -- overflows. */ |
3753 | if (value0 == value1) |
3754 | { |
3755 | *value = value0; |
3756 | return true; |
3757 | } |
3758 | |
3759 | /* Otherwise the chrec is under the form: "{-197, +, 2}_1", |
3760 | and the proof consists in showing that the sign never |
3761 | changes during the execution of the loop, from 0 to |
3762 | loop->nb_iterations. */ |
3763 | if (!evolution_function_is_affine_p (chrec)) |
3764 | return false; |
3765 | |
3766 | nb_iter = number_of_latch_executions (get_chrec_loop (chrec)); |
3767 | if (chrec_contains_undetermined (nb_iter)) |
3768 | return false; |
3769 | |
3770 | #if 0 |
3771 | /* TODO -- If the test is after the exit, we may decrease the number of |
3772 | iterations by one. */ |
3773 | if (after_exit) |
3774 | nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1)); |
3775 | #endif |
3776 | |
3777 | end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter); |
3778 | |
3779 | if (!chrec_is_positive (chrec: end_value, value: &value2)) |
3780 | return false; |
3781 | |
3782 | *value = value0; |
3783 | return value0 == value1; |
3784 | |
3785 | case INTEGER_CST: |
3786 | switch (tree_int_cst_sgn (chrec)) |
3787 | { |
3788 | case -1: |
3789 | *value = false; |
3790 | break; |
3791 | case 1: |
3792 | *value = true; |
3793 | break; |
3794 | default: |
3795 | return false; |
3796 | } |
3797 | return true; |
3798 | |
3799 | default: |
3800 | return false; |
3801 | } |
3802 | } |
3803 | |
3804 | |
3805 | /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a |
3806 | constant, and CHREC_B is an affine function. *OVERLAPS_A and |
3807 | *OVERLAPS_B are initialized to the functions that describe the |
3808 | relation between the elements accessed twice by CHREC_A and |
3809 | CHREC_B. For k >= 0, the following property is verified: |
3810 | |
3811 | CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ |
3812 | |
3813 | static void |
3814 | analyze_siv_subscript_cst_affine (tree chrec_a, |
3815 | tree chrec_b, |
3816 | conflict_function **overlaps_a, |
3817 | conflict_function **overlaps_b, |
3818 | tree *last_conflicts) |
3819 | { |
3820 | bool value0, value1, value2; |
3821 | tree type, difference, tmp; |
3822 | |
3823 | type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b)); |
3824 | chrec_a = chrec_convert (type, chrec_a, NULL); |
3825 | chrec_b = chrec_convert (type, chrec_b, NULL); |
3826 | difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a); |
3827 | |
3828 | /* Special case overlap in the first iteration. */ |
3829 | if (integer_zerop (difference)) |
3830 | { |
3831 | *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
3832 | *overlaps_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
3833 | *last_conflicts = integer_one_node; |
3834 | return; |
3835 | } |
3836 | |
3837 | if (!chrec_is_positive (chrec: initial_condition (difference), value: &value0)) |
3838 | { |
3839 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3840 | fprintf (stream: dump_file, format: "siv test failed: chrec is not positive.\n" ); |
3841 | |
3842 | dependence_stats.num_siv_unimplemented++; |
3843 | *overlaps_a = conflict_fn_not_known (); |
3844 | *overlaps_b = conflict_fn_not_known (); |
3845 | *last_conflicts = chrec_dont_know; |
3846 | return; |
3847 | } |
3848 | else |
3849 | { |
3850 | if (value0 == false) |
3851 | { |
3852 | if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC |
3853 | || !chrec_is_positive (CHREC_RIGHT (chrec_b), value: &value1)) |
3854 | { |
3855 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3856 | fprintf (stream: dump_file, format: "siv test failed: chrec not positive.\n" ); |
3857 | |
3858 | *overlaps_a = conflict_fn_not_known (); |
3859 | *overlaps_b = conflict_fn_not_known (); |
3860 | *last_conflicts = chrec_dont_know; |
3861 | dependence_stats.num_siv_unimplemented++; |
3862 | return; |
3863 | } |
3864 | else |
3865 | { |
3866 | if (value1 == true) |
3867 | { |
3868 | /* Example: |
3869 | chrec_a = 12 |
3870 | chrec_b = {10, +, 1} |
3871 | */ |
3872 | |
3873 | if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), b: difference)) |
3874 | { |
3875 | HOST_WIDE_INT numiter; |
3876 | class loop *loop = get_chrec_loop (chrec: chrec_b); |
3877 | |
3878 | *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
3879 | tmp = fold_build2 (EXACT_DIV_EXPR, type, |
3880 | fold_build1 (ABS_EXPR, type, difference), |
3881 | CHREC_RIGHT (chrec_b)); |
3882 | *overlaps_b = conflict_fn (n: 1, affine_fn_cst (cst: tmp)); |
3883 | *last_conflicts = integer_one_node; |
3884 | |
3885 | |
3886 | /* Perform weak-zero siv test to see if overlap is |
3887 | outside the loop bounds. */ |
3888 | numiter = max_stmt_executions_int (loop); |
3889 | |
3890 | if (numiter >= 0 |
3891 | && compare_tree_int (tmp, numiter) > 0) |
3892 | { |
3893 | free_conflict_function (f: *overlaps_a); |
3894 | free_conflict_function (f: *overlaps_b); |
3895 | *overlaps_a = conflict_fn_no_dependence (); |
3896 | *overlaps_b = conflict_fn_no_dependence (); |
3897 | *last_conflicts = integer_zero_node; |
3898 | dependence_stats.num_siv_independent++; |
3899 | return; |
3900 | } |
3901 | dependence_stats.num_siv_dependent++; |
3902 | return; |
3903 | } |
3904 | |
3905 | /* When the step does not divide the difference, there are |
3906 | no overlaps. */ |
3907 | else |
3908 | { |
3909 | *overlaps_a = conflict_fn_no_dependence (); |
3910 | *overlaps_b = conflict_fn_no_dependence (); |
3911 | *last_conflicts = integer_zero_node; |
3912 | dependence_stats.num_siv_independent++; |
3913 | return; |
3914 | } |
3915 | } |
3916 | |
3917 | else |
3918 | { |
3919 | /* Example: |
3920 | chrec_a = 12 |
3921 | chrec_b = {10, +, -1} |
3922 | |
3923 | In this case, chrec_a will not overlap with chrec_b. */ |
3924 | *overlaps_a = conflict_fn_no_dependence (); |
3925 | *overlaps_b = conflict_fn_no_dependence (); |
3926 | *last_conflicts = integer_zero_node; |
3927 | dependence_stats.num_siv_independent++; |
3928 | return; |
3929 | } |
3930 | } |
3931 | } |
3932 | else |
3933 | { |
3934 | if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC |
3935 | || !chrec_is_positive (CHREC_RIGHT (chrec_b), value: &value2)) |
3936 | { |
3937 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3938 | fprintf (stream: dump_file, format: "siv test failed: chrec not positive.\n" ); |
3939 | |
3940 | *overlaps_a = conflict_fn_not_known (); |
3941 | *overlaps_b = conflict_fn_not_known (); |
3942 | *last_conflicts = chrec_dont_know; |
3943 | dependence_stats.num_siv_unimplemented++; |
3944 | return; |
3945 | } |
3946 | else |
3947 | { |
3948 | if (value2 == false) |
3949 | { |
3950 | /* Example: |
3951 | chrec_a = 3 |
3952 | chrec_b = {10, +, -1} |
3953 | */ |
3954 | if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), b: difference)) |
3955 | { |
3956 | HOST_WIDE_INT numiter; |
3957 | class loop *loop = get_chrec_loop (chrec: chrec_b); |
3958 | |
3959 | *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
3960 | tmp = fold_build2 (EXACT_DIV_EXPR, type, difference, |
3961 | CHREC_RIGHT (chrec_b)); |
3962 | *overlaps_b = conflict_fn (n: 1, affine_fn_cst (cst: tmp)); |
3963 | *last_conflicts = integer_one_node; |
3964 | |
3965 | /* Perform weak-zero siv test to see if overlap is |
3966 | outside the loop bounds. */ |
3967 | numiter = max_stmt_executions_int (loop); |
3968 | |
3969 | if (numiter >= 0 |
3970 | && compare_tree_int (tmp, numiter) > 0) |
3971 | { |
3972 | free_conflict_function (f: *overlaps_a); |
3973 | free_conflict_function (f: *overlaps_b); |
3974 | *overlaps_a = conflict_fn_no_dependence (); |
3975 | *overlaps_b = conflict_fn_no_dependence (); |
3976 | *last_conflicts = integer_zero_node; |
3977 | dependence_stats.num_siv_independent++; |
3978 | return; |
3979 | } |
3980 | dependence_stats.num_siv_dependent++; |
3981 | return; |
3982 | } |
3983 | |
3984 | /* When the step does not divide the difference, there |
3985 | are no overlaps. */ |
3986 | else |
3987 | { |
3988 | *overlaps_a = conflict_fn_no_dependence (); |
3989 | *overlaps_b = conflict_fn_no_dependence (); |
3990 | *last_conflicts = integer_zero_node; |
3991 | dependence_stats.num_siv_independent++; |
3992 | return; |
3993 | } |
3994 | } |
3995 | else |
3996 | { |
3997 | /* Example: |
3998 | chrec_a = 3 |
3999 | chrec_b = {4, +, 1} |
4000 | |
4001 | In this case, chrec_a will not overlap with chrec_b. */ |
4002 | *overlaps_a = conflict_fn_no_dependence (); |
4003 | *overlaps_b = conflict_fn_no_dependence (); |
4004 | *last_conflicts = integer_zero_node; |
4005 | dependence_stats.num_siv_independent++; |
4006 | return; |
4007 | } |
4008 | } |
4009 | } |
4010 | } |
4011 | } |
4012 | |
4013 | /* Helper recursive function for initializing the matrix A. Returns |
4014 | the initial value of CHREC. */ |
4015 | |
4016 | static tree |
4017 | initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult) |
4018 | { |
4019 | gcc_assert (chrec); |
4020 | |
4021 | switch (TREE_CODE (chrec)) |
4022 | { |
4023 | case POLYNOMIAL_CHREC: |
4024 | HOST_WIDE_INT chrec_right; |
4025 | if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec))) |
4026 | return chrec_dont_know; |
4027 | chrec_right = int_cst_value (CHREC_RIGHT (chrec)); |
4028 | /* We want to be able to negate without overflow. */ |
4029 | if (chrec_right == HOST_WIDE_INT_MIN) |
4030 | return chrec_dont_know; |
4031 | A[index][0] = mult * chrec_right; |
4032 | return initialize_matrix_A (A, CHREC_LEFT (chrec), index: index + 1, mult); |
4033 | |
4034 | case PLUS_EXPR: |
4035 | case MULT_EXPR: |
4036 | case MINUS_EXPR: |
4037 | { |
4038 | tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult); |
4039 | tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult); |
4040 | |
4041 | return chrec_fold_op (TREE_CODE (chrec), type: chrec_type (chrec), op0, op1); |
4042 | } |
4043 | |
4044 | CASE_CONVERT: |
4045 | { |
4046 | tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult); |
4047 | return chrec_convert (chrec_type (chrec), op, NULL); |
4048 | } |
4049 | |
4050 | case BIT_NOT_EXPR: |
4051 | { |
4052 | /* Handle ~X as -1 - X. */ |
4053 | tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult); |
4054 | return chrec_fold_op (code: MINUS_EXPR, type: chrec_type (chrec), |
4055 | op0: build_int_cst (TREE_TYPE (chrec), -1), op1: op); |
4056 | } |
4057 | |
4058 | case INTEGER_CST: |
4059 | return chrec; |
4060 | |
4061 | default: |
4062 | gcc_unreachable (); |
4063 | return NULL_TREE; |
4064 | } |
4065 | } |
4066 | |
4067 | #define FLOOR_DIV(x,y) ((x) / (y)) |
4068 | |
4069 | /* Solves the special case of the Diophantine equation: |
4070 | | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B) |
4071 | |
4072 | Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the |
4073 | number of iterations that loops X and Y run. The overlaps will be |
4074 | constructed as evolutions in dimension DIM. */ |
4075 | |
4076 | static void |
4077 | compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter, |
4078 | HOST_WIDE_INT step_a, |
4079 | HOST_WIDE_INT step_b, |
4080 | affine_fn *overlaps_a, |
4081 | affine_fn *overlaps_b, |
4082 | tree *last_conflicts, int dim) |
4083 | { |
4084 | if (((step_a > 0 && step_b > 0) |
4085 | || (step_a < 0 && step_b < 0))) |
4086 | { |
4087 | HOST_WIDE_INT step_overlaps_a, step_overlaps_b; |
4088 | HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2; |
4089 | |
4090 | gcd_steps_a_b = gcd (step_a, step_b); |
4091 | step_overlaps_a = step_b / gcd_steps_a_b; |
4092 | step_overlaps_b = step_a / gcd_steps_a_b; |
4093 | |
4094 | if (niter > 0) |
4095 | { |
4096 | tau2 = FLOOR_DIV (niter, step_overlaps_a); |
4097 | tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b)); |
4098 | last_conflict = tau2; |
4099 | *last_conflicts = build_int_cst (NULL_TREE, last_conflict); |
4100 | } |
4101 | else |
4102 | *last_conflicts = chrec_dont_know; |
4103 | |
4104 | *overlaps_a = affine_fn_univar (integer_zero_node, dim, |
4105 | coef: build_int_cst (NULL_TREE, |
4106 | step_overlaps_a)); |
4107 | *overlaps_b = affine_fn_univar (integer_zero_node, dim, |
4108 | coef: build_int_cst (NULL_TREE, |
4109 | step_overlaps_b)); |
4110 | } |
4111 | |
4112 | else |
4113 | { |
4114 | *overlaps_a = affine_fn_cst (integer_zero_node); |
4115 | *overlaps_b = affine_fn_cst (integer_zero_node); |
4116 | *last_conflicts = integer_zero_node; |
4117 | } |
4118 | } |
4119 | |
4120 | /* Solves the special case of a Diophantine equation where CHREC_A is |
4121 | an affine bivariate function, and CHREC_B is an affine univariate |
4122 | function. For example, |
4123 | |
4124 | | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z |
4125 | |
4126 | has the following overlapping functions: |
4127 | |
4128 | | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v |
4129 | | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v |
4130 | | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v |
4131 | |
4132 | FORNOW: This is a specialized implementation for a case occurring in |
4133 | a common benchmark. Implement the general algorithm. */ |
4134 | |
4135 | static void |
4136 | compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b, |
4137 | conflict_function **overlaps_a, |
4138 | conflict_function **overlaps_b, |
4139 | tree *last_conflicts) |
4140 | { |
4141 | bool xz_p, yz_p, xyz_p; |
4142 | HOST_WIDE_INT step_x, step_y, step_z; |
4143 | HOST_WIDE_INT niter_x, niter_y, niter_z, niter; |
4144 | affine_fn overlaps_a_xz, overlaps_b_xz; |
4145 | affine_fn overlaps_a_yz, overlaps_b_yz; |
4146 | affine_fn overlaps_a_xyz, overlaps_b_xyz; |
4147 | affine_fn ova1, ova2, ovb; |
4148 | tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz; |
4149 | |
4150 | step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a))); |
4151 | step_y = int_cst_value (CHREC_RIGHT (chrec_a)); |
4152 | step_z = int_cst_value (CHREC_RIGHT (chrec_b)); |
4153 | |
4154 | niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a))); |
4155 | niter_y = max_stmt_executions_int (get_chrec_loop (chrec: chrec_a)); |
4156 | niter_z = max_stmt_executions_int (get_chrec_loop (chrec: chrec_b)); |
4157 | |
4158 | if (niter_x < 0 || niter_y < 0 || niter_z < 0) |
4159 | { |
4160 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4161 | fprintf (stream: dump_file, format: "overlap steps test failed: no iteration counts.\n" ); |
4162 | |
4163 | *overlaps_a = conflict_fn_not_known (); |
4164 | *overlaps_b = conflict_fn_not_known (); |
4165 | *last_conflicts = chrec_dont_know; |
4166 | return; |
4167 | } |
4168 | |
4169 | niter = MIN (niter_x, niter_z); |
4170 | compute_overlap_steps_for_affine_univar (niter, step_a: step_x, step_b: step_z, |
4171 | overlaps_a: &overlaps_a_xz, |
4172 | overlaps_b: &overlaps_b_xz, |
4173 | last_conflicts: &last_conflicts_xz, dim: 1); |
4174 | niter = MIN (niter_y, niter_z); |
4175 | compute_overlap_steps_for_affine_univar (niter, step_a: step_y, step_b: step_z, |
4176 | overlaps_a: &overlaps_a_yz, |
4177 | overlaps_b: &overlaps_b_yz, |
4178 | last_conflicts: &last_conflicts_yz, dim: 2); |
4179 | niter = MIN (niter_x, niter_z); |
4180 | niter = MIN (niter_y, niter); |
4181 | compute_overlap_steps_for_affine_univar (niter, step_a: step_x + step_y, step_b: step_z, |
4182 | overlaps_a: &overlaps_a_xyz, |
4183 | overlaps_b: &overlaps_b_xyz, |
4184 | last_conflicts: &last_conflicts_xyz, dim: 3); |
4185 | |
4186 | xz_p = !integer_zerop (last_conflicts_xz); |
4187 | yz_p = !integer_zerop (last_conflicts_yz); |
4188 | xyz_p = !integer_zerop (last_conflicts_xyz); |
4189 | |
4190 | if (xz_p || yz_p || xyz_p) |
4191 | { |
4192 | ova1 = affine_fn_cst (integer_zero_node); |
4193 | ova2 = affine_fn_cst (integer_zero_node); |
4194 | ovb = affine_fn_cst (integer_zero_node); |
4195 | if (xz_p) |
4196 | { |
4197 | affine_fn t0 = ova1; |
4198 | affine_fn t2 = ovb; |
4199 | |
4200 | ova1 = affine_fn_plus (fna: ova1, fnb: overlaps_a_xz); |
4201 | ovb = affine_fn_plus (fna: ovb, fnb: overlaps_b_xz); |
4202 | affine_fn_free (fn: t0); |
4203 | affine_fn_free (fn: t2); |
4204 | *last_conflicts = last_conflicts_xz; |
4205 | } |
4206 | if (yz_p) |
4207 | { |
4208 | affine_fn t0 = ova2; |
4209 | affine_fn t2 = ovb; |
4210 | |
4211 | ova2 = affine_fn_plus (fna: ova2, fnb: overlaps_a_yz); |
4212 | ovb = affine_fn_plus (fna: ovb, fnb: overlaps_b_yz); |
4213 | affine_fn_free (fn: t0); |
4214 | affine_fn_free (fn: t2); |
4215 | *last_conflicts = last_conflicts_yz; |
4216 | } |
4217 | if (xyz_p) |
4218 | { |
4219 | affine_fn t0 = ova1; |
4220 | affine_fn t2 = ova2; |
4221 | affine_fn t4 = ovb; |
4222 | |
4223 | ova1 = affine_fn_plus (fna: ova1, fnb: overlaps_a_xyz); |
4224 | ova2 = affine_fn_plus (fna: ova2, fnb: overlaps_a_xyz); |
4225 | ovb = affine_fn_plus (fna: ovb, fnb: overlaps_b_xyz); |
4226 | affine_fn_free (fn: t0); |
4227 | affine_fn_free (fn: t2); |
4228 | affine_fn_free (fn: t4); |
4229 | *last_conflicts = last_conflicts_xyz; |
4230 | } |
4231 | *overlaps_a = conflict_fn (n: 2, ova1, ova2); |
4232 | *overlaps_b = conflict_fn (n: 1, ovb); |
4233 | } |
4234 | else |
4235 | { |
4236 | *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
4237 | *overlaps_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
4238 | *last_conflicts = integer_zero_node; |
4239 | } |
4240 | |
4241 | affine_fn_free (fn: overlaps_a_xz); |
4242 | affine_fn_free (fn: overlaps_b_xz); |
4243 | affine_fn_free (fn: overlaps_a_yz); |
4244 | affine_fn_free (fn: overlaps_b_yz); |
4245 | affine_fn_free (fn: overlaps_a_xyz); |
4246 | affine_fn_free (fn: overlaps_b_xyz); |
4247 | } |
4248 | |
4249 | /* Copy the elements of vector VEC1 with length SIZE to VEC2. */ |
4250 | |
4251 | static void |
4252 | lambda_vector_copy (lambda_vector vec1, lambda_vector vec2, |
4253 | int size) |
4254 | { |
4255 | memcpy (dest: vec2, src: vec1, n: size * sizeof (*vec1)); |
4256 | } |
4257 | |
4258 | /* Copy the elements of M x N matrix MAT1 to MAT2. */ |
4259 | |
4260 | static void |
4261 | lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2, |
4262 | int m, int n) |
4263 | { |
4264 | int i; |
4265 | |
4266 | for (i = 0; i < m; i++) |
4267 | lambda_vector_copy (vec1: mat1[i], vec2: mat2[i], size: n); |
4268 | } |
4269 | |
4270 | /* Store the N x N identity matrix in MAT. */ |
4271 | |
4272 | static void |
4273 | lambda_matrix_id (lambda_matrix mat, int size) |
4274 | { |
4275 | int i, j; |
4276 | |
4277 | for (i = 0; i < size; i++) |
4278 | for (j = 0; j < size; j++) |
4279 | mat[i][j] = (i == j) ? 1 : 0; |
4280 | } |
4281 | |
4282 | /* Return the index of the first nonzero element of vector VEC1 between |
4283 | START and N. We must have START <= N. |
4284 | Returns N if VEC1 is the zero vector. */ |
4285 | |
4286 | static int |
4287 | lambda_vector_first_nz (lambda_vector vec1, int n, int start) |
4288 | { |
4289 | int j = start; |
4290 | while (j < n && vec1[j] == 0) |
4291 | j++; |
4292 | return j; |
4293 | } |
4294 | |
4295 | /* Add a multiple of row R1 of matrix MAT with N columns to row R2: |
4296 | R2 = R2 + CONST1 * R1. */ |
4297 | |
4298 | static bool |
4299 | lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, |
4300 | lambda_int const1) |
4301 | { |
4302 | int i; |
4303 | |
4304 | if (const1 == 0) |
4305 | return true; |
4306 | |
4307 | for (i = 0; i < n; i++) |
4308 | { |
4309 | bool ovf; |
4310 | lambda_int tem = mul_hwi (a: mat[r1][i], b: const1, overflow: &ovf); |
4311 | if (ovf) |
4312 | return false; |
4313 | lambda_int tem2 = add_hwi (a: mat[r2][i], b: tem, overflow: &ovf); |
4314 | if (ovf || tem2 == HOST_WIDE_INT_MIN) |
4315 | return false; |
4316 | mat[r2][i] = tem2; |
4317 | } |
4318 | |
4319 | return true; |
4320 | } |
4321 | |
4322 | /* Multiply vector VEC1 of length SIZE by a constant CONST1, |
4323 | and store the result in VEC2. */ |
4324 | |
4325 | static void |
4326 | lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2, |
4327 | int size, lambda_int const1) |
4328 | { |
4329 | int i; |
4330 | |
4331 | if (const1 == 0) |
4332 | lambda_vector_clear (vec1: vec2, size); |
4333 | else |
4334 | for (i = 0; i < size; i++) |
4335 | vec2[i] = const1 * vec1[i]; |
4336 | } |
4337 | |
4338 | /* Negate vector VEC1 with length SIZE and store it in VEC2. */ |
4339 | |
4340 | static void |
4341 | lambda_vector_negate (lambda_vector vec1, lambda_vector vec2, |
4342 | int size) |
4343 | { |
4344 | lambda_vector_mult_const (vec1, vec2, size, const1: -1); |
4345 | } |
4346 | |
4347 | /* Negate row R1 of matrix MAT which has N columns. */ |
4348 | |
4349 | static void |
4350 | lambda_matrix_row_negate (lambda_matrix mat, int n, int r1) |
4351 | { |
4352 | lambda_vector_negate (vec1: mat[r1], vec2: mat[r1], size: n); |
4353 | } |
4354 | |
4355 | /* Return true if two vectors are equal. */ |
4356 | |
4357 | static bool |
4358 | lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size) |
4359 | { |
4360 | int i; |
4361 | for (i = 0; i < size; i++) |
4362 | if (vec1[i] != vec2[i]) |
4363 | return false; |
4364 | return true; |
4365 | } |
4366 | |
4367 | /* Given an M x N integer matrix A, this function determines an M x |
4368 | M unimodular matrix U, and an M x N echelon matrix S such that |
4369 | "U.A = S". This decomposition is also known as "right Hermite". |
4370 | |
4371 | Ref: Algorithm 2.1 page 33 in "Loop Transformations for |
4372 | Restructuring Compilers" Utpal Banerjee. */ |
4373 | |
4374 | static bool |
4375 | lambda_matrix_right_hermite (lambda_matrix A, int m, int n, |
4376 | lambda_matrix S, lambda_matrix U) |
4377 | { |
4378 | int i, j, i0 = 0; |
4379 | |
4380 | lambda_matrix_copy (mat1: A, mat2: S, m, n); |
4381 | lambda_matrix_id (mat: U, size: m); |
4382 | |
4383 | for (j = 0; j < n; j++) |
4384 | { |
4385 | if (lambda_vector_first_nz (vec1: S[j], n: m, start: i0) < m) |
4386 | { |
4387 | ++i0; |
4388 | for (i = m - 1; i >= i0; i--) |
4389 | { |
4390 | while (S[i][j] != 0) |
4391 | { |
4392 | lambda_int factor, a, b; |
4393 | |
4394 | a = S[i-1][j]; |
4395 | b = S[i][j]; |
4396 | gcc_assert (a != HOST_WIDE_INT_MIN); |
4397 | factor = a / b; |
4398 | |
4399 | if (!lambda_matrix_row_add (mat: S, n, r1: i, r2: i-1, const1: -factor)) |
4400 | return false; |
4401 | std::swap (a&: S[i], b&: S[i-1]); |
4402 | |
4403 | if (!lambda_matrix_row_add (mat: U, n: m, r1: i, r2: i-1, const1: -factor)) |
4404 | return false; |
4405 | std::swap (a&: U[i], b&: U[i-1]); |
4406 | } |
4407 | } |
4408 | } |
4409 | } |
4410 | |
4411 | return true; |
4412 | } |
4413 | |
4414 | /* Determines the overlapping elements due to accesses CHREC_A and |
4415 | CHREC_B, that are affine functions. This function cannot handle |
4416 | symbolic evolution functions, ie. when initial conditions are |
4417 | parameters, because it uses lambda matrices of integers. */ |
4418 | |
4419 | static void |
4420 | analyze_subscript_affine_affine (tree chrec_a, |
4421 | tree chrec_b, |
4422 | conflict_function **overlaps_a, |
4423 | conflict_function **overlaps_b, |
4424 | tree *last_conflicts) |
4425 | { |
4426 | unsigned nb_vars_a, nb_vars_b, dim; |
4427 | lambda_int gamma, gcd_alpha_beta; |
4428 | lambda_matrix A, U, S; |
4429 | struct obstack scratch_obstack; |
4430 | |
4431 | if (eq_evolutions_p (chrec_a, chrec_b)) |
4432 | { |
4433 | /* The accessed index overlaps for each iteration in the |
4434 | loop. */ |
4435 | *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
4436 | *overlaps_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
4437 | *last_conflicts = chrec_dont_know; |
4438 | return; |
4439 | } |
4440 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4441 | fprintf (stream: dump_file, format: "(analyze_subscript_affine_affine \n" ); |
4442 | |
4443 | /* For determining the initial intersection, we have to solve a |
4444 | Diophantine equation. This is the most time consuming part. |
4445 | |
4446 | For answering to the question: "Is there a dependence?" we have |
4447 | to prove that there exists a solution to the Diophantine |
4448 | equation, and that the solution is in the iteration domain, |
4449 | i.e. the solution is positive or zero, and that the solution |
4450 | happens before the upper bound loop.nb_iterations. Otherwise |
4451 | there is no dependence. This function outputs a description of |
4452 | the iterations that hold the intersections. */ |
4453 | |
4454 | nb_vars_a = nb_vars_in_chrec (chrec_a); |
4455 | nb_vars_b = nb_vars_in_chrec (chrec_b); |
4456 | |
4457 | gcc_obstack_init (&scratch_obstack); |
4458 | |
4459 | dim = nb_vars_a + nb_vars_b; |
4460 | U = lambda_matrix_new (m: dim, n: dim, lambda_obstack: &scratch_obstack); |
4461 | A = lambda_matrix_new (m: dim, n: 1, lambda_obstack: &scratch_obstack); |
4462 | S = lambda_matrix_new (m: dim, n: 1, lambda_obstack: &scratch_obstack); |
4463 | |
4464 | tree init_a = initialize_matrix_A (A, chrec: chrec_a, index: 0, mult: 1); |
4465 | tree init_b = initialize_matrix_A (A, chrec: chrec_b, index: nb_vars_a, mult: -1); |
4466 | if (init_a == chrec_dont_know |
4467 | || init_b == chrec_dont_know) |
4468 | { |
4469 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4470 | fprintf (stream: dump_file, format: "affine-affine test failed: " |
4471 | "representation issue.\n" ); |
4472 | *overlaps_a = conflict_fn_not_known (); |
4473 | *overlaps_b = conflict_fn_not_known (); |
4474 | *last_conflicts = chrec_dont_know; |
4475 | goto end_analyze_subs_aa; |
4476 | } |
4477 | gamma = int_cst_value (init_b) - int_cst_value (init_a); |
4478 | |
4479 | /* Don't do all the hard work of solving the Diophantine equation |
4480 | when we already know the solution: for example, |
4481 | | {3, +, 1}_1 |
4482 | | {3, +, 4}_2 |
4483 | | gamma = 3 - 3 = 0. |
4484 | Then the first overlap occurs during the first iterations: |
4485 | | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x) |
4486 | */ |
4487 | if (gamma == 0) |
4488 | { |
4489 | if (nb_vars_a == 1 && nb_vars_b == 1) |
4490 | { |
4491 | HOST_WIDE_INT step_a, step_b; |
4492 | HOST_WIDE_INT niter, niter_a, niter_b; |
4493 | affine_fn ova, ovb; |
4494 | |
4495 | niter_a = max_stmt_executions_int (get_chrec_loop (chrec: chrec_a)); |
4496 | niter_b = max_stmt_executions_int (get_chrec_loop (chrec: chrec_b)); |
4497 | niter = MIN (niter_a, niter_b); |
4498 | step_a = int_cst_value (CHREC_RIGHT (chrec_a)); |
4499 | step_b = int_cst_value (CHREC_RIGHT (chrec_b)); |
4500 | |
4501 | compute_overlap_steps_for_affine_univar (niter, step_a, step_b, |
4502 | overlaps_a: &ova, overlaps_b: &ovb, |
4503 | last_conflicts, dim: 1); |
4504 | *overlaps_a = conflict_fn (n: 1, ova); |
4505 | *overlaps_b = conflict_fn (n: 1, ovb); |
4506 | } |
4507 | |
4508 | else if (nb_vars_a == 2 && nb_vars_b == 1) |
4509 | compute_overlap_steps_for_affine_1_2 |
4510 | (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts); |
4511 | |
4512 | else if (nb_vars_a == 1 && nb_vars_b == 2) |
4513 | compute_overlap_steps_for_affine_1_2 |
4514 | (chrec_a: chrec_b, chrec_b: chrec_a, overlaps_a: overlaps_b, overlaps_b: overlaps_a, last_conflicts); |
4515 | |
4516 | else |
4517 | { |
4518 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4519 | fprintf (stream: dump_file, format: "affine-affine test failed: too many variables.\n" ); |
4520 | *overlaps_a = conflict_fn_not_known (); |
4521 | *overlaps_b = conflict_fn_not_known (); |
4522 | *last_conflicts = chrec_dont_know; |
4523 | } |
4524 | goto end_analyze_subs_aa; |
4525 | } |
4526 | |
4527 | /* U.A = S */ |
4528 | if (!lambda_matrix_right_hermite (A, m: dim, n: 1, S, U)) |
4529 | { |
4530 | *overlaps_a = conflict_fn_not_known (); |
4531 | *overlaps_b = conflict_fn_not_known (); |
4532 | *last_conflicts = chrec_dont_know; |
4533 | goto end_analyze_subs_aa; |
4534 | } |
4535 | |
4536 | if (S[0][0] < 0) |
4537 | { |
4538 | S[0][0] *= -1; |
4539 | lambda_matrix_row_negate (mat: U, n: dim, r1: 0); |
4540 | } |
4541 | gcd_alpha_beta = S[0][0]; |
4542 | |
4543 | /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5, |
4544 | but that is a quite strange case. Instead of ICEing, answer |
4545 | don't know. */ |
4546 | if (gcd_alpha_beta == 0) |
4547 | { |
4548 | *overlaps_a = conflict_fn_not_known (); |
4549 | *overlaps_b = conflict_fn_not_known (); |
4550 | *last_conflicts = chrec_dont_know; |
4551 | goto end_analyze_subs_aa; |
4552 | } |
4553 | |
4554 | /* The classic "gcd-test". */ |
4555 | if (!int_divides_p (a: gcd_alpha_beta, b: gamma)) |
4556 | { |
4557 | /* The "gcd-test" has determined that there is no integer |
4558 | solution, i.e. there is no dependence. */ |
4559 | *overlaps_a = conflict_fn_no_dependence (); |
4560 | *overlaps_b = conflict_fn_no_dependence (); |
4561 | *last_conflicts = integer_zero_node; |
4562 | } |
4563 | |
4564 | /* Both access functions are univariate. This includes SIV and MIV cases. */ |
4565 | else if (nb_vars_a == 1 && nb_vars_b == 1) |
4566 | { |
4567 | /* Both functions should have the same evolution sign. */ |
4568 | if (((A[0][0] > 0 && -A[1][0] > 0) |
4569 | || (A[0][0] < 0 && -A[1][0] < 0))) |
4570 | { |
4571 | /* The solutions are given by: |
4572 | | |
4573 | | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0] |
4574 | | [u21 u22] [y0] |
4575 | |
4576 | For a given integer t. Using the following variables, |
4577 | |
4578 | | i0 = u11 * gamma / gcd_alpha_beta |
4579 | | j0 = u12 * gamma / gcd_alpha_beta |
4580 | | i1 = u21 |
4581 | | j1 = u22 |
4582 | |
4583 | the solutions are: |
4584 | |
4585 | | x0 = i0 + i1 * t, |
4586 | | y0 = j0 + j1 * t. */ |
4587 | HOST_WIDE_INT i0, j0, i1, j1; |
4588 | |
4589 | i0 = U[0][0] * gamma / gcd_alpha_beta; |
4590 | j0 = U[0][1] * gamma / gcd_alpha_beta; |
4591 | i1 = U[1][0]; |
4592 | j1 = U[1][1]; |
4593 | |
4594 | if ((i1 == 0 && i0 < 0) |
4595 | || (j1 == 0 && j0 < 0)) |
4596 | { |
4597 | /* There is no solution. |
4598 | FIXME: The case "i0 > nb_iterations, j0 > nb_iterations" |
4599 | falls in here, but for the moment we don't look at the |
4600 | upper bound of the iteration domain. */ |
4601 | *overlaps_a = conflict_fn_no_dependence (); |
4602 | *overlaps_b = conflict_fn_no_dependence (); |
4603 | *last_conflicts = integer_zero_node; |
4604 | goto end_analyze_subs_aa; |
4605 | } |
4606 | |
4607 | if (i1 > 0 && j1 > 0) |
4608 | { |
4609 | HOST_WIDE_INT niter_a |
4610 | = max_stmt_executions_int (get_chrec_loop (chrec: chrec_a)); |
4611 | HOST_WIDE_INT niter_b |
4612 | = max_stmt_executions_int (get_chrec_loop (chrec: chrec_b)); |
4613 | HOST_WIDE_INT niter = MIN (niter_a, niter_b); |
4614 | |
4615 | /* (X0, Y0) is a solution of the Diophantine equation: |
4616 | "chrec_a (X0) = chrec_b (Y0)". */ |
4617 | HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1), |
4618 | CEIL (-j0, j1)); |
4619 | HOST_WIDE_INT x0 = i1 * tau1 + i0; |
4620 | HOST_WIDE_INT y0 = j1 * tau1 + j0; |
4621 | |
4622 | /* (X1, Y1) is the smallest positive solution of the eq |
4623 | "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the |
4624 | first conflict occurs. */ |
4625 | HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1); |
4626 | HOST_WIDE_INT x1 = x0 - i1 * min_multiple; |
4627 | HOST_WIDE_INT y1 = y0 - j1 * min_multiple; |
4628 | |
4629 | if (niter > 0) |
4630 | { |
4631 | /* If the overlap occurs outside of the bounds of the |
4632 | loop, there is no dependence. */ |
4633 | if (x1 >= niter_a || y1 >= niter_b) |
4634 | { |
4635 | *overlaps_a = conflict_fn_no_dependence (); |
4636 | *overlaps_b = conflict_fn_no_dependence (); |
4637 | *last_conflicts = integer_zero_node; |
4638 | goto end_analyze_subs_aa; |
4639 | } |
4640 | |
4641 | /* max stmt executions can get quite large, avoid |
4642 | overflows by using wide ints here. */ |
4643 | widest_int tau2 |
4644 | = wi::smin (x: wi::sdiv_floor (x: wi::sub (x: niter_a, y: i0), y: i1), |
4645 | y: wi::sdiv_floor (x: wi::sub (x: niter_b, y: j0), y: j1)); |
4646 | widest_int last_conflict = wi::sub (x: tau2, y: (x1 - i0)/i1); |
4647 | if (wi::min_precision (x: last_conflict, sgn: SIGNED) |
4648 | <= TYPE_PRECISION (integer_type_node)) |
4649 | *last_conflicts |
4650 | = build_int_cst (integer_type_node, |
4651 | last_conflict.to_shwi ()); |
4652 | else |
4653 | *last_conflicts = chrec_dont_know; |
4654 | } |
4655 | else |
4656 | *last_conflicts = chrec_dont_know; |
4657 | |
4658 | *overlaps_a |
4659 | = conflict_fn (n: 1, |
4660 | affine_fn_univar (cst: build_int_cst (NULL_TREE, x1), |
4661 | dim: 1, |
4662 | coef: build_int_cst (NULL_TREE, i1))); |
4663 | *overlaps_b |
4664 | = conflict_fn (n: 1, |
4665 | affine_fn_univar (cst: build_int_cst (NULL_TREE, y1), |
4666 | dim: 1, |
4667 | coef: build_int_cst (NULL_TREE, j1))); |
4668 | } |
4669 | else |
4670 | { |
4671 | /* FIXME: For the moment, the upper bound of the |
4672 | iteration domain for i and j is not checked. */ |
4673 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4674 | fprintf (stream: dump_file, format: "affine-affine test failed: unimplemented.\n" ); |
4675 | *overlaps_a = conflict_fn_not_known (); |
4676 | *overlaps_b = conflict_fn_not_known (); |
4677 | *last_conflicts = chrec_dont_know; |
4678 | } |
4679 | } |
4680 | else |
4681 | { |
4682 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4683 | fprintf (stream: dump_file, format: "affine-affine test failed: unimplemented.\n" ); |
4684 | *overlaps_a = conflict_fn_not_known (); |
4685 | *overlaps_b = conflict_fn_not_known (); |
4686 | *last_conflicts = chrec_dont_know; |
4687 | } |
4688 | } |
4689 | else |
4690 | { |
4691 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4692 | fprintf (stream: dump_file, format: "affine-affine test failed: unimplemented.\n" ); |
4693 | *overlaps_a = conflict_fn_not_known (); |
4694 | *overlaps_b = conflict_fn_not_known (); |
4695 | *last_conflicts = chrec_dont_know; |
4696 | } |
4697 | |
4698 | end_analyze_subs_aa: |
4699 | obstack_free (&scratch_obstack, NULL); |
4700 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4701 | { |
4702 | fprintf (stream: dump_file, format: " (overlaps_a = " ); |
4703 | dump_conflict_function (outf: dump_file, cf: *overlaps_a); |
4704 | fprintf (stream: dump_file, format: ")\n (overlaps_b = " ); |
4705 | dump_conflict_function (outf: dump_file, cf: *overlaps_b); |
4706 | fprintf (stream: dump_file, format: "))\n" ); |
4707 | } |
4708 | } |
4709 | |
4710 | /* Returns true when analyze_subscript_affine_affine can be used for |
4711 | determining the dependence relation between chrec_a and chrec_b, |
4712 | that contain symbols. This function modifies chrec_a and chrec_b |
4713 | such that the analysis result is the same, and such that they don't |
4714 | contain symbols, and then can safely be passed to the analyzer. |
4715 | |
4716 | Example: The analysis of the following tuples of evolutions produce |
4717 | the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1 |
4718 | vs. {0, +, 1}_1 |
4719 | |
4720 | {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1) |
4721 | {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1) |
4722 | */ |
4723 | |
4724 | static bool |
4725 | can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b) |
4726 | { |
4727 | tree diff, type, left_a, left_b, right_b; |
4728 | |
4729 | if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a)) |
4730 | || chrec_contains_symbols (CHREC_RIGHT (*chrec_b))) |
4731 | /* FIXME: For the moment not handled. Might be refined later. */ |
4732 | return false; |
4733 | |
4734 | type = chrec_type (chrec: *chrec_a); |
4735 | left_a = CHREC_LEFT (*chrec_a); |
4736 | left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL); |
4737 | diff = chrec_fold_minus (type, left_a, left_b); |
4738 | |
4739 | if (!evolution_function_is_constant_p (chrec: diff)) |
4740 | return false; |
4741 | |
4742 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4743 | fprintf (stream: dump_file, format: "can_use_subscript_aff_aff_for_symbolic \n" ); |
4744 | |
4745 | *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a), |
4746 | left: diff, CHREC_RIGHT (*chrec_a)); |
4747 | right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL); |
4748 | *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b), |
4749 | left: build_int_cst (type, 0), |
4750 | right: right_b); |
4751 | return true; |
4752 | } |
4753 | |
4754 | /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and |
4755 | *OVERLAPS_B are initialized to the functions that describe the |
4756 | relation between the elements accessed twice by CHREC_A and |
4757 | CHREC_B. For k >= 0, the following property is verified: |
4758 | |
4759 | CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ |
4760 | |
4761 | static void |
4762 | analyze_siv_subscript (tree chrec_a, |
4763 | tree chrec_b, |
4764 | conflict_function **overlaps_a, |
4765 | conflict_function **overlaps_b, |
4766 | tree *last_conflicts, |
4767 | int loop_nest_num) |
4768 | { |
4769 | dependence_stats.num_siv++; |
4770 | |
4771 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4772 | fprintf (stream: dump_file, format: "(analyze_siv_subscript \n" ); |
4773 | |
4774 | if (evolution_function_is_constant_p (chrec: chrec_a) |
4775 | && evolution_function_is_affine_in_loop (chrec: chrec_b, loopnum: loop_nest_num)) |
4776 | analyze_siv_subscript_cst_affine (chrec_a, chrec_b, |
4777 | overlaps_a, overlaps_b, last_conflicts); |
4778 | |
4779 | else if (evolution_function_is_affine_in_loop (chrec: chrec_a, loopnum: loop_nest_num) |
4780 | && evolution_function_is_constant_p (chrec: chrec_b)) |
4781 | analyze_siv_subscript_cst_affine (chrec_a: chrec_b, chrec_b: chrec_a, |
4782 | overlaps_a: overlaps_b, overlaps_b: overlaps_a, last_conflicts); |
4783 | |
4784 | else if (evolution_function_is_affine_in_loop (chrec: chrec_a, loopnum: loop_nest_num) |
4785 | && evolution_function_is_affine_in_loop (chrec: chrec_b, loopnum: loop_nest_num)) |
4786 | { |
4787 | if (!chrec_contains_symbols (chrec_a) |
4788 | && !chrec_contains_symbols (chrec_b)) |
4789 | { |
4790 | analyze_subscript_affine_affine (chrec_a, chrec_b, |
4791 | overlaps_a, overlaps_b, |
4792 | last_conflicts); |
4793 | |
4794 | if (CF_NOT_KNOWN_P (*overlaps_a) |
4795 | || CF_NOT_KNOWN_P (*overlaps_b)) |
4796 | dependence_stats.num_siv_unimplemented++; |
4797 | else if (CF_NO_DEPENDENCE_P (*overlaps_a) |
4798 | || CF_NO_DEPENDENCE_P (*overlaps_b)) |
4799 | dependence_stats.num_siv_independent++; |
4800 | else |
4801 | dependence_stats.num_siv_dependent++; |
4802 | } |
4803 | else if (can_use_analyze_subscript_affine_affine (chrec_a: &chrec_a, |
4804 | chrec_b: &chrec_b)) |
4805 | { |
4806 | analyze_subscript_affine_affine (chrec_a, chrec_b, |
4807 | overlaps_a, overlaps_b, |
4808 | last_conflicts); |
4809 | |
4810 | if (CF_NOT_KNOWN_P (*overlaps_a) |
4811 | || CF_NOT_KNOWN_P (*overlaps_b)) |
4812 | dependence_stats.num_siv_unimplemented++; |
4813 | else if (CF_NO_DEPENDENCE_P (*overlaps_a) |
4814 | || CF_NO_DEPENDENCE_P (*overlaps_b)) |
4815 | dependence_stats.num_siv_independent++; |
4816 | else |
4817 | dependence_stats.num_siv_dependent++; |
4818 | } |
4819 | else |
4820 | goto siv_subscript_dontknow; |
4821 | } |
4822 | |
4823 | else |
4824 | { |
4825 | siv_subscript_dontknow:; |
4826 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4827 | fprintf (stream: dump_file, format: " siv test failed: unimplemented" ); |
4828 | *overlaps_a = conflict_fn_not_known (); |
4829 | *overlaps_b = conflict_fn_not_known (); |
4830 | *last_conflicts = chrec_dont_know; |
4831 | dependence_stats.num_siv_unimplemented++; |
4832 | } |
4833 | |
4834 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4835 | fprintf (stream: dump_file, format: ")\n" ); |
4836 | } |
4837 | |
4838 | /* Returns false if we can prove that the greatest common divisor of the steps |
4839 | of CHREC does not divide CST, false otherwise. */ |
4840 | |
4841 | static bool |
4842 | gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst) |
4843 | { |
4844 | HOST_WIDE_INT cd = 0, val; |
4845 | tree step; |
4846 | |
4847 | if (!tree_fits_shwi_p (cst)) |
4848 | return true; |
4849 | val = tree_to_shwi (cst); |
4850 | |
4851 | while (TREE_CODE (chrec) == POLYNOMIAL_CHREC) |
4852 | { |
4853 | step = CHREC_RIGHT (chrec); |
4854 | if (!tree_fits_shwi_p (step)) |
4855 | return true; |
4856 | cd = gcd (cd, tree_to_shwi (step)); |
4857 | chrec = CHREC_LEFT (chrec); |
4858 | } |
4859 | |
4860 | return val % cd == 0; |
4861 | } |
4862 | |
4863 | /* Analyze a MIV (Multiple Index Variable) subscript with respect to |
4864 | LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the |
4865 | functions that describe the relation between the elements accessed |
4866 | twice by CHREC_A and CHREC_B. For k >= 0, the following property |
4867 | is verified: |
4868 | |
4869 | CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ |
4870 | |
4871 | static void |
4872 | analyze_miv_subscript (tree chrec_a, |
4873 | tree chrec_b, |
4874 | conflict_function **overlaps_a, |
4875 | conflict_function **overlaps_b, |
4876 | tree *last_conflicts, |
4877 | class loop *loop_nest) |
4878 | { |
4879 | tree type, difference; |
4880 | |
4881 | dependence_stats.num_miv++; |
4882 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4883 | fprintf (stream: dump_file, format: "(analyze_miv_subscript \n" ); |
4884 | |
4885 | type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b)); |
4886 | chrec_a = chrec_convert (type, chrec_a, NULL); |
4887 | chrec_b = chrec_convert (type, chrec_b, NULL); |
4888 | difference = chrec_fold_minus (type, chrec_a, chrec_b); |
4889 | |
4890 | if (eq_evolutions_p (chrec_a, chrec_b)) |
4891 | { |
4892 | /* Access functions are the same: all the elements are accessed |
4893 | in the same order. */ |
4894 | *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
4895 | *overlaps_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
4896 | *last_conflicts = max_stmt_executions_tree (loop: get_chrec_loop (chrec: chrec_a)); |
4897 | dependence_stats.num_miv_dependent++; |
4898 | } |
4899 | |
4900 | else if (evolution_function_is_constant_p (chrec: difference) |
4901 | && evolution_function_is_affine_multivariate_p (chrec_a, |
4902 | loop_nest->num) |
4903 | && !gcd_of_steps_may_divide_p (chrec: chrec_a, cst: difference)) |
4904 | { |
4905 | /* testsuite/.../ssa-chrec-33.c |
4906 | {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2 |
4907 | |
4908 | The difference is 1, and all the evolution steps are multiples |
4909 | of 2, consequently there are no overlapping elements. */ |
4910 | *overlaps_a = conflict_fn_no_dependence (); |
4911 | *overlaps_b = conflict_fn_no_dependence (); |
4912 | *last_conflicts = integer_zero_node; |
4913 | dependence_stats.num_miv_independent++; |
4914 | } |
4915 | |
4916 | else if (evolution_function_is_affine_in_loop (chrec: chrec_a, loopnum: loop_nest->num) |
4917 | && !chrec_contains_symbols (chrec_a, loop_nest) |
4918 | && evolution_function_is_affine_in_loop (chrec: chrec_b, loopnum: loop_nest->num) |
4919 | && !chrec_contains_symbols (chrec_b, loop_nest)) |
4920 | { |
4921 | /* testsuite/.../ssa-chrec-35.c |
4922 | {0, +, 1}_2 vs. {0, +, 1}_3 |
4923 | the overlapping elements are respectively located at iterations: |
4924 | {0, +, 1}_x and {0, +, 1}_x, |
4925 | in other words, we have the equality: |
4926 | {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x) |
4927 | |
4928 | Other examples: |
4929 | {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) = |
4930 | {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y) |
4931 | |
4932 | {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) = |
4933 | {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) |
4934 | */ |
4935 | analyze_subscript_affine_affine (chrec_a, chrec_b, |
4936 | overlaps_a, overlaps_b, last_conflicts); |
4937 | |
4938 | if (CF_NOT_KNOWN_P (*overlaps_a) |
4939 | || CF_NOT_KNOWN_P (*overlaps_b)) |
4940 | dependence_stats.num_miv_unimplemented++; |
4941 | else if (CF_NO_DEPENDENCE_P (*overlaps_a) |
4942 | || CF_NO_DEPENDENCE_P (*overlaps_b)) |
4943 | dependence_stats.num_miv_independent++; |
4944 | else |
4945 | dependence_stats.num_miv_dependent++; |
4946 | } |
4947 | |
4948 | else |
4949 | { |
4950 | /* When the analysis is too difficult, answer "don't know". */ |
4951 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4952 | fprintf (stream: dump_file, format: "analyze_miv_subscript test failed: unimplemented.\n" ); |
4953 | |
4954 | *overlaps_a = conflict_fn_not_known (); |
4955 | *overlaps_b = conflict_fn_not_known (); |
4956 | *last_conflicts = chrec_dont_know; |
4957 | dependence_stats.num_miv_unimplemented++; |
4958 | } |
4959 | |
4960 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4961 | fprintf (stream: dump_file, format: ")\n" ); |
4962 | } |
4963 | |
4964 | /* Determines the iterations for which CHREC_A is equal to CHREC_B in |
4965 | with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and |
4966 | OVERLAP_ITERATIONS_B are initialized with two functions that |
4967 | describe the iterations that contain conflicting elements. |
4968 | |
4969 | Remark: For an integer k >= 0, the following equality is true: |
4970 | |
4971 | CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)). |
4972 | */ |
4973 | |
4974 | static void |
4975 | analyze_overlapping_iterations (tree chrec_a, |
4976 | tree chrec_b, |
4977 | conflict_function **overlap_iterations_a, |
4978 | conflict_function **overlap_iterations_b, |
4979 | tree *last_conflicts, class loop *loop_nest) |
4980 | { |
4981 | unsigned int lnn = loop_nest->num; |
4982 | |
4983 | dependence_stats.num_subscript_tests++; |
4984 | |
4985 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4986 | { |
4987 | fprintf (stream: dump_file, format: "(analyze_overlapping_iterations \n" ); |
4988 | fprintf (stream: dump_file, format: " (chrec_a = " ); |
4989 | print_generic_expr (dump_file, chrec_a); |
4990 | fprintf (stream: dump_file, format: ")\n (chrec_b = " ); |
4991 | print_generic_expr (dump_file, chrec_b); |
4992 | fprintf (stream: dump_file, format: ")\n" ); |
4993 | } |
4994 | |
4995 | if (chrec_a == NULL_TREE |
4996 | || chrec_b == NULL_TREE |
4997 | || chrec_contains_undetermined (chrec_a) |
4998 | || chrec_contains_undetermined (chrec_b)) |
4999 | { |
5000 | dependence_stats.num_subscript_undetermined++; |
5001 | |
5002 | *overlap_iterations_a = conflict_fn_not_known (); |
5003 | *overlap_iterations_b = conflict_fn_not_known (); |
5004 | } |
5005 | |
5006 | /* If they are the same chrec, and are affine, they overlap |
5007 | on every iteration. */ |
5008 | else if (eq_evolutions_p (chrec_a, chrec_b) |
5009 | && (evolution_function_is_affine_multivariate_p (chrec_a, lnn) |
5010 | || operand_equal_p (chrec_a, chrec_b, flags: 0))) |
5011 | { |
5012 | dependence_stats.num_same_subscript_function++; |
5013 | *overlap_iterations_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
5014 | *overlap_iterations_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
5015 | *last_conflicts = chrec_dont_know; |
5016 | } |
5017 | |
5018 | /* If they aren't the same, and aren't affine, we can't do anything |
5019 | yet. */ |
5020 | else if ((chrec_contains_symbols (chrec_a) |
5021 | || chrec_contains_symbols (chrec_b)) |
5022 | && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn) |
5023 | || !evolution_function_is_affine_multivariate_p (chrec_b, lnn))) |
5024 | { |
5025 | dependence_stats.num_subscript_undetermined++; |
5026 | *overlap_iterations_a = conflict_fn_not_known (); |
5027 | *overlap_iterations_b = conflict_fn_not_known (); |
5028 | } |
5029 | |
5030 | else if (ziv_subscript_p (chrec_a, chrec_b)) |
5031 | analyze_ziv_subscript (chrec_a, chrec_b, |
5032 | overlaps_a: overlap_iterations_a, overlaps_b: overlap_iterations_b, |
5033 | last_conflicts); |
5034 | |
5035 | else if (siv_subscript_p (chrec_a, chrec_b)) |
5036 | analyze_siv_subscript (chrec_a, chrec_b, |
5037 | overlaps_a: overlap_iterations_a, overlaps_b: overlap_iterations_b, |
5038 | last_conflicts, loop_nest_num: lnn); |
5039 | |
5040 | else |
5041 | analyze_miv_subscript (chrec_a, chrec_b, |
5042 | overlaps_a: overlap_iterations_a, overlaps_b: overlap_iterations_b, |
5043 | last_conflicts, loop_nest); |
5044 | |
5045 | if (dump_file && (dump_flags & TDF_DETAILS)) |
5046 | { |
5047 | fprintf (stream: dump_file, format: " (overlap_iterations_a = " ); |
5048 | dump_conflict_function (outf: dump_file, cf: *overlap_iterations_a); |
5049 | fprintf (stream: dump_file, format: ")\n (overlap_iterations_b = " ); |
5050 | dump_conflict_function (outf: dump_file, cf: *overlap_iterations_b); |
5051 | fprintf (stream: dump_file, format: "))\n" ); |
5052 | } |
5053 | } |
5054 | |
5055 | /* Helper function for uniquely inserting distance vectors. */ |
5056 | |
5057 | static void |
5058 | save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v) |
5059 | { |
5060 | for (lambda_vector v : DDR_DIST_VECTS (ddr)) |
5061 | if (lambda_vector_equal (vec1: v, vec2: dist_v, DDR_NB_LOOPS (ddr))) |
5062 | return; |
5063 | |
5064 | DDR_DIST_VECTS (ddr).safe_push (obj: dist_v); |
5065 | } |
5066 | |
5067 | /* Helper function for uniquely inserting direction vectors. */ |
5068 | |
5069 | static void |
5070 | save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v) |
5071 | { |
5072 | for (lambda_vector v : DDR_DIR_VECTS (ddr)) |
5073 | if (lambda_vector_equal (vec1: v, vec2: dir_v, DDR_NB_LOOPS (ddr))) |
5074 | return; |
5075 | |
5076 | DDR_DIR_VECTS (ddr).safe_push (obj: dir_v); |
5077 | } |
5078 | |
5079 | /* Add a distance of 1 on all the loops outer than INDEX. If we |
5080 | haven't yet determined a distance for this outer loop, push a new |
5081 | distance vector composed of the previous distance, and a distance |
5082 | of 1 for this outer loop. Example: |
5083 | |
5084 | | loop_1 |
5085 | | loop_2 |
5086 | | A[10] |
5087 | | endloop_2 |
5088 | | endloop_1 |
5089 | |
5090 | Saved vectors are of the form (dist_in_1, dist_in_2). First, we |
5091 | save (0, 1), then we have to save (1, 0). */ |
5092 | |
5093 | static void |
5094 | add_outer_distances (struct data_dependence_relation *ddr, |
5095 | lambda_vector dist_v, int index) |
5096 | { |
5097 | /* For each outer loop where init_v is not set, the accesses are |
5098 | in dependence of distance 1 in the loop. */ |
5099 | while (--index >= 0) |
5100 | { |
5101 | lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); |
5102 | lambda_vector_copy (vec1: dist_v, vec2: save_v, DDR_NB_LOOPS (ddr)); |
5103 | save_v[index] = 1; |
5104 | save_dist_v (ddr, dist_v: save_v); |
5105 | } |
5106 | } |
5107 | |
5108 | /* Return false when fail to represent the data dependence as a |
5109 | distance vector. A_INDEX is the index of the first reference |
5110 | (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the |
5111 | second reference. INIT_B is set to true when a component has been |
5112 | added to the distance vector DIST_V. INDEX_CARRY is then set to |
5113 | the index in DIST_V that carries the dependence. */ |
5114 | |
5115 | static bool |
5116 | build_classic_dist_vector_1 (struct data_dependence_relation *ddr, |
5117 | unsigned int a_index, unsigned int b_index, |
5118 | lambda_vector dist_v, bool *init_b, |
5119 | int *index_carry) |
5120 | { |
5121 | unsigned i; |
5122 | lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); |
5123 | class loop *loop = DDR_LOOP_NEST (ddr)[0]; |
5124 | |
5125 | for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) |
5126 | { |
5127 | tree access_fn_a, access_fn_b; |
5128 | struct subscript *subscript = DDR_SUBSCRIPT (ddr, i); |
5129 | |
5130 | if (chrec_contains_undetermined (SUB_DISTANCE (subscript))) |
5131 | { |
5132 | non_affine_dependence_relation (ddr); |
5133 | return false; |
5134 | } |
5135 | |
5136 | access_fn_a = SUB_ACCESS_FN (subscript, a_index); |
5137 | access_fn_b = SUB_ACCESS_FN (subscript, b_index); |
5138 | |
5139 | if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC |
5140 | && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC) |
5141 | { |
5142 | HOST_WIDE_INT dist; |
5143 | int index; |
5144 | int var_a = CHREC_VARIABLE (access_fn_a); |
5145 | int var_b = CHREC_VARIABLE (access_fn_b); |
5146 | |
5147 | if (var_a != var_b |
5148 | || chrec_contains_undetermined (SUB_DISTANCE (subscript))) |
5149 | { |
5150 | non_affine_dependence_relation (ddr); |
5151 | return false; |
5152 | } |
5153 | |
5154 | /* When data references are collected in a loop while data |
5155 | dependences are analyzed in loop nest nested in the loop, we |
5156 | would have more number of access functions than number of |
5157 | loops. Skip access functions of loops not in the loop nest. |
5158 | |
5159 | See PR89725 for more information. */ |
5160 | if (flow_loop_nested_p (get_loop (cfun, num: var_a), loop)) |
5161 | continue; |
5162 | |
5163 | dist = int_cst_value (SUB_DISTANCE (subscript)); |
5164 | index = index_in_loop_nest (var: var_a, DDR_LOOP_NEST (ddr)); |
5165 | *index_carry = MIN (index, *index_carry); |
5166 | |
5167 | /* This is the subscript coupling test. If we have already |
5168 | recorded a distance for this loop (a distance coming from |
5169 | another subscript), it should be the same. For example, |
5170 | in the following code, there is no dependence: |
5171 | |
5172 | | loop i = 0, N, 1 |
5173 | | T[i+1][i] = ... |
5174 | | ... = T[i][i] |
5175 | | endloop |
5176 | */ |
5177 | if (init_v[index] != 0 && dist_v[index] != dist) |
5178 | { |
5179 | finalize_ddr_dependent (ddr, chrec_known); |
5180 | return false; |
5181 | } |
5182 | |
5183 | dist_v[index] = dist; |
5184 | init_v[index] = 1; |
5185 | *init_b = true; |
5186 | } |
5187 | else if (!operand_equal_p (access_fn_a, access_fn_b, flags: 0)) |
5188 | { |
5189 | /* This can be for example an affine vs. constant dependence |
5190 | (T[i] vs. T[3]) that is not an affine dependence and is |
5191 | not representable as a distance vector. */ |
5192 | non_affine_dependence_relation (ddr); |
5193 | return false; |
5194 | } |
5195 | else |
5196 | *init_b = true; |
5197 | } |
5198 | |
5199 | return true; |
5200 | } |
5201 | |
5202 | /* Return true when the DDR contains only invariant access functions wrto. loop |
5203 | number LNUM. */ |
5204 | |
5205 | static bool |
5206 | invariant_access_functions (const struct data_dependence_relation *ddr, |
5207 | int lnum) |
5208 | { |
5209 | for (subscript *sub : DDR_SUBSCRIPTS (ddr)) |
5210 | if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum) |
5211 | || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum)) |
5212 | return false; |
5213 | |
5214 | return true; |
5215 | } |
5216 | |
5217 | /* Helper function for the case where DDR_A and DDR_B are the same |
5218 | multivariate access function with a constant step. For an example |
5219 | see pr34635-1.c. */ |
5220 | |
5221 | static void |
5222 | add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2) |
5223 | { |
5224 | int x_1, x_2; |
5225 | tree c_1 = CHREC_LEFT (c_2); |
5226 | tree c_0 = CHREC_LEFT (c_1); |
5227 | lambda_vector dist_v; |
5228 | HOST_WIDE_INT v1, v2, cd; |
5229 | |
5230 | /* Polynomials with more than 2 variables are not handled yet. When |
5231 | the evolution steps are parameters, it is not possible to |
5232 | represent the dependence using classical distance vectors. */ |
5233 | if (TREE_CODE (c_0) != INTEGER_CST |
5234 | || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST |
5235 | || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST) |
5236 | { |
5237 | DDR_AFFINE_P (ddr) = false; |
5238 | return; |
5239 | } |
5240 | |
5241 | x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr)); |
5242 | x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr)); |
5243 | |
5244 | /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */ |
5245 | dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); |
5246 | v1 = int_cst_value (CHREC_RIGHT (c_1)); |
5247 | v2 = int_cst_value (CHREC_RIGHT (c_2)); |
5248 | cd = gcd (v1, v2); |
5249 | v1 /= cd; |
5250 | v2 /= cd; |
5251 | |
5252 | if (v2 < 0) |
5253 | { |
5254 | v2 = -v2; |
5255 | v1 = -v1; |
5256 | } |
5257 | |
5258 | dist_v[x_1] = v2; |
5259 | dist_v[x_2] = -v1; |
5260 | save_dist_v (ddr, dist_v); |
5261 | |
5262 | add_outer_distances (ddr, dist_v, index: x_1); |
5263 | } |
5264 | |
5265 | /* Helper function for the case where DDR_A and DDR_B are the same |
5266 | access functions. */ |
5267 | |
5268 | static void |
5269 | add_other_self_distances (struct data_dependence_relation *ddr) |
5270 | { |
5271 | lambda_vector dist_v; |
5272 | unsigned i; |
5273 | int index_carry = DDR_NB_LOOPS (ddr); |
5274 | subscript *sub; |
5275 | class loop *loop = DDR_LOOP_NEST (ddr)[0]; |
5276 | |
5277 | FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub) |
5278 | { |
5279 | tree access_fun = SUB_ACCESS_FN (sub, 0); |
5280 | |
5281 | if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC) |
5282 | { |
5283 | if (!evolution_function_is_univariate_p (access_fun, loop->num)) |
5284 | { |
5285 | if (DDR_NUM_SUBSCRIPTS (ddr) != 1) |
5286 | { |
5287 | DDR_ARE_DEPENDENT (ddr) = chrec_dont_know; |
5288 | return; |
5289 | } |
5290 | |
5291 | access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0); |
5292 | |
5293 | if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC) |
5294 | add_multivariate_self_dist (ddr, c_2: access_fun); |
5295 | else |
5296 | /* The evolution step is not constant: it varies in |
5297 | the outer loop, so this cannot be represented by a |
5298 | distance vector. For example in pr34635.c the |
5299 | evolution is {0, +, {0, +, 4}_1}_2. */ |
5300 | DDR_AFFINE_P (ddr) = false; |
5301 | |
5302 | return; |
5303 | } |
5304 | |
5305 | /* When data references are collected in a loop while data |
5306 | dependences are analyzed in loop nest nested in the loop, we |
5307 | would have more number of access functions than number of |
5308 | loops. Skip access functions of loops not in the loop nest. |
5309 | |
5310 | See PR89725 for more information. */ |
5311 | if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)), |
5312 | loop)) |
5313 | continue; |
5314 | |
5315 | index_carry = MIN (index_carry, |
5316 | index_in_loop_nest (CHREC_VARIABLE (access_fun), |
5317 | DDR_LOOP_NEST (ddr))); |
5318 | } |
5319 | } |
5320 | |
5321 | dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); |
5322 | add_outer_distances (ddr, dist_v, index: index_carry); |
5323 | } |
5324 | |
5325 | static void |
5326 | insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr) |
5327 | { |
5328 | lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); |
5329 | |
5330 | dist_v[0] = 1; |
5331 | save_dist_v (ddr, dist_v); |
5332 | } |
5333 | |
5334 | /* Adds a unit distance vector to DDR when there is a 0 overlap. This |
5335 | is the case for example when access functions are the same and |
5336 | equal to a constant, as in: |
5337 | |
5338 | | loop_1 |
5339 | | A[3] = ... |
5340 | | ... = A[3] |
5341 | | endloop_1 |
5342 | |
5343 | in which case the distance vectors are (0) and (1). */ |
5344 | |
5345 | static void |
5346 | add_distance_for_zero_overlaps (struct data_dependence_relation *ddr) |
5347 | { |
5348 | unsigned i, j; |
5349 | |
5350 | for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) |
5351 | { |
5352 | subscript_p sub = DDR_SUBSCRIPT (ddr, i); |
5353 | conflict_function *ca = SUB_CONFLICTS_IN_A (sub); |
5354 | conflict_function *cb = SUB_CONFLICTS_IN_B (sub); |
5355 | |
5356 | for (j = 0; j < ca->n; j++) |
5357 | if (affine_function_zero_p (fn: ca->fns[j])) |
5358 | { |
5359 | insert_innermost_unit_dist_vector (ddr); |
5360 | return; |
5361 | } |
5362 | |
5363 | for (j = 0; j < cb->n; j++) |
5364 | if (affine_function_zero_p (fn: cb->fns[j])) |
5365 | { |
5366 | insert_innermost_unit_dist_vector (ddr); |
5367 | return; |
5368 | } |
5369 | } |
5370 | } |
5371 | |
5372 | /* Return true when the DDR contains two data references that have the |
5373 | same access functions. */ |
5374 | |
5375 | static inline bool |
5376 | same_access_functions (const struct data_dependence_relation *ddr) |
5377 | { |
5378 | for (subscript *sub : DDR_SUBSCRIPTS (ddr)) |
5379 | if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0), |
5380 | SUB_ACCESS_FN (sub, 1))) |
5381 | return false; |
5382 | |
5383 | return true; |
5384 | } |
5385 | |
5386 | /* Compute the classic per loop distance vector. DDR is the data |
5387 | dependence relation to build a vector from. Return false when fail |
5388 | to represent the data dependence as a distance vector. */ |
5389 | |
5390 | static bool |
5391 | build_classic_dist_vector (struct data_dependence_relation *ddr, |
5392 | class loop *loop_nest) |
5393 | { |
5394 | bool init_b = false; |
5395 | int index_carry = DDR_NB_LOOPS (ddr); |
5396 | lambda_vector dist_v; |
5397 | |
5398 | if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE) |
5399 | return false; |
5400 | |
5401 | if (same_access_functions (ddr)) |
5402 | { |
5403 | /* Save the 0 vector. */ |
5404 | dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); |
5405 | save_dist_v (ddr, dist_v); |
5406 | |
5407 | if (invariant_access_functions (ddr, lnum: loop_nest->num)) |
5408 | add_distance_for_zero_overlaps (ddr); |
5409 | |
5410 | if (DDR_NB_LOOPS (ddr) > 1) |
5411 | add_other_self_distances (ddr); |
5412 | |
5413 | return true; |
5414 | } |
5415 | |
5416 | dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); |
5417 | if (!build_classic_dist_vector_1 (ddr, a_index: 0, b_index: 1, dist_v, init_b: &init_b, index_carry: &index_carry)) |
5418 | return false; |
5419 | |
5420 | /* Save the distance vector if we initialized one. */ |
5421 | if (init_b) |
5422 | { |
5423 | /* Verify a basic constraint: classic distance vectors should |
5424 | always be lexicographically positive. |
5425 | |
5426 | Data references are collected in the order of execution of |
5427 | the program, thus for the following loop |
5428 | |
5429 | | for (i = 1; i < 100; i++) |
5430 | | for (j = 1; j < 100; j++) |
5431 | | { |
5432 | | t = T[j+1][i-1]; // A |
5433 | | T[j][i] = t + 2; // B |
5434 | | } |
5435 | |
5436 | references are collected following the direction of the wind: |
5437 | A then B. The data dependence tests are performed also |
5438 | following this order, such that we're looking at the distance |
5439 | separating the elements accessed by A from the elements later |
5440 | accessed by B. But in this example, the distance returned by |
5441 | test_dep (A, B) is lexicographically negative (-1, 1), that |
5442 | means that the access A occurs later than B with respect to |
5443 | the outer loop, ie. we're actually looking upwind. In this |
5444 | case we solve test_dep (B, A) looking downwind to the |
5445 | lexicographically positive solution, that returns the |
5446 | distance vector (1, -1). */ |
5447 | if (!lambda_vector_lexico_pos (v: dist_v, DDR_NB_LOOPS (ddr))) |
5448 | { |
5449 | lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); |
5450 | if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest)) |
5451 | return false; |
5452 | compute_subscript_distance (ddr); |
5453 | if (!build_classic_dist_vector_1 (ddr, a_index: 1, b_index: 0, dist_v: save_v, init_b: &init_b, |
5454 | index_carry: &index_carry)) |
5455 | return false; |
5456 | save_dist_v (ddr, dist_v: save_v); |
5457 | DDR_REVERSED_P (ddr) = true; |
5458 | |
5459 | /* In this case there is a dependence forward for all the |
5460 | outer loops: |
5461 | |
5462 | | for (k = 1; k < 100; k++) |
5463 | | for (i = 1; i < 100; i++) |
5464 | | for (j = 1; j < 100; j++) |
5465 | | { |
5466 | | t = T[j+1][i-1]; // A |
5467 | | T[j][i] = t + 2; // B |
5468 | | } |
5469 | |
5470 | the vectors are: |
5471 | (0, 1, -1) |
5472 | (1, 1, -1) |
5473 | (1, -1, 1) |
5474 | */ |
5475 | if (DDR_NB_LOOPS (ddr) > 1) |
5476 | { |
5477 | add_outer_distances (ddr, dist_v: save_v, index: index_carry); |
5478 | add_outer_distances (ddr, dist_v, index: index_carry); |
5479 | } |
5480 | } |
5481 | else |
5482 | { |
5483 | lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); |
5484 | lambda_vector_copy (vec1: dist_v, vec2: save_v, DDR_NB_LOOPS (ddr)); |
5485 | |
5486 | if (DDR_NB_LOOPS (ddr) > 1) |
5487 | { |
5488 | lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); |
5489 | |
5490 | if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest)) |
5491 | return false; |
5492 | compute_subscript_distance (ddr); |
5493 | if (!build_classic_dist_vector_1 (ddr, a_index: 1, b_index: 0, dist_v: opposite_v, init_b: &init_b, |
5494 | index_carry: &index_carry)) |
5495 | return false; |
5496 | |
5497 | save_dist_v (ddr, dist_v: save_v); |
5498 | add_outer_distances (ddr, dist_v, index: index_carry); |
5499 | add_outer_distances (ddr, dist_v: opposite_v, index: index_carry); |
5500 | } |
5501 | else |
5502 | save_dist_v (ddr, dist_v: save_v); |
5503 | } |
5504 | } |
5505 | else |
5506 | { |
5507 | /* There is a distance of 1 on all the outer loops: Example: |
5508 | there is a dependence of distance 1 on loop_1 for the array A. |
5509 | |
5510 | | loop_1 |
5511 | | A[5] = ... |
5512 | | endloop |
5513 | */ |
5514 | add_outer_distances (ddr, dist_v, |
5515 | index: lambda_vector_first_nz (vec1: dist_v, |
5516 | DDR_NB_LOOPS (ddr), start: 0)); |
5517 | } |
5518 | |
5519 | if (dump_file && (dump_flags & TDF_DETAILS)) |
5520 | { |
5521 | unsigned i; |
5522 | |
5523 | fprintf (stream: dump_file, format: "(build_classic_dist_vector\n" ); |
5524 | for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++) |
5525 | { |
5526 | fprintf (stream: dump_file, format: " dist_vector = (" ); |
5527 | print_lambda_vector (outfile: dump_file, DDR_DIST_VECT (ddr, i), |
5528 | DDR_NB_LOOPS (ddr)); |
5529 | fprintf (stream: dump_file, format: " )\n" ); |
5530 | } |
5531 | fprintf (stream: dump_file, format: ")\n" ); |
5532 | } |
5533 | |
5534 | return true; |
5535 | } |
5536 | |
5537 | /* Return the direction for a given distance. |
5538 | FIXME: Computing dir this way is suboptimal, since dir can catch |
5539 | cases that dist is unable to represent. */ |
5540 | |
5541 | static inline enum data_dependence_direction |
5542 | dir_from_dist (int dist) |
5543 | { |
5544 | if (dist > 0) |
5545 | return dir_positive; |
5546 | else if (dist < 0) |
5547 | return dir_negative; |
5548 | else |
5549 | return dir_equal; |
5550 | } |
5551 | |
5552 | /* Compute the classic per loop direction vector. DDR is the data |
5553 | dependence relation to build a vector from. */ |
5554 | |
5555 | static void |
5556 | build_classic_dir_vector (struct data_dependence_relation *ddr) |
5557 | { |
5558 | unsigned i, j; |
5559 | lambda_vector dist_v; |
5560 | |
5561 | FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v) |
5562 | { |
5563 | lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); |
5564 | |
5565 | for (j = 0; j < DDR_NB_LOOPS (ddr); j++) |
5566 | dir_v[j] = dir_from_dist (dist: dist_v[j]); |
5567 | |
5568 | save_dir_v (ddr, dir_v); |
5569 | } |
5570 | } |
5571 | |
5572 | /* Helper function. Returns true when there is a dependence between the |
5573 | data references. A_INDEX is the index of the first reference (0 for |
5574 | DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */ |
5575 | |
5576 | static bool |
5577 | subscript_dependence_tester_1 (struct data_dependence_relation *ddr, |
5578 | unsigned int a_index, unsigned int b_index, |
5579 | class loop *loop_nest) |
5580 | { |
5581 | unsigned int i; |
5582 | tree last_conflicts; |
5583 | struct subscript *subscript; |
5584 | tree res = NULL_TREE; |
5585 | |
5586 | for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (ix: i, ptr: &subscript); i++) |
5587 | { |
5588 | conflict_function *overlaps_a, *overlaps_b; |
5589 | |
5590 | analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index), |
5591 | SUB_ACCESS_FN (subscript, b_index), |
5592 | overlap_iterations_a: &overlaps_a, overlap_iterations_b: &overlaps_b, |
5593 | last_conflicts: &last_conflicts, loop_nest); |
5594 | |
5595 | if (SUB_CONFLICTS_IN_A (subscript)) |
5596 | free_conflict_function (SUB_CONFLICTS_IN_A (subscript)); |
5597 | if (SUB_CONFLICTS_IN_B (subscript)) |
5598 | free_conflict_function (SUB_CONFLICTS_IN_B (subscript)); |
5599 | |
5600 | SUB_CONFLICTS_IN_A (subscript) = overlaps_a; |
5601 | SUB_CONFLICTS_IN_B (subscript) = overlaps_b; |
5602 | SUB_LAST_CONFLICT (subscript) = last_conflicts; |
5603 | |
5604 | /* If there is any undetermined conflict function we have to |
5605 | give a conservative answer in case we cannot prove that |
5606 | no dependence exists when analyzing another subscript. */ |
5607 | if (CF_NOT_KNOWN_P (overlaps_a) |
5608 | || CF_NOT_KNOWN_P (overlaps_b)) |
5609 | { |
5610 | res = chrec_dont_know; |
5611 | continue; |
5612 | } |
5613 | |
5614 | /* When there is a subscript with no dependence we can stop. */ |
5615 | else if (CF_NO_DEPENDENCE_P (overlaps_a) |
5616 | || CF_NO_DEPENDENCE_P (overlaps_b)) |
5617 | { |
5618 | res = chrec_known; |
5619 | break; |
5620 | } |
5621 | } |
5622 | |
5623 | if (res == NULL_TREE) |
5624 | return true; |
5625 | |
5626 | if (res == chrec_known) |
5627 | dependence_stats.num_dependence_independent++; |
5628 | else |
5629 | dependence_stats.num_dependence_undetermined++; |
5630 | finalize_ddr_dependent (ddr, chrec: res); |
5631 | return false; |
5632 | } |
5633 | |
5634 | /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */ |
5635 | |
5636 | static void |
5637 | subscript_dependence_tester (struct data_dependence_relation *ddr, |
5638 | class loop *loop_nest) |
5639 | { |
5640 | if (subscript_dependence_tester_1 (ddr, a_index: 0, b_index: 1, loop_nest)) |
5641 | dependence_stats.num_dependence_dependent++; |
5642 | |
5643 | compute_subscript_distance (ddr); |
5644 | if (build_classic_dist_vector (ddr, loop_nest)) |
5645 | build_classic_dir_vector (ddr); |
5646 | } |
5647 | |
5648 | /* Returns true when all the access functions of A are affine or |
5649 | constant with respect to LOOP_NEST. */ |
5650 | |
5651 | static bool |
5652 | access_functions_are_affine_or_constant_p (const struct data_reference *a, |
5653 | const class loop *loop_nest) |
5654 | { |
5655 | vec<tree> fns = DR_ACCESS_FNS (a); |
5656 | for (tree t : fns) |
5657 | if (!evolution_function_is_invariant_p (t, loop_nest->num) |
5658 | && !evolution_function_is_affine_multivariate_p (t, loop_nest->num)) |
5659 | return false; |
5660 | |
5661 | return true; |
5662 | } |
5663 | |
5664 | /* This computes the affine dependence relation between A and B with |
5665 | respect to LOOP_NEST. CHREC_KNOWN is used for representing the |
5666 | independence between two accesses, while CHREC_DONT_KNOW is used |
5667 | for representing the unknown relation. |
5668 | |
5669 | Note that it is possible to stop the computation of the dependence |
5670 | relation the first time we detect a CHREC_KNOWN element for a given |
5671 | subscript. */ |
5672 | |
5673 | void |
5674 | compute_affine_dependence (struct data_dependence_relation *ddr, |
5675 | class loop *loop_nest) |
5676 | { |
5677 | struct data_reference *dra = DDR_A (ddr); |
5678 | struct data_reference *drb = DDR_B (ddr); |
5679 | |
5680 | if (dump_file && (dump_flags & TDF_DETAILS)) |
5681 | { |
5682 | fprintf (stream: dump_file, format: "(compute_affine_dependence\n" ); |
5683 | fprintf (stream: dump_file, format: " ref_a: " ); |
5684 | print_generic_expr (dump_file, DR_REF (dra)); |
5685 | fprintf (stream: dump_file, format: ", stmt_a: " ); |
5686 | print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM); |
5687 | fprintf (stream: dump_file, format: " ref_b: " ); |
5688 | print_generic_expr (dump_file, DR_REF (drb)); |
5689 | fprintf (stream: dump_file, format: ", stmt_b: " ); |
5690 | print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM); |
5691 | } |
5692 | |
5693 | /* Analyze only when the dependence relation is not yet known. */ |
5694 | if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) |
5695 | { |
5696 | dependence_stats.num_dependence_tests++; |
5697 | |
5698 | if (access_functions_are_affine_or_constant_p (a: dra, loop_nest) |
5699 | && access_functions_are_affine_or_constant_p (a: drb, loop_nest)) |
5700 | subscript_dependence_tester (ddr, loop_nest); |
5701 | |
5702 | /* As a last case, if the dependence cannot be determined, or if |
5703 | the dependence is considered too difficult to determine, answer |
5704 | "don't know". */ |
5705 | else |
5706 | { |
5707 | dependence_stats.num_dependence_undetermined++; |
5708 | |
5709 | if (dump_file && (dump_flags & TDF_DETAILS)) |
5710 | { |
5711 | fprintf (stream: dump_file, format: "Data ref a:\n" ); |
5712 | dump_data_reference (outf: dump_file, dr: dra); |
5713 | fprintf (stream: dump_file, format: "Data ref b:\n" ); |
5714 | dump_data_reference (outf: dump_file, dr: drb); |
5715 | fprintf (stream: dump_file, format: "affine dependence test not usable: access function not affine or constant.\n" ); |
5716 | } |
5717 | finalize_ddr_dependent (ddr, chrec_dont_know); |
5718 | } |
5719 | } |
5720 | |
5721 | if (dump_file && (dump_flags & TDF_DETAILS)) |
5722 | { |
5723 | if (DDR_ARE_DEPENDENT (ddr) == chrec_known) |
5724 | fprintf (stream: dump_file, format: ") -> no dependence\n" ); |
5725 | else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) |
5726 | fprintf (stream: dump_file, format: ") -> dependence analysis failed\n" ); |
5727 | else |
5728 | fprintf (stream: dump_file, format: ")\n" ); |
5729 | } |
5730 | } |
5731 | |
5732 | /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all |
5733 | the data references in DATAREFS, in the LOOP_NEST. When |
5734 | COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self |
5735 | relations. Return true when successful, i.e. data references number |
5736 | is small enough to be handled. */ |
5737 | |
5738 | bool |
5739 | compute_all_dependences (const vec<data_reference_p> &datarefs, |
5740 | vec<ddr_p> *dependence_relations, |
5741 | const vec<loop_p> &loop_nest, |
5742 | bool compute_self_and_rr) |
5743 | { |
5744 | struct data_dependence_relation *ddr; |
5745 | struct data_reference *a, *b; |
5746 | unsigned int i, j; |
5747 | |
5748 | if ((int) datarefs.length () |
5749 | > param_loop_max_datarefs_for_datadeps) |
5750 | { |
5751 | struct data_dependence_relation *ddr; |
5752 | |
5753 | /* Insert a single relation into dependence_relations: |
5754 | chrec_dont_know. */ |
5755 | ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest); |
5756 | dependence_relations->safe_push (obj: ddr); |
5757 | return false; |
5758 | } |
5759 | |
5760 | FOR_EACH_VEC_ELT (datarefs, i, a) |
5761 | for (j = i + 1; datarefs.iterate (ix: j, ptr: &b); j++) |
5762 | if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr) |
5763 | { |
5764 | ddr = initialize_data_dependence_relation (a, b, loop_nest); |
5765 | dependence_relations->safe_push (obj: ddr); |
5766 | if (loop_nest.exists ()) |
5767 | compute_affine_dependence (ddr, loop_nest: loop_nest[0]); |
5768 | } |
5769 | |
5770 | if (compute_self_and_rr) |
5771 | FOR_EACH_VEC_ELT (datarefs, i, a) |
5772 | { |
5773 | ddr = initialize_data_dependence_relation (a, b: a, loop_nest); |
5774 | dependence_relations->safe_push (obj: ddr); |
5775 | if (loop_nest.exists ()) |
5776 | compute_affine_dependence (ddr, loop_nest: loop_nest[0]); |
5777 | } |
5778 | |
5779 | return true; |
5780 | } |
5781 | |
5782 | /* Describes a location of a memory reference. */ |
5783 | |
5784 | struct data_ref_loc |
5785 | { |
5786 | /* The memory reference. */ |
5787 | tree ref; |
5788 | |
5789 | /* True if the memory reference is read. */ |
5790 | bool is_read; |
5791 | |
5792 | /* True if the data reference is conditional within the containing |
5793 | statement, i.e. if it might not occur even when the statement |
5794 | is executed and runs to completion. */ |
5795 | bool is_conditional_in_stmt; |
5796 | }; |
5797 | |
5798 | |
5799 | /* Stores the locations of memory references in STMT to REFERENCES. Returns |
5800 | true if STMT clobbers memory, false otherwise. */ |
5801 | |
5802 | static bool |
5803 | get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references) |
5804 | { |
5805 | bool clobbers_memory = false; |
5806 | data_ref_loc ref; |
5807 | tree op0, op1; |
5808 | enum gimple_code stmt_code = gimple_code (g: stmt); |
5809 | |
5810 | /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects. |
5811 | As we cannot model data-references to not spelled out |
5812 | accesses give up if they may occur. */ |
5813 | if (stmt_code == GIMPLE_CALL |
5814 | && !(gimple_call_flags (stmt) & ECF_CONST)) |
5815 | { |
5816 | /* Allow IFN_GOMP_SIMD_LANE in their own loops. */ |
5817 | if (gimple_call_internal_p (gs: stmt)) |
5818 | switch (gimple_call_internal_fn (gs: stmt)) |
5819 | { |
5820 | case IFN_GOMP_SIMD_LANE: |
5821 | { |
5822 | class loop *loop = gimple_bb (g: stmt)->loop_father; |
5823 | tree uid = gimple_call_arg (gs: stmt, index: 0); |
5824 | gcc_assert (TREE_CODE (uid) == SSA_NAME); |
5825 | if (loop == NULL |
5826 | || loop->simduid != SSA_NAME_VAR (uid)) |
5827 | clobbers_memory = true; |
5828 | break; |
5829 | } |
5830 | case IFN_MASK_LOAD: |
5831 | case IFN_MASK_STORE: |
5832 | break; |
5833 | case IFN_MASK_CALL: |
5834 | { |
5835 | tree orig_fndecl |
5836 | = gimple_call_addr_fndecl (fn: gimple_call_arg (gs: stmt, index: 0)); |
5837 | if (!orig_fndecl |
5838 | || (flags_from_decl_or_type (orig_fndecl) & ECF_CONST) == 0) |
5839 | clobbers_memory = true; |
5840 | } |
5841 | break; |
5842 | default: |
5843 | clobbers_memory = true; |
5844 | break; |
5845 | } |
5846 | else |
5847 | clobbers_memory = true; |
5848 | } |
5849 | else if (stmt_code == GIMPLE_ASM |
5850 | && (gimple_asm_volatile_p (asm_stmt: as_a <gasm *> (p: stmt)) |
5851 | || gimple_vuse (g: stmt))) |
5852 | clobbers_memory = true; |
5853 | |
5854 | if (!gimple_vuse (g: stmt)) |
5855 | return clobbers_memory; |
5856 | |
5857 | if (stmt_code == GIMPLE_ASSIGN) |
5858 | { |
5859 | tree base; |
5860 | op0 = gimple_assign_lhs (gs: stmt); |
5861 | op1 = gimple_assign_rhs1 (gs: stmt); |
5862 | |
5863 | if (DECL_P (op1) |
5864 | || (REFERENCE_CLASS_P (op1) |
5865 | && (base = get_base_address (t: op1)) |
5866 | && TREE_CODE (base) != SSA_NAME |
5867 | && !is_gimple_min_invariant (base))) |
5868 | { |
5869 | ref.ref = op1; |
5870 | ref.is_read = true; |
5871 | ref.is_conditional_in_stmt = false; |
5872 | references->safe_push (obj: ref); |
5873 | } |
5874 | } |
5875 | else if (stmt_code == GIMPLE_CALL) |
5876 | { |
5877 | unsigned i = 0, n; |
5878 | tree ptr, type; |
5879 | unsigned int align; |
5880 | |
5881 | ref.is_read = false; |
5882 | if (gimple_call_internal_p (gs: stmt)) |
5883 | switch (gimple_call_internal_fn (gs: stmt)) |
5884 | { |
5885 | case IFN_MASK_LOAD: |
5886 | if (gimple_call_lhs (gs: stmt) == NULL_TREE) |
5887 | break; |
5888 | ref.is_read = true; |
5889 | /* FALLTHRU */ |
5890 | case IFN_MASK_STORE: |
5891 | ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0); |
5892 | align = tree_to_shwi (gimple_call_arg (gs: stmt, index: 1)); |
5893 | if (ref.is_read) |
5894 | type = TREE_TYPE (gimple_call_lhs (stmt)); |
5895 | else |
5896 | type = TREE_TYPE (gimple_call_arg (stmt, 3)); |
5897 | if (TYPE_ALIGN (type) != align) |
5898 | type = build_aligned_type (type, align); |
5899 | ref.is_conditional_in_stmt = true; |
5900 | ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0), |
5901 | ptr); |
5902 | references->safe_push (obj: ref); |
5903 | return false; |
5904 | case IFN_MASK_CALL: |
5905 | i = 1; |
5906 | gcc_fallthrough (); |
5907 | default: |
5908 | break; |
5909 | } |
5910 | |
5911 | op0 = gimple_call_lhs (gs: stmt); |
5912 | n = gimple_call_num_args (gs: stmt); |
5913 | for (; i < n; i++) |
5914 | { |
5915 | op1 = gimple_call_arg (gs: stmt, index: i); |
5916 | |
5917 | if (DECL_P (op1) |
5918 | || (REFERENCE_CLASS_P (op1) && get_base_address (t: op1))) |
5919 | { |
5920 | ref.ref = op1; |
5921 | ref.is_read = true; |
5922 | ref.is_conditional_in_stmt = false; |
5923 | references->safe_push (obj: ref); |
5924 | } |
5925 | } |
5926 | } |
5927 | else |
5928 | return clobbers_memory; |
5929 | |
5930 | if (op0 |
5931 | && (DECL_P (op0) |
5932 | || (REFERENCE_CLASS_P (op0) && get_base_address (t: op0)))) |
5933 | { |
5934 | ref.ref = op0; |
5935 | ref.is_read = false; |
5936 | ref.is_conditional_in_stmt = false; |
5937 | references->safe_push (obj: ref); |
5938 | } |
5939 | return clobbers_memory; |
5940 | } |
5941 | |
5942 | |
5943 | /* Returns true if the loop-nest has any data reference. */ |
5944 | |
5945 | bool |
5946 | loop_nest_has_data_refs (loop_p loop) |
5947 | { |
5948 | basic_block *bbs = get_loop_body (loop); |
5949 | auto_vec<data_ref_loc, 3> references; |
5950 | |
5951 | for (unsigned i = 0; i < loop->num_nodes; i++) |
5952 | { |
5953 | basic_block bb = bbs[i]; |
5954 | gimple_stmt_iterator bsi; |
5955 | |
5956 | for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi)) |
5957 | { |
5958 | gimple *stmt = gsi_stmt (i: bsi); |
5959 | get_references_in_stmt (stmt, references: &references); |
5960 | if (references.length ()) |
5961 | { |
5962 | free (ptr: bbs); |
5963 | return true; |
5964 | } |
5965 | } |
5966 | } |
5967 | free (ptr: bbs); |
5968 | return false; |
5969 | } |
5970 | |
5971 | /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable |
5972 | reference, returns false, otherwise returns true. NEST is the outermost |
5973 | loop of the loop nest in which the references should be analyzed. */ |
5974 | |
5975 | opt_result |
5976 | find_data_references_in_stmt (class loop *nest, gimple *stmt, |
5977 | vec<data_reference_p> *datarefs) |
5978 | { |
5979 | auto_vec<data_ref_loc, 2> references; |
5980 | data_reference_p dr; |
5981 | |
5982 | if (get_references_in_stmt (stmt, references: &references)) |
5983 | return opt_result::failure_at (loc: stmt, fmt: "statement clobbers memory: %G" , |
5984 | stmt); |
5985 | |
5986 | for (const data_ref_loc &ref : references) |
5987 | { |
5988 | dr = create_data_ref (nest: nest ? loop_preheader_edge (nest) : NULL, |
5989 | loop: loop_containing_stmt (stmt), memref: ref.ref, |
5990 | stmt, is_read: ref.is_read, is_conditional_in_stmt: ref.is_conditional_in_stmt); |
5991 | gcc_assert (dr != NULL); |
5992 | datarefs->safe_push (obj: dr); |
5993 | } |
5994 | |
5995 | return opt_result::success (); |
5996 | } |
5997 | |
5998 | /* Stores the data references in STMT to DATAREFS. If there is an |
5999 | unanalyzable reference, returns false, otherwise returns true. |
6000 | NEST is the outermost loop of the loop nest in which the references |
6001 | should be instantiated, LOOP is the loop in which the references |
6002 | should be analyzed. */ |
6003 | |
6004 | bool |
6005 | graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt, |
6006 | vec<data_reference_p> *datarefs) |
6007 | { |
6008 | auto_vec<data_ref_loc, 2> references; |
6009 | bool ret = true; |
6010 | data_reference_p dr; |
6011 | |
6012 | if (get_references_in_stmt (stmt, references: &references)) |
6013 | return false; |
6014 | |
6015 | for (const data_ref_loc &ref : references) |
6016 | { |
6017 | dr = create_data_ref (nest, loop, memref: ref.ref, stmt, is_read: ref.is_read, |
6018 | is_conditional_in_stmt: ref.is_conditional_in_stmt); |
6019 | gcc_assert (dr != NULL); |
6020 | datarefs->safe_push (obj: dr); |
6021 | } |
6022 | |
6023 | return ret; |
6024 | } |
6025 | |
6026 | /* Search the data references in LOOP, and record the information into |
6027 | DATAREFS. Returns chrec_dont_know when failing to analyze a |
6028 | difficult case, returns NULL_TREE otherwise. */ |
6029 | |
6030 | tree |
6031 | find_data_references_in_bb (class loop *loop, basic_block bb, |
6032 | vec<data_reference_p> *datarefs) |
6033 | { |
6034 | gimple_stmt_iterator bsi; |
6035 | |
6036 | for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi)) |
6037 | { |
6038 | gimple *stmt = gsi_stmt (i: bsi); |
6039 | |
6040 | if (!find_data_references_in_stmt (nest: loop, stmt, datarefs)) |
6041 | { |
6042 | struct data_reference *res; |
6043 | res = XCNEW (struct data_reference); |
6044 | datarefs->safe_push (obj: res); |
6045 | |
6046 | return chrec_dont_know; |
6047 | } |
6048 | } |
6049 | |
6050 | return NULL_TREE; |
6051 | } |
6052 | |
6053 | /* Search the data references in LOOP, and record the information into |
6054 | DATAREFS. Returns chrec_dont_know when failing to analyze a |
6055 | difficult case, returns NULL_TREE otherwise. |
6056 | |
6057 | TODO: This function should be made smarter so that it can handle address |
6058 | arithmetic as if they were array accesses, etc. */ |
6059 | |
6060 | tree |
6061 | find_data_references_in_loop (class loop *loop, |
6062 | vec<data_reference_p> *datarefs) |
6063 | { |
6064 | basic_block bb, *bbs; |
6065 | unsigned int i; |
6066 | |
6067 | bbs = get_loop_body_in_dom_order (loop); |
6068 | |
6069 | for (i = 0; i < loop->num_nodes; i++) |
6070 | { |
6071 | bb = bbs[i]; |
6072 | |
6073 | if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know) |
6074 | { |
6075 | free (ptr: bbs); |
6076 | return chrec_dont_know; |
6077 | } |
6078 | } |
6079 | free (ptr: bbs); |
6080 | |
6081 | return NULL_TREE; |
6082 | } |
6083 | |
6084 | /* Return the alignment in bytes that DRB is guaranteed to have at all |
6085 | times. */ |
6086 | |
6087 | unsigned int |
6088 | dr_alignment (innermost_loop_behavior *drb) |
6089 | { |
6090 | /* Get the alignment of BASE_ADDRESS + INIT. */ |
6091 | unsigned int alignment = drb->base_alignment; |
6092 | unsigned int misalignment = (drb->base_misalignment |
6093 | + TREE_INT_CST_LOW (drb->init)); |
6094 | if (misalignment != 0) |
6095 | alignment = MIN (alignment, misalignment & -misalignment); |
6096 | |
6097 | /* Cap it to the alignment of OFFSET. */ |
6098 | if (!integer_zerop (drb->offset)) |
6099 | alignment = MIN (alignment, drb->offset_alignment); |
6100 | |
6101 | /* Cap it to the alignment of STEP. */ |
6102 | if (!integer_zerop (drb->step)) |
6103 | alignment = MIN (alignment, drb->step_alignment); |
6104 | |
6105 | return alignment; |
6106 | } |
6107 | |
6108 | /* If BASE is a pointer-typed SSA name, try to find the object that it |
6109 | is based on. Return this object X on success and store the alignment |
6110 | in bytes of BASE - &X in *ALIGNMENT_OUT. */ |
6111 | |
6112 | static tree |
6113 | get_base_for_alignment_1 (tree base, unsigned int *alignment_out) |
6114 | { |
6115 | if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base))) |
6116 | return NULL_TREE; |
6117 | |
6118 | gimple *def = SSA_NAME_DEF_STMT (base); |
6119 | base = analyze_scalar_evolution (loop_containing_stmt (stmt: def), base); |
6120 | |
6121 | /* Peel chrecs and record the minimum alignment preserved by |
6122 | all steps. */ |
6123 | unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT; |
6124 | while (TREE_CODE (base) == POLYNOMIAL_CHREC) |
6125 | { |
6126 | unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base)); |
6127 | alignment = MIN (alignment, step_alignment); |
6128 | base = CHREC_LEFT (base); |
6129 | } |
6130 | |
6131 | /* Punt if the expression is too complicated to handle. */ |
6132 | if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base))) |
6133 | return NULL_TREE; |
6134 | |
6135 | /* The only useful cases are those for which a dereference folds to something |
6136 | other than an INDIRECT_REF. */ |
6137 | tree ref_type = TREE_TYPE (TREE_TYPE (base)); |
6138 | tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base); |
6139 | if (!ref) |
6140 | return NULL_TREE; |
6141 | |
6142 | /* Analyze the base to which the steps we peeled were applied. */ |
6143 | poly_int64 bitsize, bitpos, bytepos; |
6144 | machine_mode mode; |
6145 | int unsignedp, reversep, volatilep; |
6146 | tree offset; |
6147 | base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode, |
6148 | &unsignedp, &reversep, &volatilep); |
6149 | if (!base || !multiple_p (a: bitpos, BITS_PER_UNIT, multiple: &bytepos)) |
6150 | return NULL_TREE; |
6151 | |
6152 | /* Restrict the alignment to that guaranteed by the offsets. */ |
6153 | unsigned int bytepos_alignment = known_alignment (a: bytepos); |
6154 | if (bytepos_alignment != 0) |
6155 | alignment = MIN (alignment, bytepos_alignment); |
6156 | if (offset) |
6157 | { |
6158 | unsigned int offset_alignment = highest_pow2_factor (offset); |
6159 | alignment = MIN (alignment, offset_alignment); |
6160 | } |
6161 | |
6162 | *alignment_out = alignment; |
6163 | return base; |
6164 | } |
6165 | |
6166 | /* Return the object whose alignment would need to be changed in order |
6167 | to increase the alignment of ADDR. Store the maximum achievable |
6168 | alignment in *MAX_ALIGNMENT. */ |
6169 | |
6170 | tree |
6171 | get_base_for_alignment (tree addr, unsigned int *max_alignment) |
6172 | { |
6173 | tree base = get_base_for_alignment_1 (base: addr, alignment_out: max_alignment); |
6174 | if (base) |
6175 | return base; |
6176 | |
6177 | if (TREE_CODE (addr) == ADDR_EXPR) |
6178 | addr = TREE_OPERAND (addr, 0); |
6179 | *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT; |
6180 | return addr; |
6181 | } |
6182 | |
6183 | /* Recursive helper function. */ |
6184 | |
6185 | static bool |
6186 | find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest) |
6187 | { |
6188 | /* Inner loops of the nest should not contain siblings. Example: |
6189 | when there are two consecutive loops, |
6190 | |
6191 | | loop_0 |
6192 | | loop_1 |
6193 | | A[{0, +, 1}_1] |
6194 | | endloop_1 |
6195 | | loop_2 |
6196 | | A[{0, +, 1}_2] |
6197 | | endloop_2 |
6198 | | endloop_0 |
6199 | |
6200 | the dependence relation cannot be captured by the distance |
6201 | abstraction. */ |
6202 | if (loop->next) |
6203 | return false; |
6204 | |
6205 | loop_nest->safe_push (obj: loop); |
6206 | if (loop->inner) |
6207 | return find_loop_nest_1 (loop: loop->inner, loop_nest); |
6208 | return true; |
6209 | } |
6210 | |
6211 | /* Return false when the LOOP is not well nested. Otherwise return |
6212 | true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will |
6213 | contain the loops from the outermost to the innermost, as they will |
6214 | appear in the classic distance vector. */ |
6215 | |
6216 | bool |
6217 | find_loop_nest (class loop *loop, vec<loop_p> *loop_nest) |
6218 | { |
6219 | loop_nest->safe_push (obj: loop); |
6220 | if (loop->inner) |
6221 | return find_loop_nest_1 (loop: loop->inner, loop_nest); |
6222 | return true; |
6223 | } |
6224 | |
6225 | /* Returns true when the data dependences have been computed, false otherwise. |
6226 | Given a loop nest LOOP, the following vectors are returned: |
6227 | DATAREFS is initialized to all the array elements contained in this loop, |
6228 | DEPENDENCE_RELATIONS contains the relations between the data references. |
6229 | Compute read-read and self relations if |
6230 | COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */ |
6231 | |
6232 | bool |
6233 | compute_data_dependences_for_loop (class loop *loop, |
6234 | bool compute_self_and_read_read_dependences, |
6235 | vec<loop_p> *loop_nest, |
6236 | vec<data_reference_p> *datarefs, |
6237 | vec<ddr_p> *dependence_relations) |
6238 | { |
6239 | bool res = true; |
6240 | |
6241 | memset (s: &dependence_stats, c: 0, n: sizeof (dependence_stats)); |
6242 | |
6243 | /* If the loop nest is not well formed, or one of the data references |
6244 | is not computable, give up without spending time to compute other |
6245 | dependences. */ |
6246 | if (!loop |
6247 | || !find_loop_nest (loop, loop_nest) |
6248 | || find_data_references_in_loop (loop, datarefs) == chrec_dont_know |
6249 | || !compute_all_dependences (datarefs: *datarefs, dependence_relations, loop_nest: *loop_nest, |
6250 | compute_self_and_rr: compute_self_and_read_read_dependences)) |
6251 | res = false; |
6252 | |
6253 | if (dump_file && (dump_flags & TDF_STATS)) |
6254 | { |
6255 | fprintf (stream: dump_file, format: "Dependence tester statistics:\n" ); |
6256 | |
6257 | fprintf (stream: dump_file, format: "Number of dependence tests: %d\n" , |
6258 | dependence_stats.num_dependence_tests); |
6259 | fprintf (stream: dump_file, format: "Number of dependence tests classified dependent: %d\n" , |
6260 | dependence_stats.num_dependence_dependent); |
6261 | fprintf (stream: dump_file, format: "Number of dependence tests classified independent: %d\n" , |
6262 | dependence_stats.num_dependence_independent); |
6263 | fprintf (stream: dump_file, format: "Number of undetermined dependence tests: %d\n" , |
6264 | dependence_stats.num_dependence_undetermined); |
6265 | |
6266 | fprintf (stream: dump_file, format: "Number of subscript tests: %d\n" , |
6267 | dependence_stats.num_subscript_tests); |
6268 | fprintf (stream: dump_file, format: "Number of undetermined subscript tests: %d\n" , |
6269 | dependence_stats.num_subscript_undetermined); |
6270 | fprintf (stream: dump_file, format: "Number of same subscript function: %d\n" , |
6271 | dependence_stats.num_same_subscript_function); |
6272 | |
6273 | fprintf (stream: dump_file, format: "Number of ziv tests: %d\n" , |
6274 | dependence_stats.num_ziv); |
6275 | fprintf (stream: dump_file, format: "Number of ziv tests returning dependent: %d\n" , |
6276 | dependence_stats.num_ziv_dependent); |
6277 | fprintf (stream: dump_file, format: "Number of ziv tests returning independent: %d\n" , |
6278 | dependence_stats.num_ziv_independent); |
6279 | fprintf (stream: dump_file, format: "Number of ziv tests unimplemented: %d\n" , |
6280 | dependence_stats.num_ziv_unimplemented); |
6281 | |
6282 | fprintf (stream: dump_file, format: "Number of siv tests: %d\n" , |
6283 | dependence_stats.num_siv); |
6284 | fprintf (stream: dump_file, format: "Number of siv tests returning dependent: %d\n" , |
6285 | dependence_stats.num_siv_dependent); |
6286 | fprintf (stream: dump_file, format: "Number of siv tests returning independent: %d\n" , |
6287 | dependence_stats.num_siv_independent); |
6288 | fprintf (stream: dump_file, format: "Number of siv tests unimplemented: %d\n" , |
6289 | dependence_stats.num_siv_unimplemented); |
6290 | |
6291 | fprintf (stream: dump_file, format: "Number of miv tests: %d\n" , |
6292 | dependence_stats.num_miv); |
6293 | fprintf (stream: dump_file, format: "Number of miv tests returning dependent: %d\n" , |
6294 | dependence_stats.num_miv_dependent); |
6295 | fprintf (stream: dump_file, format: "Number of miv tests returning independent: %d\n" , |
6296 | dependence_stats.num_miv_independent); |
6297 | fprintf (stream: dump_file, format: "Number of miv tests unimplemented: %d\n" , |
6298 | dependence_stats.num_miv_unimplemented); |
6299 | } |
6300 | |
6301 | return res; |
6302 | } |
6303 | |
6304 | /* Free the memory used by a data dependence relation DDR. */ |
6305 | |
6306 | void |
6307 | free_dependence_relation (struct data_dependence_relation *ddr) |
6308 | { |
6309 | if (ddr == NULL) |
6310 | return; |
6311 | |
6312 | if (DDR_SUBSCRIPTS (ddr).exists ()) |
6313 | free_subscripts (DDR_SUBSCRIPTS (ddr)); |
6314 | DDR_DIST_VECTS (ddr).release (); |
6315 | DDR_DIR_VECTS (ddr).release (); |
6316 | |
6317 | free (ptr: ddr); |
6318 | } |
6319 | |
6320 | /* Free the memory used by the data dependence relations from |
6321 | DEPENDENCE_RELATIONS. */ |
6322 | |
6323 | void |
6324 | free_dependence_relations (vec<ddr_p>& dependence_relations) |
6325 | { |
6326 | for (data_dependence_relation *ddr : dependence_relations) |
6327 | if (ddr) |
6328 | free_dependence_relation (ddr); |
6329 | |
6330 | dependence_relations.release (); |
6331 | } |
6332 | |
6333 | /* Free the memory used by the data references from DATAREFS. */ |
6334 | |
6335 | void |
6336 | free_data_refs (vec<data_reference_p>& datarefs) |
6337 | { |
6338 | for (data_reference *dr : datarefs) |
6339 | free_data_ref (dr); |
6340 | datarefs.release (); |
6341 | } |
6342 | |
6343 | /* Common routine implementing both dr_direction_indicator and |
6344 | dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known |
6345 | to be >= USEFUL_MIN and -1 if the indicator is known to be negative. |
6346 | Return the step as the indicator otherwise. */ |
6347 | |
6348 | static tree |
6349 | dr_step_indicator (struct data_reference *dr, int useful_min) |
6350 | { |
6351 | tree step = DR_STEP (dr); |
6352 | if (!step) |
6353 | return NULL_TREE; |
6354 | STRIP_NOPS (step); |
6355 | /* Look for cases where the step is scaled by a positive constant |
6356 | integer, which will often be the access size. If the multiplication |
6357 | doesn't change the sign (due to overflow effects) then we can |
6358 | test the unscaled value instead. */ |
6359 | if (TREE_CODE (step) == MULT_EXPR |
6360 | && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST |
6361 | && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0) |
6362 | { |
6363 | tree factor = TREE_OPERAND (step, 1); |
6364 | step = TREE_OPERAND (step, 0); |
6365 | |
6366 | /* Strip widening and truncating conversions as well as nops. */ |
6367 | if (CONVERT_EXPR_P (step) |
6368 | && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0)))) |
6369 | step = TREE_OPERAND (step, 0); |
6370 | tree type = TREE_TYPE (step); |
6371 | |
6372 | /* Get the range of step values that would not cause overflow. */ |
6373 | widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype)) |
6374 | / wi::to_widest (t: factor)); |
6375 | widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype)) |
6376 | / wi::to_widest (t: factor)); |
6377 | |
6378 | /* Get the range of values that the unconverted step actually has. */ |
6379 | wide_int step_min, step_max; |
6380 | value_range vr; |
6381 | if (TREE_CODE (step) != SSA_NAME |
6382 | || !get_range_query (cfun)->range_of_expr (r&: vr, expr: step) |
6383 | || vr.undefined_p ()) |
6384 | { |
6385 | step_min = wi::to_wide (TYPE_MIN_VALUE (type)); |
6386 | step_max = wi::to_wide (TYPE_MAX_VALUE (type)); |
6387 | } |
6388 | else |
6389 | { |
6390 | step_min = vr.lower_bound (); |
6391 | step_max = vr.upper_bound (); |
6392 | } |
6393 | |
6394 | /* Check whether the unconverted step has an acceptable range. */ |
6395 | signop sgn = TYPE_SIGN (type); |
6396 | if (wi::les_p (x: minv, y: widest_int::from (x: step_min, sgn)) |
6397 | && wi::ges_p (x: maxv, y: widest_int::from (x: step_max, sgn))) |
6398 | { |
6399 | if (wi::ge_p (x: step_min, y: useful_min, sgn)) |
6400 | return ssize_int (useful_min); |
6401 | else if (wi::lt_p (x: step_max, y: 0, sgn)) |
6402 | return ssize_int (-1); |
6403 | else |
6404 | return fold_convert (ssizetype, step); |
6405 | } |
6406 | } |
6407 | return DR_STEP (dr); |
6408 | } |
6409 | |
6410 | /* Return a value that is negative iff DR has a negative step. */ |
6411 | |
6412 | tree |
6413 | dr_direction_indicator (struct data_reference *dr) |
6414 | { |
6415 | return dr_step_indicator (dr, useful_min: 0); |
6416 | } |
6417 | |
6418 | /* Return a value that is zero iff DR has a zero step. */ |
6419 | |
6420 | tree |
6421 | dr_zero_step_indicator (struct data_reference *dr) |
6422 | { |
6423 | return dr_step_indicator (dr, useful_min: 1); |
6424 | } |
6425 | |
6426 | /* Return true if DR is known to have a nonnegative (but possibly zero) |
6427 | step. */ |
6428 | |
6429 | bool |
6430 | dr_known_forward_stride_p (struct data_reference *dr) |
6431 | { |
6432 | tree indicator = dr_direction_indicator (dr); |
6433 | tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node, |
6434 | fold_convert (ssizetype, indicator), |
6435 | ssize_int (0)); |
6436 | return neg_step_val && integer_zerop (neg_step_val); |
6437 | } |
6438 | |