1/* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "target.h"
27#include "rtl.h"
28#include "tree.h"
29#include "gimple.h"
30#include "predict.h"
31#include "memmodel.h"
32#include "tm_p.h"
33#include "ssa.h"
34#include "optabs-tree.h"
35#include "cgraph.h"
36#include "dumpfile.h"
37#include "alias.h"
38#include "fold-const.h"
39#include "stor-layout.h"
40#include "tree-eh.h"
41#include "gimplify.h"
42#include "gimple-iterator.h"
43#include "gimplify-me.h"
44#include "tree-ssa-loop-ivopts.h"
45#include "tree-ssa-loop-manip.h"
46#include "tree-ssa-loop.h"
47#include "cfgloop.h"
48#include "tree-scalar-evolution.h"
49#include "tree-vectorizer.h"
50#include "expr.h"
51#include "builtins.h"
52#include "params.h"
53#include "tree-cfg.h"
54#include "tree-hash-traits.h"
55
56/* Return true if load- or store-lanes optab OPTAB is implemented for
57 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
58
59static bool
60vect_lanes_optab_supported_p (const char *name, convert_optab optab,
61 tree vectype, unsigned HOST_WIDE_INT count)
62{
63 machine_mode mode;
64 scalar_int_mode array_mode;
65 bool limit_p;
66
67 mode = TYPE_MODE (vectype);
68 limit_p = !targetm.array_mode_supported_p (mode, count);
69 if (!int_mode_for_size (count * GET_MODE_BITSIZE (mode),
70 limit_p).exists (&array_mode))
71 {
72 if (dump_enabled_p ())
73 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
74 "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]\n",
75 GET_MODE_NAME (mode), count);
76 return false;
77 }
78
79 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
80 {
81 if (dump_enabled_p ())
82 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
83 "cannot use %s<%s><%s>\n", name,
84 GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
85 return false;
86 }
87
88 if (dump_enabled_p ())
89 dump_printf_loc (MSG_NOTE, vect_location,
90 "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
91 GET_MODE_NAME (mode));
92
93 return true;
94}
95
96
97/* Return the smallest scalar part of STMT.
98 This is used to determine the vectype of the stmt. We generally set the
99 vectype according to the type of the result (lhs). For stmts whose
100 result-type is different than the type of the arguments (e.g., demotion,
101 promotion), vectype will be reset appropriately (later). Note that we have
102 to visit the smallest datatype in this function, because that determines the
103 VF. If the smallest datatype in the loop is present only as the rhs of a
104 promotion operation - we'd miss it.
105 Such a case, where a variable of this datatype does not appear in the lhs
106 anywhere in the loop, can only occur if it's an invariant: e.g.:
107 'int_x = (int) short_inv', which we'd expect to have been optimized away by
108 invariant motion. However, we cannot rely on invariant motion to always
109 take invariants out of the loop, and so in the case of promotion we also
110 have to check the rhs.
111 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
112 types. */
113
114tree
115vect_get_smallest_scalar_type (gimple *stmt, HOST_WIDE_INT *lhs_size_unit,
116 HOST_WIDE_INT *rhs_size_unit)
117{
118 tree scalar_type = gimple_expr_type (stmt);
119 HOST_WIDE_INT lhs, rhs;
120
121 /* During the analysis phase, this function is called on arbitrary
122 statements that might not have scalar results. */
123 if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (scalar_type)))
124 return scalar_type;
125
126 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
127
128 if (is_gimple_assign (stmt)
129 && (gimple_assign_cast_p (stmt)
130 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
131 || gimple_assign_rhs_code (stmt) == WIDEN_LSHIFT_EXPR
132 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
133 {
134 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
135
136 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
137 if (rhs < lhs)
138 scalar_type = rhs_type;
139 }
140
141 *lhs_size_unit = lhs;
142 *rhs_size_unit = rhs;
143 return scalar_type;
144}
145
146
147/* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
148 tested at run-time. Return TRUE if DDR was successfully inserted.
149 Return false if versioning is not supported. */
150
151static bool
152vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
153{
154 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
155
156 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
157 return false;
158
159 if (!runtime_alias_check_p (ddr, loop,
160 optimize_loop_nest_for_speed_p (loop)))
161 return false;
162
163 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
164 return true;
165}
166
167
168/* A subroutine of vect_analyze_data_ref_dependence. Handle
169 DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence
170 distances. These distances are conservatively correct but they don't
171 reflect a guaranteed dependence.
172
173 Return true if this function does all the work necessary to avoid
174 an alias or false if the caller should use the dependence distances
175 to limit the vectorization factor in the usual way. LOOP_DEPTH is
176 the depth of the loop described by LOOP_VINFO and the other arguments
177 are as for vect_analyze_data_ref_dependence. */
178
179static bool
180vect_analyze_possibly_independent_ddr (data_dependence_relation *ddr,
181 loop_vec_info loop_vinfo,
182 int loop_depth, int *max_vf)
183{
184 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
185 lambda_vector dist_v;
186 unsigned int i;
187 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
188 {
189 int dist = dist_v[loop_depth];
190 if (dist != 0 && !(dist > 0 && DDR_REVERSED_P (ddr)))
191 {
192 /* If the user asserted safelen >= DIST consecutive iterations
193 can be executed concurrently, assume independence.
194
195 ??? An alternative would be to add the alias check even
196 in this case, and vectorize the fallback loop with the
197 maximum VF set to safelen. However, if the user has
198 explicitly given a length, it's less likely that that
199 would be a win. */
200 if (loop->safelen >= 2 && abs_hwi (dist) <= loop->safelen)
201 {
202 if (loop->safelen < *max_vf)
203 *max_vf = loop->safelen;
204 LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
205 continue;
206 }
207
208 /* For dependence distances of 2 or more, we have the option
209 of limiting VF or checking for an alias at runtime.
210 Prefer to check at runtime if we can, to avoid limiting
211 the VF unnecessarily when the bases are in fact independent.
212
213 Note that the alias checks will be removed if the VF ends up
214 being small enough. */
215 return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
216 }
217 }
218 return true;
219}
220
221
222/* Function vect_analyze_data_ref_dependence.
223
224 Return TRUE if there (might) exist a dependence between a memory-reference
225 DRA and a memory-reference DRB. When versioning for alias may check a
226 dependence at run-time, return FALSE. Adjust *MAX_VF according to
227 the data dependence. */
228
229static bool
230vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
231 loop_vec_info loop_vinfo, int *max_vf)
232{
233 unsigned int i;
234 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
235 struct data_reference *dra = DDR_A (ddr);
236 struct data_reference *drb = DDR_B (ddr);
237 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
238 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
239 lambda_vector dist_v;
240 unsigned int loop_depth;
241
242 /* In loop analysis all data references should be vectorizable. */
243 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
244 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
245 gcc_unreachable ();
246
247 /* Independent data accesses. */
248 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
249 return false;
250
251 if (dra == drb
252 || (DR_IS_READ (dra) && DR_IS_READ (drb)))
253 return false;
254
255 /* We do not have to consider dependences between accesses that belong
256 to the same group. */
257 if (GROUP_FIRST_ELEMENT (stmtinfo_a)
258 && GROUP_FIRST_ELEMENT (stmtinfo_a) == GROUP_FIRST_ELEMENT (stmtinfo_b))
259 return false;
260
261 /* Even if we have an anti-dependence then, as the vectorized loop covers at
262 least two scalar iterations, there is always also a true dependence.
263 As the vectorizer does not re-order loads and stores we can ignore
264 the anti-dependence if TBAA can disambiguate both DRs similar to the
265 case with known negative distance anti-dependences (positive
266 distance anti-dependences would violate TBAA constraints). */
267 if (((DR_IS_READ (dra) && DR_IS_WRITE (drb))
268 || (DR_IS_WRITE (dra) && DR_IS_READ (drb)))
269 && !alias_sets_conflict_p (get_alias_set (DR_REF (dra)),
270 get_alias_set (DR_REF (drb))))
271 return false;
272
273 /* Unknown data dependence. */
274 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
275 {
276 /* If user asserted safelen consecutive iterations can be
277 executed concurrently, assume independence. */
278 if (loop->safelen >= 2)
279 {
280 if (loop->safelen < *max_vf)
281 *max_vf = loop->safelen;
282 LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
283 return false;
284 }
285
286 if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
287 || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
288 {
289 if (dump_enabled_p ())
290 {
291 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
292 "versioning for alias not supported for: "
293 "can't determine dependence between ");
294 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
295 DR_REF (dra));
296 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
297 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
298 DR_REF (drb));
299 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
300 }
301 return true;
302 }
303
304 if (dump_enabled_p ())
305 {
306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
307 "versioning for alias required: "
308 "can't determine dependence between ");
309 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
310 DR_REF (dra));
311 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
312 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
313 DR_REF (drb));
314 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
315 }
316
317 /* Add to list of ddrs that need to be tested at run-time. */
318 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
319 }
320
321 /* Known data dependence. */
322 if (DDR_NUM_DIST_VECTS (ddr) == 0)
323 {
324 /* If user asserted safelen consecutive iterations can be
325 executed concurrently, assume independence. */
326 if (loop->safelen >= 2)
327 {
328 if (loop->safelen < *max_vf)
329 *max_vf = loop->safelen;
330 LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
331 return false;
332 }
333
334 if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
335 || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
336 {
337 if (dump_enabled_p ())
338 {
339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
340 "versioning for alias not supported for: "
341 "bad dist vector for ");
342 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
343 DR_REF (dra));
344 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
345 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
346 DR_REF (drb));
347 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
348 }
349 return true;
350 }
351
352 if (dump_enabled_p ())
353 {
354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
355 "versioning for alias required: "
356 "bad dist vector for ");
357 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
358 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
359 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
360 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
361 }
362 /* Add to list of ddrs that need to be tested at run-time. */
363 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
364 }
365
366 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
367
368 if (DDR_COULD_BE_INDEPENDENT_P (ddr)
369 && vect_analyze_possibly_independent_ddr (ddr, loop_vinfo,
370 loop_depth, max_vf))
371 return false;
372
373 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
374 {
375 int dist = dist_v[loop_depth];
376
377 if (dump_enabled_p ())
378 dump_printf_loc (MSG_NOTE, vect_location,
379 "dependence distance = %d.\n", dist);
380
381 if (dist == 0)
382 {
383 if (dump_enabled_p ())
384 {
385 dump_printf_loc (MSG_NOTE, vect_location,
386 "dependence distance == 0 between ");
387 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
388 dump_printf (MSG_NOTE, " and ");
389 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
390 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
391 }
392
393 /* When we perform grouped accesses and perform implicit CSE
394 by detecting equal accesses and doing disambiguation with
395 runtime alias tests like for
396 .. = a[i];
397 .. = a[i+1];
398 a[i] = ..;
399 a[i+1] = ..;
400 *p = ..;
401 .. = a[i];
402 .. = a[i+1];
403 where we will end up loading { a[i], a[i+1] } once, make
404 sure that inserting group loads before the first load and
405 stores after the last store will do the right thing.
406 Similar for groups like
407 a[i] = ...;
408 ... = a[i];
409 a[i+1] = ...;
410 where loads from the group interleave with the store. */
411 if (STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
412 || STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
413 {
414 gimple *earlier_stmt;
415 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
416 if (DR_IS_WRITE
417 (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
418 {
419 if (dump_enabled_p ())
420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
421 "READ_WRITE dependence in interleaving."
422 "\n");
423 return true;
424 }
425 }
426
427 continue;
428 }
429
430 if (dist > 0 && DDR_REVERSED_P (ddr))
431 {
432 /* If DDR_REVERSED_P the order of the data-refs in DDR was
433 reversed (to make distance vector positive), and the actual
434 distance is negative. */
435 if (dump_enabled_p ())
436 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
437 "dependence distance negative.\n");
438 /* Record a negative dependence distance to later limit the
439 amount of stmt copying / unrolling we can perform.
440 Only need to handle read-after-write dependence. */
441 if (DR_IS_READ (drb)
442 && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0
443 || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > (unsigned)dist))
444 STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
445 continue;
446 }
447
448 if (abs (dist) >= 2
449 && abs (dist) < *max_vf)
450 {
451 /* The dependence distance requires reduction of the maximal
452 vectorization factor. */
453 *max_vf = abs (dist);
454 if (dump_enabled_p ())
455 dump_printf_loc (MSG_NOTE, vect_location,
456 "adjusting maximal vectorization factor to %i\n",
457 *max_vf);
458 }
459
460 if (abs (dist) >= *max_vf)
461 {
462 /* Dependence distance does not create dependence, as far as
463 vectorization is concerned, in this case. */
464 if (dump_enabled_p ())
465 dump_printf_loc (MSG_NOTE, vect_location,
466 "dependence distance >= VF.\n");
467 continue;
468 }
469
470 if (dump_enabled_p ())
471 {
472 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
473 "not vectorized, possible dependence "
474 "between data-refs ");
475 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
476 dump_printf (MSG_NOTE, " and ");
477 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
478 dump_printf (MSG_NOTE, "\n");
479 }
480
481 return true;
482 }
483
484 return false;
485}
486
487/* Function vect_analyze_data_ref_dependences.
488
489 Examine all the data references in the loop, and make sure there do not
490 exist any data dependences between them. Set *MAX_VF according to
491 the maximum vectorization factor the data dependences allow. */
492
493bool
494vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo, int *max_vf)
495{
496 unsigned int i;
497 struct data_dependence_relation *ddr;
498
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_NOTE, vect_location,
501 "=== vect_analyze_data_ref_dependences ===\n");
502
503 LOOP_VINFO_DDRS (loop_vinfo)
504 .create (LOOP_VINFO_DATAREFS (loop_vinfo).length ()
505 * LOOP_VINFO_DATAREFS (loop_vinfo).length ());
506 LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
507 /* We need read-read dependences to compute STMT_VINFO_SAME_ALIGN_REFS. */
508 if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
509 &LOOP_VINFO_DDRS (loop_vinfo),
510 LOOP_VINFO_LOOP_NEST (loop_vinfo), true))
511 return false;
512
513 /* For epilogues we either have no aliases or alias versioning
514 was applied to original loop. Therefore we may just get max_vf
515 using VF of original loop. */
516 if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
517 *max_vf = LOOP_VINFO_ORIG_MAX_VECT_FACTOR (loop_vinfo);
518 else
519 FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
520 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
521 return false;
522
523 return true;
524}
525
526
527/* Function vect_slp_analyze_data_ref_dependence.
528
529 Return TRUE if there (might) exist a dependence between a memory-reference
530 DRA and a memory-reference DRB. When versioning for alias may check a
531 dependence at run-time, return FALSE. Adjust *MAX_VF according to
532 the data dependence. */
533
534static bool
535vect_slp_analyze_data_ref_dependence (struct data_dependence_relation *ddr)
536{
537 struct data_reference *dra = DDR_A (ddr);
538 struct data_reference *drb = DDR_B (ddr);
539
540 /* We need to check dependences of statements marked as unvectorizable
541 as well, they still can prohibit vectorization. */
542
543 /* Independent data accesses. */
544 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
545 return false;
546
547 if (dra == drb)
548 return false;
549
550 /* Read-read is OK. */
551 if (DR_IS_READ (dra) && DR_IS_READ (drb))
552 return false;
553
554 /* If dra and drb are part of the same interleaving chain consider
555 them independent. */
556 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (DR_STMT (dra)))
557 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (dra)))
558 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (drb)))))
559 return false;
560
561 /* Unknown data dependence. */
562 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
563 {
564 if (dump_enabled_p ())
565 {
566 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
567 "can't determine dependence between ");
568 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
569 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
570 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
571 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
572 }
573 }
574 else if (dump_enabled_p ())
575 {
576 dump_printf_loc (MSG_NOTE, vect_location,
577 "determined dependence between ");
578 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
579 dump_printf (MSG_NOTE, " and ");
580 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
581 dump_printf (MSG_NOTE, "\n");
582 }
583
584 return true;
585}
586
587
588/* Analyze dependences involved in the transform of SLP NODE. STORES
589 contain the vector of scalar stores of this instance if we are
590 disambiguating the loads. */
591
592static bool
593vect_slp_analyze_node_dependences (slp_instance instance, slp_tree node,
594 vec<gimple *> stores, gimple *last_store)
595{
596 /* This walks over all stmts involved in the SLP load/store done
597 in NODE verifying we can sink them up to the last stmt in the
598 group. */
599 gimple *last_access = vect_find_last_scalar_stmt_in_slp (node);
600 for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
601 {
602 gimple *access = SLP_TREE_SCALAR_STMTS (node)[k];
603 if (access == last_access)
604 continue;
605 data_reference *dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (access));
606 for (gimple_stmt_iterator gsi = gsi_for_stmt (access);
607 gsi_stmt (gsi) != last_access; gsi_next (&gsi))
608 {
609 gimple *stmt = gsi_stmt (gsi);
610 if (! gimple_vuse (stmt)
611 || (DR_IS_READ (dr_a) && ! gimple_vdef (stmt)))
612 continue;
613
614 /* If we couldn't record a (single) data reference for this
615 stmt we have to give up. */
616 /* ??? Here and below if dependence analysis fails we can resort
617 to the alias oracle which can handle more kinds of stmts. */
618 data_reference *dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
619 if (!dr_b)
620 return false;
621
622 bool dependent = false;
623 /* If we run into a store of this same instance (we've just
624 marked those) then delay dependence checking until we run
625 into the last store because this is where it will have
626 been sunk to (and we verify if we can do that as well). */
627 if (gimple_visited_p (stmt))
628 {
629 if (stmt != last_store)
630 continue;
631 unsigned i;
632 gimple *store;
633 FOR_EACH_VEC_ELT (stores, i, store)
634 {
635 data_reference *store_dr
636 = STMT_VINFO_DATA_REF (vinfo_for_stmt (store));
637 ddr_p ddr = initialize_data_dependence_relation
638 (dr_a, store_dr, vNULL);
639 dependent = vect_slp_analyze_data_ref_dependence (ddr);
640 free_dependence_relation (ddr);
641 if (dependent)
642 break;
643 }
644 }
645 else
646 {
647 ddr_p ddr = initialize_data_dependence_relation (dr_a,
648 dr_b, vNULL);
649 dependent = vect_slp_analyze_data_ref_dependence (ddr);
650 free_dependence_relation (ddr);
651 }
652 if (dependent)
653 return false;
654 }
655 }
656 return true;
657}
658
659
660/* Function vect_analyze_data_ref_dependences.
661
662 Examine all the data references in the basic-block, and make sure there
663 do not exist any data dependences between them. Set *MAX_VF according to
664 the maximum vectorization factor the data dependences allow. */
665
666bool
667vect_slp_analyze_instance_dependence (slp_instance instance)
668{
669 if (dump_enabled_p ())
670 dump_printf_loc (MSG_NOTE, vect_location,
671 "=== vect_slp_analyze_instance_dependence ===\n");
672
673 /* The stores of this instance are at the root of the SLP tree. */
674 slp_tree store = SLP_INSTANCE_TREE (instance);
675 if (! STMT_VINFO_DATA_REF (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (store)[0])))
676 store = NULL;
677
678 /* Verify we can sink stores to the vectorized stmt insert location. */
679 gimple *last_store = NULL;
680 if (store)
681 {
682 if (! vect_slp_analyze_node_dependences (instance, store, vNULL, NULL))
683 return false;
684
685 /* Mark stores in this instance and remember the last one. */
686 last_store = vect_find_last_scalar_stmt_in_slp (store);
687 for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
688 gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k], true);
689 }
690
691 bool res = true;
692
693 /* Verify we can sink loads to the vectorized stmt insert location,
694 special-casing stores of this instance. */
695 slp_tree load;
696 unsigned int i;
697 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load)
698 if (! vect_slp_analyze_node_dependences (instance, load,
699 store
700 ? SLP_TREE_SCALAR_STMTS (store)
701 : vNULL, last_store))
702 {
703 res = false;
704 break;
705 }
706
707 /* Unset the visited flag. */
708 if (store)
709 for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
710 gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k], false);
711
712 return res;
713}
714
715/* Record in VINFO the base alignment guarantee given by DRB. STMT is
716 the statement that contains DRB, which is useful for recording in the
717 dump file. */
718
719static void
720vect_record_base_alignment (vec_info *vinfo, gimple *stmt,
721 innermost_loop_behavior *drb)
722{
723 bool existed;
724 innermost_loop_behavior *&entry
725 = vinfo->base_alignments.get_or_insert (drb->base_address, &existed);
726 if (!existed || entry->base_alignment < drb->base_alignment)
727 {
728 entry = drb;
729 if (dump_enabled_p ())
730 {
731 dump_printf_loc (MSG_NOTE, vect_location,
732 "recording new base alignment for ");
733 dump_generic_expr (MSG_NOTE, TDF_SLIM, drb->base_address);
734 dump_printf (MSG_NOTE, "\n");
735 dump_printf_loc (MSG_NOTE, vect_location,
736 " alignment: %d\n", drb->base_alignment);
737 dump_printf_loc (MSG_NOTE, vect_location,
738 " misalignment: %d\n", drb->base_misalignment);
739 dump_printf_loc (MSG_NOTE, vect_location,
740 " based on: ");
741 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
742 }
743 }
744}
745
746/* If the region we're going to vectorize is reached, all unconditional
747 data references occur at least once. We can therefore pool the base
748 alignment guarantees from each unconditional reference. Do this by
749 going through all the data references in VINFO and checking whether
750 the containing statement makes the reference unconditionally. If so,
751 record the alignment of the base address in VINFO so that it can be
752 used for all other references with the same base. */
753
754void
755vect_record_base_alignments (vec_info *vinfo)
756{
757 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
758 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
759 data_reference *dr;
760 unsigned int i;
761 FOR_EACH_VEC_ELT (vinfo->datarefs, i, dr)
762 if (!DR_IS_CONDITIONAL_IN_STMT (dr))
763 {
764 gimple *stmt = DR_STMT (dr);
765 vect_record_base_alignment (vinfo, stmt, &DR_INNERMOST (dr));
766
767 /* If DR is nested in the loop that is being vectorized, we can also
768 record the alignment of the base wrt the outer loop. */
769 if (loop && nested_in_vect_loop_p (loop, stmt))
770 {
771 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
772 vect_record_base_alignment
773 (vinfo, stmt, &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info));
774 }
775 }
776}
777
778/* Return the target alignment for the vectorized form of DR. */
779
780static unsigned int
781vect_calculate_target_alignment (struct data_reference *dr)
782{
783 gimple *stmt = DR_STMT (dr);
784 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
785 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
786 return targetm.vectorize.preferred_vector_alignment (vectype);
787}
788
789/* Function vect_compute_data_ref_alignment
790
791 Compute the misalignment of the data reference DR.
792
793 Output:
794 1. If during the misalignment computation it is found that the data reference
795 cannot be vectorized then false is returned.
796 2. DR_MISALIGNMENT (DR) is defined.
797
798 FOR NOW: No analysis is actually performed. Misalignment is calculated
799 only for trivial cases. TODO. */
800
801bool
802vect_compute_data_ref_alignment (struct data_reference *dr)
803{
804 gimple *stmt = DR_STMT (dr);
805 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
806 vec_base_alignments *base_alignments = &stmt_info->vinfo->base_alignments;
807 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
808 struct loop *loop = NULL;
809 tree ref = DR_REF (dr);
810 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
811
812 if (dump_enabled_p ())
813 dump_printf_loc (MSG_NOTE, vect_location,
814 "vect_compute_data_ref_alignment:\n");
815
816 if (loop_vinfo)
817 loop = LOOP_VINFO_LOOP (loop_vinfo);
818
819 /* Initialize misalignment to unknown. */
820 SET_DR_MISALIGNMENT (dr, DR_MISALIGNMENT_UNKNOWN);
821
822 innermost_loop_behavior *drb = vect_dr_behavior (dr);
823 bool step_preserves_misalignment_p;
824
825 unsigned HOST_WIDE_INT vector_alignment
826 = vect_calculate_target_alignment (dr) / BITS_PER_UNIT;
827 DR_TARGET_ALIGNMENT (dr) = vector_alignment;
828
829 /* No step for BB vectorization. */
830 if (!loop)
831 {
832 gcc_assert (integer_zerop (drb->step));
833 step_preserves_misalignment_p = true;
834 }
835
836 /* In case the dataref is in an inner-loop of the loop that is being
837 vectorized (LOOP), we use the base and misalignment information
838 relative to the outer-loop (LOOP). This is ok only if the misalignment
839 stays the same throughout the execution of the inner-loop, which is why
840 we have to check that the stride of the dataref in the inner-loop evenly
841 divides by the vector alignment. */
842 else if (nested_in_vect_loop_p (loop, stmt))
843 {
844 step_preserves_misalignment_p
845 = (DR_STEP_ALIGNMENT (dr) % vector_alignment) == 0;
846
847 if (dump_enabled_p ())
848 {
849 if (step_preserves_misalignment_p)
850 dump_printf_loc (MSG_NOTE, vect_location,
851 "inner step divides the vector alignment.\n");
852 else
853 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
854 "inner step doesn't divide the vector"
855 " alignment.\n");
856 }
857 }
858
859 /* Similarly we can only use base and misalignment information relative to
860 an innermost loop if the misalignment stays the same throughout the
861 execution of the loop. As above, this is the case if the stride of
862 the dataref evenly divides by the alignment. */
863 else
864 {
865 unsigned vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
866 step_preserves_misalignment_p
867 = ((DR_STEP_ALIGNMENT (dr) * vf) % vector_alignment) == 0;
868
869 if (!step_preserves_misalignment_p && dump_enabled_p ())
870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
871 "step doesn't divide the vector alignment.\n");
872 }
873
874 unsigned int base_alignment = drb->base_alignment;
875 unsigned int base_misalignment = drb->base_misalignment;
876
877 /* Calculate the maximum of the pooled base address alignment and the
878 alignment that we can compute for DR itself. */
879 innermost_loop_behavior **entry = base_alignments->get (drb->base_address);
880 if (entry && base_alignment < (*entry)->base_alignment)
881 {
882 base_alignment = (*entry)->base_alignment;
883 base_misalignment = (*entry)->base_misalignment;
884 }
885
886 if (drb->offset_alignment < vector_alignment
887 || !step_preserves_misalignment_p
888 /* We need to know whether the step wrt the vectorized loop is
889 negative when computing the starting misalignment below. */
890 || TREE_CODE (drb->step) != INTEGER_CST)
891 {
892 if (dump_enabled_p ())
893 {
894 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
895 "Unknown alignment for access: ");
896 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
897 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
898 }
899 return true;
900 }
901
902 if (base_alignment < vector_alignment)
903 {
904 tree base = drb->base_address;
905 if (TREE_CODE (base) == ADDR_EXPR)
906 base = TREE_OPERAND (base, 0);
907 if (!vect_can_force_dr_alignment_p (base,
908 vector_alignment * BITS_PER_UNIT))
909 {
910 if (dump_enabled_p ())
911 {
912 dump_printf_loc (MSG_NOTE, vect_location,
913 "can't force alignment of ref: ");
914 dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
915 dump_printf (MSG_NOTE, "\n");
916 }
917 return true;
918 }
919
920 if (DECL_USER_ALIGN (base))
921 {
922 if (dump_enabled_p ())
923 {
924 dump_printf_loc (MSG_NOTE, vect_location,
925 "not forcing alignment of user-aligned "
926 "variable: ");
927 dump_generic_expr (MSG_NOTE, TDF_SLIM, base);
928 dump_printf (MSG_NOTE, "\n");
929 }
930 return true;
931 }
932
933 /* Force the alignment of the decl.
934 NOTE: This is the only change to the code we make during
935 the analysis phase, before deciding to vectorize the loop. */
936 if (dump_enabled_p ())
937 {
938 dump_printf_loc (MSG_NOTE, vect_location, "force alignment of ");
939 dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
940 dump_printf (MSG_NOTE, "\n");
941 }
942
943 DR_VECT_AUX (dr)->base_decl = base;
944 DR_VECT_AUX (dr)->base_misaligned = true;
945 base_misalignment = 0;
946 }
947 unsigned int misalignment = (base_misalignment
948 + TREE_INT_CST_LOW (drb->init));
949
950 /* If this is a backward running DR then first access in the larger
951 vectype actually is N-1 elements before the address in the DR.
952 Adjust misalign accordingly. */
953 if (tree_int_cst_sgn (drb->step) < 0)
954 /* PLUS because STEP is negative. */
955 misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
956 * TREE_INT_CST_LOW (drb->step));
957
958 SET_DR_MISALIGNMENT (dr, misalignment & (vector_alignment - 1));
959
960 if (dump_enabled_p ())
961 {
962 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
963 "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
964 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
965 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
966 }
967
968 return true;
969}
970
971/* Function vect_update_misalignment_for_peel.
972 Sets DR's misalignment
973 - to 0 if it has the same alignment as DR_PEEL,
974 - to the misalignment computed using NPEEL if DR's salignment is known,
975 - to -1 (unknown) otherwise.
976
977 DR - the data reference whose misalignment is to be adjusted.
978 DR_PEEL - the data reference whose misalignment is being made
979 zero in the vector loop by the peel.
980 NPEEL - the number of iterations in the peel loop if the misalignment
981 of DR_PEEL is known at compile time. */
982
983static void
984vect_update_misalignment_for_peel (struct data_reference *dr,
985 struct data_reference *dr_peel, int npeel)
986{
987 unsigned int i;
988 vec<dr_p> same_aligned_drs;
989 struct data_reference *current_dr;
990 int dr_size = vect_get_scalar_dr_size (dr);
991 int dr_peel_size = vect_get_scalar_dr_size (dr_peel);
992 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
993 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
994
995 /* For interleaved data accesses the step in the loop must be multiplied by
996 the size of the interleaving group. */
997 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
998 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
999 if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info))
1000 dr_peel_size *= GROUP_SIZE (peel_stmt_info);
1001
1002 /* It can be assumed that the data refs with the same alignment as dr_peel
1003 are aligned in the vector loop. */
1004 same_aligned_drs
1005 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1006 FOR_EACH_VEC_ELT (same_aligned_drs, i, current_dr)
1007 {
1008 if (current_dr != dr)
1009 continue;
1010 gcc_assert (!known_alignment_for_access_p (dr)
1011 || !known_alignment_for_access_p (dr_peel)
1012 || (DR_MISALIGNMENT (dr) / dr_size
1013 == DR_MISALIGNMENT (dr_peel) / dr_peel_size));
1014 SET_DR_MISALIGNMENT (dr, 0);
1015 return;
1016 }
1017
1018 if (known_alignment_for_access_p (dr)
1019 && known_alignment_for_access_p (dr_peel))
1020 {
1021 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1022 int misal = DR_MISALIGNMENT (dr);
1023 misal += negative ? -npeel * dr_size : npeel * dr_size;
1024 misal &= DR_TARGET_ALIGNMENT (dr) - 1;
1025 SET_DR_MISALIGNMENT (dr, misal);
1026 return;
1027 }
1028
1029 if (dump_enabled_p ())
1030 dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment " \
1031 "to unknown (-1).\n");
1032 SET_DR_MISALIGNMENT (dr, DR_MISALIGNMENT_UNKNOWN);
1033}
1034
1035
1036/* Function verify_data_ref_alignment
1037
1038 Return TRUE if DR can be handled with respect to alignment. */
1039
1040static bool
1041verify_data_ref_alignment (data_reference_p dr)
1042{
1043 enum dr_alignment_support supportable_dr_alignment
1044 = vect_supportable_dr_alignment (dr, false);
1045 if (!supportable_dr_alignment)
1046 {
1047 if (dump_enabled_p ())
1048 {
1049 if (DR_IS_READ (dr))
1050 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1051 "not vectorized: unsupported unaligned load.");
1052 else
1053 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1054 "not vectorized: unsupported unaligned "
1055 "store.");
1056
1057 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1058 DR_REF (dr));
1059 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1060 }
1061 return false;
1062 }
1063
1064 if (supportable_dr_alignment != dr_aligned && dump_enabled_p ())
1065 dump_printf_loc (MSG_NOTE, vect_location,
1066 "Vectorizing an unaligned access.\n");
1067
1068 return true;
1069}
1070
1071/* Function vect_verify_datarefs_alignment
1072
1073 Return TRUE if all data references in the loop can be
1074 handled with respect to alignment. */
1075
1076bool
1077vect_verify_datarefs_alignment (loop_vec_info vinfo)
1078{
1079 vec<data_reference_p> datarefs = vinfo->datarefs;
1080 struct data_reference *dr;
1081 unsigned int i;
1082
1083 FOR_EACH_VEC_ELT (datarefs, i, dr)
1084 {
1085 gimple *stmt = DR_STMT (dr);
1086 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1087
1088 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1089 continue;
1090
1091 /* For interleaving, only the alignment of the first access matters. */
1092 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1093 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1094 continue;
1095
1096 /* Strided accesses perform only component accesses, alignment is
1097 irrelevant for them. */
1098 if (STMT_VINFO_STRIDED_P (stmt_info)
1099 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1100 continue;
1101
1102 if (! verify_data_ref_alignment (dr))
1103 return false;
1104 }
1105
1106 return true;
1107}
1108
1109/* Given an memory reference EXP return whether its alignment is less
1110 than its size. */
1111
1112static bool
1113not_size_aligned (tree exp)
1114{
1115 if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp))))
1116 return true;
1117
1118 return (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (exp)))
1119 > get_object_alignment (exp));
1120}
1121
1122/* Function vector_alignment_reachable_p
1123
1124 Return true if vector alignment for DR is reachable by peeling
1125 a few loop iterations. Return false otherwise. */
1126
1127static bool
1128vector_alignment_reachable_p (struct data_reference *dr)
1129{
1130 gimple *stmt = DR_STMT (dr);
1131 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1132 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1133
1134 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1135 {
1136 /* For interleaved access we peel only if number of iterations in
1137 the prolog loop ({VF - misalignment}), is a multiple of the
1138 number of the interleaved accesses. */
1139 int elem_size, mis_in_elements;
1140 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1141
1142 /* FORNOW: handle only known alignment. */
1143 if (!known_alignment_for_access_p (dr))
1144 return false;
1145
1146 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1147 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1148
1149 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
1150 return false;
1151 }
1152
1153 /* If misalignment is known at the compile time then allow peeling
1154 only if natural alignment is reachable through peeling. */
1155 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1156 {
1157 HOST_WIDE_INT elmsize =
1158 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1159 if (dump_enabled_p ())
1160 {
1161 dump_printf_loc (MSG_NOTE, vect_location,
1162 "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1163 dump_printf (MSG_NOTE,
1164 ". misalignment = %d.\n", DR_MISALIGNMENT (dr));
1165 }
1166 if (DR_MISALIGNMENT (dr) % elmsize)
1167 {
1168 if (dump_enabled_p ())
1169 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1170 "data size does not divide the misalignment.\n");
1171 return false;
1172 }
1173 }
1174
1175 if (!known_alignment_for_access_p (dr))
1176 {
1177 tree type = TREE_TYPE (DR_REF (dr));
1178 bool is_packed = not_size_aligned (DR_REF (dr));
1179 if (dump_enabled_p ())
1180 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1181 "Unknown misalignment, %snaturally aligned\n",
1182 is_packed ? "not " : "");
1183 return targetm.vectorize.vector_alignment_reachable (type, is_packed);
1184 }
1185
1186 return true;
1187}
1188
1189
1190/* Calculate the cost of the memory access represented by DR. */
1191
1192static void
1193vect_get_data_access_cost (struct data_reference *dr,
1194 unsigned int *inside_cost,
1195 unsigned int *outside_cost,
1196 stmt_vector_for_cost *body_cost_vec)
1197{
1198 gimple *stmt = DR_STMT (dr);
1199 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1200 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1201 int ncopies;
1202
1203 if (PURE_SLP_STMT (stmt_info))
1204 ncopies = 1;
1205 else
1206 ncopies = vect_get_num_copies (loop_vinfo, STMT_VINFO_VECTYPE (stmt_info));
1207
1208 if (DR_IS_READ (dr))
1209 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost,
1210 NULL, body_cost_vec, false);
1211 else
1212 vect_get_store_cost (dr, ncopies, inside_cost, body_cost_vec);
1213
1214 if (dump_enabled_p ())
1215 dump_printf_loc (MSG_NOTE, vect_location,
1216 "vect_get_data_access_cost: inside_cost = %d, "
1217 "outside_cost = %d.\n", *inside_cost, *outside_cost);
1218}
1219
1220
1221typedef struct _vect_peel_info
1222{
1223 struct data_reference *dr;
1224 int npeel;
1225 unsigned int count;
1226} *vect_peel_info;
1227
1228typedef struct _vect_peel_extended_info
1229{
1230 struct _vect_peel_info peel_info;
1231 unsigned int inside_cost;
1232 unsigned int outside_cost;
1233} *vect_peel_extended_info;
1234
1235
1236/* Peeling hashtable helpers. */
1237
1238struct peel_info_hasher : free_ptr_hash <_vect_peel_info>
1239{
1240 static inline hashval_t hash (const _vect_peel_info *);
1241 static inline bool equal (const _vect_peel_info *, const _vect_peel_info *);
1242};
1243
1244inline hashval_t
1245peel_info_hasher::hash (const _vect_peel_info *peel_info)
1246{
1247 return (hashval_t) peel_info->npeel;
1248}
1249
1250inline bool
1251peel_info_hasher::equal (const _vect_peel_info *a, const _vect_peel_info *b)
1252{
1253 return (a->npeel == b->npeel);
1254}
1255
1256
1257/* Insert DR into peeling hash table with NPEEL as key. */
1258
1259static void
1260vect_peeling_hash_insert (hash_table<peel_info_hasher> *peeling_htab,
1261 loop_vec_info loop_vinfo, struct data_reference *dr,
1262 int npeel)
1263{
1264 struct _vect_peel_info elem, *slot;
1265 _vect_peel_info **new_slot;
1266 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1267
1268 elem.npeel = npeel;
1269 slot = peeling_htab->find (&elem);
1270 if (slot)
1271 slot->count++;
1272 else
1273 {
1274 slot = XNEW (struct _vect_peel_info);
1275 slot->npeel = npeel;
1276 slot->dr = dr;
1277 slot->count = 1;
1278 new_slot = peeling_htab->find_slot (slot, INSERT);
1279 *new_slot = slot;
1280 }
1281
1282 if (!supportable_dr_alignment
1283 && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1284 slot->count += VECT_MAX_COST;
1285}
1286
1287
1288/* Traverse peeling hash table to find peeling option that aligns maximum
1289 number of data accesses. */
1290
1291int
1292vect_peeling_hash_get_most_frequent (_vect_peel_info **slot,
1293 _vect_peel_extended_info *max)
1294{
1295 vect_peel_info elem = *slot;
1296
1297 if (elem->count > max->peel_info.count
1298 || (elem->count == max->peel_info.count
1299 && max->peel_info.npeel > elem->npeel))
1300 {
1301 max->peel_info.npeel = elem->npeel;
1302 max->peel_info.count = elem->count;
1303 max->peel_info.dr = elem->dr;
1304 }
1305
1306 return 1;
1307}
1308
1309/* Get the costs of peeling NPEEL iterations checking data access costs
1310 for all data refs. If UNKNOWN_MISALIGNMENT is true, we assume DR0's
1311 misalignment will be zero after peeling. */
1312
1313static void
1314vect_get_peeling_costs_all_drs (vec<data_reference_p> datarefs,
1315 struct data_reference *dr0,
1316 unsigned int *inside_cost,
1317 unsigned int *outside_cost,
1318 stmt_vector_for_cost *body_cost_vec,
1319 unsigned int npeel,
1320 bool unknown_misalignment)
1321{
1322 unsigned i;
1323 data_reference *dr;
1324
1325 FOR_EACH_VEC_ELT (datarefs, i, dr)
1326 {
1327 gimple *stmt = DR_STMT (dr);
1328 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1329 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1330 continue;
1331
1332 /* For interleaving, only the alignment of the first access
1333 matters. */
1334 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1335 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1336 continue;
1337
1338 /* Strided accesses perform only component accesses, alignment is
1339 irrelevant for them. */
1340 if (STMT_VINFO_STRIDED_P (stmt_info)
1341 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1342 continue;
1343
1344 int save_misalignment;
1345 save_misalignment = DR_MISALIGNMENT (dr);
1346 if (npeel == 0)
1347 ;
1348 else if (unknown_misalignment && dr == dr0)
1349 SET_DR_MISALIGNMENT (dr, 0);
1350 else
1351 vect_update_misalignment_for_peel (dr, dr0, npeel);
1352 vect_get_data_access_cost (dr, inside_cost, outside_cost,
1353 body_cost_vec);
1354 SET_DR_MISALIGNMENT (dr, save_misalignment);
1355 }
1356}
1357
1358/* Traverse peeling hash table and calculate cost for each peeling option.
1359 Find the one with the lowest cost. */
1360
1361int
1362vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
1363 _vect_peel_extended_info *min)
1364{
1365 vect_peel_info elem = *slot;
1366 int dummy;
1367 unsigned int inside_cost = 0, outside_cost = 0;
1368 gimple *stmt = DR_STMT (elem->dr);
1369 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1370 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1371 stmt_vector_for_cost prologue_cost_vec, body_cost_vec,
1372 epilogue_cost_vec;
1373
1374 prologue_cost_vec.create (2);
1375 body_cost_vec.create (2);
1376 epilogue_cost_vec.create (2);
1377
1378 vect_get_peeling_costs_all_drs (LOOP_VINFO_DATAREFS (loop_vinfo),
1379 elem->dr, &inside_cost, &outside_cost,
1380 &body_cost_vec, elem->npeel, false);
1381
1382 body_cost_vec.release ();
1383
1384 outside_cost += vect_get_known_peeling_cost
1385 (loop_vinfo, elem->npeel, &dummy,
1386 &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1387 &prologue_cost_vec, &epilogue_cost_vec);
1388
1389 /* Prologue and epilogue costs are added to the target model later.
1390 These costs depend only on the scalar iteration cost, the
1391 number of peeling iterations finally chosen, and the number of
1392 misaligned statements. So discard the information found here. */
1393 prologue_cost_vec.release ();
1394 epilogue_cost_vec.release ();
1395
1396 if (inside_cost < min->inside_cost
1397 || (inside_cost == min->inside_cost
1398 && outside_cost < min->outside_cost))
1399 {
1400 min->inside_cost = inside_cost;
1401 min->outside_cost = outside_cost;
1402 min->peel_info.dr = elem->dr;
1403 min->peel_info.npeel = elem->npeel;
1404 min->peel_info.count = elem->count;
1405 }
1406
1407 return 1;
1408}
1409
1410
1411/* Choose best peeling option by traversing peeling hash table and either
1412 choosing an option with the lowest cost (if cost model is enabled) or the
1413 option that aligns as many accesses as possible. */
1414
1415static struct _vect_peel_extended_info
1416vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab,
1417 loop_vec_info loop_vinfo)
1418{
1419 struct _vect_peel_extended_info res;
1420
1421 res.peel_info.dr = NULL;
1422
1423 if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1424 {
1425 res.inside_cost = INT_MAX;
1426 res.outside_cost = INT_MAX;
1427 peeling_htab->traverse <_vect_peel_extended_info *,
1428 vect_peeling_hash_get_lowest_cost> (&res);
1429 }
1430 else
1431 {
1432 res.peel_info.count = 0;
1433 peeling_htab->traverse <_vect_peel_extended_info *,
1434 vect_peeling_hash_get_most_frequent> (&res);
1435 res.inside_cost = 0;
1436 res.outside_cost = 0;
1437 }
1438
1439 return res;
1440}
1441
1442/* Return true if the new peeling NPEEL is supported. */
1443
1444static bool
1445vect_peeling_supportable (loop_vec_info loop_vinfo, struct data_reference *dr0,
1446 unsigned npeel)
1447{
1448 unsigned i;
1449 struct data_reference *dr = NULL;
1450 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1451 gimple *stmt;
1452 stmt_vec_info stmt_info;
1453 enum dr_alignment_support supportable_dr_alignment;
1454
1455 /* Ensure that all data refs can be vectorized after the peel. */
1456 FOR_EACH_VEC_ELT (datarefs, i, dr)
1457 {
1458 int save_misalignment;
1459
1460 if (dr == dr0)
1461 continue;
1462
1463 stmt = DR_STMT (dr);
1464 stmt_info = vinfo_for_stmt (stmt);
1465 /* For interleaving, only the alignment of the first access
1466 matters. */
1467 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1468 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1469 continue;
1470
1471 /* Strided accesses perform only component accesses, alignment is
1472 irrelevant for them. */
1473 if (STMT_VINFO_STRIDED_P (stmt_info)
1474 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1475 continue;
1476
1477 save_misalignment = DR_MISALIGNMENT (dr);
1478 vect_update_misalignment_for_peel (dr, dr0, npeel);
1479 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1480 SET_DR_MISALIGNMENT (dr, save_misalignment);
1481
1482 if (!supportable_dr_alignment)
1483 return false;
1484 }
1485
1486 return true;
1487}
1488
1489/* Function vect_enhance_data_refs_alignment
1490
1491 This pass will use loop versioning and loop peeling in order to enhance
1492 the alignment of data references in the loop.
1493
1494 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1495 original loop is to be vectorized. Any other loops that are created by
1496 the transformations performed in this pass - are not supposed to be
1497 vectorized. This restriction will be relaxed.
1498
1499 This pass will require a cost model to guide it whether to apply peeling
1500 or versioning or a combination of the two. For example, the scheme that
1501 intel uses when given a loop with several memory accesses, is as follows:
1502 choose one memory access ('p') which alignment you want to force by doing
1503 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1504 other accesses are not necessarily aligned, or (2) use loop versioning to
1505 generate one loop in which all accesses are aligned, and another loop in
1506 which only 'p' is necessarily aligned.
1507
1508 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1509 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1510 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1511
1512 Devising a cost model is the most critical aspect of this work. It will
1513 guide us on which access to peel for, whether to use loop versioning, how
1514 many versions to create, etc. The cost model will probably consist of
1515 generic considerations as well as target specific considerations (on
1516 powerpc for example, misaligned stores are more painful than misaligned
1517 loads).
1518
1519 Here are the general steps involved in alignment enhancements:
1520
1521 -- original loop, before alignment analysis:
1522 for (i=0; i<N; i++){
1523 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1524 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1525 }
1526
1527 -- After vect_compute_data_refs_alignment:
1528 for (i=0; i<N; i++){
1529 x = q[i]; # DR_MISALIGNMENT(q) = 3
1530 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1531 }
1532
1533 -- Possibility 1: we do loop versioning:
1534 if (p is aligned) {
1535 for (i=0; i<N; i++){ # loop 1A
1536 x = q[i]; # DR_MISALIGNMENT(q) = 3
1537 p[i] = y; # DR_MISALIGNMENT(p) = 0
1538 }
1539 }
1540 else {
1541 for (i=0; i<N; i++){ # loop 1B
1542 x = q[i]; # DR_MISALIGNMENT(q) = 3
1543 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1544 }
1545 }
1546
1547 -- Possibility 2: we do loop peeling:
1548 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1549 x = q[i];
1550 p[i] = y;
1551 }
1552 for (i = 3; i < N; i++){ # loop 2A
1553 x = q[i]; # DR_MISALIGNMENT(q) = 0
1554 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1555 }
1556
1557 -- Possibility 3: combination of loop peeling and versioning:
1558 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1559 x = q[i];
1560 p[i] = y;
1561 }
1562 if (p is aligned) {
1563 for (i = 3; i<N; i++){ # loop 3A
1564 x = q[i]; # DR_MISALIGNMENT(q) = 0
1565 p[i] = y; # DR_MISALIGNMENT(p) = 0
1566 }
1567 }
1568 else {
1569 for (i = 3; i<N; i++){ # loop 3B
1570 x = q[i]; # DR_MISALIGNMENT(q) = 0
1571 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1572 }
1573 }
1574
1575 These loops are later passed to loop_transform to be vectorized. The
1576 vectorizer will use the alignment information to guide the transformation
1577 (whether to generate regular loads/stores, or with special handling for
1578 misalignment). */
1579
1580bool
1581vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1582{
1583 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1584 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1585 enum dr_alignment_support supportable_dr_alignment;
1586 struct data_reference *dr0 = NULL, *first_store = NULL;
1587 struct data_reference *dr;
1588 unsigned int i, j;
1589 bool do_peeling = false;
1590 bool do_versioning = false;
1591 bool stat;
1592 gimple *stmt;
1593 stmt_vec_info stmt_info;
1594 unsigned int npeel = 0;
1595 bool one_misalignment_known = false;
1596 bool one_misalignment_unknown = false;
1597 bool one_dr_unsupportable = false;
1598 struct data_reference *unsupportable_dr = NULL;
1599 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1600 unsigned possible_npeel_number = 1;
1601 tree vectype;
1602 unsigned int nelements, mis, same_align_drs_max = 0;
1603 hash_table<peel_info_hasher> peeling_htab (1);
1604
1605 if (dump_enabled_p ())
1606 dump_printf_loc (MSG_NOTE, vect_location,
1607 "=== vect_enhance_data_refs_alignment ===\n");
1608
1609 /* Reset data so we can safely be called multiple times. */
1610 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
1611 LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = 0;
1612
1613 /* While cost model enhancements are expected in the future, the high level
1614 view of the code at this time is as follows:
1615
1616 A) If there is a misaligned access then see if peeling to align
1617 this access can make all data references satisfy
1618 vect_supportable_dr_alignment. If so, update data structures
1619 as needed and return true.
1620
1621 B) If peeling wasn't possible and there is a data reference with an
1622 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1623 then see if loop versioning checks can be used to make all data
1624 references satisfy vect_supportable_dr_alignment. If so, update
1625 data structures as needed and return true.
1626
1627 C) If neither peeling nor versioning were successful then return false if
1628 any data reference does not satisfy vect_supportable_dr_alignment.
1629
1630 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1631
1632 Note, Possibility 3 above (which is peeling and versioning together) is not
1633 being done at this time. */
1634
1635 /* (1) Peeling to force alignment. */
1636
1637 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1638 Considerations:
1639 + How many accesses will become aligned due to the peeling
1640 - How many accesses will become unaligned due to the peeling,
1641 and the cost of misaligned accesses.
1642 - The cost of peeling (the extra runtime checks, the increase
1643 in code size). */
1644
1645 FOR_EACH_VEC_ELT (datarefs, i, dr)
1646 {
1647 stmt = DR_STMT (dr);
1648 stmt_info = vinfo_for_stmt (stmt);
1649
1650 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1651 continue;
1652
1653 /* For interleaving, only the alignment of the first access
1654 matters. */
1655 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1656 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1657 continue;
1658
1659 /* For invariant accesses there is nothing to enhance. */
1660 if (integer_zerop (DR_STEP (dr)))
1661 continue;
1662
1663 /* Strided accesses perform only component accesses, alignment is
1664 irrelevant for them. */
1665 if (STMT_VINFO_STRIDED_P (stmt_info)
1666 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1667 continue;
1668
1669 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1670 do_peeling = vector_alignment_reachable_p (dr);
1671 if (do_peeling)
1672 {
1673 if (known_alignment_for_access_p (dr))
1674 {
1675 unsigned int npeel_tmp = 0;
1676 bool negative = tree_int_cst_compare (DR_STEP (dr),
1677 size_zero_node) < 0;
1678
1679 vectype = STMT_VINFO_VECTYPE (stmt_info);
1680 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1681 unsigned int target_align = DR_TARGET_ALIGNMENT (dr);
1682 unsigned int dr_size = vect_get_scalar_dr_size (dr);
1683 mis = (negative ? DR_MISALIGNMENT (dr) : -DR_MISALIGNMENT (dr));
1684 if (DR_MISALIGNMENT (dr) != 0)
1685 npeel_tmp = (mis & (target_align - 1)) / dr_size;
1686
1687 /* For multiple types, it is possible that the bigger type access
1688 will have more than one peeling option. E.g., a loop with two
1689 types: one of size (vector size / 4), and the other one of
1690 size (vector size / 8). Vectorization factor will 8. If both
1691 accesses are misaligned by 3, the first one needs one scalar
1692 iteration to be aligned, and the second one needs 5. But the
1693 first one will be aligned also by peeling 5 scalar
1694 iterations, and in that case both accesses will be aligned.
1695 Hence, except for the immediate peeling amount, we also want
1696 to try to add full vector size, while we don't exceed
1697 vectorization factor.
1698 We do this automatically for cost model, since we calculate
1699 cost for every peeling option. */
1700 if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1701 {
1702 if (STMT_SLP_TYPE (stmt_info))
1703 possible_npeel_number
1704 = (vf * GROUP_SIZE (stmt_info)) / nelements;
1705 else
1706 possible_npeel_number = vf / nelements;
1707
1708 /* NPEEL_TMP is 0 when there is no misalignment, but also
1709 allow peeling NELEMENTS. */
1710 if (DR_MISALIGNMENT (dr) == 0)
1711 possible_npeel_number++;
1712 }
1713
1714 /* Save info about DR in the hash table. Also include peeling
1715 amounts according to the explanation above. */
1716 for (j = 0; j < possible_npeel_number; j++)
1717 {
1718 vect_peeling_hash_insert (&peeling_htab, loop_vinfo,
1719 dr, npeel_tmp);
1720 npeel_tmp += target_align / dr_size;
1721 }
1722
1723 one_misalignment_known = true;
1724 }
1725 else
1726 {
1727 /* If we don't know any misalignment values, we prefer
1728 peeling for data-ref that has the maximum number of data-refs
1729 with the same alignment, unless the target prefers to align
1730 stores over load. */
1731 unsigned same_align_drs
1732 = STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ();
1733 if (!dr0
1734 || same_align_drs_max < same_align_drs)
1735 {
1736 same_align_drs_max = same_align_drs;
1737 dr0 = dr;
1738 }
1739 /* For data-refs with the same number of related
1740 accesses prefer the one where the misalign
1741 computation will be invariant in the outermost loop. */
1742 else if (same_align_drs_max == same_align_drs)
1743 {
1744 struct loop *ivloop0, *ivloop;
1745 ivloop0 = outermost_invariant_loop_for_expr
1746 (loop, DR_BASE_ADDRESS (dr0));
1747 ivloop = outermost_invariant_loop_for_expr
1748 (loop, DR_BASE_ADDRESS (dr));
1749 if ((ivloop && !ivloop0)
1750 || (ivloop && ivloop0
1751 && flow_loop_nested_p (ivloop, ivloop0)))
1752 dr0 = dr;
1753 }
1754
1755 one_misalignment_unknown = true;
1756
1757 /* Check for data refs with unsupportable alignment that
1758 can be peeled. */
1759 if (!supportable_dr_alignment)
1760 {
1761 one_dr_unsupportable = true;
1762 unsupportable_dr = dr;
1763 }
1764
1765 if (!first_store && DR_IS_WRITE (dr))
1766 first_store = dr;
1767 }
1768 }
1769 else
1770 {
1771 if (!aligned_access_p (dr))
1772 {
1773 if (dump_enabled_p ())
1774 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1775 "vector alignment may not be reachable\n");
1776 break;
1777 }
1778 }
1779 }
1780
1781 /* Check if we can possibly peel the loop. */
1782 if (!vect_can_advance_ivs_p (loop_vinfo)
1783 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop))
1784 || loop->inner)
1785 do_peeling = false;
1786
1787 struct _vect_peel_extended_info peel_for_known_alignment;
1788 struct _vect_peel_extended_info peel_for_unknown_alignment;
1789 struct _vect_peel_extended_info best_peel;
1790
1791 peel_for_unknown_alignment.inside_cost = INT_MAX;
1792 peel_for_unknown_alignment.outside_cost = INT_MAX;
1793 peel_for_unknown_alignment.peel_info.count = 0;
1794
1795 if (do_peeling
1796 && one_misalignment_unknown)
1797 {
1798 /* Check if the target requires to prefer stores over loads, i.e., if
1799 misaligned stores are more expensive than misaligned loads (taking
1800 drs with same alignment into account). */
1801 unsigned int load_inside_cost = 0;
1802 unsigned int load_outside_cost = 0;
1803 unsigned int store_inside_cost = 0;
1804 unsigned int store_outside_cost = 0;
1805
1806 stmt_vector_for_cost dummy;
1807 dummy.create (2);
1808 vect_get_peeling_costs_all_drs (datarefs, dr0,
1809 &load_inside_cost,
1810 &load_outside_cost,
1811 &dummy, vf / 2, true);
1812 dummy.release ();
1813
1814 if (first_store)
1815 {
1816 dummy.create (2);
1817 vect_get_peeling_costs_all_drs (datarefs, first_store,
1818 &store_inside_cost,
1819 &store_outside_cost,
1820 &dummy, vf / 2, true);
1821 dummy.release ();
1822 }
1823 else
1824 {
1825 store_inside_cost = INT_MAX;
1826 store_outside_cost = INT_MAX;
1827 }
1828
1829 if (load_inside_cost > store_inside_cost
1830 || (load_inside_cost == store_inside_cost
1831 && load_outside_cost > store_outside_cost))
1832 {
1833 dr0 = first_store;
1834 peel_for_unknown_alignment.inside_cost = store_inside_cost;
1835 peel_for_unknown_alignment.outside_cost = store_outside_cost;
1836 }
1837 else
1838 {
1839 peel_for_unknown_alignment.inside_cost = load_inside_cost;
1840 peel_for_unknown_alignment.outside_cost = load_outside_cost;
1841 }
1842
1843 stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
1844 prologue_cost_vec.create (2);
1845 epilogue_cost_vec.create (2);
1846
1847 int dummy2;
1848 peel_for_unknown_alignment.outside_cost += vect_get_known_peeling_cost
1849 (loop_vinfo, vf / 2, &dummy2,
1850 &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1851 &prologue_cost_vec, &epilogue_cost_vec);
1852
1853 prologue_cost_vec.release ();
1854 epilogue_cost_vec.release ();
1855
1856 peel_for_unknown_alignment.peel_info.count = 1
1857 + STMT_VINFO_SAME_ALIGN_REFS
1858 (vinfo_for_stmt (DR_STMT (dr0))).length ();
1859 }
1860
1861 peel_for_unknown_alignment.peel_info.npeel = 0;
1862 peel_for_unknown_alignment.peel_info.dr = dr0;
1863
1864 best_peel = peel_for_unknown_alignment;
1865
1866 peel_for_known_alignment.inside_cost = INT_MAX;
1867 peel_for_known_alignment.outside_cost = INT_MAX;
1868 peel_for_known_alignment.peel_info.count = 0;
1869 peel_for_known_alignment.peel_info.dr = NULL;
1870
1871 if (do_peeling && one_misalignment_known)
1872 {
1873 /* Peeling is possible, but there is no data access that is not supported
1874 unless aligned. So we try to choose the best possible peeling from
1875 the hash table. */
1876 peel_for_known_alignment = vect_peeling_hash_choose_best_peeling
1877 (&peeling_htab, loop_vinfo);
1878 }
1879
1880 /* Compare costs of peeling for known and unknown alignment. */
1881 if (peel_for_known_alignment.peel_info.dr != NULL
1882 && peel_for_unknown_alignment.inside_cost
1883 >= peel_for_known_alignment.inside_cost)
1884 {
1885 best_peel = peel_for_known_alignment;
1886
1887 /* If the best peeling for known alignment has NPEEL == 0, perform no
1888 peeling at all except if there is an unsupportable dr that we can
1889 align. */
1890 if (best_peel.peel_info.npeel == 0 && !one_dr_unsupportable)
1891 do_peeling = false;
1892 }
1893
1894 /* If there is an unsupportable data ref, prefer this over all choices so far
1895 since we'd have to discard a chosen peeling except when it accidentally
1896 aligned the unsupportable data ref. */
1897 if (one_dr_unsupportable)
1898 dr0 = unsupportable_dr;
1899 else if (do_peeling)
1900 {
1901 /* Calculate the penalty for no peeling, i.e. leaving everything as-is.
1902 TODO: Use nopeel_outside_cost or get rid of it? */
1903 unsigned nopeel_inside_cost = 0;
1904 unsigned nopeel_outside_cost = 0;
1905
1906 stmt_vector_for_cost dummy;
1907 dummy.create (2);
1908 vect_get_peeling_costs_all_drs (datarefs, NULL, &nopeel_inside_cost,
1909 &nopeel_outside_cost, &dummy, 0, false);
1910 dummy.release ();
1911
1912 /* Add epilogue costs. As we do not peel for alignment here, no prologue
1913 costs will be recorded. */
1914 stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
1915 prologue_cost_vec.create (2);
1916 epilogue_cost_vec.create (2);
1917
1918 int dummy2;
1919 nopeel_outside_cost += vect_get_known_peeling_cost
1920 (loop_vinfo, 0, &dummy2,
1921 &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1922 &prologue_cost_vec, &epilogue_cost_vec);
1923
1924 prologue_cost_vec.release ();
1925 epilogue_cost_vec.release ();
1926
1927 npeel = best_peel.peel_info.npeel;
1928 dr0 = best_peel.peel_info.dr;
1929
1930 /* If no peeling is not more expensive than the best peeling we
1931 have so far, don't perform any peeling. */
1932 if (nopeel_inside_cost <= best_peel.inside_cost)
1933 do_peeling = false;
1934 }
1935
1936 if (do_peeling)
1937 {
1938 stmt = DR_STMT (dr0);
1939 stmt_info = vinfo_for_stmt (stmt);
1940 vectype = STMT_VINFO_VECTYPE (stmt_info);
1941
1942 if (known_alignment_for_access_p (dr0))
1943 {
1944 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1945 size_zero_node) < 0;
1946 if (!npeel)
1947 {
1948 /* Since it's known at compile time, compute the number of
1949 iterations in the peeled loop (the peeling factor) for use in
1950 updating DR_MISALIGNMENT values. The peeling factor is the
1951 vectorization factor minus the misalignment as an element
1952 count. */
1953 mis = negative ? DR_MISALIGNMENT (dr0) : -DR_MISALIGNMENT (dr0);
1954 unsigned int target_align = DR_TARGET_ALIGNMENT (dr0);
1955 npeel = ((mis & (target_align - 1))
1956 / vect_get_scalar_dr_size (dr0));
1957 }
1958
1959 /* For interleaved data access every iteration accesses all the
1960 members of the group, therefore we divide the number of iterations
1961 by the group size. */
1962 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1963 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1964 npeel /= GROUP_SIZE (stmt_info);
1965
1966 if (dump_enabled_p ())
1967 dump_printf_loc (MSG_NOTE, vect_location,
1968 "Try peeling by %d\n", npeel);
1969 }
1970
1971 /* Ensure that all datarefs can be vectorized after the peel. */
1972 if (!vect_peeling_supportable (loop_vinfo, dr0, npeel))
1973 do_peeling = false;
1974
1975 /* Check if all datarefs are supportable and log. */
1976 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1977 {
1978 stat = vect_verify_datarefs_alignment (loop_vinfo);
1979 if (!stat)
1980 do_peeling = false;
1981 else
1982 return stat;
1983 }
1984
1985 /* Cost model #1 - honor --param vect-max-peeling-for-alignment. */
1986 if (do_peeling)
1987 {
1988 unsigned max_allowed_peel
1989 = PARAM_VALUE (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT);
1990 if (max_allowed_peel != (unsigned)-1)
1991 {
1992 unsigned max_peel = npeel;
1993 if (max_peel == 0)
1994 {
1995 unsigned int target_align = DR_TARGET_ALIGNMENT (dr0);
1996 max_peel = target_align / vect_get_scalar_dr_size (dr0) - 1;
1997 }
1998 if (max_peel > max_allowed_peel)
1999 {
2000 do_peeling = false;
2001 if (dump_enabled_p ())
2002 dump_printf_loc (MSG_NOTE, vect_location,
2003 "Disable peeling, max peels reached: %d\n", max_peel);
2004 }
2005 }
2006 }
2007
2008 /* Cost model #2 - if peeling may result in a remaining loop not
2009 iterating enough to be vectorized then do not peel. */
2010 if (do_peeling
2011 && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2012 {
2013 unsigned max_peel
2014 = npeel == 0 ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1 : npeel;
2015 if (LOOP_VINFO_INT_NITERS (loop_vinfo)
2016 < LOOP_VINFO_VECT_FACTOR (loop_vinfo) + max_peel)
2017 do_peeling = false;
2018 }
2019
2020 if (do_peeling)
2021 {
2022 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
2023 If the misalignment of DR_i is identical to that of dr0 then set
2024 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
2025 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
2026 by the peeling factor times the element size of DR_i (MOD the
2027 vectorization factor times the size). Otherwise, the
2028 misalignment of DR_i must be set to unknown. */
2029 FOR_EACH_VEC_ELT (datarefs, i, dr)
2030 if (dr != dr0)
2031 {
2032 /* Strided accesses perform only component accesses, alignment
2033 is irrelevant for them. */
2034 stmt_info = vinfo_for_stmt (DR_STMT (dr));
2035 if (STMT_VINFO_STRIDED_P (stmt_info)
2036 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
2037 continue;
2038
2039 vect_update_misalignment_for_peel (dr, dr0, npeel);
2040 }
2041
2042 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
2043 if (npeel)
2044 LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
2045 else
2046 LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
2047 = DR_MISALIGNMENT (dr0);
2048 SET_DR_MISALIGNMENT (dr0, 0);
2049 if (dump_enabled_p ())
2050 {
2051 dump_printf_loc (MSG_NOTE, vect_location,
2052 "Alignment of access forced using peeling.\n");
2053 dump_printf_loc (MSG_NOTE, vect_location,
2054 "Peeling for alignment will be applied.\n");
2055 }
2056
2057 /* The inside-loop cost will be accounted for in vectorizable_load
2058 and vectorizable_store correctly with adjusted alignments.
2059 Drop the body_cst_vec on the floor here. */
2060 stat = vect_verify_datarefs_alignment (loop_vinfo);
2061 gcc_assert (stat);
2062 return stat;
2063 }
2064 }
2065
2066 /* (2) Versioning to force alignment. */
2067
2068 /* Try versioning if:
2069 1) optimize loop for speed
2070 2) there is at least one unsupported misaligned data ref with an unknown
2071 misalignment, and
2072 3) all misaligned data refs with a known misalignment are supported, and
2073 4) the number of runtime alignment checks is within reason. */
2074
2075 do_versioning =
2076 optimize_loop_nest_for_speed_p (loop)
2077 && (!loop->inner); /* FORNOW */
2078
2079 if (do_versioning)
2080 {
2081 FOR_EACH_VEC_ELT (datarefs, i, dr)
2082 {
2083 stmt = DR_STMT (dr);
2084 stmt_info = vinfo_for_stmt (stmt);
2085
2086 /* For interleaving, only the alignment of the first access
2087 matters. */
2088 if (aligned_access_p (dr)
2089 || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
2090 && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
2091 continue;
2092
2093 if (STMT_VINFO_STRIDED_P (stmt_info))
2094 {
2095 /* Strided loads perform only component accesses, alignment is
2096 irrelevant for them. */
2097 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
2098 continue;
2099 do_versioning = false;
2100 break;
2101 }
2102
2103 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
2104
2105 if (!supportable_dr_alignment)
2106 {
2107 gimple *stmt;
2108 int mask;
2109 tree vectype;
2110
2111 if (known_alignment_for_access_p (dr)
2112 || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
2113 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
2114 {
2115 do_versioning = false;
2116 break;
2117 }
2118
2119 stmt = DR_STMT (dr);
2120 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2121 gcc_assert (vectype);
2122
2123 /* The rightmost bits of an aligned address must be zeros.
2124 Construct the mask needed for this test. For example,
2125 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2126 mask must be 15 = 0xf. */
2127 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
2128
2129 /* FORNOW: use the same mask to test all potentially unaligned
2130 references in the loop. The vectorizer currently supports
2131 a single vector size, see the reference to
2132 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2133 vectorization factor is computed. */
2134 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2135 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2136 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2137 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (
2138 DR_STMT (dr));
2139 }
2140 }
2141
2142 /* Versioning requires at least one misaligned data reference. */
2143 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2144 do_versioning = false;
2145 else if (!do_versioning)
2146 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
2147 }
2148
2149 if (do_versioning)
2150 {
2151 vec<gimple *> may_misalign_stmts
2152 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2153 gimple *stmt;
2154
2155 /* It can now be assumed that the data references in the statements
2156 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2157 of the loop being vectorized. */
2158 FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt)
2159 {
2160 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2161 dr = STMT_VINFO_DATA_REF (stmt_info);
2162 SET_DR_MISALIGNMENT (dr, 0);
2163 if (dump_enabled_p ())
2164 dump_printf_loc (MSG_NOTE, vect_location,
2165 "Alignment of access forced using versioning.\n");
2166 }
2167
2168 if (dump_enabled_p ())
2169 dump_printf_loc (MSG_NOTE, vect_location,
2170 "Versioning for alignment will be applied.\n");
2171
2172 /* Peeling and versioning can't be done together at this time. */
2173 gcc_assert (! (do_peeling && do_versioning));
2174
2175 stat = vect_verify_datarefs_alignment (loop_vinfo);
2176 gcc_assert (stat);
2177 return stat;
2178 }
2179
2180 /* This point is reached if neither peeling nor versioning is being done. */
2181 gcc_assert (! (do_peeling || do_versioning));
2182
2183 stat = vect_verify_datarefs_alignment (loop_vinfo);
2184 return stat;
2185}
2186
2187
2188/* Function vect_find_same_alignment_drs.
2189
2190 Update group and alignment relations according to the chosen
2191 vectorization factor. */
2192
2193static void
2194vect_find_same_alignment_drs (struct data_dependence_relation *ddr)
2195{
2196 struct data_reference *dra = DDR_A (ddr);
2197 struct data_reference *drb = DDR_B (ddr);
2198 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
2199 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
2200
2201 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2202 return;
2203
2204 if (dra == drb)
2205 return;
2206
2207 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
2208 || !operand_equal_p (DR_OFFSET (dra), DR_OFFSET (drb), 0)
2209 || !operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
2210 return;
2211
2212 /* Two references with distance zero have the same alignment. */
2213 offset_int diff = (wi::to_offset (DR_INIT (dra))
2214 - wi::to_offset (DR_INIT (drb)));
2215 if (diff != 0)
2216 {
2217 /* Get the wider of the two alignments. */
2218 unsigned int align_a = (vect_calculate_target_alignment (dra)
2219 / BITS_PER_UNIT);
2220 unsigned int align_b = (vect_calculate_target_alignment (drb)
2221 / BITS_PER_UNIT);
2222 unsigned int max_align = MAX (align_a, align_b);
2223
2224 /* Require the gap to be a multiple of the larger vector alignment. */
2225 if (!wi::multiple_of_p (diff, max_align, SIGNED))
2226 return;
2227 }
2228
2229 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a).safe_push (drb);
2230 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b).safe_push (dra);
2231 if (dump_enabled_p ())
2232 {
2233 dump_printf_loc (MSG_NOTE, vect_location,
2234 "accesses have the same alignment: ");
2235 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
2236 dump_printf (MSG_NOTE, " and ");
2237 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
2238 dump_printf (MSG_NOTE, "\n");
2239 }
2240}
2241
2242
2243/* Function vect_analyze_data_refs_alignment
2244
2245 Analyze the alignment of the data-references in the loop.
2246 Return FALSE if a data reference is found that cannot be vectorized. */
2247
2248bool
2249vect_analyze_data_refs_alignment (loop_vec_info vinfo)
2250{
2251 if (dump_enabled_p ())
2252 dump_printf_loc (MSG_NOTE, vect_location,
2253 "=== vect_analyze_data_refs_alignment ===\n");
2254
2255 /* Mark groups of data references with same alignment using
2256 data dependence information. */
2257 vec<ddr_p> ddrs = vinfo->ddrs;
2258 struct data_dependence_relation *ddr;
2259 unsigned int i;
2260
2261 FOR_EACH_VEC_ELT (ddrs, i, ddr)
2262 vect_find_same_alignment_drs (ddr);
2263
2264 vec<data_reference_p> datarefs = vinfo->datarefs;
2265 struct data_reference *dr;
2266
2267 vect_record_base_alignments (vinfo);
2268 FOR_EACH_VEC_ELT (datarefs, i, dr)
2269 {
2270 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
2271 if (STMT_VINFO_VECTORIZABLE (stmt_info)
2272 && !vect_compute_data_ref_alignment (dr))
2273 {
2274 /* Strided accesses perform only component accesses, misalignment
2275 information is irrelevant for them. */
2276 if (STMT_VINFO_STRIDED_P (stmt_info)
2277 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
2278 continue;
2279
2280 if (dump_enabled_p ())
2281 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2282 "not vectorized: can't calculate alignment "
2283 "for data ref.\n");
2284
2285 return false;
2286 }
2287 }
2288
2289 return true;
2290}
2291
2292
2293/* Analyze alignment of DRs of stmts in NODE. */
2294
2295static bool
2296vect_slp_analyze_and_verify_node_alignment (slp_tree node)
2297{
2298 /* We vectorize from the first scalar stmt in the node unless
2299 the node is permuted in which case we start from the first
2300 element in the group. */
2301 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2302 data_reference_p first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2303 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
2304 first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
2305
2306 data_reference_p dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2307 if (! vect_compute_data_ref_alignment (dr)
2308 /* For creating the data-ref pointer we need alignment of the
2309 first element anyway. */
2310 || (dr != first_dr
2311 && ! vect_compute_data_ref_alignment (first_dr))
2312 || ! verify_data_ref_alignment (dr))
2313 {
2314 if (dump_enabled_p ())
2315 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2316 "not vectorized: bad data alignment in basic "
2317 "block.\n");
2318 return false;
2319 }
2320
2321 return true;
2322}
2323
2324/* Function vect_slp_analyze_instance_alignment
2325
2326 Analyze the alignment of the data-references in the SLP instance.
2327 Return FALSE if a data reference is found that cannot be vectorized. */
2328
2329bool
2330vect_slp_analyze_and_verify_instance_alignment (slp_instance instance)
2331{
2332 if (dump_enabled_p ())
2333 dump_printf_loc (MSG_NOTE, vect_location,
2334 "=== vect_slp_analyze_and_verify_instance_alignment ===\n");
2335
2336 slp_tree node;
2337 unsigned i;
2338 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
2339 if (! vect_slp_analyze_and_verify_node_alignment (node))
2340 return false;
2341
2342 node = SLP_INSTANCE_TREE (instance);
2343 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]))
2344 && ! vect_slp_analyze_and_verify_node_alignment
2345 (SLP_INSTANCE_TREE (instance)))
2346 return false;
2347
2348 return true;
2349}
2350
2351
2352/* Analyze groups of accesses: check that DR belongs to a group of
2353 accesses of legal size, step, etc. Detect gaps, single element
2354 interleaving, and other special cases. Set grouped access info.
2355 Collect groups of strided stores for further use in SLP analysis.
2356 Worker for vect_analyze_group_access. */
2357
2358static bool
2359vect_analyze_group_access_1 (struct data_reference *dr)
2360{
2361 tree step = DR_STEP (dr);
2362 tree scalar_type = TREE_TYPE (DR_REF (dr));
2363 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2364 gimple *stmt = DR_STMT (dr);
2365 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2366 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2367 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2368 HOST_WIDE_INT dr_step = -1;
2369 HOST_WIDE_INT groupsize, last_accessed_element = 1;
2370 bool slp_impossible = false;
2371
2372 /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2373 size of the interleaving group (including gaps). */
2374 if (tree_fits_shwi_p (step))
2375 {
2376 dr_step = tree_to_shwi (step);
2377 /* Check that STEP is a multiple of type size. Otherwise there is
2378 a non-element-sized gap at the end of the group which we
2379 cannot represent in GROUP_GAP or GROUP_SIZE.
2380 ??? As we can handle non-constant step fine here we should
2381 simply remove uses of GROUP_GAP between the last and first
2382 element and instead rely on DR_STEP. GROUP_SIZE then would
2383 simply not include that gap. */
2384 if ((dr_step % type_size) != 0)
2385 {
2386 if (dump_enabled_p ())
2387 {
2388 dump_printf_loc (MSG_NOTE, vect_location,
2389 "Step ");
2390 dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
2391 dump_printf (MSG_NOTE,
2392 " is not a multiple of the element size for ");
2393 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
2394 dump_printf (MSG_NOTE, "\n");
2395 }
2396 return false;
2397 }
2398 groupsize = absu_hwi (dr_step) / type_size;
2399 }
2400 else
2401 groupsize = 0;
2402
2403 /* Not consecutive access is possible only if it is a part of interleaving. */
2404 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2405 {
2406 /* Check if it this DR is a part of interleaving, and is a single
2407 element of the group that is accessed in the loop. */
2408
2409 /* Gaps are supported only for loads. STEP must be a multiple of the type
2410 size. The size of the group must be a power of 2. */
2411 if (DR_IS_READ (dr)
2412 && (dr_step % type_size) == 0
2413 && groupsize > 0
2414 && pow2p_hwi (groupsize))
2415 {
2416 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2417 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2418 GROUP_GAP (stmt_info) = groupsize - 1;
2419 if (dump_enabled_p ())
2420 {
2421 dump_printf_loc (MSG_NOTE, vect_location,
2422 "Detected single element interleaving ");
2423 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
2424 dump_printf (MSG_NOTE, " step ");
2425 dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
2426 dump_printf (MSG_NOTE, "\n");
2427 }
2428
2429 return true;
2430 }
2431
2432 if (dump_enabled_p ())
2433 {
2434 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2435 "not consecutive access ");
2436 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2437 }
2438
2439 if (bb_vinfo)
2440 {
2441 /* Mark the statement as unvectorizable. */
2442 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2443 return true;
2444 }
2445
2446 dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n");
2447 STMT_VINFO_STRIDED_P (stmt_info) = true;
2448 return true;
2449 }
2450
2451 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2452 {
2453 /* First stmt in the interleaving chain. Check the chain. */
2454 gimple *next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2455 struct data_reference *data_ref = dr;
2456 unsigned int count = 1;
2457 tree prev_init = DR_INIT (data_ref);
2458 gimple *prev = stmt;
2459 HOST_WIDE_INT diff, gaps = 0;
2460
2461 while (next)
2462 {
2463 /* Skip same data-refs. In case that two or more stmts share
2464 data-ref (supported only for loads), we vectorize only the first
2465 stmt, and the rest get their vectorized loads from the first
2466 one. */
2467 if (!tree_int_cst_compare (DR_INIT (data_ref),
2468 DR_INIT (STMT_VINFO_DATA_REF (
2469 vinfo_for_stmt (next)))))
2470 {
2471 if (DR_IS_WRITE (data_ref))
2472 {
2473 if (dump_enabled_p ())
2474 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2475 "Two store stmts share the same dr.\n");
2476 return false;
2477 }
2478
2479 if (dump_enabled_p ())
2480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2481 "Two or more load stmts share the same dr.\n");
2482
2483 /* For load use the same data-ref load. */
2484 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2485
2486 prev = next;
2487 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2488 continue;
2489 }
2490
2491 prev = next;
2492 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2493
2494 /* All group members have the same STEP by construction. */
2495 gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0));
2496
2497 /* Check that the distance between two accesses is equal to the type
2498 size. Otherwise, we have gaps. */
2499 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2500 - TREE_INT_CST_LOW (prev_init)) / type_size;
2501 if (diff != 1)
2502 {
2503 /* FORNOW: SLP of accesses with gaps is not supported. */
2504 slp_impossible = true;
2505 if (DR_IS_WRITE (data_ref))
2506 {
2507 if (dump_enabled_p ())
2508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2509 "interleaved store with gaps\n");
2510 return false;
2511 }
2512
2513 gaps += diff - 1;
2514 }
2515
2516 last_accessed_element += diff;
2517
2518 /* Store the gap from the previous member of the group. If there is no
2519 gap in the access, GROUP_GAP is always 1. */
2520 GROUP_GAP (vinfo_for_stmt (next)) = diff;
2521
2522 prev_init = DR_INIT (data_ref);
2523 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2524 /* Count the number of data-refs in the chain. */
2525 count++;
2526 }
2527
2528 if (groupsize == 0)
2529 groupsize = count + gaps;
2530
2531 /* This could be UINT_MAX but as we are generating code in a very
2532 inefficient way we have to cap earlier. See PR78699 for example. */
2533 if (groupsize > 4096)
2534 {
2535 if (dump_enabled_p ())
2536 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2537 "group is too large\n");
2538 return false;
2539 }
2540
2541 /* Check that the size of the interleaving is equal to count for stores,
2542 i.e., that there are no gaps. */
2543 if (groupsize != count
2544 && !DR_IS_READ (dr))
2545 {
2546 if (dump_enabled_p ())
2547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2548 "interleaved store with gaps\n");
2549 return false;
2550 }
2551
2552 /* If there is a gap after the last load in the group it is the
2553 difference between the groupsize and the last accessed
2554 element.
2555 When there is no gap, this difference should be 0. */
2556 GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - last_accessed_element;
2557
2558 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2559 if (dump_enabled_p ())
2560 {
2561 dump_printf_loc (MSG_NOTE, vect_location,
2562 "Detected interleaving ");
2563 if (DR_IS_READ (dr))
2564 dump_printf (MSG_NOTE, "load ");
2565 else
2566 dump_printf (MSG_NOTE, "store ");
2567 dump_printf (MSG_NOTE, "of size %u starting with ",
2568 (unsigned)groupsize);
2569 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2570 if (GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
2571 dump_printf_loc (MSG_NOTE, vect_location,
2572 "There is a gap of %u elements after the group\n",
2573 GROUP_GAP (vinfo_for_stmt (stmt)));
2574 }
2575
2576 /* SLP: create an SLP data structure for every interleaving group of
2577 stores for further analysis in vect_analyse_slp. */
2578 if (DR_IS_WRITE (dr) && !slp_impossible)
2579 {
2580 if (loop_vinfo)
2581 LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt);
2582 if (bb_vinfo)
2583 BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt);
2584 }
2585 }
2586
2587 return true;
2588}
2589
2590/* Analyze groups of accesses: check that DR belongs to a group of
2591 accesses of legal size, step, etc. Detect gaps, single element
2592 interleaving, and other special cases. Set grouped access info.
2593 Collect groups of strided stores for further use in SLP analysis. */
2594
2595static bool
2596vect_analyze_group_access (struct data_reference *dr)
2597{
2598 if (!vect_analyze_group_access_1 (dr))
2599 {
2600 /* Dissolve the group if present. */
2601 gimple *next;
2602 gimple *stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (dr)));
2603 while (stmt)
2604 {
2605 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2606 next = GROUP_NEXT_ELEMENT (vinfo);
2607 GROUP_FIRST_ELEMENT (vinfo) = NULL;
2608 GROUP_NEXT_ELEMENT (vinfo) = NULL;
2609 stmt = next;
2610 }
2611 return false;
2612 }
2613 return true;
2614}
2615
2616/* Analyze the access pattern of the data-reference DR.
2617 In case of non-consecutive accesses call vect_analyze_group_access() to
2618 analyze groups of accesses. */
2619
2620static bool
2621vect_analyze_data_ref_access (struct data_reference *dr)
2622{
2623 tree step = DR_STEP (dr);
2624 tree scalar_type = TREE_TYPE (DR_REF (dr));
2625 gimple *stmt = DR_STMT (dr);
2626 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2627 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2628 struct loop *loop = NULL;
2629
2630 if (loop_vinfo)
2631 loop = LOOP_VINFO_LOOP (loop_vinfo);
2632
2633 if (loop_vinfo && !step)
2634 {
2635 if (dump_enabled_p ())
2636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2637 "bad data-ref access in loop\n");
2638 return false;
2639 }
2640
2641 /* Allow loads with zero step in inner-loop vectorization. */
2642 if (loop_vinfo && integer_zerop (step))
2643 {
2644 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2645 if (!nested_in_vect_loop_p (loop, stmt))
2646 return DR_IS_READ (dr);
2647 /* Allow references with zero step for outer loops marked
2648 with pragma omp simd only - it guarantees absence of
2649 loop-carried dependencies between inner loop iterations. */
2650 if (!loop->force_vectorize)
2651 {
2652 if (dump_enabled_p ())
2653 dump_printf_loc (MSG_NOTE, vect_location,
2654 "zero step in inner loop of nest\n");
2655 return false;
2656 }
2657 }
2658
2659 if (loop && nested_in_vect_loop_p (loop, stmt))
2660 {
2661 /* Interleaved accesses are not yet supported within outer-loop
2662 vectorization for references in the inner-loop. */
2663 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2664
2665 /* For the rest of the analysis we use the outer-loop step. */
2666 step = STMT_VINFO_DR_STEP (stmt_info);
2667 if (integer_zerop (step))
2668 {
2669 if (dump_enabled_p ())
2670 dump_printf_loc (MSG_NOTE, vect_location,
2671 "zero step in outer loop.\n");
2672 return DR_IS_READ (dr);
2673 }
2674 }
2675
2676 /* Consecutive? */
2677 if (TREE_CODE (step) == INTEGER_CST)
2678 {
2679 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2680 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2681 || (dr_step < 0
2682 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2683 {
2684 /* Mark that it is not interleaving. */
2685 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2686 return true;
2687 }
2688 }
2689
2690 if (loop && nested_in_vect_loop_p (loop, stmt))
2691 {
2692 if (dump_enabled_p ())
2693 dump_printf_loc (MSG_NOTE, vect_location,
2694 "grouped access in outer loop.\n");
2695 return false;
2696 }
2697
2698
2699 /* Assume this is a DR handled by non-constant strided load case. */
2700 if (TREE_CODE (step) != INTEGER_CST)
2701 return (STMT_VINFO_STRIDED_P (stmt_info)
2702 && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2703 || vect_analyze_group_access (dr)));
2704
2705 /* Not consecutive access - check if it's a part of interleaving group. */
2706 return vect_analyze_group_access (dr);
2707}
2708
2709/* Compare two data-references DRA and DRB to group them into chunks
2710 suitable for grouping. */
2711
2712static int
2713dr_group_sort_cmp (const void *dra_, const void *drb_)
2714{
2715 data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
2716 data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
2717 int cmp;
2718
2719 /* Stabilize sort. */
2720 if (dra == drb)
2721 return 0;
2722
2723 /* DRs in different loops never belong to the same group. */
2724 loop_p loopa = gimple_bb (DR_STMT (dra))->loop_father;
2725 loop_p loopb = gimple_bb (DR_STMT (drb))->loop_father;
2726 if (loopa != loopb)
2727 return loopa->num < loopb->num ? -1 : 1;
2728
2729 /* Ordering of DRs according to base. */
2730 cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
2731 DR_BASE_ADDRESS (drb));
2732 if (cmp != 0)
2733 return cmp;
2734
2735 /* And according to DR_OFFSET. */
2736 cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
2737 if (cmp != 0)
2738 return cmp;
2739
2740 /* Put reads before writes. */
2741 if (DR_IS_READ (dra) != DR_IS_READ (drb))
2742 return DR_IS_READ (dra) ? -1 : 1;
2743
2744 /* Then sort after access size. */
2745 cmp = data_ref_compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
2746 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
2747 if (cmp != 0)
2748 return cmp;
2749
2750 /* And after step. */
2751 cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
2752 if (cmp != 0)
2753 return cmp;
2754
2755 /* Then sort after DR_INIT. In case of identical DRs sort after stmt UID. */
2756 cmp = tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb));
2757 if (cmp == 0)
2758 return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
2759 return cmp;
2760}
2761
2762/* Function vect_analyze_data_ref_accesses.
2763
2764 Analyze the access pattern of all the data references in the loop.
2765
2766 FORNOW: the only access pattern that is considered vectorizable is a
2767 simple step 1 (consecutive) access.
2768
2769 FORNOW: handle only arrays and pointer accesses. */
2770
2771bool
2772vect_analyze_data_ref_accesses (vec_info *vinfo)
2773{
2774 unsigned int i;
2775 vec<data_reference_p> datarefs = vinfo->datarefs;
2776 struct data_reference *dr;
2777
2778 if (dump_enabled_p ())
2779 dump_printf_loc (MSG_NOTE, vect_location,
2780 "=== vect_analyze_data_ref_accesses ===\n");
2781
2782 if (datarefs.is_empty ())
2783 return true;
2784
2785 /* Sort the array of datarefs to make building the interleaving chains
2786 linear. Don't modify the original vector's order, it is needed for
2787 determining what dependencies are reversed. */
2788 vec<data_reference_p> datarefs_copy = datarefs.copy ();
2789 datarefs_copy.qsort (dr_group_sort_cmp);
2790
2791 /* Build the interleaving chains. */
2792 for (i = 0; i < datarefs_copy.length () - 1;)
2793 {
2794 data_reference_p dra = datarefs_copy[i];
2795 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
2796 stmt_vec_info lastinfo = NULL;
2797 if (! STMT_VINFO_VECTORIZABLE (stmtinfo_a))
2798 {
2799 ++i;
2800 continue;
2801 }
2802 for (i = i + 1; i < datarefs_copy.length (); ++i)
2803 {
2804 data_reference_p drb = datarefs_copy[i];
2805 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
2806 if (! STMT_VINFO_VECTORIZABLE (stmtinfo_b))
2807 break;
2808
2809 /* ??? Imperfect sorting (non-compatible types, non-modulo
2810 accesses, same accesses) can lead to a group to be artificially
2811 split here as we don't just skip over those. If it really
2812 matters we can push those to a worklist and re-iterate
2813 over them. The we can just skip ahead to the next DR here. */
2814
2815 /* DRs in a different loop should not be put into the same
2816 interleaving group. */
2817 if (gimple_bb (DR_STMT (dra))->loop_father
2818 != gimple_bb (DR_STMT (drb))->loop_father)
2819 break;
2820
2821 /* Check that the data-refs have same first location (except init)
2822 and they are both either store or load (not load and store,
2823 not masked loads or stores). */
2824 if (DR_IS_READ (dra) != DR_IS_READ (drb)
2825 || data_ref_compare_tree (DR_BASE_ADDRESS (dra),
2826 DR_BASE_ADDRESS (drb)) != 0
2827 || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0
2828 || !gimple_assign_single_p (DR_STMT (dra))
2829 || !gimple_assign_single_p (DR_STMT (drb)))
2830 break;
2831
2832 /* Check that the data-refs have the same constant size. */
2833 tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
2834 tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
2835 if (!tree_fits_uhwi_p (sza)
2836 || !tree_fits_uhwi_p (szb)
2837 || !tree_int_cst_equal (sza, szb))
2838 break;
2839
2840 /* Check that the data-refs have the same step. */
2841 if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != 0)
2842 break;
2843
2844 /* Check the types are compatible.
2845 ??? We don't distinguish this during sorting. */
2846 if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
2847 TREE_TYPE (DR_REF (drb))))
2848 break;
2849
2850 /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */
2851 HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
2852 HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
2853 HOST_WIDE_INT init_prev
2854 = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]));
2855 gcc_assert (init_a <= init_b
2856 && init_a <= init_prev
2857 && init_prev <= init_b);
2858
2859 /* Do not place the same access in the interleaving chain twice. */
2860 if (init_b == init_prev)
2861 {
2862 gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]))
2863 < gimple_uid (DR_STMT (drb)));
2864 /* ??? For now we simply "drop" the later reference which is
2865 otherwise the same rather than finishing off this group.
2866 In the end we'd want to re-process duplicates forming
2867 multiple groups from the refs, likely by just collecting
2868 all candidates (including duplicates and split points
2869 below) in a vector and then process them together. */
2870 continue;
2871 }
2872
2873 /* If init_b == init_a + the size of the type * k, we have an
2874 interleaving, and DRA is accessed before DRB. */
2875 HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
2876 if (type_size_a == 0
2877 || (init_b - init_a) % type_size_a != 0)
2878 break;
2879
2880 /* If we have a store, the accesses are adjacent. This splits
2881 groups into chunks we support (we don't support vectorization
2882 of stores with gaps). */
2883 if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a)
2884 break;
2885
2886 /* If the step (if not zero or non-constant) is greater than the
2887 difference between data-refs' inits this splits groups into
2888 suitable sizes. */
2889 if (tree_fits_shwi_p (DR_STEP (dra)))
2890 {
2891 HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
2892 if (step != 0 && step <= (init_b - init_a))
2893 break;
2894 }
2895
2896 if (dump_enabled_p ())
2897 {
2898 dump_printf_loc (MSG_NOTE, vect_location,
2899 "Detected interleaving ");
2900 if (DR_IS_READ (dra))
2901 dump_printf (MSG_NOTE, "load ");
2902 else
2903 dump_printf (MSG_NOTE, "store ");
2904 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
2905 dump_printf (MSG_NOTE, " and ");
2906 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
2907 dump_printf (MSG_NOTE, "\n");
2908 }
2909
2910 /* Link the found element into the group list. */
2911 if (!GROUP_FIRST_ELEMENT (stmtinfo_a))
2912 {
2913 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (dra);
2914 lastinfo = stmtinfo_a;
2915 }
2916 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (dra);
2917 GROUP_NEXT_ELEMENT (lastinfo) = DR_STMT (drb);
2918 lastinfo = stmtinfo_b;
2919 }
2920 }
2921
2922 FOR_EACH_VEC_ELT (datarefs_copy, i, dr)
2923 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2924 && !vect_analyze_data_ref_access (dr))
2925 {
2926 if (dump_enabled_p ())
2927 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2928 "not vectorized: complicated access pattern.\n");
2929
2930 if (is_a <bb_vec_info> (vinfo))
2931 {
2932 /* Mark the statement as not vectorizable. */
2933 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2934 continue;
2935 }
2936 else
2937 {
2938 datarefs_copy.release ();
2939 return false;
2940 }
2941 }
2942
2943 datarefs_copy.release ();
2944 return true;
2945}
2946
2947/* Function vect_vfa_segment_size.
2948
2949 Create an expression that computes the size of segment
2950 that will be accessed for a data reference. The functions takes into
2951 account that realignment loads may access one more vector.
2952
2953 Input:
2954 DR: The data reference.
2955 LENGTH_FACTOR: segment length to consider.
2956
2957 Return an expression whose value is the size of segment which will be
2958 accessed by DR. */
2959
2960static tree
2961vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
2962{
2963 tree segment_length;
2964
2965 if (integer_zerop (DR_STEP (dr)))
2966 segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2967 else
2968 segment_length = size_binop (MULT_EXPR,
2969 fold_convert (sizetype, DR_STEP (dr)),
2970 fold_convert (sizetype, length_factor));
2971
2972 if (vect_supportable_dr_alignment (dr, false)
2973 == dr_explicit_realign_optimized)
2974 {
2975 tree vector_size = TYPE_SIZE_UNIT
2976 (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2977
2978 segment_length = size_binop (PLUS_EXPR, segment_length, vector_size);
2979 }
2980 return segment_length;
2981}
2982
2983/* Function vect_no_alias_p.
2984
2985 Given data references A and B with equal base and offset, the alias
2986 relation can be decided at compilation time, return TRUE if they do
2987 not alias to each other; return FALSE otherwise. SEGMENT_LENGTH_A
2988 and SEGMENT_LENGTH_B are the memory lengths accessed by A and B
2989 respectively. */
2990
2991static bool
2992vect_no_alias_p (struct data_reference *a, struct data_reference *b,
2993 tree segment_length_a, tree segment_length_b)
2994{
2995 gcc_assert (TREE_CODE (DR_INIT (a)) == INTEGER_CST
2996 && TREE_CODE (DR_INIT (b)) == INTEGER_CST);
2997 if (tree_int_cst_equal (DR_INIT (a), DR_INIT (b)))
2998 return false;
2999
3000 tree seg_a_min = DR_INIT (a);
3001 tree seg_a_max = fold_build2 (PLUS_EXPR, TREE_TYPE (seg_a_min),
3002 seg_a_min, segment_length_a);
3003 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
3004 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
3005 [a, a+12) */
3006 if (tree_int_cst_compare (DR_STEP (a), size_zero_node) < 0)
3007 {
3008 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
3009 seg_a_min = fold_build2 (PLUS_EXPR, TREE_TYPE (seg_a_max),
3010 seg_a_max, unit_size);
3011 seg_a_max = fold_build2 (PLUS_EXPR, TREE_TYPE (DR_INIT (a)),
3012 DR_INIT (a), unit_size);
3013 }
3014 tree seg_b_min = DR_INIT (b);
3015 tree seg_b_max = fold_build2 (PLUS_EXPR, TREE_TYPE (seg_b_min),
3016 seg_b_min, segment_length_b);
3017 if (tree_int_cst_compare (DR_STEP (b), size_zero_node) < 0)
3018 {
3019 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
3020 seg_b_min = fold_build2 (PLUS_EXPR, TREE_TYPE (seg_b_max),
3021 seg_b_max, unit_size);
3022 seg_b_max = fold_build2 (PLUS_EXPR, TREE_TYPE (DR_INIT (b)),
3023 DR_INIT (b), unit_size);
3024 }
3025
3026 if (tree_int_cst_le (seg_a_max, seg_b_min)
3027 || tree_int_cst_le (seg_b_max, seg_a_min))
3028 return true;
3029
3030 return false;
3031}
3032
3033/* Return true if the minimum nonzero dependence distance for loop LOOP_DEPTH
3034 in DDR is >= VF. */
3035
3036static bool
3037dependence_distance_ge_vf (data_dependence_relation *ddr,
3038 unsigned int loop_depth, unsigned HOST_WIDE_INT vf)
3039{
3040 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE
3041 || DDR_NUM_DIST_VECTS (ddr) == 0)
3042 return false;
3043
3044 /* If the dependence is exact, we should have limited the VF instead. */
3045 gcc_checking_assert (DDR_COULD_BE_INDEPENDENT_P (ddr));
3046
3047 unsigned int i;
3048 lambda_vector dist_v;
3049 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
3050 {
3051 HOST_WIDE_INT dist = dist_v[loop_depth];
3052 if (dist != 0
3053 && !(dist > 0 && DDR_REVERSED_P (ddr))
3054 && (unsigned HOST_WIDE_INT) abs_hwi (dist) < vf)
3055 return false;
3056 }
3057
3058 if (dump_enabled_p ())
3059 {
3060 dump_printf_loc (MSG_NOTE, vect_location,
3061 "dependence distance between ");
3062 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
3063 dump_printf (MSG_NOTE, " and ");
3064 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
3065 dump_printf (MSG_NOTE, " is >= VF\n");
3066 }
3067
3068 return true;
3069}
3070
3071/* Function vect_prune_runtime_alias_test_list.
3072
3073 Prune a list of ddrs to be tested at run-time by versioning for alias.
3074 Merge several alias checks into one if possible.
3075 Return FALSE if resulting list of ddrs is longer then allowed by
3076 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
3077
3078bool
3079vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
3080{
3081 typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash;
3082 hash_set <tree_pair_hash> compared_objects;
3083
3084 vec<ddr_p> may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
3085 vec<dr_with_seg_len_pair_t> &comp_alias_ddrs
3086 = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
3087 vec<vec_object_pair> &check_unequal_addrs
3088 = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
3089 int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3090 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
3091
3092 ddr_p ddr;
3093 unsigned int i;
3094 tree length_factor;
3095
3096 if (dump_enabled_p ())
3097 dump_printf_loc (MSG_NOTE, vect_location,
3098 "=== vect_prune_runtime_alias_test_list ===\n");
3099
3100 if (may_alias_ddrs.is_empty ())
3101 return true;
3102
3103 comp_alias_ddrs.create (may_alias_ddrs.length ());
3104
3105 unsigned int loop_depth
3106 = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
3107 LOOP_VINFO_LOOP_NEST (loop_vinfo));
3108
3109 /* First, we collect all data ref pairs for aliasing checks. */
3110 FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
3111 {
3112 int comp_res;
3113 struct data_reference *dr_a, *dr_b;
3114 gimple *dr_group_first_a, *dr_group_first_b;
3115 tree segment_length_a, segment_length_b;
3116 gimple *stmt_a, *stmt_b;
3117
3118 /* Ignore the alias if the VF we chose ended up being no greater
3119 than the dependence distance. */
3120 if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor))
3121 continue;
3122
3123 if (DDR_OBJECT_A (ddr))
3124 {
3125 vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr));
3126 if (!compared_objects.add (new_pair))
3127 {
3128 if (dump_enabled_p ())
3129 {
3130 dump_printf_loc (MSG_NOTE, vect_location, "checking that ");
3131 dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.first);
3132 dump_printf (MSG_NOTE, " and ");
3133 dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.second);
3134 dump_printf (MSG_NOTE, " have different addresses\n");
3135 }
3136 LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (new_pair);
3137 }
3138 continue;
3139 }
3140
3141 dr_a = DDR_A (ddr);
3142 stmt_a = DR_STMT (DDR_A (ddr));
3143 dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
3144 if (dr_group_first_a)
3145 {
3146 stmt_a = dr_group_first_a;
3147 dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
3148 }
3149
3150 dr_b = DDR_B (ddr);
3151 stmt_b = DR_STMT (DDR_B (ddr));
3152 dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
3153 if (dr_group_first_b)
3154 {
3155 stmt_b = dr_group_first_b;
3156 dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
3157 }
3158
3159 if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
3160 length_factor = scalar_loop_iters;
3161 else
3162 length_factor = size_int (vect_factor);
3163 segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
3164 segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
3165
3166 comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
3167 DR_BASE_ADDRESS (dr_b));
3168 if (comp_res == 0)
3169 comp_res = data_ref_compare_tree (DR_OFFSET (dr_a),
3170 DR_OFFSET (dr_b));
3171
3172 /* Alias is known at compilation time. */
3173 if (comp_res == 0
3174 && TREE_CODE (DR_STEP (dr_a)) == INTEGER_CST
3175 && TREE_CODE (DR_STEP (dr_b)) == INTEGER_CST
3176 && TREE_CODE (segment_length_a) == INTEGER_CST
3177 && TREE_CODE (segment_length_b) == INTEGER_CST)
3178 {
3179 if (vect_no_alias_p (dr_a, dr_b, segment_length_a, segment_length_b))
3180 continue;
3181
3182 if (dump_enabled_p ())
3183 dump_printf_loc (MSG_NOTE, vect_location,
3184 "not vectorized: compilation time alias.\n");
3185
3186 return false;
3187 }
3188
3189 dr_with_seg_len_pair_t dr_with_seg_len_pair
3190 (dr_with_seg_len (dr_a, segment_length_a),
3191 dr_with_seg_len (dr_b, segment_length_b));
3192
3193 /* Canonicalize pairs by sorting the two DR members. */
3194 if (comp_res > 0)
3195 std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
3196
3197 comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
3198 }
3199
3200 prune_runtime_alias_test_list (&comp_alias_ddrs,
3201 (unsigned HOST_WIDE_INT) vect_factor);
3202
3203 unsigned int count = (comp_alias_ddrs.length ()
3204 + check_unequal_addrs.length ());
3205 dump_printf_loc (MSG_NOTE, vect_location,
3206 "improved number of alias checks from %d to %d\n",
3207 may_alias_ddrs.length (), count);
3208 if ((int) count > PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
3209 {
3210 if (dump_enabled_p ())
3211 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3212 "number of versioning for alias "
3213 "run-time tests exceeds %d "
3214 "(--param vect-max-version-for-alias-checks)\n",
3215 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
3216 return false;
3217 }
3218
3219 return true;
3220}
3221
3222/* Return true if a non-affine read or write in STMT is suitable for a
3223 gather load or scatter store. Describe the operation in *INFO if so. */
3224
3225bool
3226vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo,
3227 gather_scatter_info *info)
3228{
3229 HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
3230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3231 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3232 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3233 tree offtype = NULL_TREE;
3234 tree decl, base, off;
3235 machine_mode pmode;
3236 int punsignedp, reversep, pvolatilep = 0;
3237
3238 base = DR_REF (dr);
3239 /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
3240 see if we can use the def stmt of the address. */
3241 if (is_gimple_call (stmt)
3242 && gimple_call_internal_p (stmt)
3243 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
3244 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
3245 && TREE_CODE (base) == MEM_REF
3246 && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
3247 && integer_zerop (TREE_OPERAND (base, 1))
3248 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, 0)))
3249 {
3250 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
3251 if (is_gimple_assign (def_stmt)
3252 && gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
3253 base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
3254 }
3255
3256 /* The gather and scatter builtins need address of the form
3257 loop_invariant + vector * {1, 2, 4, 8}
3258 or
3259 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
3260 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
3261 of loop invariants/SSA_NAMEs defined in the loop, with casts,
3262 multiplications and additions in it. To get a vector, we need
3263 a single SSA_NAME that will be defined in the loop and will
3264 contain everything that is not loop invariant and that can be
3265 vectorized. The following code attempts to find such a preexistng
3266 SSA_NAME OFF and put the loop invariants into a tree BASE
3267 that can be gimplified before the loop. */
3268 base = get_inner_reference (base, &pbitsize, &pbitpos, &off, &pmode,
3269 &punsignedp, &reversep, &pvolatilep);
3270 gcc_assert (base && (pbitpos % BITS_PER_UNIT) == 0 && !reversep);
3271
3272 if (TREE_CODE (base) == MEM_REF)
3273 {
3274 if (!integer_zerop (TREE_OPERAND (base, 1)))
3275 {
3276 if (off == NULL_TREE)
3277 {
3278 offset_int moff = mem_ref_offset (base);
3279 off = wide_int_to_tree (sizetype, moff);
3280 }
3281 else
3282 off = size_binop (PLUS_EXPR, off,
3283 fold_convert (sizetype, TREE_OPERAND (base, 1)));
3284 }
3285 base = TREE_OPERAND (base, 0);
3286 }
3287 else
3288 base = build_fold_addr_expr (base);
3289
3290 if (off == NULL_TREE)
3291 off = size_zero_node;
3292
3293 /* If base is not loop invariant, either off is 0, then we start with just
3294 the constant offset in the loop invariant BASE and continue with base
3295 as OFF, otherwise give up.
3296 We could handle that case by gimplifying the addition of base + off
3297 into some SSA_NAME and use that as off, but for now punt. */
3298 if (!expr_invariant_in_loop_p (loop, base))
3299 {
3300 if (!integer_zerop (off))
3301 return false;
3302 off = base;
3303 base = size_int (pbitpos / BITS_PER_UNIT);
3304 }
3305 /* Otherwise put base + constant offset into the loop invariant BASE
3306 and continue with OFF. */
3307 else
3308 {
3309 base = fold_convert (sizetype, base);
3310 base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
3311 }
3312
3313 /* OFF at this point may be either a SSA_NAME or some tree expression
3314 from get_inner_reference. Try to peel off loop invariants from it
3315 into BASE as long as possible. */
3316 STRIP_NOPS (off);
3317 while (offtype == NULL_TREE)
3318 {
3319 enum tree_code code;
3320 tree op0, op1, add = NULL_TREE;
3321
3322 if (TREE_CODE (off) == SSA_NAME)
3323 {
3324 gimple *def_stmt = SSA_NAME_DEF_STMT (off);
3325
3326 if (expr_invariant_in_loop_p (loop, off))
3327 return false;
3328
3329 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
3330 break;
3331
3332 op0 = gimple_assign_rhs1 (def_stmt);
3333 code = gimple_assign_rhs_code (def_stmt);
3334 op1 = gimple_assign_rhs2 (def_stmt);
3335 }
3336 else
3337 {
3338 if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
3339 return false;
3340 code = TREE_CODE (off);
3341 extract_ops_from_tree (off, &code, &op0, &op1);
3342 }
3343 switch (code)
3344 {
3345 case POINTER_PLUS_EXPR:
3346 case PLUS_EXPR:
3347 if (expr_invariant_in_loop_p (loop, op0))
3348 {
3349 add = op0;
3350 off = op1;
3351 do_add:
3352 add = fold_convert (sizetype, add);
3353 if (scale != 1)
3354 add = size_binop (MULT_EXPR, add, size_int (scale));
3355 base = size_binop (PLUS_EXPR, base, add);
3356 continue;
3357 }
3358 if (expr_invariant_in_loop_p (loop, op1))
3359 {
3360 add = op1;
3361 off = op0;
3362 goto do_add;
3363 }
3364 break;
3365 case MINUS_EXPR:
3366 if (expr_invariant_in_loop_p (loop, op1))
3367 {
3368 add = fold_convert (sizetype, op1);
3369 add = size_binop (MINUS_EXPR, size_zero_node, add);
3370 off = op0;
3371 goto do_add;
3372 }
3373 break;
3374 case MULT_EXPR:
3375 if (scale == 1 && tree_fits_shwi_p (op1))
3376 {
3377 scale = tree_to_shwi (op1);
3378 off = op0;
3379 continue;
3380 }
3381 break;
3382 case SSA_NAME:
3383 off = op0;
3384 continue;
3385 CASE_CONVERT:
3386 if (!POINTER_TYPE_P (TREE_TYPE (op0))
3387 && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3388 break;
3389 if (TYPE_PRECISION (TREE_TYPE (op0))
3390 == TYPE_PRECISION (TREE_TYPE (off)))
3391 {
3392 off = op0;
3393 continue;
3394 }
3395 if (TYPE_PRECISION (TREE_TYPE (op0))
3396 < TYPE_PRECISION (TREE_TYPE (off)))
3397 {
3398 off = op0;
3399 offtype = TREE_TYPE (off);
3400 STRIP_NOPS (off);
3401 continue;
3402 }
3403 break;
3404 default:
3405 break;
3406 }
3407 break;
3408 }
3409
3410 /* If at the end OFF still isn't a SSA_NAME or isn't
3411 defined in the loop, punt. */
3412 if (TREE_CODE (off) != SSA_NAME
3413 || expr_invariant_in_loop_p (loop, off))
3414 return false;
3415
3416 if (offtype == NULL_TREE)
3417 offtype = TREE_TYPE (off);
3418
3419 if (DR_IS_READ (dr))
3420 decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
3421 offtype, scale);
3422 else
3423 decl = targetm.vectorize.builtin_scatter (STMT_VINFO_VECTYPE (stmt_info),
3424 offtype, scale);
3425
3426 if (decl == NULL_TREE)
3427 return false;
3428
3429 info->decl = decl;
3430 info->base = base;
3431 info->offset = off;
3432 info->offset_dt = vect_unknown_def_type;
3433 info->offset_vectype = NULL_TREE;
3434 info->scale = scale;
3435 return true;
3436}
3437
3438/* Function vect_analyze_data_refs.
3439
3440 Find all the data references in the loop or basic block.
3441
3442 The general structure of the analysis of data refs in the vectorizer is as
3443 follows:
3444 1- vect_analyze_data_refs(loop/bb): call
3445 compute_data_dependences_for_loop/bb to find and analyze all data-refs
3446 in the loop/bb and their dependences.
3447 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3448 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3449 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3450
3451*/
3452
3453bool
3454vect_analyze_data_refs (vec_info *vinfo, int *min_vf)
3455{
3456 struct loop *loop = NULL;
3457 unsigned int i;
3458 struct data_reference *dr;
3459 tree scalar_type;
3460
3461 if (dump_enabled_p ())
3462 dump_printf_loc (MSG_NOTE, vect_location,
3463 "=== vect_analyze_data_refs ===\n");
3464
3465 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
3466 loop = LOOP_VINFO_LOOP (loop_vinfo);
3467
3468 /* Go through the data-refs, check that the analysis succeeded. Update
3469 pointer from stmt_vec_info struct to DR and vectype. */
3470
3471 vec<data_reference_p> datarefs = vinfo->datarefs;
3472 FOR_EACH_VEC_ELT (datarefs, i, dr)
3473 {
3474 gimple *stmt;
3475 stmt_vec_info stmt_info;
3476 tree base, offset, init;
3477 enum { SG_NONE, GATHER, SCATTER } gatherscatter = SG_NONE;
3478 bool simd_lane_access = false;
3479 int vf;
3480
3481again:
3482 if (!dr || !DR_REF (dr))
3483 {
3484 if (dump_enabled_p ())
3485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3486 "not vectorized: unhandled data-ref\n");
3487 return false;
3488 }
3489
3490 stmt = DR_STMT (dr);
3491 stmt_info = vinfo_for_stmt (stmt);
3492
3493 /* Discard clobbers from the dataref vector. We will remove
3494 clobber stmts during vectorization. */
3495 if (gimple_clobber_p (stmt))
3496 {
3497 free_data_ref (dr);
3498 if (i == datarefs.length () - 1)
3499 {
3500 datarefs.pop ();
3501 break;
3502 }
3503 datarefs.ordered_remove (i);
3504 dr = datarefs[i];
3505 goto again;
3506 }
3507
3508 /* Check that analysis of the data-ref succeeded. */
3509 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3510 || !DR_STEP (dr))
3511 {
3512 bool maybe_gather
3513 = DR_IS_READ (dr)
3514 && !TREE_THIS_VOLATILE (DR_REF (dr))
3515 && targetm.vectorize.builtin_gather != NULL;
3516 bool maybe_scatter
3517 = DR_IS_WRITE (dr)
3518 && !TREE_THIS_VOLATILE (DR_REF (dr))
3519 && targetm.vectorize.builtin_scatter != NULL;
3520 bool maybe_simd_lane_access
3521 = is_a <loop_vec_info> (vinfo) && loop->simduid;
3522
3523 /* If target supports vector gather loads or scatter stores, or if
3524 this might be a SIMD lane access, see if they can't be used. */
3525 if (is_a <loop_vec_info> (vinfo)
3526 && (maybe_gather || maybe_scatter || maybe_simd_lane_access)
3527 && !nested_in_vect_loop_p (loop, stmt))
3528 {
3529 struct data_reference *newdr
3530 = create_data_ref (NULL, loop_containing_stmt (stmt),
3531 DR_REF (dr), stmt, !maybe_scatter,
3532 DR_IS_CONDITIONAL_IN_STMT (dr));
3533 gcc_assert (newdr != NULL && DR_REF (newdr));
3534 if (DR_BASE_ADDRESS (newdr)
3535 && DR_OFFSET (newdr)
3536 && DR_INIT (newdr)
3537 && DR_STEP (newdr)
3538 && integer_zerop (DR_STEP (newdr)))
3539 {
3540 if (maybe_simd_lane_access)
3541 {
3542 tree off = DR_OFFSET (newdr);
3543 STRIP_NOPS (off);
3544 if (TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
3545 && TREE_CODE (off) == MULT_EXPR
3546 && tree_fits_uhwi_p (TREE_OPERAND (off, 1)))
3547 {
3548 tree step = TREE_OPERAND (off, 1);
3549 off = TREE_OPERAND (off, 0);
3550 STRIP_NOPS (off);
3551 if (CONVERT_EXPR_P (off)
3552 && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off,
3553 0)))
3554 < TYPE_PRECISION (TREE_TYPE (off)))
3555 off = TREE_OPERAND (off, 0);
3556 if (TREE_CODE (off) == SSA_NAME)
3557 {
3558 gimple *def = SSA_NAME_DEF_STMT (off);
3559 tree reft = TREE_TYPE (DR_REF (newdr));
3560 if (is_gimple_call (def)
3561 && gimple_call_internal_p (def)
3562 && (gimple_call_internal_fn (def)
3563 == IFN_GOMP_SIMD_LANE))
3564 {
3565 tree arg = gimple_call_arg (def, 0);
3566 gcc_assert (TREE_CODE (arg) == SSA_NAME);
3567 arg = SSA_NAME_VAR (arg);
3568 if (arg == loop->simduid
3569 /* For now. */
3570 && tree_int_cst_equal
3571 (TYPE_SIZE_UNIT (reft),
3572 step))
3573 {
3574 DR_OFFSET (newdr) = ssize_int (0);
3575 DR_STEP (newdr) = step;
3576 DR_OFFSET_ALIGNMENT (newdr)
3577 = BIGGEST_ALIGNMENT;
3578 DR_STEP_ALIGNMENT (newdr)
3579 = highest_pow2_factor (step);
3580 dr = newdr;
3581 simd_lane_access = true;
3582 }
3583 }
3584 }
3585 }
3586 }
3587 if (!simd_lane_access && (maybe_gather || maybe_scatter))
3588 {
3589 dr = newdr;
3590 if (maybe_gather)
3591 gatherscatter = GATHER;
3592 else
3593 gatherscatter = SCATTER;
3594 }
3595 }
3596 if (gatherscatter == SG_NONE && !simd_lane_access)
3597 free_data_ref (newdr);
3598 }
3599
3600 if (gatherscatter == SG_NONE && !simd_lane_access)
3601 {
3602 if (dump_enabled_p ())
3603 {
3604 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3605 "not vectorized: data ref analysis "
3606 "failed ");
3607 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3608 }
3609
3610 if (is_a <bb_vec_info> (vinfo))
3611 break;
3612
3613 return false;
3614 }
3615 }
3616
3617 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3618 {
3619 if (dump_enabled_p ())
3620 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3621 "not vectorized: base addr of dr is a "
3622 "constant\n");
3623
3624 if (is_a <bb_vec_info> (vinfo))
3625 break;
3626
3627 if (gatherscatter != SG_NONE || simd_lane_access)
3628 free_data_ref (dr);
3629 return false;
3630 }
3631
3632 if (TREE_THIS_VOLATILE (DR_REF (dr)))
3633 {
3634 if (dump_enabled_p ())
3635 {
3636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3637 "not vectorized: volatile type ");
3638 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3639 }
3640
3641 if (is_a <bb_vec_info> (vinfo))
3642 break;
3643
3644 return false;
3645 }
3646
3647 if (stmt_can_throw_internal (stmt))
3648 {
3649 if (dump_enabled_p ())
3650 {
3651 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3652 "not vectorized: statement can throw an "
3653 "exception ");
3654 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3655 }
3656
3657 if (is_a <bb_vec_info> (vinfo))
3658 break;
3659
3660 if (gatherscatter != SG_NONE || simd_lane_access)
3661 free_data_ref (dr);
3662 return false;
3663 }
3664
3665 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
3666 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
3667 {
3668 if (dump_enabled_p ())
3669 {
3670 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3671 "not vectorized: statement is bitfield "
3672 "access ");
3673 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3674 }
3675
3676 if (is_a <bb_vec_info> (vinfo))
3677 break;
3678
3679 if (gatherscatter != SG_NONE || simd_lane_access)
3680 free_data_ref (dr);
3681 return false;
3682 }
3683
3684 base = unshare_expr (DR_BASE_ADDRESS (dr));
3685 offset = unshare_expr (DR_OFFSET (dr));
3686 init = unshare_expr (DR_INIT (dr));
3687
3688 if (is_gimple_call (stmt)
3689 && (!gimple_call_internal_p (stmt)
3690 || (gimple_call_internal_fn (stmt) != IFN_MASK_LOAD
3691 && gimple_call_internal_fn (stmt) != IFN_MASK_STORE)))
3692 {
3693 if (dump_enabled_p ())
3694 {
3695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3696 "not vectorized: dr in a call ");
3697 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3698 }
3699
3700 if (is_a <bb_vec_info> (vinfo))
3701 break;
3702
3703 if (gatherscatter != SG_NONE || simd_lane_access)
3704 free_data_ref (dr);
3705 return false;
3706 }
3707
3708 /* Update DR field in stmt_vec_info struct. */
3709
3710 /* If the dataref is in an inner-loop of the loop that is considered for
3711 for vectorization, we also want to analyze the access relative to
3712 the outer-loop (DR contains information only relative to the
3713 inner-most enclosing loop). We do that by building a reference to the
3714 first location accessed by the inner-loop, and analyze it relative to
3715 the outer-loop. */
3716 if (loop && nested_in_vect_loop_p (loop, stmt))
3717 {
3718 /* Build a reference to the first location accessed by the
3719 inner loop: *(BASE + INIT + OFFSET). By construction,
3720 this address must be invariant in the inner loop, so we
3721 can consider it as being used in the outer loop. */
3722 tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
3723 init, offset);
3724 tree init_addr = fold_build_pointer_plus (base, init_offset);
3725 tree init_ref = build_fold_indirect_ref (init_addr);
3726
3727 if (dump_enabled_p ())
3728 {
3729 dump_printf_loc (MSG_NOTE, vect_location,
3730 "analyze in outer loop: ");
3731 dump_generic_expr (MSG_NOTE, TDF_SLIM, init_ref);
3732 dump_printf (MSG_NOTE, "\n");
3733 }
3734
3735 if (!dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info),
3736 init_ref, loop))
3737 /* dr_analyze_innermost already explained the failure. */
3738 return false;
3739
3740 if (dump_enabled_p ())
3741 {
3742 dump_printf_loc (MSG_NOTE, vect_location,
3743 "\touter base_address: ");
3744 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3745 STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3746 dump_printf (MSG_NOTE, "\n\touter offset from base address: ");
3747 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3748 STMT_VINFO_DR_OFFSET (stmt_info));
3749 dump_printf (MSG_NOTE,
3750 "\n\touter constant offset from base address: ");
3751 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3752 STMT_VINFO_DR_INIT (stmt_info));
3753 dump_printf (MSG_NOTE, "\n\touter step: ");
3754 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3755 STMT_VINFO_DR_STEP (stmt_info));
3756 dump_printf (MSG_NOTE, "\n\touter base alignment: %d\n",
3757 STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info));
3758 dump_printf (MSG_NOTE, "\n\touter base misalignment: %d\n",
3759 STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info));
3760 dump_printf (MSG_NOTE, "\n\touter offset alignment: %d\n",
3761 STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info));
3762 dump_printf (MSG_NOTE, "\n\touter step alignment: %d\n",
3763 STMT_VINFO_DR_STEP_ALIGNMENT (stmt_info));
3764 }
3765 }
3766
3767 if (STMT_VINFO_DATA_REF (stmt_info))
3768 {
3769 if (dump_enabled_p ())
3770 {
3771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3772 "not vectorized: more than one data ref "
3773 "in stmt: ");
3774 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3775 }
3776
3777 if (is_a <bb_vec_info> (vinfo))
3778 break;
3779
3780 if (gatherscatter != SG_NONE || simd_lane_access)
3781 free_data_ref (dr);
3782 return false;
3783 }
3784
3785 STMT_VINFO_DATA_REF (stmt_info) = dr;
3786 if (simd_lane_access)
3787 {
3788 STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) = true;
3789 free_data_ref (datarefs[i]);
3790 datarefs[i] = dr;
3791 }
3792
3793 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == ADDR_EXPR
3794 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr), 0))
3795 && DECL_NONALIASED (TREE_OPERAND (DR_BASE_ADDRESS (dr), 0)))
3796 {
3797 if (dump_enabled_p ())
3798 {
3799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3800 "not vectorized: base object not addressable "
3801 "for stmt: ");
3802 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3803 }
3804 if (is_a <bb_vec_info> (vinfo))
3805 {
3806 /* In BB vectorization the ref can still participate
3807 in dependence analysis, we just can't vectorize it. */
3808 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3809 continue;
3810 }
3811 return false;
3812 }
3813
3814 /* Set vectype for STMT. */
3815 scalar_type = TREE_TYPE (DR_REF (dr));
3816 STMT_VINFO_VECTYPE (stmt_info)
3817 = get_vectype_for_scalar_type (scalar_type);
3818 if (!STMT_VINFO_VECTYPE (stmt_info))
3819 {
3820 if (dump_enabled_p ())
3821 {
3822 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3823 "not vectorized: no vectype for stmt: ");
3824 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3825 dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
3826 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
3827 scalar_type);
3828 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3829 }
3830
3831 if (is_a <bb_vec_info> (vinfo))
3832 {
3833 /* No vector type is fine, the ref can still participate
3834 in dependence analysis, we just can't vectorize it. */
3835 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3836 continue;
3837 }
3838
3839 if (gatherscatter != SG_NONE || simd_lane_access)
3840 {
3841 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3842 if (gatherscatter != SG_NONE)
3843 free_data_ref (dr);
3844 }
3845 return false;
3846 }
3847 else
3848 {
3849 if (dump_enabled_p ())
3850 {
3851 dump_printf_loc (MSG_NOTE, vect_location,
3852 "got vectype for stmt: ");
3853 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3854 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3855 STMT_VINFO_VECTYPE (stmt_info));
3856 dump_printf (MSG_NOTE, "\n");
3857 }
3858 }
3859
3860 /* Adjust the minimal vectorization factor according to the
3861 vector type. */
3862 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
3863 if (vf > *min_vf)
3864 *min_vf = vf;
3865
3866 if (gatherscatter != SG_NONE)
3867 {
3868 gather_scatter_info gs_info;
3869 if (!vect_check_gather_scatter (stmt, as_a <loop_vec_info> (vinfo),
3870 &gs_info)
3871 || !get_vectype_for_scalar_type (TREE_TYPE (gs_info.offset)))
3872 {
3873 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3874 free_data_ref (dr);
3875 if (dump_enabled_p ())
3876 {
3877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3878 (gatherscatter == GATHER) ?
3879 "not vectorized: not suitable for gather "
3880 "load " :
3881 "not vectorized: not suitable for scatter "
3882 "store ");
3883 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3884 }
3885 return false;
3886 }
3887
3888 free_data_ref (datarefs[i]);
3889 datarefs[i] = dr;
3890 STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
3891 }
3892
3893 else if (is_a <loop_vec_info> (vinfo)
3894 && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
3895 {
3896 if (nested_in_vect_loop_p (loop, stmt))
3897 {
3898 if (dump_enabled_p ())
3899 {
3900 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3901 "not vectorized: not suitable for strided "
3902 "load ");
3903 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3904 }
3905 return false;
3906 }
3907 STMT_VINFO_STRIDED_P (stmt_info) = true;
3908 }
3909 }
3910
3911 /* If we stopped analysis at the first dataref we could not analyze
3912 when trying to vectorize a basic-block mark the rest of the datarefs
3913 as not vectorizable and truncate the vector of datarefs. That
3914 avoids spending useless time in analyzing their dependence. */
3915 if (i != datarefs.length ())
3916 {
3917 gcc_assert (is_a <bb_vec_info> (vinfo));
3918 for (unsigned j = i; j < datarefs.length (); ++j)
3919 {
3920 data_reference_p dr = datarefs[j];
3921 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
3922 free_data_ref (dr);
3923 }
3924 datarefs.truncate (i);
3925 }
3926
3927 return true;
3928}
3929
3930
3931/* Function vect_get_new_vect_var.
3932
3933 Returns a name for a new variable. The current naming scheme appends the
3934 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3935 the name of vectorizer generated variables, and appends that to NAME if
3936 provided. */
3937
3938tree
3939vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
3940{
3941 const char *prefix;
3942 tree new_vect_var;
3943
3944 switch (var_kind)
3945 {
3946 case vect_simple_var:
3947 prefix = "vect";
3948 break;
3949 case vect_scalar_var:
3950 prefix = "stmp";
3951 break;
3952 case vect_mask_var:
3953 prefix = "mask";
3954 break;
3955 case vect_pointer_var:
3956 prefix = "vectp";
3957 break;
3958 default:
3959 gcc_unreachable ();
3960 }
3961
3962 if (name)
3963 {
3964 char* tmp = concat (prefix, "_", name, NULL);
3965 new_vect_var = create_tmp_reg (type, tmp);
3966 free (tmp);
3967 }
3968 else
3969 new_vect_var = create_tmp_reg (type, prefix);
3970
3971 return new_vect_var;
3972}
3973
3974/* Like vect_get_new_vect_var but return an SSA name. */
3975
3976tree
3977vect_get_new_ssa_name (tree type, enum vect_var_kind var_kind, const char *name)
3978{
3979 const char *prefix;
3980 tree new_vect_var;
3981
3982 switch (var_kind)
3983 {
3984 case vect_simple_var:
3985 prefix = "vect";
3986 break;
3987 case vect_scalar_var:
3988 prefix = "stmp";
3989 break;
3990 case vect_pointer_var:
3991 prefix = "vectp";
3992 break;
3993 default:
3994 gcc_unreachable ();
3995 }
3996
3997 if (name)
3998 {
3999 char* tmp = concat (prefix, "_", name, NULL);
4000 new_vect_var = make_temp_ssa_name (type, NULL, tmp);
4001 free (tmp);
4002 }
4003 else
4004 new_vect_var = make_temp_ssa_name (type, NULL, prefix);
4005
4006 return new_vect_var;
4007}
4008
4009/* Duplicate ptr info and set alignment/misaligment on NAME from DR. */
4010
4011static void
4012vect_duplicate_ssa_name_ptr_info (tree name, data_reference *dr)
4013{
4014 duplicate_ssa_name_ptr_info (name, DR_PTR_INFO (dr));
4015 int misalign = DR_MISALIGNMENT (dr);
4016 if (misalign == DR_MISALIGNMENT_UNKNOWN)
4017 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name));
4018 else
4019 set_ptr_info_alignment (SSA_NAME_PTR_INFO (name),
4020 DR_TARGET_ALIGNMENT (dr), misalign);
4021}
4022
4023/* Function vect_create_addr_base_for_vector_ref.
4024
4025 Create an expression that computes the address of the first memory location
4026 that will be accessed for a data reference.
4027
4028 Input:
4029 STMT: The statement containing the data reference.
4030 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
4031 OFFSET: Optional. If supplied, it is be added to the initial address.
4032 LOOP: Specify relative to which loop-nest should the address be computed.
4033 For example, when the dataref is in an inner-loop nested in an
4034 outer-loop that is now being vectorized, LOOP can be either the
4035 outer-loop, or the inner-loop. The first memory location accessed
4036 by the following dataref ('in' points to short):
4037
4038 for (i=0; i<N; i++)
4039 for (j=0; j<M; j++)
4040 s += in[i+j]
4041
4042 is as follows:
4043 if LOOP=i_loop: &in (relative to i_loop)
4044 if LOOP=j_loop: &in+i*2B (relative to j_loop)
4045 BYTE_OFFSET: Optional, defaulted to NULL. If supplied, it is added to the
4046 initial address. Unlike OFFSET, which is number of elements to
4047 be added, BYTE_OFFSET is measured in bytes.
4048
4049 Output:
4050 1. Return an SSA_NAME whose value is the address of the memory location of
4051 the first vector of the data reference.
4052 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
4053 these statement(s) which define the returned SSA_NAME.
4054
4055 FORNOW: We are only handling array accesses with step 1. */
4056
4057tree
4058vect_create_addr_base_for_vector_ref (gimple *stmt,
4059 gimple_seq *new_stmt_list,
4060 tree offset,
4061 tree byte_offset)
4062{
4063 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4064 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4065 const char *base_name;
4066 tree addr_base;
4067 tree dest;
4068 gimple_seq seq = NULL;
4069 tree vect_ptr_type;
4070 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
4071 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4072 innermost_loop_behavior *drb = vect_dr_behavior (dr);
4073
4074 tree data_ref_base = unshare_expr (drb->base_address);
4075 tree base_offset = unshare_expr (drb->offset);
4076 tree init = unshare_expr (drb->init);
4077
4078 if (loop_vinfo)
4079 base_name = get_name (data_ref_base);
4080 else
4081 {
4082 base_offset = ssize_int (0);
4083 init = ssize_int (0);
4084 base_name = get_name (DR_REF (dr));
4085 }
4086
4087 /* Create base_offset */
4088 base_offset = size_binop (PLUS_EXPR,
4089 fold_convert (sizetype, base_offset),
4090 fold_convert (sizetype, init));
4091
4092 if (offset)
4093 {
4094 offset = fold_build2 (MULT_EXPR, sizetype,
4095 fold_convert (sizetype, offset), step);
4096 base_offset = fold_build2 (PLUS_EXPR, sizetype,
4097 base_offset, offset);
4098 }
4099 if (byte_offset)
4100 {
4101 byte_offset = fold_convert (sizetype, byte_offset);
4102 base_offset = fold_build2 (PLUS_EXPR, sizetype,
4103 base_offset, byte_offset);
4104 }
4105
4106 /* base + base_offset */
4107 if (loop_vinfo)
4108 addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
4109 else
4110 {
4111 addr_base = build1 (ADDR_EXPR,
4112 build_pointer_type (TREE_TYPE (DR_REF (dr))),
4113 unshare_expr (DR_REF (dr)));
4114 }
4115
4116 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
4117 dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
4118 addr_base = force_gimple_operand (addr_base, &seq, true, dest);
4119 gimple_seq_add_seq (new_stmt_list, seq);
4120
4121 if (DR_PTR_INFO (dr)
4122 && TREE_CODE (addr_base) == SSA_NAME
4123 && !SSA_NAME_PTR_INFO (addr_base))
4124 {
4125 vect_duplicate_ssa_name_ptr_info (addr_base, dr);
4126 if (offset || byte_offset)
4127 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr_base));
4128 }
4129
4130 if (dump_enabled_p ())
4131 {
4132 dump_printf_loc (MSG_NOTE, vect_location, "created ");
4133 dump_generic_expr (MSG_NOTE, TDF_SLIM, addr_base);
4134 dump_printf (MSG_NOTE, "\n");
4135 }
4136
4137 return addr_base;
4138}
4139
4140
4141/* Function vect_create_data_ref_ptr.
4142
4143 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
4144 location accessed in the loop by STMT, along with the def-use update
4145 chain to appropriately advance the pointer through the loop iterations.
4146 Also set aliasing information for the pointer. This pointer is used by
4147 the callers to this function to create a memory reference expression for
4148 vector load/store access.
4149
4150 Input:
4151 1. STMT: a stmt that references memory. Expected to be of the form
4152 GIMPLE_ASSIGN <name, data-ref> or
4153 GIMPLE_ASSIGN <data-ref, name>.
4154 2. AGGR_TYPE: the type of the reference, which should be either a vector
4155 or an array.
4156 3. AT_LOOP: the loop where the vector memref is to be created.
4157 4. OFFSET (optional): an offset to be added to the initial address accessed
4158 by the data-ref in STMT.
4159 5. BSI: location where the new stmts are to be placed if there is no loop
4160 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
4161 pointing to the initial address.
4162 7. BYTE_OFFSET (optional, defaults to NULL): a byte offset to be added
4163 to the initial address accessed by the data-ref in STMT. This is
4164 similar to OFFSET, but OFFSET is counted in elements, while BYTE_OFFSET
4165 in bytes.
4166
4167 Output:
4168 1. Declare a new ptr to vector_type, and have it point to the base of the
4169 data reference (initial addressed accessed by the data reference).
4170 For example, for vector of type V8HI, the following code is generated:
4171
4172 v8hi *ap;
4173 ap = (v8hi *)initial_address;
4174
4175 if OFFSET is not supplied:
4176 initial_address = &a[init];
4177 if OFFSET is supplied:
4178 initial_address = &a[init + OFFSET];
4179 if BYTE_OFFSET is supplied:
4180 initial_address = &a[init] + BYTE_OFFSET;
4181
4182 Return the initial_address in INITIAL_ADDRESS.
4183
4184 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
4185 update the pointer in each iteration of the loop.
4186
4187 Return the increment stmt that updates the pointer in PTR_INCR.
4188
4189 3. Set INV_P to true if the access pattern of the data reference in the
4190 vectorized loop is invariant. Set it to false otherwise.
4191
4192 4. Return the pointer. */
4193
4194tree
4195vect_create_data_ref_ptr (gimple *stmt, tree aggr_type, struct loop *at_loop,
4196 tree offset, tree *initial_address,
4197 gimple_stmt_iterator *gsi, gimple **ptr_incr,
4198 bool only_init, bool *inv_p, tree byte_offset)
4199{
4200 const char *base_name;
4201 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4202 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4203 struct loop *loop = NULL;
4204 bool nested_in_vect_loop = false;
4205 struct loop *containing_loop = NULL;
4206 tree aggr_ptr_type;
4207 tree aggr_ptr;
4208 tree new_temp;
4209 gimple_seq new_stmt_list = NULL;
4210 edge pe = NULL;
4211 basic_block new_bb;
4212 tree aggr_ptr_init;
4213 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4214 tree aptr;
4215 gimple_stmt_iterator incr_gsi;
4216 bool insert_after;
4217 tree indx_before_incr, indx_after_incr;
4218 gimple *incr;
4219 tree step;
4220 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4221
4222 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
4223 || TREE_CODE (aggr_type) == VECTOR_TYPE);
4224
4225 if (loop_vinfo)
4226 {
4227 loop = LOOP_VINFO_LOOP (loop_vinfo);
4228 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4229 containing_loop = (gimple_bb (stmt))->loop_father;
4230 pe = loop_preheader_edge (loop);
4231 }
4232 else
4233 {
4234 gcc_assert (bb_vinfo);
4235 only_init = true;
4236 *ptr_incr = NULL;
4237 }
4238
4239 /* Check the step (evolution) of the load in LOOP, and record
4240 whether it's invariant. */
4241 step = vect_dr_behavior (dr)->step;
4242 if (integer_zerop (step))
4243 *inv_p = true;
4244 else
4245 *inv_p = false;
4246
4247 /* Create an expression for the first address accessed by this load
4248 in LOOP. */
4249 base_name = get_name (DR_BASE_ADDRESS (dr));
4250
4251 if (dump_enabled_p ())
4252 {
4253 tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
4254 dump_printf_loc (MSG_NOTE, vect_location,
4255 "create %s-pointer variable to type: ",
4256 get_tree_code_name (TREE_CODE (aggr_type)));
4257 dump_generic_expr (MSG_NOTE, TDF_SLIM, aggr_type);
4258 if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
4259 dump_printf (MSG_NOTE, " vectorizing an array ref: ");
4260 else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
4261 dump_printf (MSG_NOTE, " vectorizing a vector ref: ");
4262 else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
4263 dump_printf (MSG_NOTE, " vectorizing a record based array ref: ");
4264 else
4265 dump_printf (MSG_NOTE, " vectorizing a pointer ref: ");
4266 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_BASE_OBJECT (dr));
4267 dump_printf (MSG_NOTE, "\n");
4268 }
4269
4270 /* (1) Create the new aggregate-pointer variable.
4271 Vector and array types inherit the alias set of their component
4272 type by default so we need to use a ref-all pointer if the data
4273 reference does not conflict with the created aggregated data
4274 reference because it is not addressable. */
4275 bool need_ref_all = false;
4276 if (!alias_sets_conflict_p (get_alias_set (aggr_type),
4277 get_alias_set (DR_REF (dr))))
4278 need_ref_all = true;
4279 /* Likewise for any of the data references in the stmt group. */
4280 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
4281 {
4282 gimple *orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
4283 do
4284 {
4285 stmt_vec_info sinfo = vinfo_for_stmt (orig_stmt);
4286 struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
4287 if (!alias_sets_conflict_p (get_alias_set (aggr_type),
4288 get_alias_set (DR_REF (sdr))))
4289 {
4290 need_ref_all = true;
4291 break;
4292 }
4293 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (sinfo);
4294 }
4295 while (orig_stmt);
4296 }
4297 aggr_ptr_type = build_pointer_type_for_mode (aggr_type, ptr_mode,
4298 need_ref_all);
4299 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
4300
4301
4302 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
4303 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
4304 def-use update cycles for the pointer: one relative to the outer-loop
4305 (LOOP), which is what steps (3) and (4) below do. The other is relative
4306 to the inner-loop (which is the inner-most loop containing the dataref),
4307 and this is done be step (5) below.
4308
4309 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
4310 inner-most loop, and so steps (3),(4) work the same, and step (5) is
4311 redundant. Steps (3),(4) create the following:
4312
4313 vp0 = &base_addr;
4314 LOOP: vp1 = phi(vp0,vp2)
4315 ...
4316 ...
4317 vp2 = vp1 + step
4318 goto LOOP
4319
4320 If there is an inner-loop nested in loop, then step (5) will also be
4321 applied, and an additional update in the inner-loop will be created:
4322
4323 vp0 = &base_addr;
4324 LOOP: vp1 = phi(vp0,vp2)
4325 ...
4326 inner: vp3 = phi(vp1,vp4)
4327 vp4 = vp3 + inner_step
4328 if () goto inner
4329 ...
4330 vp2 = vp1 + step
4331 if () goto LOOP */
4332
4333 /* (2) Calculate the initial address of the aggregate-pointer, and set
4334 the aggregate-pointer to point to it before the loop. */
4335
4336 /* Create: (&(base[init_val+offset]+byte_offset) in the loop preheader. */
4337
4338 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
4339 offset, byte_offset);
4340 if (new_stmt_list)
4341 {
4342 if (pe)
4343 {
4344 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
4345 gcc_assert (!new_bb);
4346 }
4347 else
4348 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
4349 }
4350
4351 *initial_address = new_temp;
4352 aggr_ptr_init = new_temp;
4353
4354 /* (3) Handle the updating of the aggregate-pointer inside the loop.
4355 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
4356 inner-loop nested in LOOP (during outer-loop vectorization). */
4357
4358 /* No update in loop is required. */
4359 if (only_init && (!loop_vinfo || at_loop == loop))
4360 aptr = aggr_ptr_init;
4361 else
4362 {
4363 /* The step of the aggregate pointer is the type size. */
4364 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
4365 /* One exception to the above is when the scalar step of the load in
4366 LOOP is zero. In this case the step here is also zero. */
4367 if (*inv_p)
4368 iv_step = size_zero_node;
4369 else if (tree_int_cst_sgn (step) == -1)
4370 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
4371
4372 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4373
4374 create_iv (aggr_ptr_init,
4375 fold_convert (aggr_ptr_type, iv_step),
4376 aggr_ptr, loop, &incr_gsi, insert_after,
4377 &indx_before_incr, &indx_after_incr);
4378 incr = gsi_stmt (incr_gsi);
4379 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
4380
4381 /* Copy the points-to information if it exists. */
4382 if (DR_PTR_INFO (dr))
4383 {
4384 vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr);
4385 vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr);
4386 }
4387 if (ptr_incr)
4388 *ptr_incr = incr;
4389
4390 aptr = indx_before_incr;
4391 }
4392
4393 if (!nested_in_vect_loop || only_init)
4394 return aptr;
4395
4396
4397 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
4398 nested in LOOP, if exists. */
4399
4400 gcc_assert (nested_in_vect_loop);
4401 if (!only_init)
4402 {
4403 standard_iv_increment_position (containing_loop, &incr_gsi,
4404 &insert_after);
4405 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
4406 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
4407 &indx_after_incr);
4408 incr = gsi_stmt (incr_gsi);
4409 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
4410
4411 /* Copy the points-to information if it exists. */
4412 if (DR_PTR_INFO (dr))
4413 {
4414 vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr);
4415 vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr);
4416 }
4417 if (ptr_incr)
4418 *ptr_incr = incr;
4419
4420 return indx_before_incr;
4421 }
4422 else
4423 gcc_unreachable ();
4424}
4425
4426
4427/* Function bump_vector_ptr
4428
4429 Increment a pointer (to a vector type) by vector-size. If requested,
4430 i.e. if PTR-INCR is given, then also connect the new increment stmt
4431 to the existing def-use update-chain of the pointer, by modifying
4432 the PTR_INCR as illustrated below:
4433
4434 The pointer def-use update-chain before this function:
4435 DATAREF_PTR = phi (p_0, p_2)
4436 ....
4437 PTR_INCR: p_2 = DATAREF_PTR + step
4438
4439 The pointer def-use update-chain after this function:
4440 DATAREF_PTR = phi (p_0, p_2)
4441 ....
4442 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
4443 ....
4444 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
4445
4446 Input:
4447 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
4448 in the loop.
4449 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
4450 the loop. The increment amount across iterations is expected
4451 to be vector_size.
4452 BSI - location where the new update stmt is to be placed.
4453 STMT - the original scalar memory-access stmt that is being vectorized.
4454 BUMP - optional. The offset by which to bump the pointer. If not given,
4455 the offset is assumed to be vector_size.
4456
4457 Output: Return NEW_DATAREF_PTR as illustrated above.
4458
4459*/
4460
4461tree
4462bump_vector_ptr (tree dataref_ptr, gimple *ptr_incr, gimple_stmt_iterator *gsi,
4463 gimple *stmt, tree bump)
4464{
4465 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4466 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4467 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4468 tree update = TYPE_SIZE_UNIT (vectype);
4469 gassign *incr_stmt;
4470 ssa_op_iter iter;
4471 use_operand_p use_p;
4472 tree new_dataref_ptr;
4473
4474 if (bump)
4475 update = bump;
4476
4477 if (TREE_CODE (dataref_ptr) == SSA_NAME)
4478 new_dataref_ptr = copy_ssa_name (dataref_ptr);
4479 else
4480 new_dataref_ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
4481 incr_stmt = gimple_build_assign (new_dataref_ptr, POINTER_PLUS_EXPR,
4482 dataref_ptr, update);
4483 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
4484
4485 /* Copy the points-to information if it exists. */
4486 if (DR_PTR_INFO (dr))
4487 {
4488 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
4489 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
4490 }
4491
4492 if (!ptr_incr)
4493 return new_dataref_ptr;
4494
4495 /* Update the vector-pointer's cross-iteration increment. */
4496 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
4497 {
4498 tree use = USE_FROM_PTR (use_p);
4499
4500 if (use == dataref_ptr)
4501 SET_USE (use_p, new_dataref_ptr);
4502 else
4503 gcc_assert (tree_int_cst_compare (use, update) == 0);
4504 }
4505
4506 return new_dataref_ptr;
4507}
4508
4509
4510/* Function vect_create_destination_var.
4511
4512 Create a new temporary of type VECTYPE. */
4513
4514tree
4515vect_create_destination_var (tree scalar_dest, tree vectype)
4516{
4517 tree vec_dest;
4518 const char *name;
4519 char *new_name;
4520 tree type;
4521 enum vect_var_kind kind;
4522
4523 kind = vectype
4524 ? VECTOR_BOOLEAN_TYPE_P (vectype)
4525 ? vect_mask_var
4526 : vect_simple_var
4527 : vect_scalar_var;
4528 type = vectype ? vectype : TREE_TYPE (scalar_dest);
4529
4530 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
4531
4532 name = get_name (scalar_dest);
4533 if (name)
4534 new_name = xasprintf ("%s_%u", name, SSA_NAME_VERSION (scalar_dest));
4535 else
4536 new_name = xasprintf ("_%u", SSA_NAME_VERSION (scalar_dest));
4537 vec_dest = vect_get_new_vect_var (type, kind, new_name);
4538 free (new_name);
4539
4540 return vec_dest;
4541}
4542
4543/* Function vect_grouped_store_supported.
4544
4545 Returns TRUE if interleave high and interleave low permutations
4546 are supported, and FALSE otherwise. */
4547
4548bool
4549vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
4550{
4551 machine_mode mode = TYPE_MODE (vectype);
4552
4553 /* vect_permute_store_chain requires the group size to be equal to 3 or
4554 be a power of two. */
4555 if (count != 3 && exact_log2 (count) == -1)
4556 {
4557 if (dump_enabled_p ())
4558 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4559 "the size of the group of accesses"
4560 " is not a power of 2 or not eqaul to 3\n");
4561 return false;
4562 }
4563
4564 /* Check that the permutation is supported. */
4565 if (VECTOR_MODE_P (mode))
4566 {
4567 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4568 auto_vec_perm_indices sel (nelt);
4569 sel.quick_grow (nelt);
4570
4571 if (count == 3)
4572 {
4573 unsigned int j0 = 0, j1 = 0, j2 = 0;
4574 unsigned int i, j;
4575
4576 for (j = 0; j < 3; j++)
4577 {
4578 int nelt0 = ((3 - j) * nelt) % 3;
4579 int nelt1 = ((3 - j) * nelt + 1) % 3;
4580 int nelt2 = ((3 - j) * nelt + 2) % 3;
4581 for (i = 0; i < nelt; i++)
4582 {
4583 if (3 * i + nelt0 < nelt)
4584 sel[3 * i + nelt0] = j0++;
4585 if (3 * i + nelt1 < nelt)
4586 sel[3 * i + nelt1] = nelt + j1++;
4587 if (3 * i + nelt2 < nelt)
4588 sel[3 * i + nelt2] = 0;
4589 }
4590 if (!can_vec_perm_p (mode, false, &sel))
4591 {
4592 if (dump_enabled_p ())
4593 dump_printf (MSG_MISSED_OPTIMIZATION,
4594 "permutaion op not supported by target.\n");
4595 return false;
4596 }
4597
4598 for (