1/* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "target.h"
27#include "rtl.h"
28#include "tree.h"
29#include "gimple.h"
30#include "ssa.h"
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
35#include "dumpfile.h"
36#include "alias.h"
37#include "fold-const.h"
38#include "stor-layout.h"
39#include "tree-eh.h"
40#include "gimplify.h"
41#include "gimple-iterator.h"
42#include "gimplify-me.h"
43#include "tree-cfg.h"
44#include "tree-ssa-loop-manip.h"
45#include "cfgloop.h"
46#include "tree-ssa-loop.h"
47#include "tree-scalar-evolution.h"
48#include "tree-vectorizer.h"
49#include "builtins.h"
50#include "internal-fn.h"
51#include "tree-vector-builder.h"
52
53/* For lang_hooks.types.type_for_mode. */
54#include "langhooks.h"
55
56/* Says whether a statement is a load, a store of a vectorized statement
57 result, or a store of an invariant value. */
58enum vec_load_store_type {
59 VLS_LOAD,
60 VLS_STORE,
61 VLS_STORE_INVARIANT
62};
63
64/* Return the vectorized type for the given statement. */
65
66tree
67stmt_vectype (struct _stmt_vec_info *stmt_info)
68{
69 return STMT_VINFO_VECTYPE (stmt_info);
70}
71
72/* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
74bool
75stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
76{
77 gimple *stmt = STMT_VINFO_STMT (stmt_info);
78 basic_block bb = gimple_bb (stmt);
79 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
80 struct loop* loop;
81
82 if (!loop_vinfo)
83 return false;
84
85 loop = LOOP_VINFO_LOOP (loop_vinfo);
86
87 return (bb->loop_father == loop->inner);
88}
89
90/* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
93
94unsigned
95record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
96 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
97 int misalign, enum vect_cost_model_location where)
98{
99 if ((kind == vector_load || kind == unaligned_load)
100 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
101 kind = vector_gather_load;
102 if ((kind == vector_store || kind == unaligned_store)
103 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
104 kind = vector_scatter_store;
105 if (body_cost_vec)
106 {
107 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
108 stmt_info_for_cost si = { count, kind,
109 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
110 misalign };
111 body_cost_vec->safe_push (si);
112 return (unsigned)
113 (builtin_vectorization_cost (kind, vectype, misalign) * count);
114 }
115 else
116 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
117 count, kind, stmt_info, misalign, where);
118}
119
120/* Return a variable of type ELEM_TYPE[NELEMS]. */
121
122static tree
123create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
124{
125 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
126 "vect_array");
127}
128
129/* ARRAY is an array of vectors created by create_vector_array.
130 Return an SSA_NAME for the vector in index N. The reference
131 is part of the vectorization of STMT and the vector is associated
132 with scalar destination SCALAR_DEST. */
133
134static tree
135read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
136 tree array, unsigned HOST_WIDE_INT n)
137{
138 tree vect_type, vect, vect_name, array_ref;
139 gimple *new_stmt;
140
141 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
142 vect_type = TREE_TYPE (TREE_TYPE (array));
143 vect = vect_create_destination_var (scalar_dest, vect_type);
144 array_ref = build4 (ARRAY_REF, vect_type, array,
145 build_int_cst (size_type_node, n),
146 NULL_TREE, NULL_TREE);
147
148 new_stmt = gimple_build_assign (vect, array_ref);
149 vect_name = make_ssa_name (vect, new_stmt);
150 gimple_assign_set_lhs (new_stmt, vect_name);
151 vect_finish_stmt_generation (stmt, new_stmt, gsi);
152
153 return vect_name;
154}
155
156/* ARRAY is an array of vectors created by create_vector_array.
157 Emit code to store SSA_NAME VECT in index N of the array.
158 The store is part of the vectorization of STMT. */
159
160static void
161write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
162 tree array, unsigned HOST_WIDE_INT n)
163{
164 tree array_ref;
165 gimple *new_stmt;
166
167 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
168 build_int_cst (size_type_node, n),
169 NULL_TREE, NULL_TREE);
170
171 new_stmt = gimple_build_assign (array_ref, vect);
172 vect_finish_stmt_generation (stmt, new_stmt, gsi);
173}
174
175/* PTR is a pointer to an array of type TYPE. Return a representation
176 of *PTR. The memory reference replaces those in FIRST_DR
177 (and its group). */
178
179static tree
180create_array_ref (tree type, tree ptr, tree alias_ptr_type)
181{
182 tree mem_ref;
183
184 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
185 /* Arrays have the same alignment as their type. */
186 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
187 return mem_ref;
188}
189
190/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
191
192/* Function vect_mark_relevant.
193
194 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
195
196static void
197vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
198 enum vect_relevant relevant, bool live_p)
199{
200 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
201 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
202 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
203 gimple *pattern_stmt;
204
205 if (dump_enabled_p ())
206 {
207 dump_printf_loc (MSG_NOTE, vect_location,
208 "mark relevant %d, live %d: ", relevant, live_p);
209 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
210 }
211
212 /* If this stmt is an original stmt in a pattern, we might need to mark its
213 related pattern stmt instead of the original stmt. However, such stmts
214 may have their own uses that are not in any pattern, in such cases the
215 stmt itself should be marked. */
216 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
217 {
218 /* This is the last stmt in a sequence that was detected as a
219 pattern that can potentially be vectorized. Don't mark the stmt
220 as relevant/live because it's not going to be vectorized.
221 Instead mark the pattern-stmt that replaces it. */
222
223 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
224
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_info = vinfo_for_stmt (pattern_stmt);
230 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
231 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
232 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
233 stmt = pattern_stmt;
234 }
235
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
239
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
242 {
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
247 }
248
249 worklist->safe_push (stmt);
250}
251
252
253/* Function is_simple_and_all_uses_invariant
254
255 Return true if STMT is simple and all uses of it are invariant. */
256
257bool
258is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
259{
260 tree op;
261 gimple *def_stmt;
262 ssa_op_iter iter;
263
264 if (!is_gimple_assign (stmt))
265 return false;
266
267 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
268 {
269 enum vect_def_type dt = vect_uninitialized_def;
270
271 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
272 {
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
275 "use not simple.\n");
276 return false;
277 }
278
279 if (dt != vect_external_def && dt != vect_constant_def)
280 return false;
281 }
282 return true;
283}
284
285/* Function vect_stmt_relevant_p.
286
287 Return true if STMT in loop that is represented by LOOP_VINFO is
288 "relevant for vectorization".
289
290 A stmt is considered "relevant for vectorization" if:
291 - it has uses outside the loop.
292 - it has vdefs (it alters memory).
293 - control stmts in the loop (except for the exit condition).
294
295 CHECKME: what other side effects would the vectorizer allow? */
296
297static bool
298vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
299 enum vect_relevant *relevant, bool *live_p)
300{
301 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
302 ssa_op_iter op_iter;
303 imm_use_iterator imm_iter;
304 use_operand_p use_p;
305 def_operand_p def_p;
306
307 *relevant = vect_unused_in_scope;
308 *live_p = false;
309
310 /* cond stmt other than loop exit cond. */
311 if (is_ctrl_stmt (stmt)
312 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
313 != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
315
316 /* changing memory. */
317 if (gimple_code (stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt)
319 && !gimple_clobber_p (stmt))
320 {
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
325 }
326
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
329 {
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
331 {
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
334 {
335 if (dump_enabled_p ())
336 dump_printf_loc (MSG_NOTE, vect_location,
337 "vec_stmt_relevant_p: used out of loop.\n");
338
339 if (is_gimple_debug (USE_STMT (use_p)))
340 continue;
341
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
346
347 *live_p = true;
348 }
349 }
350 }
351
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
354 {
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
359 }
360
361 return (*live_p || *relevant);
362}
363
364
365/* Function exist_non_indexing_operands_for_use_p
366
367 USE is one of the uses attached to STMT. Check if USE is
368 used in STMT for anything other than indexing an array. */
369
370static bool
371exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
372{
373 tree operand;
374 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
375
376 /* USE corresponds to some operand in STMT. If there is no data
377 reference in STMT, then any operand that corresponds to USE
378 is not indexing an array. */
379 if (!STMT_VINFO_DATA_REF (stmt_info))
380 return true;
381
382 /* STMT has a data_ref. FORNOW this means that its of one of
383 the following forms:
384 -1- ARRAY_REF = var
385 -2- var = ARRAY_REF
386 (This should have been verified in analyze_data_refs).
387
388 'var' in the second case corresponds to a def, not a use,
389 so USE cannot correspond to any operands that are not used
390 for array indexing.
391
392 Therefore, all we need to check is if STMT falls into the
393 first case, and whether var corresponds to USE. */
394
395 if (!gimple_assign_copy_p (stmt))
396 {
397 if (is_gimple_call (stmt)
398 && gimple_call_internal_p (stmt))
399 switch (gimple_call_internal_fn (stmt))
400 {
401 case IFN_MASK_STORE:
402 operand = gimple_call_arg (stmt, 3);
403 if (operand == use)
404 return true;
405 /* FALLTHRU */
406 case IFN_MASK_LOAD:
407 operand = gimple_call_arg (stmt, 2);
408 if (operand == use)
409 return true;
410 break;
411 default:
412 break;
413 }
414 return false;
415 }
416
417 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
418 return false;
419 operand = gimple_assign_rhs1 (stmt);
420 if (TREE_CODE (operand) != SSA_NAME)
421 return false;
422
423 if (operand == use)
424 return true;
425
426 return false;
427}
428
429
430/*
431 Function process_use.
432
433 Inputs:
434 - a USE in STMT in a loop represented by LOOP_VINFO
435 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
436 that defined USE. This is done by calling mark_relevant and passing it
437 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
438 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
439 be performed.
440
441 Outputs:
442 Generally, LIVE_P and RELEVANT are used to define the liveness and
443 relevance info of the DEF_STMT of this USE:
444 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
445 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
446 Exceptions:
447 - case 1: If USE is used only for address computations (e.g. array indexing),
448 which does not need to be directly vectorized, then the liveness/relevance
449 of the respective DEF_STMT is left unchanged.
450 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
451 skip DEF_STMT cause it had already been processed.
452 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
453 be modified accordingly.
454
455 Return true if everything is as expected. Return false otherwise. */
456
457static bool
458process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
459 enum vect_relevant relevant, vec<gimple *> *worklist,
460 bool force)
461{
462 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
463 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
464 stmt_vec_info dstmt_vinfo;
465 basic_block bb, def_bb;
466 gimple *def_stmt;
467 enum vect_def_type dt;
468
469 /* case 1: we are only interested in uses that need to be vectorized. Uses
470 that are used for address computation are not considered relevant. */
471 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
472 return true;
473
474 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
475 {
476 if (dump_enabled_p ())
477 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
478 "not vectorized: unsupported use in stmt.\n");
479 return false;
480 }
481
482 if (!def_stmt || gimple_nop_p (def_stmt))
483 return true;
484
485 def_bb = gimple_bb (def_stmt);
486 if (!flow_bb_inside_loop_p (loop, def_bb))
487 {
488 if (dump_enabled_p ())
489 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
490 return true;
491 }
492
493 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
494 DEF_STMT must have already been processed, because this should be the
495 only way that STMT, which is a reduction-phi, was put in the worklist,
496 as there should be no other uses for DEF_STMT in the loop. So we just
497 check that everything is as expected, and we are done. */
498 dstmt_vinfo = vinfo_for_stmt (def_stmt);
499 bb = gimple_bb (stmt);
500 if (gimple_code (stmt) == GIMPLE_PHI
501 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
502 && gimple_code (def_stmt) != GIMPLE_PHI
503 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
504 && bb->loop_father == def_bb->loop_father)
505 {
506 if (dump_enabled_p ())
507 dump_printf_loc (MSG_NOTE, vect_location,
508 "reduc-stmt defining reduc-phi in the same nest.\n");
509 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
510 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
511 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
512 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
513 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
514 return true;
515 }
516
517 /* case 3a: outer-loop stmt defining an inner-loop stmt:
518 outer-loop-header-bb:
519 d = def_stmt
520 inner-loop:
521 stmt # use (d)
522 outer-loop-tail-bb:
523 ... */
524 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
525 {
526 if (dump_enabled_p ())
527 dump_printf_loc (MSG_NOTE, vect_location,
528 "outer-loop def-stmt defining inner-loop stmt.\n");
529
530 switch (relevant)
531 {
532 case vect_unused_in_scope:
533 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
534 vect_used_in_scope : vect_unused_in_scope;
535 break;
536
537 case vect_used_in_outer_by_reduction:
538 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
539 relevant = vect_used_by_reduction;
540 break;
541
542 case vect_used_in_outer:
543 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
544 relevant = vect_used_in_scope;
545 break;
546
547 case vect_used_in_scope:
548 break;
549
550 default:
551 gcc_unreachable ();
552 }
553 }
554
555 /* case 3b: inner-loop stmt defining an outer-loop stmt:
556 outer-loop-header-bb:
557 ...
558 inner-loop:
559 d = def_stmt
560 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
561 stmt # use (d) */
562 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
563 {
564 if (dump_enabled_p ())
565 dump_printf_loc (MSG_NOTE, vect_location,
566 "inner-loop def-stmt defining outer-loop stmt.\n");
567
568 switch (relevant)
569 {
570 case vect_unused_in_scope:
571 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
572 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
573 vect_used_in_outer_by_reduction : vect_unused_in_scope;
574 break;
575
576 case vect_used_by_reduction:
577 case vect_used_only_live:
578 relevant = vect_used_in_outer_by_reduction;
579 break;
580
581 case vect_used_in_scope:
582 relevant = vect_used_in_outer;
583 break;
584
585 default:
586 gcc_unreachable ();
587 }
588 }
589 /* We are also not interested in uses on loop PHI backedges that are
590 inductions. Otherwise we'll needlessly vectorize the IV increment
591 and cause hybrid SLP for SLP inductions. Unless the PHI is live
592 of course. */
593 else if (gimple_code (stmt) == GIMPLE_PHI
594 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
595 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
596 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
597 == use))
598 {
599 if (dump_enabled_p ())
600 dump_printf_loc (MSG_NOTE, vect_location,
601 "induction value on backedge.\n");
602 return true;
603 }
604
605
606 vect_mark_relevant (worklist, def_stmt, relevant, false);
607 return true;
608}
609
610
611/* Function vect_mark_stmts_to_be_vectorized.
612
613 Not all stmts in the loop need to be vectorized. For example:
614
615 for i...
616 for j...
617 1. T0 = i + j
618 2. T1 = a[T0]
619
620 3. j = j + 1
621
622 Stmt 1 and 3 do not need to be vectorized, because loop control and
623 addressing of vectorized data-refs are handled differently.
624
625 This pass detects such stmts. */
626
627bool
628vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
629{
630 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
631 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
632 unsigned int nbbs = loop->num_nodes;
633 gimple_stmt_iterator si;
634 gimple *stmt;
635 unsigned int i;
636 stmt_vec_info stmt_vinfo;
637 basic_block bb;
638 gimple *phi;
639 bool live_p;
640 enum vect_relevant relevant;
641
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "=== vect_mark_stmts_to_be_vectorized ===\n");
645
646 auto_vec<gimple *, 64> worklist;
647
648 /* 1. Init worklist. */
649 for (i = 0; i < nbbs; i++)
650 {
651 bb = bbs[i];
652 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
653 {
654 phi = gsi_stmt (si);
655 if (dump_enabled_p ())
656 {
657 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
658 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
659 }
660
661 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
662 vect_mark_relevant (&worklist, phi, relevant, live_p);
663 }
664 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
665 {
666 stmt = gsi_stmt (si);
667 if (dump_enabled_p ())
668 {
669 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
670 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
671 }
672
673 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
674 vect_mark_relevant (&worklist, stmt, relevant, live_p);
675 }
676 }
677
678 /* 2. Process_worklist */
679 while (worklist.length () > 0)
680 {
681 use_operand_p use_p;
682 ssa_op_iter iter;
683
684 stmt = worklist.pop ();
685 if (dump_enabled_p ())
686 {
687 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
688 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
689 }
690
691 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
692 (DEF_STMT) as relevant/irrelevant according to the relevance property
693 of STMT. */
694 stmt_vinfo = vinfo_for_stmt (stmt);
695 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
696
697 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
698 propagated as is to the DEF_STMTs of its USEs.
699
700 One exception is when STMT has been identified as defining a reduction
701 variable; in this case we set the relevance to vect_used_by_reduction.
702 This is because we distinguish between two kinds of relevant stmts -
703 those that are used by a reduction computation, and those that are
704 (also) used by a regular computation. This allows us later on to
705 identify stmts that are used solely by a reduction, and therefore the
706 order of the results that they produce does not have to be kept. */
707
708 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
709 {
710 case vect_reduction_def:
711 gcc_assert (relevant != vect_unused_in_scope);
712 if (relevant != vect_unused_in_scope
713 && relevant != vect_used_in_scope
714 && relevant != vect_used_by_reduction
715 && relevant != vect_used_only_live)
716 {
717 if (dump_enabled_p ())
718 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
719 "unsupported use of reduction.\n");
720 return false;
721 }
722 break;
723
724 case vect_nested_cycle:
725 if (relevant != vect_unused_in_scope
726 && relevant != vect_used_in_outer_by_reduction
727 && relevant != vect_used_in_outer)
728 {
729 if (dump_enabled_p ())
730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
731 "unsupported use of nested cycle.\n");
732
733 return false;
734 }
735 break;
736
737 case vect_double_reduction_def:
738 if (relevant != vect_unused_in_scope
739 && relevant != vect_used_by_reduction
740 && relevant != vect_used_only_live)
741 {
742 if (dump_enabled_p ())
743 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
744 "unsupported use of double reduction.\n");
745
746 return false;
747 }
748 break;
749
750 default:
751 break;
752 }
753
754 if (is_pattern_stmt_p (stmt_vinfo))
755 {
756 /* Pattern statements are not inserted into the code, so
757 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
758 have to scan the RHS or function arguments instead. */
759 if (is_gimple_assign (stmt))
760 {
761 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
762 tree op = gimple_assign_rhs1 (stmt);
763
764 i = 1;
765 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
766 {
767 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
768 relevant, &worklist, false)
769 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
770 relevant, &worklist, false))
771 return false;
772 i = 2;
773 }
774 for (; i < gimple_num_ops (stmt); i++)
775 {
776 op = gimple_op (stmt, i);
777 if (TREE_CODE (op) == SSA_NAME
778 && !process_use (stmt, op, loop_vinfo, relevant,
779 &worklist, false))
780 return false;
781 }
782 }
783 else if (is_gimple_call (stmt))
784 {
785 for (i = 0; i < gimple_call_num_args (stmt); i++)
786 {
787 tree arg = gimple_call_arg (stmt, i);
788 if (!process_use (stmt, arg, loop_vinfo, relevant,
789 &worklist, false))
790 return false;
791 }
792 }
793 }
794 else
795 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
796 {
797 tree op = USE_FROM_PTR (use_p);
798 if (!process_use (stmt, op, loop_vinfo, relevant,
799 &worklist, false))
800 return false;
801 }
802
803 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
804 {
805 gather_scatter_info gs_info;
806 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
807 gcc_unreachable ();
808 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
809 &worklist, true))
810 return false;
811 }
812 } /* while worklist */
813
814 return true;
815}
816
817
818/* Function vect_model_simple_cost.
819
820 Models cost for simple operations, i.e. those that only emit ncopies of a
821 single op. Right now, this does not account for multiple insns that could
822 be generated for the single vector op. We will handle that shortly. */
823
824void
825vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
826 enum vect_def_type *dt,
827 int ndts,
828 stmt_vector_for_cost *prologue_cost_vec,
829 stmt_vector_for_cost *body_cost_vec)
830{
831 int i;
832 int inside_cost = 0, prologue_cost = 0;
833
834 /* The SLP costs were already calculated during SLP tree build. */
835 if (PURE_SLP_STMT (stmt_info))
836 return;
837
838 /* Cost the "broadcast" of a scalar operand in to a vector operand.
839 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
840 cost model. */
841 for (i = 0; i < ndts; i++)
842 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
843 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
844 stmt_info, 0, vect_prologue);
845
846 /* Pass the inside-of-loop statements to the target-specific cost model. */
847 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
848 stmt_info, 0, vect_body);
849
850 if (dump_enabled_p ())
851 dump_printf_loc (MSG_NOTE, vect_location,
852 "vect_model_simple_cost: inside_cost = %d, "
853 "prologue_cost = %d .\n", inside_cost, prologue_cost);
854}
855
856
857/* Model cost for type demotion and promotion operations. PWR is normally
858 zero for single-step promotions and demotions. It will be one if
859 two-step promotion/demotion is required, and so on. Each additional
860 step doubles the number of instructions required. */
861
862static void
863vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
864 enum vect_def_type *dt, int pwr)
865{
866 int i, tmp;
867 int inside_cost = 0, prologue_cost = 0;
868 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
869 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
870 void *target_cost_data;
871
872 /* The SLP costs were already calculated during SLP tree build. */
873 if (PURE_SLP_STMT (stmt_info))
874 return;
875
876 if (loop_vinfo)
877 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
878 else
879 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
880
881 for (i = 0; i < pwr + 1; i++)
882 {
883 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
884 (i + 1) : i;
885 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
886 vec_promote_demote, stmt_info, 0,
887 vect_body);
888 }
889
890 /* FORNOW: Assuming maximum 2 args per stmts. */
891 for (i = 0; i < 2; i++)
892 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
893 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
894 stmt_info, 0, vect_prologue);
895
896 if (dump_enabled_p ())
897 dump_printf_loc (MSG_NOTE, vect_location,
898 "vect_model_promotion_demotion_cost: inside_cost = %d, "
899 "prologue_cost = %d .\n", inside_cost, prologue_cost);
900}
901
902/* Function vect_model_store_cost
903
904 Models cost for stores. In the case of grouped accesses, one access
905 has the overhead of the grouped access attributed to it. */
906
907void
908vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
909 vect_memory_access_type memory_access_type,
910 enum vect_def_type dt, slp_tree slp_node,
911 stmt_vector_for_cost *prologue_cost_vec,
912 stmt_vector_for_cost *body_cost_vec)
913{
914 unsigned int inside_cost = 0, prologue_cost = 0;
915 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
916 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
917 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
918
919 if (dt == vect_constant_def || dt == vect_external_def)
920 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
921 stmt_info, 0, vect_prologue);
922
923 /* Grouped stores update all elements in the group at once,
924 so we want the DR for the first statement. */
925 if (!slp_node && grouped_access_p)
926 {
927 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
928 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
929 }
930
931 /* True if we should include any once-per-group costs as well as
932 the cost of the statement itself. For SLP we only get called
933 once per group anyhow. */
934 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
935
936 /* We assume that the cost of a single store-lanes instruction is
937 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
938 access is instead being provided by a permute-and-store operation,
939 include the cost of the permutes. */
940 if (first_stmt_p
941 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
942 {
943 /* Uses a high and low interleave or shuffle operations for each
944 needed permute. */
945 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
946 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
947 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
948 stmt_info, 0, vect_body);
949
950 if (dump_enabled_p ())
951 dump_printf_loc (MSG_NOTE, vect_location,
952 "vect_model_store_cost: strided group_size = %d .\n",
953 group_size);
954 }
955
956 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
957 /* Costs of the stores. */
958 if (memory_access_type == VMAT_ELEMENTWISE
959 || memory_access_type == VMAT_GATHER_SCATTER)
960 /* N scalar stores plus extracting the elements. */
961 inside_cost += record_stmt_cost (body_cost_vec,
962 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
963 scalar_store, stmt_info, 0, vect_body);
964 else
965 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
966
967 if (memory_access_type == VMAT_ELEMENTWISE
968 || memory_access_type == VMAT_STRIDED_SLP)
969 inside_cost += record_stmt_cost (body_cost_vec,
970 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
971 vec_to_scalar, stmt_info, 0, vect_body);
972
973 if (dump_enabled_p ())
974 dump_printf_loc (MSG_NOTE, vect_location,
975 "vect_model_store_cost: inside_cost = %d, "
976 "prologue_cost = %d .\n", inside_cost, prologue_cost);
977}
978
979
980/* Calculate cost of DR's memory access. */
981void
982vect_get_store_cost (struct data_reference *dr, int ncopies,
983 unsigned int *inside_cost,
984 stmt_vector_for_cost *body_cost_vec)
985{
986 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
987 gimple *stmt = DR_STMT (dr);
988 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
989
990 switch (alignment_support_scheme)
991 {
992 case dr_aligned:
993 {
994 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
995 vector_store, stmt_info, 0,
996 vect_body);
997
998 if (dump_enabled_p ())
999 dump_printf_loc (MSG_NOTE, vect_location,
1000 "vect_model_store_cost: aligned.\n");
1001 break;
1002 }
1003
1004 case dr_unaligned_supported:
1005 {
1006 /* Here, we assign an additional cost for the unaligned store. */
1007 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1008 unaligned_store, stmt_info,
1009 DR_MISALIGNMENT (dr), vect_body);
1010 if (dump_enabled_p ())
1011 dump_printf_loc (MSG_NOTE, vect_location,
1012 "vect_model_store_cost: unaligned supported by "
1013 "hardware.\n");
1014 break;
1015 }
1016
1017 case dr_unaligned_unsupported:
1018 {
1019 *inside_cost = VECT_MAX_COST;
1020
1021 if (dump_enabled_p ())
1022 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1023 "vect_model_store_cost: unsupported access.\n");
1024 break;
1025 }
1026
1027 default:
1028 gcc_unreachable ();
1029 }
1030}
1031
1032
1033/* Function vect_model_load_cost
1034
1035 Models cost for loads. In the case of grouped accesses, one access has
1036 the overhead of the grouped access attributed to it. Since unaligned
1037 accesses are supported for loads, we also account for the costs of the
1038 access scheme chosen. */
1039
1040void
1041vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1042 vect_memory_access_type memory_access_type,
1043 slp_tree slp_node,
1044 stmt_vector_for_cost *prologue_cost_vec,
1045 stmt_vector_for_cost *body_cost_vec)
1046{
1047 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1048 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1049 unsigned int inside_cost = 0, prologue_cost = 0;
1050 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1051
1052 /* Grouped loads read all elements in the group at once,
1053 so we want the DR for the first statement. */
1054 if (!slp_node && grouped_access_p)
1055 {
1056 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1057 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1058 }
1059
1060 /* True if we should include any once-per-group costs as well as
1061 the cost of the statement itself. For SLP we only get called
1062 once per group anyhow. */
1063 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1064
1065 /* We assume that the cost of a single load-lanes instruction is
1066 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1067 access is instead being provided by a load-and-permute operation,
1068 include the cost of the permutes. */
1069 if (first_stmt_p
1070 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1071 {
1072 /* Uses an even and odd extract operations or shuffle operations
1073 for each needed permute. */
1074 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1075 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1076 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1077 stmt_info, 0, vect_body);
1078
1079 if (dump_enabled_p ())
1080 dump_printf_loc (MSG_NOTE, vect_location,
1081 "vect_model_load_cost: strided group_size = %d .\n",
1082 group_size);
1083 }
1084
1085 /* The loads themselves. */
1086 if (memory_access_type == VMAT_ELEMENTWISE
1087 || memory_access_type == VMAT_GATHER_SCATTER)
1088 {
1089 /* N scalar loads plus gathering them into a vector. */
1090 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1091 inside_cost += record_stmt_cost (body_cost_vec,
1092 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1093 scalar_load, stmt_info, 0, vect_body);
1094 }
1095 else
1096 vect_get_load_cost (dr, ncopies, first_stmt_p,
1097 &inside_cost, &prologue_cost,
1098 prologue_cost_vec, body_cost_vec, true);
1099 if (memory_access_type == VMAT_ELEMENTWISE
1100 || memory_access_type == VMAT_STRIDED_SLP)
1101 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1102 stmt_info, 0, vect_body);
1103
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_NOTE, vect_location,
1106 "vect_model_load_cost: inside_cost = %d, "
1107 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1108}
1109
1110
1111/* Calculate cost of DR's memory access. */
1112void
1113vect_get_load_cost (struct data_reference *dr, int ncopies,
1114 bool add_realign_cost, unsigned int *inside_cost,
1115 unsigned int *prologue_cost,
1116 stmt_vector_for_cost *prologue_cost_vec,
1117 stmt_vector_for_cost *body_cost_vec,
1118 bool record_prologue_costs)
1119{
1120 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1121 gimple *stmt = DR_STMT (dr);
1122 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1123
1124 switch (alignment_support_scheme)
1125 {
1126 case dr_aligned:
1127 {
1128 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1129 stmt_info, 0, vect_body);
1130
1131 if (dump_enabled_p ())
1132 dump_printf_loc (MSG_NOTE, vect_location,
1133 "vect_model_load_cost: aligned.\n");
1134
1135 break;
1136 }
1137 case dr_unaligned_supported:
1138 {
1139 /* Here, we assign an additional cost for the unaligned load. */
1140 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1141 unaligned_load, stmt_info,
1142 DR_MISALIGNMENT (dr), vect_body);
1143
1144 if (dump_enabled_p ())
1145 dump_printf_loc (MSG_NOTE, vect_location,
1146 "vect_model_load_cost: unaligned supported by "
1147 "hardware.\n");
1148
1149 break;
1150 }
1151 case dr_explicit_realign:
1152 {
1153 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1154 vector_load, stmt_info, 0, vect_body);
1155 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1156 vec_perm, stmt_info, 0, vect_body);
1157
1158 /* FIXME: If the misalignment remains fixed across the iterations of
1159 the containing loop, the following cost should be added to the
1160 prologue costs. */
1161 if (targetm.vectorize.builtin_mask_for_load)
1162 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1163 stmt_info, 0, vect_body);
1164
1165 if (dump_enabled_p ())
1166 dump_printf_loc (MSG_NOTE, vect_location,
1167 "vect_model_load_cost: explicit realign\n");
1168
1169 break;
1170 }
1171 case dr_explicit_realign_optimized:
1172 {
1173 if (dump_enabled_p ())
1174 dump_printf_loc (MSG_NOTE, vect_location,
1175 "vect_model_load_cost: unaligned software "
1176 "pipelined.\n");
1177
1178 /* Unaligned software pipeline has a load of an address, an initial
1179 load, and possibly a mask operation to "prime" the loop. However,
1180 if this is an access in a group of loads, which provide grouped
1181 access, then the above cost should only be considered for one
1182 access in the group. Inside the loop, there is a load op
1183 and a realignment op. */
1184
1185 if (add_realign_cost && record_prologue_costs)
1186 {
1187 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1188 vector_stmt, stmt_info,
1189 0, vect_prologue);
1190 if (targetm.vectorize.builtin_mask_for_load)
1191 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1192 vector_stmt, stmt_info,
1193 0, vect_prologue);
1194 }
1195
1196 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1197 stmt_info, 0, vect_body);
1198 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1199 stmt_info, 0, vect_body);
1200
1201 if (dump_enabled_p ())
1202 dump_printf_loc (MSG_NOTE, vect_location,
1203 "vect_model_load_cost: explicit realign optimized"
1204 "\n");
1205
1206 break;
1207 }
1208
1209 case dr_unaligned_unsupported:
1210 {
1211 *inside_cost = VECT_MAX_COST;
1212
1213 if (dump_enabled_p ())
1214 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1215 "vect_model_load_cost: unsupported access.\n");
1216 break;
1217 }
1218
1219 default:
1220 gcc_unreachable ();
1221 }
1222}
1223
1224/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1225 the loop preheader for the vectorized stmt STMT. */
1226
1227static void
1228vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1229{
1230 if (gsi)
1231 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1232 else
1233 {
1234 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1235 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1236
1237 if (loop_vinfo)
1238 {
1239 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1240 basic_block new_bb;
1241 edge pe;
1242
1243 if (nested_in_vect_loop_p (loop, stmt))
1244 loop = loop->inner;
1245
1246 pe = loop_preheader_edge (loop);
1247 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1248 gcc_assert (!new_bb);
1249 }
1250 else
1251 {
1252 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1253 basic_block bb;
1254 gimple_stmt_iterator gsi_bb_start;
1255
1256 gcc_assert (bb_vinfo);
1257 bb = BB_VINFO_BB (bb_vinfo);
1258 gsi_bb_start = gsi_after_labels (bb);
1259 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1260 }
1261 }
1262
1263 if (dump_enabled_p ())
1264 {
1265 dump_printf_loc (MSG_NOTE, vect_location,
1266 "created new init_stmt: ");
1267 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1268 }
1269}
1270
1271/* Function vect_init_vector.
1272
1273 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1274 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1275 vector type a vector with all elements equal to VAL is created first.
1276 Place the initialization at BSI if it is not NULL. Otherwise, place the
1277 initialization at the loop preheader.
1278 Return the DEF of INIT_STMT.
1279 It will be used in the vectorization of STMT. */
1280
1281tree
1282vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1283{
1284 gimple *init_stmt;
1285 tree new_temp;
1286
1287 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1288 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1289 {
1290 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1291 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1292 {
1293 /* Scalar boolean value should be transformed into
1294 all zeros or all ones value before building a vector. */
1295 if (VECTOR_BOOLEAN_TYPE_P (type))
1296 {
1297 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1298 tree false_val = build_zero_cst (TREE_TYPE (type));
1299
1300 if (CONSTANT_CLASS_P (val))
1301 val = integer_zerop (val) ? false_val : true_val;
1302 else
1303 {
1304 new_temp = make_ssa_name (TREE_TYPE (type));
1305 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1306 val, true_val, false_val);
1307 vect_init_vector_1 (stmt, init_stmt, gsi);
1308 val = new_temp;
1309 }
1310 }
1311 else if (CONSTANT_CLASS_P (val))
1312 val = fold_convert (TREE_TYPE (type), val);
1313 else
1314 {
1315 new_temp = make_ssa_name (TREE_TYPE (type));
1316 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1317 init_stmt = gimple_build_assign (new_temp,
1318 fold_build1 (VIEW_CONVERT_EXPR,
1319 TREE_TYPE (type),
1320 val));
1321 else
1322 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1323 vect_init_vector_1 (stmt, init_stmt, gsi);
1324 val = new_temp;
1325 }
1326 }
1327 val = build_vector_from_val (type, val);
1328 }
1329
1330 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1331 init_stmt = gimple_build_assign (new_temp, val);
1332 vect_init_vector_1 (stmt, init_stmt, gsi);
1333 return new_temp;
1334}
1335
1336/* Function vect_get_vec_def_for_operand_1.
1337
1338 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1339 DT that will be used in the vectorized stmt. */
1340
1341tree
1342vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1343{
1344 tree vec_oprnd;
1345 gimple *vec_stmt;
1346 stmt_vec_info def_stmt_info = NULL;
1347
1348 switch (dt)
1349 {
1350 /* operand is a constant or a loop invariant. */
1351 case vect_constant_def:
1352 case vect_external_def:
1353 /* Code should use vect_get_vec_def_for_operand. */
1354 gcc_unreachable ();
1355
1356 /* operand is defined inside the loop. */
1357 case vect_internal_def:
1358 {
1359 /* Get the def from the vectorized stmt. */
1360 def_stmt_info = vinfo_for_stmt (def_stmt);
1361
1362 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1363 /* Get vectorized pattern statement. */
1364 if (!vec_stmt
1365 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1366 && !STMT_VINFO_RELEVANT (def_stmt_info))
1367 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1368 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1369 gcc_assert (vec_stmt);
1370 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1371 vec_oprnd = PHI_RESULT (vec_stmt);
1372 else if (is_gimple_call (vec_stmt))
1373 vec_oprnd = gimple_call_lhs (vec_stmt);
1374 else
1375 vec_oprnd = gimple_assign_lhs (vec_stmt);
1376 return vec_oprnd;
1377 }
1378
1379 /* operand is defined by a loop header phi. */
1380 case vect_reduction_def:
1381 case vect_double_reduction_def:
1382 case vect_nested_cycle:
1383 case vect_induction_def:
1384 {
1385 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1386
1387 /* Get the def from the vectorized stmt. */
1388 def_stmt_info = vinfo_for_stmt (def_stmt);
1389 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1390 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1391 vec_oprnd = PHI_RESULT (vec_stmt);
1392 else
1393 vec_oprnd = gimple_get_lhs (vec_stmt);
1394 return vec_oprnd;
1395 }
1396
1397 default:
1398 gcc_unreachable ();
1399 }
1400}
1401
1402
1403/* Function vect_get_vec_def_for_operand.
1404
1405 OP is an operand in STMT. This function returns a (vector) def that will be
1406 used in the vectorized stmt for STMT.
1407
1408 In the case that OP is an SSA_NAME which is defined in the loop, then
1409 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1410
1411 In case OP is an invariant or constant, a new stmt that creates a vector def
1412 needs to be introduced. VECTYPE may be used to specify a required type for
1413 vector invariant. */
1414
1415tree
1416vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1417{
1418 gimple *def_stmt;
1419 enum vect_def_type dt;
1420 bool is_simple_use;
1421 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1422 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1423
1424 if (dump_enabled_p ())
1425 {
1426 dump_printf_loc (MSG_NOTE, vect_location,
1427 "vect_get_vec_def_for_operand: ");
1428 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1429 dump_printf (MSG_NOTE, "\n");
1430 }
1431
1432 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1433 gcc_assert (is_simple_use);
1434 if (def_stmt && dump_enabled_p ())
1435 {
1436 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1437 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1438 }
1439
1440 if (dt == vect_constant_def || dt == vect_external_def)
1441 {
1442 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1443 tree vector_type;
1444
1445 if (vectype)
1446 vector_type = vectype;
1447 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1448 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1449 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1450 else
1451 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1452
1453 gcc_assert (vector_type);
1454 return vect_init_vector (stmt, op, vector_type, NULL);
1455 }
1456 else
1457 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1458}
1459
1460
1461/* Function vect_get_vec_def_for_stmt_copy
1462
1463 Return a vector-def for an operand. This function is used when the
1464 vectorized stmt to be created (by the caller to this function) is a "copy"
1465 created in case the vectorized result cannot fit in one vector, and several
1466 copies of the vector-stmt are required. In this case the vector-def is
1467 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1468 of the stmt that defines VEC_OPRND.
1469 DT is the type of the vector def VEC_OPRND.
1470
1471 Context:
1472 In case the vectorization factor (VF) is bigger than the number
1473 of elements that can fit in a vectype (nunits), we have to generate
1474 more than one vector stmt to vectorize the scalar stmt. This situation
1475 arises when there are multiple data-types operated upon in the loop; the
1476 smallest data-type determines the VF, and as a result, when vectorizing
1477 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1478 vector stmt (each computing a vector of 'nunits' results, and together
1479 computing 'VF' results in each iteration). This function is called when
1480 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1481 which VF=16 and nunits=4, so the number of copies required is 4):
1482
1483 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1484
1485 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1486 VS1.1: vx.1 = memref1 VS1.2
1487 VS1.2: vx.2 = memref2 VS1.3
1488 VS1.3: vx.3 = memref3
1489
1490 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1491 VSnew.1: vz1 = vx.1 + ... VSnew.2
1492 VSnew.2: vz2 = vx.2 + ... VSnew.3
1493 VSnew.3: vz3 = vx.3 + ...
1494
1495 The vectorization of S1 is explained in vectorizable_load.
1496 The vectorization of S2:
1497 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1498 the function 'vect_get_vec_def_for_operand' is called to
1499 get the relevant vector-def for each operand of S2. For operand x it
1500 returns the vector-def 'vx.0'.
1501
1502 To create the remaining copies of the vector-stmt (VSnew.j), this
1503 function is called to get the relevant vector-def for each operand. It is
1504 obtained from the respective VS1.j stmt, which is recorded in the
1505 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1506
1507 For example, to obtain the vector-def 'vx.1' in order to create the
1508 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1509 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1510 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1511 and return its def ('vx.1').
1512 Overall, to create the above sequence this function will be called 3 times:
1513 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1514 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1515 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1516
1517tree
1518vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1519{
1520 gimple *vec_stmt_for_operand;
1521 stmt_vec_info def_stmt_info;
1522
1523 /* Do nothing; can reuse same def. */
1524 if (dt == vect_external_def || dt == vect_constant_def )
1525 return vec_oprnd;
1526
1527 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1528 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1529 gcc_assert (def_stmt_info);
1530 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1531 gcc_assert (vec_stmt_for_operand);
1532 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1533 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1534 else
1535 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1536 return vec_oprnd;
1537}
1538
1539
1540/* Get vectorized definitions for the operands to create a copy of an original
1541 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1542
1543void
1544vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1545 vec<tree> *vec_oprnds0,
1546 vec<tree> *vec_oprnds1)
1547{
1548 tree vec_oprnd = vec_oprnds0->pop ();
1549
1550 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1551 vec_oprnds0->quick_push (vec_oprnd);
1552
1553 if (vec_oprnds1 && vec_oprnds1->length ())
1554 {
1555 vec_oprnd = vec_oprnds1->pop ();
1556 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1557 vec_oprnds1->quick_push (vec_oprnd);
1558 }
1559}
1560
1561
1562/* Get vectorized definitions for OP0 and OP1. */
1563
1564void
1565vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1566 vec<tree> *vec_oprnds0,
1567 vec<tree> *vec_oprnds1,
1568 slp_tree slp_node)
1569{
1570 if (slp_node)
1571 {
1572 int nops = (op1 == NULL_TREE) ? 1 : 2;
1573 auto_vec<tree> ops (nops);
1574 auto_vec<vec<tree> > vec_defs (nops);
1575
1576 ops.quick_push (op0);
1577 if (op1)
1578 ops.quick_push (op1);
1579
1580 vect_get_slp_defs (ops, slp_node, &vec_defs);
1581
1582 *vec_oprnds0 = vec_defs[0];
1583 if (op1)
1584 *vec_oprnds1 = vec_defs[1];
1585 }
1586 else
1587 {
1588 tree vec_oprnd;
1589
1590 vec_oprnds0->create (1);
1591 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1592 vec_oprnds0->quick_push (vec_oprnd);
1593
1594 if (op1)
1595 {
1596 vec_oprnds1->create (1);
1597 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1598 vec_oprnds1->quick_push (vec_oprnd);
1599 }
1600 }
1601}
1602
1603
1604/* Function vect_finish_stmt_generation.
1605
1606 Insert a new stmt. */
1607
1608void
1609vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1610 gimple_stmt_iterator *gsi)
1611{
1612 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1613 vec_info *vinfo = stmt_info->vinfo;
1614
1615 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1616
1617 if (!gsi_end_p (*gsi)
1618 && gimple_has_mem_ops (vec_stmt))
1619 {
1620 gimple *at_stmt = gsi_stmt (*gsi);
1621 tree vuse = gimple_vuse (at_stmt);
1622 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1623 {
1624 tree vdef = gimple_vdef (at_stmt);
1625 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1626 /* If we have an SSA vuse and insert a store, update virtual
1627 SSA form to avoid triggering the renamer. Do so only
1628 if we can easily see all uses - which is what almost always
1629 happens with the way vectorized stmts are inserted. */
1630 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1631 && ((is_gimple_assign (vec_stmt)
1632 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1633 || (is_gimple_call (vec_stmt)
1634 && !(gimple_call_flags (vec_stmt)
1635 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1636 {
1637 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1638 gimple_set_vdef (vec_stmt, new_vdef);
1639 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1640 }
1641 }
1642 }
1643 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1644
1645 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1646
1647 if (dump_enabled_p ())
1648 {
1649 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1650 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1651 }
1652
1653 gimple_set_location (vec_stmt, gimple_location (stmt));
1654
1655 /* While EH edges will generally prevent vectorization, stmt might
1656 e.g. be in a must-not-throw region. Ensure newly created stmts
1657 that could throw are part of the same region. */
1658 int lp_nr = lookup_stmt_eh_lp (stmt);
1659 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1660 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1661}
1662
1663/* We want to vectorize a call to combined function CFN with function
1664 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1665 as the types of all inputs. Check whether this is possible using
1666 an internal function, returning its code if so or IFN_LAST if not. */
1667
1668static internal_fn
1669vectorizable_internal_function (combined_fn cfn, tree fndecl,
1670 tree vectype_out, tree vectype_in)
1671{
1672 internal_fn ifn;
1673 if (internal_fn_p (cfn))
1674 ifn = as_internal_fn (cfn);
1675 else
1676 ifn = associated_internal_fn (fndecl);
1677 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1678 {
1679 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1680 if (info.vectorizable)
1681 {
1682 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1683 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1684 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1685 OPTIMIZE_FOR_SPEED))
1686 return ifn;
1687 }
1688 }
1689 return IFN_LAST;
1690}
1691
1692
1693static tree permute_vec_elements (tree, tree, tree, gimple *,
1694 gimple_stmt_iterator *);
1695
1696/* STMT is a non-strided load or store, meaning that it accesses
1697 elements with a known constant step. Return -1 if that step
1698 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1699
1700static int
1701compare_step_with_zero (gimple *stmt)
1702{
1703 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1704 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1705 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1706 size_zero_node);
1707}
1708
1709/* If the target supports a permute mask that reverses the elements in
1710 a vector of type VECTYPE, return that mask, otherwise return null. */
1711
1712static tree
1713perm_mask_for_reverse (tree vectype)
1714{
1715 int i, nunits;
1716
1717 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1718
1719 auto_vec_perm_indices sel (nunits);
1720 for (i = 0; i < nunits; ++i)
1721 sel.quick_push (nunits - 1 - i);
1722
1723 if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
1724 return NULL_TREE;
1725 return vect_gen_perm_mask_checked (vectype, sel);
1726}
1727
1728/* A subroutine of get_load_store_type, with a subset of the same
1729 arguments. Handle the case where STMT is part of a grouped load
1730 or store.
1731
1732 For stores, the statements in the group are all consecutive
1733 and there is no gap at the end. For loads, the statements in the
1734 group might not be consecutive; there can be gaps between statements
1735 as well as at the end. */
1736
1737static bool
1738get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1739 vec_load_store_type vls_type,
1740 vect_memory_access_type *memory_access_type)
1741{
1742 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1743 vec_info *vinfo = stmt_info->vinfo;
1744 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1745 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1746 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1747 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1748 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1749 bool single_element_p = (stmt == first_stmt
1750 && !GROUP_NEXT_ELEMENT (stmt_info));
1751 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1752 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
1753
1754 /* True if the vectorized statements would access beyond the last
1755 statement in the group. */
1756 bool overrun_p = false;
1757
1758 /* True if we can cope with such overrun by peeling for gaps, so that
1759 there is at least one final scalar iteration after the vector loop. */
1760 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1761
1762 /* There can only be a gap at the end of the group if the stride is
1763 known at compile time. */
1764 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1765
1766 /* Stores can't yet have gaps. */
1767 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1768
1769 if (slp)
1770 {
1771 if (STMT_VINFO_STRIDED_P (stmt_info))
1772 {
1773 /* Try to use consecutive accesses of GROUP_SIZE elements,
1774 separated by the stride, until we have a complete vector.
1775 Fall back to scalar accesses if that isn't possible. */
1776 if (nunits % group_size == 0)
1777 *memory_access_type = VMAT_STRIDED_SLP;
1778 else
1779 *memory_access_type = VMAT_ELEMENTWISE;
1780 }
1781 else
1782 {
1783 overrun_p = loop_vinfo && gap != 0;
1784 if (overrun_p && vls_type != VLS_LOAD)
1785 {
1786 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1787 "Grouped store with gaps requires"
1788 " non-consecutive accesses\n");
1789 return false;
1790 }
1791 /* An overrun is fine if the trailing elements are smaller
1792 than the alignment boundary B. Every vector access will
1793 be a multiple of B and so we are guaranteed to access a
1794 non-gap element in the same B-sized block. */
1795 if (overrun_p
1796 && gap < (vect_known_alignment_in_bytes (first_dr)
1797 / vect_get_scalar_dr_size (first_dr)))
1798 overrun_p = false;
1799 if (overrun_p && !can_overrun_p)
1800 {
1801 if (dump_enabled_p ())
1802 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1803 "Peeling for outer loop is not supported\n");
1804 return false;
1805 }
1806 *memory_access_type = VMAT_CONTIGUOUS;
1807 }
1808 }
1809 else
1810 {
1811 /* We can always handle this case using elementwise accesses,
1812 but see if something more efficient is available. */
1813 *memory_access_type = VMAT_ELEMENTWISE;
1814
1815 /* If there is a gap at the end of the group then these optimizations
1816 would access excess elements in the last iteration. */
1817 bool would_overrun_p = (gap != 0);
1818 /* An overrun is fine if the trailing elements are smaller than the
1819 alignment boundary B. Every vector access will be a multiple of B
1820 and so we are guaranteed to access a non-gap element in the
1821 same B-sized block. */
1822 if (would_overrun_p
1823 && gap < (vect_known_alignment_in_bytes (first_dr)
1824 / vect_get_scalar_dr_size (first_dr)))
1825 would_overrun_p = false;
1826
1827 if (!STMT_VINFO_STRIDED_P (stmt_info)
1828 && (can_overrun_p || !would_overrun_p)
1829 && compare_step_with_zero (stmt) > 0)
1830 {
1831 /* First try using LOAD/STORE_LANES. */
1832 if (vls_type == VLS_LOAD
1833 ? vect_load_lanes_supported (vectype, group_size)
1834 : vect_store_lanes_supported (vectype, group_size))
1835 {
1836 *memory_access_type = VMAT_LOAD_STORE_LANES;
1837 overrun_p = would_overrun_p;
1838 }
1839
1840 /* If that fails, try using permuting loads. */
1841 if (*memory_access_type == VMAT_ELEMENTWISE
1842 && (vls_type == VLS_LOAD
1843 ? vect_grouped_load_supported (vectype, single_element_p,
1844 group_size)
1845 : vect_grouped_store_supported (vectype, group_size)))
1846 {
1847 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1848 overrun_p = would_overrun_p;
1849 }
1850 }
1851 }
1852
1853 if (vls_type != VLS_LOAD && first_stmt == stmt)
1854 {
1855 /* STMT is the leader of the group. Check the operands of all the
1856 stmts of the group. */
1857 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1858 while (next_stmt)
1859 {
1860 gcc_assert (gimple_assign_single_p (next_stmt));
1861 tree op = gimple_assign_rhs1 (next_stmt);
1862 gimple *def_stmt;
1863 enum vect_def_type dt;
1864 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1865 {
1866 if (dump_enabled_p ())
1867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1868 "use not simple.\n");
1869 return false;
1870 }
1871 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1872 }
1873 }
1874
1875 if (overrun_p)
1876 {
1877 gcc_assert (can_overrun_p);
1878 if (dump_enabled_p ())
1879 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1880 "Data access with gaps requires scalar "
1881 "epilogue loop\n");
1882 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1883 }
1884
1885 return true;
1886}
1887
1888/* A subroutine of get_load_store_type, with a subset of the same
1889 arguments. Handle the case where STMT is a load or store that
1890 accesses consecutive elements with a negative step. */
1891
1892static vect_memory_access_type
1893get_negative_load_store_type (gimple *stmt, tree vectype,
1894 vec_load_store_type vls_type,
1895 unsigned int ncopies)
1896{
1897 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1898 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1899 dr_alignment_support alignment_support_scheme;
1900
1901 if (ncopies > 1)
1902 {
1903 if (dump_enabled_p ())
1904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1905 "multiple types with negative step.\n");
1906 return VMAT_ELEMENTWISE;
1907 }
1908
1909 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1910 if (alignment_support_scheme != dr_aligned
1911 && alignment_support_scheme != dr_unaligned_supported)
1912 {
1913 if (dump_enabled_p ())
1914 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1915 "negative step but alignment required.\n");
1916 return VMAT_ELEMENTWISE;
1917 }
1918
1919 if (vls_type == VLS_STORE_INVARIANT)
1920 {
1921 if (dump_enabled_p ())
1922 dump_printf_loc (MSG_NOTE, vect_location,
1923 "negative step with invariant source;"
1924 " no permute needed.\n");
1925 return VMAT_CONTIGUOUS_DOWN;
1926 }
1927
1928 if (!perm_mask_for_reverse (vectype))
1929 {
1930 if (dump_enabled_p ())
1931 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1932 "negative step and reversing not supported.\n");
1933 return VMAT_ELEMENTWISE;
1934 }
1935
1936 return VMAT_CONTIGUOUS_REVERSE;
1937}
1938
1939/* Analyze load or store statement STMT of type VLS_TYPE. Return true
1940 if there is a memory access type that the vectorized form can use,
1941 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1942 or scatters, fill in GS_INFO accordingly.
1943
1944 SLP says whether we're performing SLP rather than loop vectorization.
1945 VECTYPE is the vector type that the vectorized statements will use.
1946 NCOPIES is the number of vector statements that will be needed. */
1947
1948static bool
1949get_load_store_type (gimple *stmt, tree vectype, bool slp,
1950 vec_load_store_type vls_type, unsigned int ncopies,
1951 vect_memory_access_type *memory_access_type,
1952 gather_scatter_info *gs_info)
1953{
1954 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1955 vec_info *vinfo = stmt_info->vinfo;
1956 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1957 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1958 {
1959 *memory_access_type = VMAT_GATHER_SCATTER;
1960 gimple *def_stmt;
1961 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1962 gcc_unreachable ();
1963 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1964 &gs_info->offset_dt,
1965 &gs_info->offset_vectype))
1966 {
1967 if (dump_enabled_p ())
1968 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1969 "%s index use not simple.\n",
1970 vls_type == VLS_LOAD ? "gather" : "scatter");
1971 return false;
1972 }
1973 }
1974 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1975 {
1976 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1977 memory_access_type))
1978 return false;
1979 }
1980 else if (STMT_VINFO_STRIDED_P (stmt_info))
1981 {
1982 gcc_assert (!slp);
1983 *memory_access_type = VMAT_ELEMENTWISE;
1984 }
1985 else
1986 {
1987 int cmp = compare_step_with_zero (stmt);
1988 if (cmp < 0)
1989 *memory_access_type = get_negative_load_store_type
1990 (stmt, vectype, vls_type, ncopies);
1991 else if (cmp == 0)
1992 {
1993 gcc_assert (vls_type == VLS_LOAD);
1994 *memory_access_type = VMAT_INVARIANT;
1995 }
1996 else
1997 *memory_access_type = VMAT_CONTIGUOUS;
1998 }
1999
2000 /* FIXME: At the moment the cost model seems to underestimate the
2001 cost of using elementwise accesses. This check preserves the
2002 traditional behavior until that can be fixed. */
2003 if (*memory_access_type == VMAT_ELEMENTWISE
2004 && !STMT_VINFO_STRIDED_P (stmt_info))
2005 {
2006 if (dump_enabled_p ())
2007 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2008 "not falling back to elementwise accesses\n");
2009 return false;
2010 }
2011 return true;
2012}
2013
2014/* Function vectorizable_mask_load_store.
2015
2016 Check if STMT performs a conditional load or store that can be vectorized.
2017 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2018 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2019 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2020
2021static bool
2022vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
2023 gimple **vec_stmt, slp_tree slp_node)
2024{
2025 tree vec_dest = NULL;
2026 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2027 stmt_vec_info prev_stmt_info;
2028 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2029 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2030 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2031 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2032 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2033 tree rhs_vectype = NULL_TREE;
2034 tree mask_vectype;
2035 tree elem_type;
2036 gimple *new_stmt;
2037 tree dummy;
2038 tree dataref_ptr = NULL_TREE;
2039 gimple *ptr_incr;
2040 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2041 int ncopies;
2042 int i, j;
2043 bool inv_p;
2044 gather_scatter_info gs_info;
2045 vec_load_store_type vls_type;
2046 tree mask;
2047 gimple *def_stmt;
2048 enum vect_def_type dt;
2049
2050 if (slp_node != NULL)
2051 return false;
2052
2053 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2054 gcc_assert (ncopies >= 1);
2055
2056 mask = gimple_call_arg (stmt, 2);
2057
2058 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2059 return false;
2060
2061 /* FORNOW. This restriction should be relaxed. */
2062 if (nested_in_vect_loop && ncopies > 1)
2063 {
2064 if (dump_enabled_p ())
2065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2066 "multiple types in nested loop.");
2067 return false;
2068 }
2069
2070 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2071 return false;
2072
2073 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2074 && ! vec_stmt)
2075 return false;
2076
2077 if (!STMT_VINFO_DATA_REF (stmt_info))
2078 return false;
2079
2080 elem_type = TREE_TYPE (vectype);
2081
2082 if (TREE_CODE (mask) != SSA_NAME)
2083 return false;
2084
2085 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2086 return false;
2087
2088 if (!mask_vectype)
2089 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2090
2091 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2092 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
2093 return false;
2094
2095 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2096 {
2097 tree rhs = gimple_call_arg (stmt, 3);
2098 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2099 return false;
2100 if (dt == vect_constant_def || dt == vect_external_def)
2101 vls_type = VLS_STORE_INVARIANT;
2102 else
2103 vls_type = VLS_STORE;
2104 }
2105 else
2106 vls_type = VLS_LOAD;
2107
2108 vect_memory_access_type memory_access_type;
2109 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2110 &memory_access_type, &gs_info))
2111 return false;
2112
2113 if (memory_access_type == VMAT_GATHER_SCATTER)
2114 {
2115 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2116 tree masktype
2117 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2118 if (TREE_CODE (masktype) == INTEGER_TYPE)
2119 {
2120 if (dump_enabled_p ())
2121 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2122 "masked gather with integer mask not supported.");
2123 return false;
2124 }
2125 }
2126 else if (memory_access_type != VMAT_CONTIGUOUS)
2127 {
2128 if (dump_enabled_p ())
2129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2130 "unsupported access type for masked %s.\n",
2131 vls_type == VLS_LOAD ? "load" : "store");
2132 return false;
2133 }
2134 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2135 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2136 TYPE_MODE (mask_vectype),
2137 vls_type == VLS_LOAD)
2138 || (rhs_vectype
2139 && !useless_type_conversion_p (vectype, rhs_vectype)))
2140 return false;
2141
2142 if (!vec_stmt) /* transformation not required. */
2143 {
2144 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
2145 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2146 if (vls_type == VLS_LOAD)
2147 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2148 NULL, NULL, NULL);
2149 else
2150 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2151 dt, NULL, NULL, NULL);
2152 return true;
2153 }
2154 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
2155
2156 /* Transform. */
2157
2158 if (memory_access_type == VMAT_GATHER_SCATTER)
2159 {
2160 tree vec_oprnd0 = NULL_TREE, op;
2161 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2162 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
2163 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
2164 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
2165 tree mask_perm_mask = NULL_TREE;
2166 edge pe = loop_preheader_edge (loop);
2167 gimple_seq seq;
2168 basic_block new_bb;
2169 enum { NARROW, NONE, WIDEN } modifier;
2170 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
2171
2172 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
2173 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2174 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2175 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2176 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2177 scaletype = TREE_VALUE (arglist);
2178 gcc_checking_assert (types_compatible_p (srctype, rettype)
2179 && types_compatible_p (srctype, masktype));
2180
2181 if (nunits == gather_off_nunits)
2182 modifier = NONE;
2183 else if (nunits == gather_off_nunits / 2)
2184 {
2185 modifier = WIDEN;
2186
2187 auto_vec_perm_indices sel (gather_off_nunits);
2188 for (i = 0; i < gather_off_nunits; ++i)
2189 sel.quick_push (i | nunits);
2190
2191 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
2192 }
2193 else if (nunits == gather_off_nunits * 2)
2194 {
2195 modifier = NARROW;
2196
2197 auto_vec_perm_indices sel (nunits);
2198 sel.quick_grow (nunits);
2199 for (i = 0; i < nunits; ++i)
2200 sel[i] = i < gather_off_nunits
2201 ? i : i + nunits - gather_off_nunits;
2202
2203 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
2204 ncopies *= 2;
2205 for (i = 0; i < nunits; ++i)
2206 sel[i] = i | gather_off_nunits;
2207 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
2208 }
2209 else
2210 gcc_unreachable ();
2211
2212 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2213
2214 ptr = fold_convert (ptrtype, gs_info.base);
2215 if (!is_gimple_min_invariant (ptr))
2216 {
2217 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2218 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2219 gcc_assert (!new_bb);
2220 }
2221
2222 scale = build_int_cst (scaletype, gs_info.scale);
2223
2224 prev_stmt_info = NULL;
2225 for (j = 0; j < ncopies; ++j)
2226 {
2227 if (modifier == WIDEN && (j & 1))
2228 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2229 perm_mask, stmt, gsi);
2230 else if (j == 0)
2231 op = vec_oprnd0
2232 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
2233 else
2234 op = vec_oprnd0
2235 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
2236
2237 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2238 {
2239 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2240 == TYPE_VECTOR_SUBPARTS (idxtype));
2241 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2242 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2243 new_stmt
2244 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2245 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2246 op = var;
2247 }
2248
2249 if (mask_perm_mask && (j & 1))
2250 mask_op = permute_vec_elements (mask_op, mask_op,
2251 mask_perm_mask, stmt, gsi);
2252 else
2253 {
2254 if (j == 0)
2255 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2256 else
2257 {
2258 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2259 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2260 }
2261
2262 mask_op = vec_mask;
2263 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2264 {
2265 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2266 == TYPE_VECTOR_SUBPARTS (masktype));
2267 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2268 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2269 new_stmt
2270 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2271 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2272 mask_op = var;
2273 }
2274 }
2275
2276 new_stmt
2277 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
2278 scale);
2279
2280 if (!useless_type_conversion_p (vectype, rettype))
2281 {
2282 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2283 == TYPE_VECTOR_SUBPARTS (rettype));
2284 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2285 gimple_call_set_lhs (new_stmt, op);
2286 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2287 var = make_ssa_name (vec_dest);
2288 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2289 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2290 }
2291 else
2292 {
2293 var = make_ssa_name (vec_dest, new_stmt);
2294 gimple_call_set_lhs (new_stmt, var);
2295 }
2296
2297 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2298
2299 if (modifier == NARROW)
2300 {
2301 if ((j & 1) == 0)
2302 {
2303 prev_res = var;
2304 continue;
2305 }
2306 var = permute_vec_elements (prev_res, var,
2307 perm_mask, stmt, gsi);
2308 new_stmt = SSA_NAME_DEF_STMT (var);
2309 }
2310
2311 if (prev_stmt_info == NULL)
2312 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2313 else
2314 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2315 prev_stmt_info = vinfo_for_stmt (new_stmt);
2316 }
2317
2318 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2319 from the IL. */
2320 if (STMT_VINFO_RELATED_STMT (stmt_info))
2321 {
2322 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2323 stmt_info = vinfo_for_stmt (stmt);
2324 }
2325 tree lhs = gimple_call_lhs (stmt);
2326 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2327 set_vinfo_for_stmt (new_stmt, stmt_info);
2328 set_vinfo_for_stmt (stmt, NULL);
2329 STMT_VINFO_STMT (stmt_info) = new_stmt;
2330 gsi_replace (gsi, new_stmt, true);
2331 return true;
2332 }
2333 else if (vls_type != VLS_LOAD)
2334 {
2335 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2336 prev_stmt_info = NULL;
2337 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2338 for (i = 0; i < ncopies; i++)
2339 {
2340 unsigned align, misalign;
2341
2342 if (i == 0)
2343 {
2344 tree rhs = gimple_call_arg (stmt, 3);
2345 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2346 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2347 mask_vectype);
2348 /* We should have catched mismatched types earlier. */
2349 gcc_assert (useless_type_conversion_p (vectype,
2350 TREE_TYPE (vec_rhs)));
2351 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2352 NULL_TREE, &dummy, gsi,
2353 &ptr_incr, false, &inv_p);
2354 gcc_assert (!inv_p);
2355 }
2356 else
2357 {
2358 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2359 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2360 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2361 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2362 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2363 TYPE_SIZE_UNIT (vectype));
2364 }
2365
2366 align = DR_TARGET_ALIGNMENT (dr);
2367 if (aligned_access_p (dr))
2368 misalign = 0;
2369 else if (DR_MISALIGNMENT (dr) == -1)
2370 {
2371 align = TYPE_ALIGN_UNIT (elem_type);
2372 misalign = 0;
2373 }
2374 else
2375 misalign = DR_MISALIGNMENT (dr);
2376 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2377 misalign);
2378 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2379 misalign ? least_bit_hwi (misalign) : align);
2380 gcall *call
2381 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2382 ptr, vec_mask, vec_rhs);
2383 gimple_call_set_nothrow (call, true);
2384 new_stmt = call;
2385 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2386 if (i == 0)
2387 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2388 else
2389 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2390 prev_stmt_info = vinfo_for_stmt (new_stmt);
2391 }
2392 }
2393 else
2394 {
2395 tree vec_mask = NULL_TREE;
2396 prev_stmt_info = NULL;
2397 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2398 for (i = 0; i < ncopies; i++)
2399 {
2400 unsigned align, misalign;
2401
2402 if (i == 0)
2403 {
2404 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2405 mask_vectype);
2406 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2407 NULL_TREE, &dummy, gsi,
2408 &ptr_incr, false, &inv_p);
2409 gcc_assert (!inv_p);
2410 }
2411 else
2412 {
2413 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2414 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2415 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2416 TYPE_SIZE_UNIT (vectype));
2417 }
2418
2419 align = DR_TARGET_ALIGNMENT (dr);
2420 if (aligned_access_p (dr))
2421 misalign = 0;
2422 else if (DR_MISALIGNMENT (dr) == -1)
2423 {
2424 align = TYPE_ALIGN_UNIT (elem_type);
2425 misalign = 0;
2426 }
2427 else
2428 misalign = DR_MISALIGNMENT (dr);
2429 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2430 misalign);
2431 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2432 misalign ? least_bit_hwi (misalign) : align);
2433 gcall *call
2434 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2435 ptr, vec_mask);
2436 gimple_call_set_lhs (call, make_ssa_name (vec_dest));
2437 gimple_call_set_nothrow (call, true);
2438 vect_finish_stmt_generation (stmt, call, gsi);
2439 if (i == 0)
2440 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = call;
2441 else
2442 STMT_VINFO_RELATED_STMT (prev_stmt_info) = call;
2443 prev_stmt_info = vinfo_for_stmt (call);
2444 }
2445 }
2446
2447 if (vls_type == VLS_LOAD)
2448 {
2449 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2450 from the IL. */
2451 if (STMT_VINFO_RELATED_STMT (stmt_info))
2452 {
2453 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2454 stmt_info = vinfo_for_stmt (stmt);
2455 }
2456 tree lhs = gimple_call_lhs (stmt);
2457 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2458 set_vinfo_for_stmt (new_stmt, stmt_info);
2459 set_vinfo_for_stmt (stmt, NULL);
2460 STMT_VINFO_STMT (stmt_info) = new_stmt;
2461 gsi_replace (gsi, new_stmt, true);
2462 }
2463
2464 return true;
2465}
2466
2467/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2468
2469static bool
2470vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2471 gimple **vec_stmt, slp_tree slp_node,
2472 tree vectype_in, enum vect_def_type *dt)
2473{
2474 tree op, vectype;
2475 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2476 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2477 unsigned ncopies, nunits;
2478
2479 op = gimple_call_arg (stmt, 0);
2480 vectype = STMT_VINFO_VECTYPE (stmt_info);
2481 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2482
2483 /* Multiple types in SLP are handled by creating the appropriate number of
2484 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2485 case of SLP. */
2486 if (slp_node)
2487 ncopies = 1;
2488 else
2489 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2490
2491 gcc_assert (ncopies >= 1);
2492
2493 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2494 if (! char_vectype)
2495 return false;
2496
2497 unsigned int num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2498 unsigned word_bytes = num_bytes / nunits;
2499
2500 auto_vec_perm_indices elts (num_bytes);
2501 for (unsigned i = 0; i < nunits; ++i)
2502 for (unsigned j = 0; j < word_bytes; ++j)
2503 elts.quick_push ((i + 1) * word_bytes - j - 1);
2504
2505 if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, &elts))
2506 return false;
2507
2508 if (! vec_stmt)
2509 {
2510 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2511 if (dump_enabled_p ())
2512 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2513 "\n");
2514 if (! PURE_SLP_STMT (stmt_info))
2515 {
2516 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2517 1, vector_stmt, stmt_info, 0, vect_prologue);
2518 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2519 ncopies, vec_perm, stmt_info, 0, vect_body);
2520 }
2521 return true;
2522 }
2523
2524 tree_vector_builder telts (char_vectype, num_bytes, 1);
2525 for (unsigned i = 0; i < num_bytes; ++i)
2526 telts.quick_push (build_int_cst (char_type_node, elts[i]));
2527 tree bswap_vconst = telts.build ();
2528
2529 /* Transform. */
2530 vec<tree> vec_oprnds = vNULL;
2531 gimple *new_stmt = NULL;
2532 stmt_vec_info prev_stmt_info = NULL;
2533 for (unsigned j = 0; j < ncopies; j++)
2534 {
2535 /* Handle uses. */
2536 if (j == 0)
2537 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2538 else
2539 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2540
2541 /* Arguments are ready. create the new vector stmt. */
2542 unsigned i;
2543 tree vop;
2544 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2545 {
2546 tree tem = make_ssa_name (char_vectype);
2547 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2548 char_vectype, vop));
2549 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2550 tree tem2 = make_ssa_name (char_vectype);
2551 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2552 tem, tem, bswap_vconst);
2553 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2554 tem = make_ssa_name (vectype);
2555 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2556 vectype, tem2));
2557 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2558 if (slp_node)
2559 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2560 }
2561
2562 if (slp_node)
2563 continue;
2564
2565 if (j == 0)
2566 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2567 else
2568 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2569
2570 prev_stmt_info = vinfo_for_stmt (new_stmt);
2571 }
2572
2573 vec_oprnds.release ();
2574 return true;
2575}
2576
2577/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2578 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2579 in a single step. On success, store the binary pack code in
2580 *CONVERT_CODE. */
2581
2582static bool
2583simple_integer_narrowing (tree vectype_out, tree vectype_in,
2584 tree_code *convert_code)
2585{
2586 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2587 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2588 return false;
2589
2590 tree_code code;
2591 int multi_step_cvt = 0;
2592 auto_vec <tree, 8> interm_types;
2593 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2594 &code, &multi_step_cvt,
2595 &interm_types)
2596 || multi_step_cvt)
2597 return false;
2598
2599 *convert_code = code;
2600 return true;
2601}
2602
2603/* Function vectorizable_call.
2604
2605 Check if GS performs a function call that can be vectorized.
2606 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2607 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2608 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2609
2610static bool
2611vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2612 slp_tree slp_node)
2613{
2614 gcall *stmt;
2615 tree vec_dest;
2616 tree scalar_dest;
2617 tree op, type;
2618 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2619 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2620 tree vectype_out, vectype_in;
2621 int nunits_in;
2622 int nunits_out;
2623 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2624 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2625 vec_info *vinfo = stmt_info->vinfo;
2626 tree fndecl, new_temp, rhs_type;
2627 gimple *def_stmt;
2628 enum vect_def_type dt[3]
2629 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2630 int ndts = 3;
2631 gimple *new_stmt = NULL;
2632 int ncopies, j;
2633 vec<tree> vargs = vNULL;
2634 enum { NARROW, NONE, WIDEN } modifier;
2635 size_t i, nargs;
2636 tree lhs;
2637
2638 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2639 return false;
2640
2641 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2642 && ! vec_stmt)
2643 return false;
2644
2645 /* Is GS a vectorizable call? */
2646 stmt = dyn_cast <gcall *> (gs);
2647 if (!stmt)
2648 return false;
2649
2650 if (gimple_call_internal_p (stmt)
2651 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2652 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2653 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2654 slp_node);
2655
2656 if (gimple_call_lhs (stmt) == NULL_TREE
2657 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2658 return false;
2659
2660 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2661
2662 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2663
2664 /* Process function arguments. */
2665 rhs_type = NULL_TREE;
2666 vectype_in = NULL_TREE;
2667 nargs = gimple_call_num_args (stmt);
2668
2669 /* Bail out if the function has more than three arguments, we do not have
2670 interesting builtin functions to vectorize with more than two arguments
2671 except for fma. No arguments is also not good. */
2672 if (nargs == 0 || nargs > 3)
2673 return false;
2674
2675 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2676 if (gimple_call_internal_p (stmt)
2677 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2678 {
2679 nargs = 0;
2680 rhs_type = unsigned_type_node;
2681 }
2682
2683 for (i = 0; i < nargs; i++)
2684 {
2685 tree opvectype;
2686
2687 op = gimple_call_arg (stmt, i);
2688
2689 /* We can only handle calls with arguments of the same type. */
2690 if (rhs_type
2691 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2692 {
2693 if (dump_enabled_p ())
2694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2695 "argument types differ.\n");
2696 return false;
2697 }
2698 if (!rhs_type)
2699 rhs_type = TREE_TYPE (op);
2700
2701 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2702 {
2703 if (dump_enabled_p ())
2704 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2705 "use not simple.\n");
2706 return false;
2707 }
2708
2709 if (!vectype_in)
2710 vectype_in = opvectype;
2711 else if (opvectype
2712 && opvectype != vectype_in)
2713 {
2714 if (dump_enabled_p ())
2715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2716 "argument vector types differ.\n");
2717 return false;
2718 }
2719 }
2720 /* If all arguments are external or constant defs use a vector type with
2721 the same size as the output vector type. */
2722 if (!vectype_in)
2723 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2724 if (vec_stmt)
2725 gcc_assert (vectype_in);
2726 if (!vectype_in)
2727 {
2728 if (dump_enabled_p ())
2729 {
2730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2731 "no vectype for scalar type ");
2732 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2733 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2734 }
2735
2736 return false;
2737 }
2738
2739 /* FORNOW */
2740 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2741 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2742 if (nunits_in == nunits_out / 2)
2743 modifier = NARROW;
2744 else if (nunits_out == nunits_in)
2745 modifier = NONE;
2746 else if (nunits_out == nunits_in / 2)
2747 modifier = WIDEN;
2748 else
2749 return false;
2750
2751 /* We only handle functions that do not read or clobber memory. */
2752 if (gimple_vuse (stmt))
2753 {
2754 if (dump_enabled_p ())
2755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2756 "function reads from or writes to memory.\n");
2757 return false;
2758 }
2759
2760 /* For now, we only vectorize functions if a target specific builtin
2761 is available. TODO -- in some cases, it might be profitable to
2762 insert the calls for pieces of the vector, in order to be able
2763 to vectorize other operations in the loop. */
2764 fndecl = NULL_TREE;
2765 internal_fn ifn = IFN_LAST;
2766 combined_fn cfn = gimple_call_combined_fn (stmt);
2767 tree callee = gimple_call_fndecl (stmt);
2768
2769 /* First try using an internal function. */
2770 tree_code convert_code = ERROR_MARK;
2771 if (cfn != CFN_LAST
2772 && (modifier == NONE
2773 || (modifier == NARROW
2774 && simple_integer_narrowing (vectype_out, vectype_in,
2775 &convert_code))))
2776 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2777 vectype_in);
2778
2779 /* If that fails, try asking for a target-specific built-in function. */
2780 if (ifn == IFN_LAST)
2781 {
2782 if (cfn != CFN_LAST)
2783 fndecl = targetm.vectorize.builtin_vectorized_function
2784 (cfn, vectype_out, vectype_in);
2785 else
2786 fndecl = targetm.vectorize.builtin_md_vectorized_function
2787 (callee, vectype_out, vectype_in);
2788 }
2789
2790 if (ifn == IFN_LAST && !fndecl)
2791 {
2792 if (cfn == CFN_GOMP_SIMD_LANE
2793 && !slp_node
2794 && loop_vinfo
2795 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2796 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2797 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2798 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2799 {
2800 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2801 { 0, 1, 2, ... vf - 1 } vector. */
2802 gcc_assert (nargs == 0);
2803 }
2804 else if (modifier == NONE
2805 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2806 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2807 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2808 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2809 vectype_in, dt);
2810 else
2811 {
2812 if (dump_enabled_p ())
2813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2814 "function is not vectorizable.\n");
2815 return false;
2816 }
2817 }
2818
2819 if (slp_node)
2820 ncopies = 1;
2821 else if (modifier == NARROW && ifn == IFN_LAST)
2822 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
2823 else
2824 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
2825
2826 /* Sanity check: make sure that at least one copy of the vectorized stmt
2827 needs to be generated. */
2828 gcc_assert (ncopies >= 1);
2829
2830 if (!vec_stmt) /* transformation not required. */
2831 {
2832 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2833 if (dump_enabled_p ())
2834 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2835 "\n");
2836 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
2837 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2838 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2839 vec_promote_demote, stmt_info, 0, vect_body);
2840
2841 return true;
2842 }
2843
2844 /* Transform. */
2845
2846 if (dump_enabled_p ())
2847 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2848
2849 /* Handle def. */
2850 scalar_dest = gimple_call_lhs (stmt);
2851 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2852
2853 prev_stmt_info = NULL;
2854 if (modifier == NONE || ifn != IFN_LAST)
2855 {
2856 tree prev_res = NULL_TREE;
2857 for (j = 0; j < ncopies; ++j)
2858 {
2859 /* Build argument list for the vectorized call. */
2860 if (j == 0)
2861 vargs.create (nargs);
2862 else
2863 vargs.truncate (0);
2864
2865 if (slp_node)
2866 {
2867 auto_vec<vec<tree> > vec_defs (nargs);
2868 vec<tree> vec_oprnds0;
2869
2870 for (i = 0; i < nargs; i++)
2871 vargs.quick_push (gimple_call_arg (stmt, i));
2872 vect_get_slp_defs (vargs, slp_node, &vec_defs);
2873 vec_oprnds0 = vec_defs[0];
2874
2875 /* Arguments are ready. Create the new vector stmt. */
2876 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2877 {
2878 size_t k;
2879 for (k = 0; k < nargs; k++)
2880 {
2881 vec<tree> vec_oprndsk = vec_defs[k];
2882 vargs[k] = vec_oprndsk[i];
2883 }
2884 if (modifier == NARROW)
2885 {
2886 tree half_res = make_ssa_name (vectype_in);
2887 gcall *call
2888 = gimple_build_call_internal_vec (ifn, vargs);
2889 gimple_call_set_lhs (call, half_res);
2890 gimple_call_set_nothrow (call, true);
2891 new_stmt = call;
2892 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2893 if ((i & 1) == 0)
2894 {
2895 prev_res = half_res;
2896 continue;
2897 }
2898 new_temp = make_ssa_name (vec_dest);
2899 new_stmt = gimple_build_assign (new_temp, convert_code,
2900 prev_res, half_res);
2901 }
2902 else
2903 {
2904 gcall *call;
2905 if (ifn != IFN_LAST)
2906 call = gimple_build_call_internal_vec (ifn, vargs);
2907 else
2908 call = gimple_build_call_vec (fndecl, vargs);
2909 new_temp = make_ssa_name (vec_dest, call);
2910 gimple_call_set_lhs (call, new_temp);
2911 gimple_call_set_nothrow (call, true);
2912 new_stmt = call;
2913 }
2914 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2915 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2916 }
2917
2918 for (i = 0; i < nargs; i++)
2919 {
2920 vec<tree> vec_oprndsi = vec_defs[i];
2921 vec_oprndsi.release ();
2922 }
2923 continue;
2924 }
2925
2926 for (i = 0; i < nargs; i++)
2927 {
2928 op = gimple_call_arg (stmt, i);
2929 if (j == 0)
2930 vec_oprnd0
2931 = vect_get_vec_def_for_operand (op, stmt);
2932 else
2933 {
2934 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2935 vec_oprnd0
2936 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2937 }
2938
2939 vargs.quick_push (vec_oprnd0);
2940 }
2941
2942 if (gimple_call_internal_p (stmt)
2943 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2944 {
2945 tree_vector_builder v (vectype_out, 1, 3);
2946 for (int k = 0; k < 3; ++k)
2947 v.quick_push (build_int_cst (unsigned_type_node,
2948 j * nunits_out + k));
2949 tree cst = v.build ();
2950 tree new_var
2951 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2952 gimple *init_stmt = gimple_build_assign (new_var, cst);
2953 vect_init_vector_1 (stmt, init_stmt, NULL);
2954 new_temp = make_ssa_name (vec_dest);
2955 new_stmt = gimple_build_assign (new_temp, new_var);
2956 }
2957 else if (modifier == NARROW)
2958 {
2959 tree half_res = make_ssa_name (vectype_in);
2960 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
2961 gimple_call_set_lhs (call, half_res);
2962 gimple_call_set_nothrow (call, true);
2963 new_stmt = call;
2964 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2965 if ((j & 1) == 0)
2966 {
2967 prev_res = half_res;
2968 continue;
2969 }
2970 new_temp = make_ssa_name (vec_dest);
2971 new_stmt = gimple_build_assign (new_temp, convert_code,
2972 prev_res, half_res);
2973 }
2974 else
2975 {
2976 gcall *call;
2977 if (ifn != IFN_LAST)
2978 call = gimple_build_call_internal_vec (ifn, vargs);
2979 else
2980 call = gimple_build_call_vec (fndecl, vargs);
2981 new_temp = make_ssa_name (vec_dest, new_stmt);
2982 gimple_call_set_lhs (call, new_temp);
2983 gimple_call_set_nothrow (call, true);
2984 new_stmt = call;
2985 }
2986 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2987
2988 if (j == (modifier == NARROW ? 1 : 0))
2989 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2990 else
2991 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2992
2993 prev_stmt_info = vinfo_for_stmt (new_stmt);
2994 }
2995 }
2996 else if (modifier == NARROW)
2997 {
2998 for (j = 0; j < ncopies; ++j)
2999 {
3000 /* Build argument list for the vectorized call. */
3001 if (j == 0)
3002 vargs.create (nargs * 2);
3003 else
3004 vargs.truncate (0);
3005
3006 if (slp_node)
3007 {
3008 auto_vec<vec<tree> > vec_defs (nargs);
3009 vec<tree> vec_oprnds0;
3010
3011 for (i = 0; i < nargs; i++)
3012 vargs.quick_push (gimple_call_arg (stmt, i));
3013 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3014 vec_oprnds0 = vec_defs[0];
3015
3016 /* Arguments are ready. Create the new vector stmt. */
3017 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3018 {
3019 size_t k;
3020 vargs.truncate (0);
3021 for (k = 0; k < nargs; k++)
3022 {
3023 vec<tree> vec_oprndsk = vec_defs[k];
3024 vargs.quick_push (vec_oprndsk[i]);
3025 vargs.quick_push (vec_oprndsk[i + 1]);
3026 }
3027 gcall *call;
3028 if (ifn != IFN_LAST)
3029 call = gimple_build_call_internal_vec (ifn, vargs);
3030 else
3031 call = gimple_build_call_vec (fndecl, vargs);
3032 new_temp = make_ssa_name (vec_dest, call);
3033 gimple_call_set_lhs (call, new_temp);
3034 gimple_call_set_nothrow (call, true);
3035 new_stmt = call;
3036 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3037 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3038 }
3039
3040 for (i = 0; i < nargs; i++)
3041 {
3042 vec<tree> vec_oprndsi = vec_defs[i];
3043 vec_oprndsi.release ();
3044 }
3045 continue;
3046 }
3047
3048 for (i = 0; i < nargs; i++)
3049 {
3050 op = gimple_call_arg (stmt, i);
3051 if (j == 0)
3052 {
3053 vec_oprnd0
3054 = vect_get_vec_def_for_operand (op, stmt);
3055 vec_oprnd1
3056 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3057 }
3058 else
3059 {
3060 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3061 vec_oprnd0
3062 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3063 vec_oprnd1
3064 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3065 }
3066
3067 vargs.quick_push (vec_oprnd0);
3068 vargs.quick_push (vec_oprnd1);
3069 }
3070
3071 new_stmt = gimple_build_call_vec (fndecl, vargs);
3072 new_temp = make_ssa_name (vec_dest, new_stmt);
3073 gimple_call_set_lhs (new_stmt, new_temp);
3074 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3075
3076 if (j == 0)
3077 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3078 else
3079 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3080
3081 prev_stmt_info = vinfo_for_stmt (new_stmt);
3082 }
3083
3084 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3085 }
3086 else
3087 /* No current target implements this case. */
3088 return false;
3089
3090 vargs.release ();
3091
3092 /* The call in STMT might prevent it from being removed in dce.
3093 We however cannot remove it here, due to the way the ssa name
3094 it defines is mapped to the new definition. So just replace
3095 rhs of the statement with something harmless. */
3096
3097 if (slp_node)
3098 return true;
3099
3100 type = TREE_TYPE (scalar_dest);
3101 if (is_pattern_stmt_p (stmt_info))
3102 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3103 else
3104 lhs = gimple_call_lhs (stmt);
3105
3106 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3107 set_vinfo_for_stmt (new_stmt, stmt_info);
3108 set_vinfo_for_stmt (stmt, NULL);
3109 STMT_VINFO_STMT (stmt_info) = new_stmt;
3110 gsi_replace (gsi, new_stmt, false);
3111
3112 return true;
3113}
3114
3115
3116struct simd_call_arg_info
3117{
3118 tree vectype;
3119 tree op;
3120 HOST_WIDE_INT linear_step;
3121 enum vect_def_type dt;
3122 unsigned int align;
3123 bool simd_lane_linear;
3124};
3125
3126/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3127 is linear within simd lane (but not within whole loop), note it in
3128 *ARGINFO. */
3129
3130static void
3131vect_simd_lane_linear (tree op, struct loop *loop,
3132 struct simd_call_arg_info *arginfo)
3133{
3134 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3135
3136 if (!is_gimple_assign (def_stmt)
3137 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3138 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3139 return;
3140
3141 tree base = gimple_assign_rhs1 (def_stmt);
3142 HOST_WIDE_INT linear_step = 0;
3143 tree v = gimple_assign_rhs2 (def_stmt);
3144 while (TREE_CODE (v) == SSA_NAME)
3145 {
3146 tree t;
3147 def_stmt = SSA_NAME_DEF_STMT (v);
3148 if (is_gimple_assign (def_stmt))
3149 switch (gimple_assign_rhs_code (def_stmt))
3150 {
3151 case PLUS_EXPR:
3152 t = gimple_assign_rhs2 (def_stmt);
3153 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3154 return;
3155 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3156 v = gimple_assign_rhs1 (def_stmt);
3157 continue;
3158 case MULT_EXPR:
3159 t = gimple_assign_rhs2 (def_stmt);
3160 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3161 return;
3162 linear_step = tree_to_shwi (t);
3163 v = gimple_assign_rhs1 (def_stmt);
3164 continue;
3165 CASE_CONVERT:
3166 t = gimple_assign_rhs1 (def_stmt);
3167 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3168 || (TYPE_PRECISION (TREE_TYPE (v))
3169 < TYPE_PRECISION (TREE_TYPE (t))))
3170 return;
3171 if (!linear_step)
3172 linear_step = 1;
3173 v = t;
3174 continue;
3175 default:
3176 return;
3177 }
3178 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3179 && loop->simduid
3180 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3181 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3182 == loop->simduid))
3183 {
3184 if (!linear_step)
3185 linear_step = 1;
3186 arginfo->linear_step = linear_step;
3187 arginfo->op = base;
3188 arginfo->simd_lane_linear = true;
3189 return;
3190 }
3191 }
3192}
3193
3194/* Function vectorizable_simd_clone_call.
3195
3196 Check if STMT performs a function call that can be vectorized
3197 by calling a simd clone of the function.
3198 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3199 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3200 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3201
3202static bool
3203vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3204 gimple **vec_stmt, slp_tree slp_node)
3205{
3206 tree vec_dest;
3207 tree scalar_dest;
3208 tree op, type;
3209 tree vec_oprnd0 = NULL_TREE;
3210 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3211 tree vectype;
3212 unsigned int nunits;
3213 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3214 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3215 vec_info *vinfo = stmt_info->vinfo;
3216 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3217 tree fndecl, new_temp;
3218 gimple *def_stmt;
3219 gimple *new_stmt = NULL;
3220 int ncopies, j;
3221 auto_vec<simd_call_arg_info> arginfo;
3222 vec<tree> vargs = vNULL;
3223 size_t i, nargs;
3224 tree lhs, rtype, ratype;
3225 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3226
3227 /* Is STMT a vectorizable call? */
3228 if (!is_gimple_call (stmt))
3229 return false;
3230
3231 fndecl = gimple_call_fndecl (stmt);
3232 if (fndecl == NULL_TREE)
3233 return false;
3234
3235 struct cgraph_node *node = cgraph_node::get (fndecl);
3236 if (node == NULL || node->simd_clones == NULL)
3237 return false;
3238
3239 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3240 return false;
3241
3242 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3243 && ! vec_stmt)
3244 return false;
3245
3246 if (gimple_call_lhs (stmt)
3247 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3248 return false;
3249
3250 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3251
3252 vectype = STMT_VINFO_VECTYPE (stmt_info);
3253
3254 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3255 return false;
3256
3257 /* FORNOW */
3258 if (slp_node)
3259 return false;
3260
3261 /* Process function arguments. */
3262 nargs = gimple_call_num_args (stmt);
3263
3264 /* Bail out if the function has zero arguments. */
3265 if (nargs == 0)
3266 return false;
3267
3268 arginfo.reserve (nargs, true);
3269
3270 for (i = 0; i < nargs; i++)
3271 {
3272 simd_call_arg_info thisarginfo;
3273 affine_iv iv;
3274
3275 thisarginfo.linear_step = 0;
3276 thisarginfo.align = 0;
3277 thisarginfo.op = NULL_TREE;
3278 thisarginfo.simd_lane_linear = false;
3279
3280 op = gimple_call_arg (stmt, i);
3281 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3282 &thisarginfo.vectype)
3283 || thisarginfo.dt == vect_uninitialized_def)
3284 {
3285 if (dump_enabled_p ())
3286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3287 "use not simple.\n");
3288 return false;
3289 }
3290
3291 if (thisarginfo.dt == vect_constant_def
3292 || thisarginfo.dt == vect_external_def)
3293 gcc_assert (thisarginfo.vectype == NULL_TREE);
3294 else
3295 gcc_assert (thisarginfo.vectype != NULL_TREE);
3296
3297 /* For linear arguments, the analyze phase should have saved
3298 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3299 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3300 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3301 {
3302 gcc_assert (vec_stmt);
3303 thisarginfo.linear_step
3304 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3305 thisarginfo.op
3306 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3307 thisarginfo.simd_lane_linear
3308 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3309 == boolean_true_node);
3310 /* If loop has been peeled for alignment, we need to adjust it. */
3311 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3312 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3313 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3314 {
3315 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3316 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3317 tree opt = TREE_TYPE (thisarginfo.op);
3318 bias = fold_convert (TREE_TYPE (step), bias);
3319 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3320 thisarginfo.op
3321 = fold_build2 (POINTER_TYPE_P (opt)
3322 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3323 thisarginfo.op, bias);
3324 }
3325 }
3326 else if (!vec_stmt
3327 && thisarginfo.dt != vect_constant_def
3328 && thisarginfo.dt != vect_external_def
3329 && loop_vinfo
3330 && TREE_CODE (op) == SSA_NAME
3331 && simple_iv (loop, loop_containing_stmt (stmt), op,
3332 &iv, false)
3333 && tree_fits_shwi_p (iv.step))
3334 {
3335 thisarginfo.linear_step = tree_to_shwi (iv.step);
3336 thisarginfo.op = iv.base;
3337 }
3338 else if ((thisarginfo.dt == vect_constant_def
3339 || thisarginfo.dt == vect_external_def)
3340 && POINTER_TYPE_P (TREE_TYPE (op)))
3341 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3342 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3343 linear too. */
3344 if (POINTER_TYPE_P (TREE_TYPE (op))
3345 && !thisarginfo.linear_step
3346 && !vec_stmt
3347 && thisarginfo.dt != vect_constant_def
3348 && thisarginfo.dt != vect_external_def
3349 && loop_vinfo
3350 && !slp_node
3351 && TREE_CODE (op) == SSA_NAME)
3352 vect_simd_lane_linear (op, loop, &thisarginfo);
3353
3354 arginfo.quick_push (thisarginfo);
3355 }
3356
3357 unsigned int badness = 0;
3358 struct cgraph_node *bestn = NULL;
3359 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3360 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3361 else
3362 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3363 n = n->simdclone->next_clone)
3364 {
3365 unsigned int this_badness = 0;
3366 if (n->simdclone->simdlen
3367 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3368 || n->simdclone->nargs != nargs)
3369 continue;
3370 if (n->simdclone->simdlen
3371 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3372 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3373 - exact_log2 (n->simdclone->simdlen)) * 1024;
3374 if (n->simdclone->inbranch)
3375 this_badness += 2048;
3376 int target_badness = targetm.simd_clone.usable (n);
3377 if (target_badness < 0)
3378 continue;
3379 this_badness += target_badness * 512;
3380 /* FORNOW: Have to add code to add the mask argument. */
3381 if (n->simdclone->inbranch)
3382 continue;
3383 for (i = 0; i < nargs; i++)
3384 {
3385 switch (n->simdclone->args[i].arg_type)
3386 {
3387 case SIMD_CLONE_ARG_TYPE_VECTOR:
3388 if (!useless_type_conversion_p
3389 (n->simdclone->args[i].orig_type,
3390 TREE_TYPE (gimple_call_arg (stmt, i))))
3391 i = -1;
3392 else if (arginfo[i].dt == vect_constant_def
3393 || arginfo[i].dt == vect_external_def
3394 || arginfo[i].linear_step)
3395 this_badness += 64;
3396 break;
3397 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3398 if (arginfo[i].dt != vect_constant_def
3399 && arginfo[i].dt != vect_external_def)
3400 i = -1;
3401 break;
3402 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3403 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3404 if (arginfo[i].dt == vect_constant_def
3405 || arginfo[i].dt == vect_external_def
3406 || (arginfo[i].linear_step
3407 != n->simdclone->args[i].linear_step))
3408 i = -1;
3409 break;
3410 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3411 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3412 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3413 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3414 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3415 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3416 /* FORNOW */
3417 i = -1;
3418 break;
3419 case SIMD_CLONE_ARG_TYPE_MASK:
3420 gcc_unreachable ();
3421 }
3422 if (i == (size_t) -1)
3423 break;
3424 if (n->simdclone->args[i].alignment > arginfo[i].align)
3425 {
3426 i = -1;
3427 break;
3428 }
3429 if (arginfo[i].align)
3430 this_badness += (exact_log2 (arginfo[i].align)
3431 - exact_log2 (n->simdclone->args[i].alignment));
3432 }
3433 if (i == (size_t) -1)
3434 continue;
3435 if (bestn == NULL || this_badness < badness)
3436 {
3437 bestn = n;
3438 badness = this_badness;
3439 }
3440 }
3441
3442 if (bestn == NULL)
3443 return false;
3444
3445 for (i = 0; i < nargs; i++)
3446 if ((arginfo[i].dt == vect_constant_def
3447 || arginfo[i].dt == vect_external_def)
3448 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3449 {
3450 arginfo[i].vectype
3451 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3452 i)));
3453 if (arginfo[i].vectype == NULL
3454 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3455 > bestn->simdclone->simdlen))
3456 return false;
3457 }
3458
3459 fndecl = bestn->decl;
3460 nunits = bestn->simdclone->simdlen;
3461 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3462
3463 /* If the function isn't const, only allow it in simd loops where user
3464 has asserted that at least nunits consecutive iterations can be
3465 performed using SIMD instructions. */
3466 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3467 && gimple_vuse (stmt))
3468 return false;
3469
3470 /* Sanity check: make sure that at least one copy of the vectorized stmt
3471 needs to be generated. */
3472 gcc_assert (ncopies >= 1);
3473
3474 if (!vec_stmt) /* transformation not required. */
3475 {
3476 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3477 for (i = 0; i < nargs; i++)
3478 if ((bestn->simdclone->args[i].arg_type
3479 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3480 || (bestn->simdclone->args[i].arg_type
3481 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3482 {
3483 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3484 + 1);
3485 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3486 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3487 ? size_type_node : TREE_TYPE (arginfo[i].op);
3488 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3489 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3490 tree sll = arginfo[i].simd_lane_linear
3491 ? boolean_true_node : boolean_false_node;
3492 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3493 }
3494 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3495 if (dump_enabled_p ())
3496 dump_printf_loc (MSG_NOTE, vect_location,
3497 "=== vectorizable_simd_clone_call ===\n");
3498/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3499 return true;
3500 }
3501
3502 /* Transform. */
3503
3504 if (dump_enabled_p ())
3505 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3506
3507 /* Handle def. */
3508 scalar_dest = gimple_call_lhs (stmt);
3509 vec_dest = NULL_TREE;
3510 rtype = NULL_TREE;
3511 ratype = NULL_TREE;
3512 if (scalar_dest)
3513 {
3514 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3515 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3516 if (TREE_CODE (rtype) == ARRAY_TYPE)
3517 {
3518 ratype = rtype;
3519 rtype = TREE_TYPE (ratype);
3520 }
3521 }
3522
3523 prev_stmt_info = NULL;
3524 for (j = 0; j < ncopies; ++j)
3525 {
3526 /* Build argument list for the vectorized call. */
3527 if (j == 0)
3528 vargs.create (nargs);
3529 else
3530 vargs.truncate (0);
3531
3532 for (i = 0; i < nargs; i++)
3533 {
3534 unsigned int k, l, m, o;
3535 tree atype;
3536 op = gimple_call_arg (stmt, i);
3537 switch (bestn->simdclone->args[i].arg_type)
3538 {
3539 case SIMD_CLONE_ARG_TYPE_VECTOR:
3540 atype = bestn->simdclone->args[i].vector_type;
3541 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3542 for (m = j * o; m < (j + 1) * o; m++)
3543 {
3544 if (TYPE_VECTOR_SUBPARTS (atype)
3545 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3546 {
3547 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3548 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3549 / TYPE_VECTOR_SUBPARTS (atype));
3550 gcc_assert ((k & (k - 1)) == 0);
3551 if (m == 0)
3552 vec_oprnd0
3553 = vect_get_vec_def_for_operand (op, stmt);
3554 else
3555 {
3556 vec_oprnd0 = arginfo[i].op;
3557 if ((m & (k - 1)) == 0)
3558 vec_oprnd0
3559 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3560 vec_oprnd0);
3561 }
3562 arginfo[i].op = vec_oprnd0;
3563 vec_oprnd0
3564 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3565 bitsize_int (prec),
3566 bitsize_int ((m & (k - 1)) * prec));
3567 new_stmt
3568 = gimple_build_assign (make_ssa_name (atype),
3569 vec_oprnd0);
3570 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3571 vargs.safe_push (gimple_assign_lhs (new_stmt));
3572 }
3573 else
3574 {
3575 k = (TYPE_VECTOR_SUBPARTS (atype)
3576 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3577 gcc_assert ((k & (k - 1)) == 0);
3578 vec<constructor_elt, va_gc> *ctor_elts;
3579 if (k != 1)
3580 vec_alloc (ctor_elts, k);
3581 else
3582 ctor_elts = NULL;
3583 for (l = 0; l < k; l++)
3584 {
3585 if (m == 0 && l == 0)
3586 vec_oprnd0
3587 = vect_get_vec_def_for_operand (op, stmt);
3588 else
3589 vec_oprnd0
3590 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3591 arginfo[i].op);
3592 arginfo[i].op = vec_oprnd0;
3593 if (k == 1)
3594 break;
3595 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3596 vec_oprnd0);
3597 }
3598 if (k == 1)
3599 vargs.safe_push (vec_oprnd0);
3600 else
3601 {
3602 vec_oprnd0 = build_constructor (atype, ctor_elts);
3603 new_stmt
3604 = gimple_build_assign (make_ssa_name (atype),
3605 vec_oprnd0);
3606 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3607 vargs.safe_push (gimple_assign_lhs (new_stmt));
3608 }
3609 }
3610 }
3611 break;
3612 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3613 vargs.safe_push (op);
3614 break;
3615 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3616 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3617 if (j == 0)
3618 {
3619 gimple_seq stmts;
3620 arginfo[i].op
3621 = force_gimple_operand (arginfo[i].op, &stmts, true,
3622 NULL_TREE);
3623 if (stmts != NULL)
3624 {
3625 basic_block new_bb;
3626 edge pe = loop_preheader_edge (loop);
3627 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3628 gcc_assert (!new_bb);
3629 }
3630 if (arginfo[i].simd_lane_linear)
3631 {
3632 vargs.safe_push (arginfo[i].op);
3633 break;
3634 }
3635 tree phi_res = copy_ssa_name (op);
3636 gphi *new_phi = create_phi_node (phi_res, loop->header);
3637 set_vinfo_for_stmt (new_phi,
3638 new_stmt_vec_info (new_phi, loop_vinfo));
3639 add_phi_arg (new_phi, arginfo[i].op,
3640 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3641 enum tree_code code
3642 = POINTER_TYPE_P (TREE_TYPE (op))
3643 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3644 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3645 ? sizetype : TREE_TYPE (op);
3646 widest_int cst
3647 = wi::mul (bestn->simdclone->args[i].linear_step,
3648 ncopies * nunits);
3649 tree tcst = wide_int_to_tree (type, cst);
3650 tree phi_arg = copy_ssa_name (op);
3651 new_stmt
3652 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3653 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3654 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3655 set_vinfo_for_stmt (new_stmt,
3656 new_stmt_vec_info (new_stmt, loop_vinfo));
3657 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3658 UNKNOWN_LOCATION);
3659 arginfo[i].op = phi_res;
3660 vargs.safe_push (phi_res);
3661 }
3662 else
3663 {
3664 enum tree_code code
3665 = POINTER_TYPE_P (TREE_TYPE (op))
3666 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3667 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3668 ? sizetype : TREE_TYPE (op);
3669 widest_int cst
3670 = wi::mul (bestn->simdclone->args[i].linear_step,
3671 j * nunits);
3672 tree tcst = wide_int_to_tree (type, cst);
3673 new_temp = make_ssa_name (TREE_TYPE (op));
3674 new_stmt = gimple_build_assign (new_temp, code,
3675 arginfo[i].op, tcst);
3676 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3677 vargs.safe_push (new_temp);
3678 }
3679 break;
3680 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3681 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3682 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3683 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3684 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3685 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3686 default:
3687 gcc_unreachable ();
3688 }
3689 }
3690
3691 new_stmt = gimple_build_call_vec (fndecl, vargs);
3692 if (vec_dest)
3693 {
3694 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3695 if (ratype)
3696 new_temp = create_tmp_var (ratype);
3697 else if (TYPE_VECTOR_SUBPARTS (vectype)
3698 == TYPE_VECTOR_SUBPARTS (rtype))
3699 new_temp = make_ssa_name (vec_dest, new_stmt);
3700 else
3701 new_temp = make_ssa_name (rtype, new_stmt);
3702 gimple_call_set_lhs (new_stmt, new_temp);
3703 }
3704 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3705
3706 if (vec_dest)
3707 {
3708 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3709 {
3710 unsigned int k, l;
3711 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3712 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3713 gcc_assert ((k & (k - 1)) == 0);
3714 for (l = 0; l < k; l++)
3715 {
3716 tree t;
3717 if (ratype)
3718 {
3719 t = build_fold_addr_expr (new_temp);
3720 t = build2 (MEM_REF, vectype, t,
3721 build_int_cst (TREE_TYPE (t),
3722 l * prec / BITS_PER_UNIT));
3723 }
3724 else
3725 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3726 bitsize_int (prec), bitsize_int (l * prec));
3727 new_stmt
3728 = gimple_build_assign (make_ssa_name (vectype), t);
3729 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3730 if (j == 0 && l == 0)
3731 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3732 else
3733 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3734
3735 prev_stmt_info = vinfo_for_stmt (new_stmt);
3736 }
3737
3738 if (ratype)
3739 {
3740 tree clobber = build_constructor (ratype, NULL);
3741 TREE_THIS_VOLATILE (clobber) = 1;
3742 new_stmt = gimple_build_assign (new_temp, clobber);
3743 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3744 }
3745 continue;
3746 }
3747 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3748 {
3749 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3750 / TYPE_VECTOR_SUBPARTS (rtype));
3751 gcc_assert ((k & (k - 1)) == 0);
3752 if ((j & (k - 1)) == 0)
3753 vec_alloc (ret_ctor_elts, k);
3754 if (ratype)
3755 {
3756 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3757 for (m = 0; m < o; m++)
3758 {
3759 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3760 size_int (m), NULL_TREE, NULL_TREE);
3761 new_stmt
3762 = gimple_build_assign (make_ssa_name (rtype), tem);
3763 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3764 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3765 gimple_assign_lhs (new_stmt));
3766 }
3767 tree clobber = build_constructor (ratype, NULL);
3768 TREE_THIS_VOLATILE (clobber) = 1;
3769 new_stmt = gimple_build_assign (new_temp, clobber);
3770 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3771 }
3772 else
3773 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3774 if ((j & (k - 1)) != k - 1)
3775 continue;
3776 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3777 new_stmt
3778 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3779 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3780
3781 if ((unsigned) j == k - 1)
3782 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3783 else
3784 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3785
3786 prev_stmt_info = vinfo_for_stmt (new_stmt);
3787 continue;
3788 }
3789 else if (ratype)
3790 {
3791 tree t = build_fold_addr_expr (new_temp);
3792 t = build2 (MEM_REF, vectype, t,
3793 build_int_cst (TREE_TYPE (t), 0));
3794 new_stmt
3795 = gimple_build_assign (make_ssa_name (vec_dest), t);
3796 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3797 tree clobber = build_constructor (ratype, NULL);
3798 TREE_THIS_VOLATILE (clobber) = 1;
3799 vect_finish_stmt_generation (stmt,
3800 gimple_build_assign (new_temp,
3801 clobber), gsi);
3802 }
3803 }
3804
3805 if (j == 0)
3806 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3807 else
3808 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3809
3810 prev_stmt_info = vinfo_for_stmt (new_stmt);
3811 }
3812
3813 vargs.release ();
3814
3815 /* The call in STMT might prevent it from being removed in dce.
3816 We however cannot remove it here, due to the way the ssa name
3817 it defines is mapped to the new definition. So just replace
3818 rhs of the statement with something harmless. */
3819
3820 if (slp_node)
3821 return true;
3822
3823 if (scalar_dest)
3824 {
3825 type = TREE_TYPE (scalar_dest);
3826 if (is_pattern_stmt_p (stmt_info))
3827 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3828 else
3829 lhs = gimple_call_lhs (stmt);
3830 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3831 }
3832 else
3833 new_stmt = gimple_build_nop ();
3834 set_vinfo_for_stmt (new_stmt, stmt_info);
3835 set_vinfo_for_stmt (stmt, NULL);
3836 STMT_VINFO_STMT (stmt_info) = new_stmt;
3837 gsi_replace (gsi, new_stmt, true);
3838 unlink_stmt_vdef (stmt);
3839
3840 return true;
3841}
3842
3843
3844/* Function vect_gen_widened_results_half
3845
3846 Create a vector stmt whose code, type, number of arguments, and result
3847 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3848 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3849 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3850 needs to be created (DECL is a function-decl of a target-builtin).
3851 STMT is the original scalar stmt that we are vectorizing. */
3852
3853static gimple *
3854vect_gen_widened_results_half (enum tree_code code,
3855 tree decl,
3856 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3857 tree vec_dest, gimple_stmt_iterator *gsi,
3858 gimple *stmt)
3859{
3860 gimple *new_stmt;
3861 tree new_temp;
3862
3863 /* Generate half of the widened result: */
3864 if (code == CALL_EXPR)
3865 {
3866 /* Target specific support */
3867 if (op_type == binary_op)
3868 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3869 else
3870 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3871 new_temp = make_ssa_name (vec_dest, new_stmt);
3872